Source code for mindmeld.converter.rasa

# -*- coding: utf-8 -*-
#
# Copyright (c) 2015 Cisco Systems, Inc. and others.  All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""This module contains the Rasacoverter class used to convert Rasa projects
into MindMeld projects"""

import copy
import logging
import os
import re
from keyword import iskeyword
import yaml

from mindmeld.converter.converter import Converter
from mindmeld.exceptions import MindMeldError

logger = logging.getLogger(__name__)

RASA_ENTITY_REGEX = re.compile(r"(\[(.+?)\]\((.*?)\))")
MINDMELD_ENTITY_REGEX = re.compile(r"\{.+?\}")


[docs]class RasaConverter(Converter): """The class is a sub class of the abstract Converter class. This class contains the methods required to convert a Rasa project into a MindMeld project """ def __init__(self, rasa_project_directory, mindmeld_project_directory): if os.path.exists(os.path.dirname(rasa_project_directory)): self.rasa_project_directory = rasa_project_directory self.mindmeld_project_directory = mindmeld_project_directory else: msg = "`{rasa_project_directory}` does not exist. Please verify.".format( rasa_project_directory=rasa_project_directory ) raise FileNotFoundError(msg) self.all_entities = set() def _create_intents_directories(self, mindmeld_project_directory, intents): """Note: Because Rasa does not support multiple domains at this time. All intents are placed under a domain named 'general'.""" GENERAL_DOMAIN_LOCATION = "/domains/general/" for intent in intents: self.create_directory( mindmeld_project_directory + GENERAL_DOMAIN_LOCATION + intent ) def _create_entities_directories(self, mindmeld_project_directory, entities): for entity in entities: entity_path = mindmeld_project_directory + "/entities/" + entity self.create_directory(entity_path) with open(entity_path + "/gazetteer.txt", "w") as f: f.close() with open(entity_path + "/mapping.json", "w") as f: # skeleton mapping file that a user must fill in f.write('{\n "entities":[]\n}') f.close() @staticmethod def _is_line_intent_definiton(line): return line[0:10] == "## intent:" @staticmethod def _get_intent_from_line(line): return line.split(" ")[1].split(":")[1].rstrip() def _create_intent_training_file(self, intent_directory): self.create_directory(intent_directory) with open(intent_directory + "/train.txt", "w") as f: f.close() @staticmethod def _remove_comments_from_line(line): start_of_comment = line.find("<!---") end_of_comment = line.find("-->") line_without_comment = line.replace( line[start_of_comment: end_of_comment + 3], "" ) line_without_comment = line_without_comment.rstrip() return line_without_comment def _translate_rasa_entry_to_mindmeld_entry(self, rasa_entry: str) -> str: mindmeld_entry = rasa_entry for match, entity, entity_type in RASA_ENTITY_REGEX.findall(rasa_entry): mindmeld_entity = f"{{{entity}|{entity_type.lower()}}}" mindmeld_entry = mindmeld_entry.replace(match, mindmeld_entity) self.all_entities.add(mindmeld_entity) return mindmeld_entry def _add_example_to_training_file(self, current_intent_path: str, rasa_entry: str): with open(current_intent_path + "/train.txt", "a") as intent_f: rasa_entry = RasaConverter._remove_comments_from_line(rasa_entry) mindmeld_entry = self._translate_rasa_entry_to_mindmeld_entry(rasa_entry) intent_f.write(mindmeld_entry + "\n") def _get_action_endpoint(self): for file_ending in ["yaml", "yml"]: file_name = self.rasa_project_directory + "/endpoints." + file_ending if os.path.isfile(file_name): try: with open(file_name, "r") as stream: data = yaml.safe_load(stream) return data.get("action_endpoint", {}).get("url") except IOError as e: logger.error("Can not open endpoints.yml file at %s", file_name) logger.error(e) logger.error("Could not find endpoints.yml file in project directory") def _read_domain_file(self): for file_ending in ["yaml", "yml"]: file_name = self.rasa_project_directory + "/domain." + file_ending if os.path.isfile(file_name): try: with open(file_name, "r") as stream: domain_data_loaded = yaml.safe_load(stream) return domain_data_loaded except IOError as e: logger.error("Can not open domain.yml file at %s", file_name) logger.error(e) logger.error("Could not find domain.yml file in project directory") raise FileNotFoundError def _read_entities(self): domain_file = self._read_domain_file() if "entities" in domain_file: return domain_file["entities"] else: return [] def _read_slots(self): domain_file = self._read_domain_file() if "slots" in domain_file: return domain_file["slots"] else: return [] def _read_intents(self): domain_file = self._read_domain_file() return domain_file["intents"] def _read_actions(self): domain_file = self._read_domain_file() return domain_file["actions"] def _read_templates(self): domain_file = self._read_domain_file() templates = {} for field in ["templates", "responses"]: templates.update(domain_file.get(field, {})) return templates
[docs] def create_entity_files(self, mm_entry): entity_value, entity = mm_entry.strip("{}").split("|") gazetteer_location = os.path.join( self.mindmeld_project_directory, "entities", entity, "gazetteer.txt" ) try: with open(gazetteer_location, "a") as f: f.write(entity_value + "\n") f.close() except FileNotFoundError as e: self._create_entities_directories(self.mindmeld_project_directory, [entity]) with open(gazetteer_location, "a") as f: f.write(entity_value + "\n") f.close() logger.error("Domain file may not contain entity %s", entity) logger.error(e)
@staticmethod def _is_valid_function_name(name): return name.isidentifier() and not iskeyword(name) @staticmethod def _is_story_name(stories_line): return stories_line[0:3] == "## " def _get_story_name(self, stories_line): if "<!--" in stories_line: return self._remove_comments_from_line( stories_line.replace("## ", "") ).rstrip() else: return stories_line.replace("## ", "").rstrip() @staticmethod def _is_intent(stories_line): return stories_line[0:2] == "* " @staticmethod def _is_action(stories_line): return "- " in stories_line @staticmethod def _does_intent_have_entity(stories_line): return len(MINDMELD_ENTITY_REGEX.findall(stories_line)) > 0 @staticmethod def _clean_up_entities_list(entities_with_values): # trim off { } entities_with_values = entities_with_values[1:-1] # split data entities if there are multiples and clean white space entities_list = entities_with_values.split(",") for i, entity in enumerate(entities_list): entities_list[i] = entity.replace('"', "") entities_list[i] = entities_list[i].lstrip() return entities_list def _get_intent_with_entity(self, stories_line): if RasaConverter._does_intent_have_entity(stories_line): entities_with_values = MINDMELD_ENTITY_REGEX.search(stories_line) entities_with_values = entities_with_values.group(0) entities_list = self._clean_up_entities_list(entities_with_values) start_of_entity = stories_line.find(entities_with_values) intent = self._remove_comments_from_line( stories_line[2:start_of_entity] ).rstrip() return intent, entities_list else: intent = self._remove_comments_from_line(stories_line[2:]).rstrip() entities_list = [] return intent, entities_list def _get_stories(self): if os.path.isfile(self.rasa_project_directory + "/data/stories.md"): try: with open(self.rasa_project_directory + "/data/stories.md", "r") as f: stories_dictionary = {} current_story_name = "" steps = [] current_step = {} current_intent = "" current_actions = [] stories_lines = f.readlines() max_lines = len(stories_lines) for line_num, line in enumerate(stories_lines): if self._is_story_name(line): current_story_name = self._get_story_name(line) continue if self._is_intent(line): ( current_intent, current_entities, ) = self._get_intent_with_entity(line) current_step["intent"] = copy.deepcopy(current_intent) current_step["entities"] = copy.deepcopy(current_entities) continue if self._is_action(line): current_actions.append( RasaConverter._remove_comments_from_line( line[3:] ).rstrip() ) if ( (line_num + 1) < max_lines ) and RasaConverter._is_action(stories_lines[line_num + 1]): continue current_step["actions"] = copy.deepcopy(current_actions) current_actions.clear() steps.append(copy.deepcopy(current_step)) current_step.clear() elif len(line.strip()) == 0: if current_story_name != "": stories_dictionary[current_story_name] = copy.deepcopy( steps ) steps.clear() current_story_name = "" if line_num == (max_lines - 1): stories_dictionary[current_story_name] = copy.deepcopy( steps ) steps.clear() current_story_name = "" f.close() return stories_dictionary except IOError as e: logger.error( "Can not open stories.md file at %s", self.rasa_project_directory + "/data/stories.md", ) logger.error(e) else: logger.error( "Could not find stories.md file in %s", self.rasa_project_directory + "/data/stories.md", ) raise FileNotFoundError
[docs] def create_mindmeld_directory(self, mindmeld_project_path): self.create_directory(mindmeld_project_path) self.create_directory(mindmeld_project_path + "/data") self.create_directory(mindmeld_project_path + "/domains") self.create_directory(mindmeld_project_path + "/domains/general") self.create_directory(mindmeld_project_path + "/entities")
[docs] def create_mindmeld_training_data(self): """Method to transfer and reformat the training data in a Rasa Project""" # read intents listed in domain.yml intents = self._read_intents() # create intents subdirectories self._create_intents_directories(self.mindmeld_project_directory, intents) # read entities in domain.yml entities = [entity.lower() for entity in self._read_entities()] # create entities subdirectories if entities is not empty if entities: self._create_entities_directories(self.mindmeld_project_directory, entities) # try and open data files from rasa project nlu_data_loc = self.rasa_project_directory + "/data/nlu_data.md" try: with open(nlu_data_loc, "r") as nlu_data_md_file: nlu_data_lines = nlu_data_md_file.readlines() except FileNotFoundError as error: raise MindMeldError(f"Cannot open nlu_data.md file at {nlu_data_loc}") from error # iterate through each line current_intent_path = "" for line in nlu_data_lines: if self._is_line_intent_definiton(line): current_intent_path = ( self.mindmeld_project_directory + "/domains/general/" + RasaConverter._get_intent_from_line(line) ) # create data text file for intent examples` self._create_intent_training_file(current_intent_path) else: # We can add an extra space for rasa_entity since rasa_entity is rstripped # during it's processing delimiter, rasa_entity = (line + ' ').split(' ', maxsplit=1) delimiter == '-' and self._add_example_to_training_file( # pylint: disable=expression-not-assigned # noqa: E501 current_intent_path, rasa_entity) # create all entity folders for entity in self.all_entities: self.create_entity_files(entity)
def _write_init_header(self): initialization_strings = [ 'from mindmeld import Application', 'from mindmeld.components.custom_action import CustomAction', 'from . import custom_features # noqa: F401', '\n', 'app = Application(__name__)', "__all__ = ['app']" ] url = self._get_action_endpoint() if url: action_config = "action_config = {{'url': '{url}'}}\n".format(url=url) initialization_strings.append(action_config) initialization_strings.append("\n") f = open(self.mindmeld_project_directory + "/__init__.py", "w+") f.write('\n'.join(initialization_strings)) return f @staticmethod def _get_app_handle(intent, entities): has_entity_string = ", has_entity=" has_entities_string = ", has_entities=[" entities_string = "" if len(entities) > 1: entities_string = has_entities_string for entity_value in entities: entity_string = entity_value.split(":")[0] if entity_value == entities[-1]: entities_string += "'" + entity_string + "']" else: entities_string += "'" + entity_string + "', " elif len(entities) == 1: for entity_value in entities: entity_string = entity_value.split(":")[0] entities_string += has_entity_string + "'" + entity_string + "'" handle_string = "@app.handle(intent='" + intent + "'" + entities_string + ")\n" return handle_string def _write_function_declaration(self, action, f): if self._is_valid_function_name(action): function_declartion_string = "def {}(request, responder):\n".format(action) f.write(function_declartion_string) else: logger.error("Action {action} is not a valid name for a python function") raise SyntaxError @staticmethod def _write_function_body_prompt(prompts, f): entities_list = [] prompts_list = [] # check if prompts contain any entities for prompt in prompts: entities = MINDMELD_ENTITY_REGEX.findall(prompt) # If we have entities, we do string format with entities; otherwise # just simple string prompts if len(entities) > 0: entities_list = [] newprompt = prompt for i, entity in enumerate(entities, start=0): newprompt = prompt.replace(entity, "{" + str(i) + "}") entities_list.append(entity.replace("{", "").replace("}", "")) entities_args = ", ".join(map(str, entities_list)) prompts_list.append( '"' + newprompt + '".format({})'.format(entities_args) ) for entity in entities_list: newentity = entity.replace("{", "").replace("}", "") entities_string = " {}_s = [e['text'] for e in ".format( newentity ) + "request.entities if e['type'] == '{}']\n".format(newentity) entity_string = " {0} = {0}_s[0]\n".format(newentity) f.write(entities_string) f.write(entity_string) else: prompts_list.append('"' + prompt + '"') prompts_string = " prompts = [{}]\n".format(", ".join(prompts_list)) f.write(prompts_string) @staticmethod def _write_default_function(): pass @staticmethod def _get_text_prompts_list(action_templates): prompts = [] for template in action_templates: if "text" in template: prompts.append(template["text"]) return prompts @staticmethod def _write_responder_lines(f): responder_string = " responder.reply(prompts)\n responder.listen()\n" f.write(responder_string) def _read_file_lines(self): with open(self.mindmeld_project_directory + "/__init__.py", "r+") as f: return f.readlines() @staticmethod def _is_custom_action(action): return action[0:6] == "action" @staticmethod def _get_custom_action(action): lines = [ " # This is a custom action from rasa\n", " action = CustomAction(name='{action}', config=action_config)\n".format( action=action ), " action.invoke(request, responder)\n", ] return lines def _write_functions(self, actions, templates, f): for action in actions: self._write_function_declaration(action, f) if action in templates: # Get list of templates per action action_templates = templates[action] prompts_list = RasaConverter._get_text_prompts_list(action_templates) self._write_function_body_prompt(prompts_list, f) self._write_responder_lines(f) else: if self._is_custom_action(action): f.writelines(self._get_custom_action(action)) else: # If no templates, write a blank function f.write(" # No templates were provided for action\n") f.write(" pass\n") if action != actions[-1]: f.write("\n") f.write("\n") @staticmethod def _attach_handle_to_function(handle, action, file_lines): for i, line in enumerate(file_lines): if "def {}(request, responder):".format(action) in line: insert_line = i while file_lines[i - 1].strip() != "": if file_lines[i - 1] == handle: return i = i - 1 file_lines.insert(insert_line, handle) @staticmethod def _attach_actions_to_function(current_action, actions, file_lines): """ When we have more than one actions in an intent, we want to attach the actions to the same intent handler. """ current_line = None for i, line in enumerate(file_lines): if len(re.findall("def {action}".format(action=current_action), line)) > 0: current_line = i break if not current_line: logger.warning("Action handler not found for %s.", current_action) return additional_actions = [] # for the rest of the actions, add any custom action here for action in actions: custom_action = RasaConverter._get_custom_action(action) if RasaConverter._is_custom_action(action): file_lines[current_line + 1 : current_line + 1] = custom_action current_line += len(custom_action) else: additional_actions.append(action) if additional_actions: # we note non-custom actions as a string list file_lines.insert( current_line + 1, " additional_actions = {actions}\n".format( actions=additional_actions ), )
[docs] def create_mindmeld_init(self): f = self._write_init_header() actions = self._read_actions() templates = self._read_templates() # Write all functions for each action self._write_functions(actions, templates, f) f.close() # Read contents of current file file_lines = self._read_file_lines() stories_dictionary = self._get_stories() # Loop through all stories and create intent-action relationship for item in stories_dictionary.items(): # Loop through steps for each story for step in item[1]: # Get intent, any entities, and actions intent = step["intent"].strip() entities = step["entities"] actions = [action.strip() for action in step["actions"]] # attach handle to correct function app_handle_string = RasaConverter._get_app_handle(intent, entities) self._attach_handle_to_function( app_handle_string, actions[0], file_lines ) # check if more than 1 action per intent if len(actions) > 1: self._attach_actions_to_function( actions[0], actions[1:], file_lines ) # write all lines back to file with open(self.mindmeld_project_directory + "/__init__.py", "w") as f: f.writelines(file_lines)
[docs] @staticmethod def create_custom_features(mindmeld_project_directory, main_file_loc): with open(main_file_loc + "/rasa_custom_features.txt", "r") as f: string = f.read() with open(mindmeld_project_directory + "/custom_features.py", "w") as f: f.write(string)
[docs] def convert_project(self): """Main function that will convert a Rasa project into a MindMeld project. The Rasa project consists of three major files that contain much of data that is converted into the MindMeld project: /domain.yml - Contains all of the intents, entities, actions, and templates used in the rasa project /data/stories.md - Contains the stories which are used to match intents and actions together /data/nlu_data.md - Contains the training data for each intent. Some of the training data may contain entities limitations: - Rasa has the ability to handle multiple intents per query, while MindMeld does not. - Rasa training data may be json format, which is not currently supported. - Rasa has a feature called Rasa forms which is not currently supported. - Rasa's configuration files are not transfered, instead generic MindMeld configuration files are copied over. """ # Create project directory with sub folders self.create_mindmeld_directory(self.mindmeld_project_directory) # Transfer over test data from Rasa project and reformat to MindMeld project self.create_mindmeld_training_data() file_loc = os.path.dirname(os.path.realpath(__file__)) self.create_main(self.mindmeld_project_directory, file_loc) self.create_mindmeld_init() self.create_custom_features(self.mindmeld_project_directory, file_loc)