From b62570017628f2c5d6e6f291ed8d4b7288443333 Mon Sep 17 00:00:00 2001 From: Hugo Carvalho Date: Mon, 16 Sep 2019 17:33:32 -0300 Subject: [PATCH 01/20] Add file upload sample --- src/api/views/files.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/api/views/files.py b/src/api/views/files.py index 28d1d2e9..76ec48f5 100644 --- a/src/api/views/files.py +++ b/src/api/views/files.py @@ -1,4 +1,5 @@ from rest_framework.views import APIView +from rest_framework.parsers import FileUploadParser from django.shortcuts import get_object_or_404 from django.http import JsonResponse, Http404 @@ -52,3 +53,18 @@ def get(self, request, project_id): return JsonResponse({'content': markdown_str}) + + def put(self, request, project_id, format=None): + print("here") + try: + # file_obj = request.data['file'] + # print(file_obj.read() + file_obj = request.data["file"] + print(file_obj) + + + except: + print("error honey") + return JsonResponse({'content': "error honey"}) + + return JsonResponse({'content': "hey"}) \ No newline at end of file From 072290d3093e814404ef81d6d9590efc75c04e1a Mon Sep 17 00:00:00 2001 From: Hugo Carvalho Date: Tue, 17 Sep 2019 20:18:16 -0300 Subject: [PATCH 02/20] Add parser for intent file --- requirements.txt | 4 ++++ src/api/utils/handlers.py | 10 ++++++++ src/api/views/files.py | 50 +++++++++++++++++++++++++++++---------- 3 files changed, 52 insertions(+), 12 deletions(-) create mode 100644 src/api/utils/handlers.py diff --git a/requirements.txt b/requirements.txt index 43eee6db..c6f194f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,7 @@ djangorestframework ~= 3.10.2 djongo ~= 1.2.31 django-cors-headers ~= 3.1.0 requests ~= 2.22.0 +bs4 ~= 0.0.1 +Markdown ~= 3.1.1 +ruamel.yaml ~= 0.16.5 +ruamel.yaml.clib ~= 0.1.2 \ No newline at end of file diff --git a/src/api/utils/handlers.py b/src/api/utils/handlers.py new file mode 100644 index 00000000..2a1eb8b6 --- /dev/null +++ b/src/api/utils/handlers.py @@ -0,0 +1,10 @@ +from tempfile import TemporaryFile + +def handle_uploaded_file(f): + tmp = TemporaryFile(mode='w+b') + for chunk in f.chunks(): + tmp.write(chunk) + + tmp.seek(0) + + return tmp \ No newline at end of file diff --git a/src/api/views/files.py b/src/api/views/files.py index 76ec48f5..172fa902 100644 --- a/src/api/views/files.py +++ b/src/api/views/files.py @@ -6,7 +6,11 @@ from api.models import Project, Story, Intent from api.parser import StoryParser, IntentParser +from api.utils.handlers import handle_uploaded_file +from ruamel.yaml import YAML +import markdown +from bs4 import BeautifulSoup class StoriesFile(APIView): """ @@ -55,16 +59,38 @@ def get(self, request, project_id): def put(self, request, project_id, format=None): - print("here") - try: - # file_obj = request.data['file'] - # print(file_obj.read() - file_obj = request.data["file"] - print(file_obj) - - - except: - print("error honey") - return JsonResponse({'content': "error honey"}) + project = get_object_or_404(Project, pk=project_id) - return JsonResponse({'content': "hey"}) \ No newline at end of file + try: + # Handle file from request + file_obj = request.data['file'] + file_tmp = handle_uploaded_file(file_obj) + file_content = file_tmp.read().decode('utf-8') + + # Parser markdown to html + html = markdown.markdown(file_content) + html = BeautifulSoup(html, features="html.parser") + intent_names = html.findAll('h2') + intent_list_samples = html.findAll('ul') + + # Extract data + intents = [] + for intent_name, intent_samples in zip(intent_names, intent_list_samples): + intent_name = intent_name.string.split("intent:")[-1] + samples = BeautifulSoup(str(intent_samples), features="html.parser").findAll('li') + samples = [sample.string for sample in samples] + + intent = {"name" : intent_name, + "samples" : samples, + "project" : project } + intents.append(Intent(**intent)) + + + Intent.objects.bulk_create(intents) + file_tmp.close() + + except Exception as e: + return JsonResponse({'content': "File had problems during upload"}) + raise e + + return JsonResponse({'content': "File has been successfully uploaded"}) \ No newline at end of file From 6b5d29aa912253a12c661bc05f30a8c6248da6b7 Mon Sep 17 00:00:00 2001 From: Hugo Carvalho Date: Tue, 17 Sep 2019 23:03:11 -0300 Subject: [PATCH 03/20] Adjusting parsing of item content --- requirements.txt | 3 ++- src/api/views/files.py | 41 +++++++++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/requirements.txt b/requirements.txt index c6f194f1..1ca18a81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ requests ~= 2.22.0 bs4 ~= 0.0.1 Markdown ~= 3.1.1 ruamel.yaml ~= 0.16.5 -ruamel.yaml.clib ~= 0.1.2 \ No newline at end of file +ruamel.yaml.clib ~= 0.1.2 +html2markdown ~= 0.1.7 \ No newline at end of file diff --git a/src/api/views/files.py b/src/api/views/files.py index 172fa902..c8c8aa1f 100644 --- a/src/api/views/files.py +++ b/src/api/views/files.py @@ -8,8 +8,9 @@ from api.parser import StoryParser, IntentParser from api.utils.handlers import handle_uploaded_file -from ruamel.yaml import YAML import markdown +import html2markdown +from ruamel.yaml import YAML from bs4 import BeautifulSoup class StoriesFile(APIView): @@ -68,7 +69,10 @@ def put(self, request, project_id, format=None): file_content = file_tmp.read().decode('utf-8') # Parser markdown to html - html = markdown.markdown(file_content) + md = markdown.Markdown() + html = md.convert(file_content) + print("MD", md) + md.reset() html = BeautifulSoup(html, features="html.parser") intent_names = html.findAll('h2') intent_list_samples = html.findAll('ul') @@ -76,21 +80,34 @@ def put(self, request, project_id, format=None): # Extract data intents = [] for intent_name, intent_samples in zip(intent_names, intent_list_samples): - intent_name = intent_name.string.split("intent:")[-1] - samples = BeautifulSoup(str(intent_samples), features="html.parser").findAll('li') - samples = [sample.string for sample in samples] - - intent = {"name" : intent_name, - "samples" : samples, - "project" : project } - intents.append(Intent(**intent)) + if intent_name.string is not None: + if "intent" in intent_name.string: + intent_name = intent_name.string.split("intent:")[-1] + s = BeautifulSoup(str(intent_samples), features="html.parser").findAll('li') + + samples = [] + for sample in s: + sample_string = "" + if sample.string is None: + s = str(sample) + s = s.replace("
  • ", "") + s = s.replace("
  • ", "") + sample_string = html2markdown.convert(s) + else: + sample_string = sample.string + + samples.append(sample_string) + + intent = {"name" : intent_name, + "samples" : samples, + "project" : project } + intents.append(Intent(**intent)) - Intent.objects.bulk_create(intents) file_tmp.close() except Exception as e: - return JsonResponse({'content': "File had problems during upload"}) raise e + return JsonResponse({'content': "File had problems during upload"}) return JsonResponse({'content': "File has been successfully uploaded"}) \ No newline at end of file From 159c198f4c3b40fac799c8c4fcb734bc8ca63bdb Mon Sep 17 00:00:00 2001 From: Hugo Carvalho Date: Wed, 18 Sep 2019 19:28:10 -0300 Subject: [PATCH 04/20] Add utter file parser and pydoc --- src/api/urls.py | 1 + src/api/views/files.py | 49 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/src/api/urls.py b/src/api/urls.py index dfcc6630..2addf71d 100644 --- a/src/api/urls.py +++ b/src/api/urls.py @@ -16,4 +16,5 @@ path('projects//stories/', ListStories.as_view()), path('files//stories/', StoriesFile.as_view(), name='stories-file'), path('files//intents/', IntentsFile.as_view(), name='intents-file'), + path('files//utters/', UttersFile.as_view(), name='utters-file'), ] diff --git a/src/api/views/files.py b/src/api/views/files.py index c8c8aa1f..32db3629 100644 --- a/src/api/views/files.py +++ b/src/api/views/files.py @@ -4,7 +4,7 @@ from django.shortcuts import get_object_or_404 from django.http import JsonResponse, Http404 -from api.models import Project, Story, Intent +from api.models import Project, Story, Intent, Utter from api.parser import StoryParser, IntentParser from api.utils.handlers import handle_uploaded_file @@ -58,7 +58,9 @@ def get(self, request, project_id): return JsonResponse({'content': markdown_str}) - + """ + Receives a put request with a project id and a Markdown file with intents specs as arguments. Then parse and add this file into DB + """ def put(self, request, project_id, format=None): project = get_object_or_404(Project, pk=project_id) @@ -110,4 +112,47 @@ def put(self, request, project_id, format=None): raise e return JsonResponse({'content': "File had problems during upload"}) + return JsonResponse({'content': "File has been successfully uploaded"}) + + + +class UttersFile(APIView): + """ + Receives a put request with a project id and a YML file with utter specs as arguments. Then parse and add this file into DB + """ + + def put(self, request, project_id, format=None): + project = get_object_or_404(Project, pk=project_id) + + try: + # Handle file from request + file_obj = request.data['file'] + file_tmp = handle_uploaded_file(file_obj) + + # Handle yaml + yaml=YAML(typ="safe") + domain = yaml.load(file_tmp) + + utters_list = domain['templates'] + utters = [] + for utter_name in utters_list.keys(): + alternatives = [] + + for alternative in utters_list[utter_name]: + alternatives.append(alternative['text']) + + utter = {"name" : utter_name, + "alternatives" : [alternatives], + "project" : project } + + utters.append(Utter(**utter)) + + Utter.objects.bulk_create(utters) + + file_tmp.close() + + except Exception as e: + raise e + return JsonResponse({'content': "File had problems during upload"}) + return JsonResponse({'content': "File has been successfully uploaded"}) \ No newline at end of file From f856d86bda311b4c1eea14f4265a63d353e95968 Mon Sep 17 00:00:00 2001 From: Hugo Carvalho Date: Tue, 24 Sep 2019 10:27:25 -0300 Subject: [PATCH 05/20] Adjust utter parser --- src/api/views/files.py | 44 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/src/api/views/files.py b/src/api/views/files.py index 32db3629..21ac94a8 100644 --- a/src/api/views/files.py +++ b/src/api/views/files.py @@ -8,6 +8,7 @@ from api.parser import StoryParser, IntentParser from api.utils.handlers import handle_uploaded_file +import re import markdown import html2markdown from ruamel.yaml import YAML @@ -36,6 +37,44 @@ def get(self, request, project_id): return JsonResponse({'content': markdown_str}) + """ + Receives a put request with a project id and a Markdown file with story specs as arguments. Then parse and add this file into DB + """ + def put(self, request, project_id, format=None): + project = get_object_or_404(Project, pk=project_id) + + try: + # Handle file from request + file_obj = request.data['file'] + file_tmp = handle_uploaded_file(file_obj) + file_content = file_tmp.read().decode('utf-8') + + # Parser markdown to html + md = markdown.Markdown() + html = md.convert(file_content) + html = BeautifulSoup(html, features="html.parser") + story_names = html.findAll('h2') + + content_list = [] + a = html.findAll(re.compile('\
      ')) + + for content_options in html.findAll(re.compile('\\\w*\')): + pass + + # Extract data + stories = [] + for story_name, contents in zip(story_names, content_list): + pass + file_tmp.close() + + except Exception as e: + raise e + return JsonResponse({'content': "File had problems during upload"}) + + return JsonResponse({'content': "File has been successfully uploaded"}) + + + class IntentsFile(APIView): """ Receives a get request with a project id and returns @@ -73,8 +112,6 @@ def put(self, request, project_id, format=None): # Parser markdown to html md = markdown.Markdown() html = md.convert(file_content) - print("MD", md) - md.reset() html = BeautifulSoup(html, features="html.parser") intent_names = html.findAll('h2') intent_list_samples = html.findAll('ul') @@ -139,10 +176,11 @@ def put(self, request, project_id, format=None): alternatives = [] for alternative in utters_list[utter_name]: - alternatives.append(alternative['text']) + alternatives.append(alternative['text'].split("\n\n")) utter = {"name" : utter_name, "alternatives" : [alternatives], + "multiple_alternatives": True if len(alternatives) > 1 else False, "project" : project } utters.append(Utter(**utter)) From 25861885e0ff18c35983778c2a305ee2eaae20f9 Mon Sep 17 00:00:00 2001 From: Hugo Carvalho Date: Wed, 25 Sep 2019 17:30:45 -0300 Subject: [PATCH 06/20] Add parser for story --- src/api/views/files.py | 64 ++++++++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/src/api/views/files.py b/src/api/views/files.py index 1fe48ac1..cd55df84 100644 --- a/src/api/views/files.py +++ b/src/api/views/files.py @@ -34,7 +34,47 @@ def get(self, request, project_id): for story in stories: markdown_str += parser.parse(story) - return JsonResponse({'content': markdown_str}) + return JsonResponse({'content': markdown_str}) + + def clean_str(string): + string = string.replace(" ", "") + string = string.replace("\n", "") + + return string + + + def markdown_parser(self, markdown_file): + stories = [] + stories_file = markdown_file.split("## ")[1:] + + for story in stories_file: + story = story.split("\n*") + story_name = story[0] + intents = story[1:] + + content = [] + for intent in intents: + intent = intent.split("\n -") + + intent_name = StoriesFile.clean_str(intent[0]) + utters = intent[1:] + + intent = Intent.objects.get(name=intent_name) + content.append({"id": intent.id, + "type": "intent"}) + + + for utter in utters: + utter_name = StoriesFile.clean_str(utter) + utter = Utter.objects.get(name=utter_name) + + content.append({"id": utter.id, + "type": "utter"}) + + stories.append({"name": story_name, + "content": content}) + + return stories """ @@ -48,23 +88,17 @@ def put(self, request, project_id, format=None): file_obj = request.data['file'] file_tmp = handle_uploaded_file(file_obj) file_content = file_tmp.read().decode('utf-8') + + stories_dicts = StoriesFile().markdown_parser(file_content) - # Parser markdown to html - md = markdown.Markdown() - html = md.convert(file_content) - html = BeautifulSoup(html, features="html.parser") - story_names = html.findAll('h2') - - content_list = [] - a = html.findAll(re.compile('\
        ')) + stories = [] + for story in stories_dicts: + story.update({"project": project}) + print(story) + stories.append(Story(**story)) - for content_options in html.findAll(re.compile('\\\w*\')): - pass + Story.objects.bulk_create(stories) - # Extract data - stories = [] - for story_name, contents in zip(story_names, content_list): - pass file_tmp.close() except Exception as e: From e744a1c4b7e92a440823899a4c757b3f91859cea Mon Sep 17 00:00:00 2001 From: Hugo Carvalho Date: Wed, 25 Sep 2019 19:02:16 -0300 Subject: [PATCH 07/20] New parsers and more things --- src/api/models/intent.py | 1 + src/api/parsers.py | 98 ++++++++++++++++++++++++ src/api/views/files.py | 158 ++++++++++++++++++++++++++++++--------- 3 files changed, 221 insertions(+), 36 deletions(-) create mode 100644 src/api/parsers.py diff --git a/src/api/models/intent.py b/src/api/models/intent.py index 7419de1f..7b66e14d 100644 --- a/src/api/models/intent.py +++ b/src/api/models/intent.py @@ -4,6 +4,7 @@ import random class Intent(models.Model): + # TODO add unique=True to name field name = models.TextField() samples = models.ListField(default=[]) project = models.EmbeddedModelField( diff --git a/src/api/parsers.py b/src/api/parsers.py new file mode 100644 index 00000000..3a93dc94 --- /dev/null +++ b/src/api/parsers.py @@ -0,0 +1,98 @@ + +from collections import deque, namedtuple + +Token = namedtuple('Token', ['type', 'data']) + + +def parse_story(src): + """ + Parse a story file and return a list of documents with the format: + + [{ + 'story': , + 'intents': [ + { + 'intent': , + 'utters': [, , ...], + } + ], + }, + ... + ] + """ + parser = StoryParser(src) + return parser.parse() + + +# +# Auxiliary classes +# +class StoryParser: + """ + Recursive desecent parser for stories (not part of public API) + """ + def __init__(self, src): + self.tokens = deque(self.lex(src)) + + def lex(self, src): + for line in src.splitlines(): + line = line.strip() + if line.startswith(' +## Money 8.3 +* captacao + - utter_captacao + - utter_continuar_conversa + +## Money 10 +* lei_rouanet_valor_maximo_projeto + - utter_lei_rouanet_valor_maximo_projeto +* lei_rouanet_valor_maximo_geral + - utter_lei_rouanet_valor_minimo + - utter_lei_rouanet_valor_maximo_pessoa_fisica + - utter_lei_rouanet_valor_maximo_pessoa_juridica + - utter_lei_rouanet_valor_maximo_regiao + - utter_continuar_conversa +''' + pprint(parse_story(src)) \ No newline at end of file diff --git a/src/api/views/files.py b/src/api/views/files.py index 573654cb..9586665d 100644 --- a/src/api/views/files.py +++ b/src/api/views/files.py @@ -139,55 +139,67 @@ def get(self, request, project_id): def put(self, request, project_id, format=None): project = get_object_or_404(Project, pk=project_id) - try: + file_obj = request.data['file'] + with handle_uploaded_file(file_obj) as file_tmp: # Handle file from request - file_obj = request.data['file'] - file_tmp = handle_uploaded_file(file_obj) file_content = file_tmp.read().decode('utf-8') # Parser markdown to html md = markdown.Markdown() html = md.convert(file_content) html = BeautifulSoup(html, features="html.parser") - intent_names = html.findAll('h2') - intent_list_samples = html.findAll('ul') + names = html.findAll('h2') + list_samples = html.findAll('ul') # Extract data intents = [] - for intent_name, intent_samples in zip(intent_names, intent_list_samples): - if intent_name.string is not None: - if "intent" in intent_name.string: - intent_name = intent_name.string.split("intent:")[-1] - s = BeautifulSoup(str(intent_samples), features="html.parser").findAll('li') - - samples = [] - for sample in s: - sample_string = "" - if sample.string is None: - s = str(sample) - s = s.replace("
      • ", "") - s = s.replace("
      • ", "") - sample_string = html2markdown.convert(s) - else: - sample_string = sample.string - - samples.append(sample_string) - - intent = {"name" : intent_name, - "samples" : samples, - "project" : project } - intents.append(Intent(**intent)) - - Intent.objects.bulk_create(intents) - file_tmp.close() + for name, samples in zip(names, list_samples): + if name.string is not None and "intent" in name.string: + name = name.string.split("intent:")[-1] + intents.append(Intent( + name=name, + samples=[ + li.string or html2markdown.convert(innerHTML(li)).replace('# ', '#') + for li in samples.findAll('li')], + project=project, + )) + + bulk_update_unique(intents, 'name') + return JsonResponse({'content': "File has been successfully uploaded"}) - except Exception as e: - raise e - return JsonResponse({'content': "File had problems during upload"}) - return JsonResponse({'content': "File has been successfully uploaded"}) +def bulk_update_unique(items, attr='name'): + """ + Save a list of elements that have a new value for the given attribute. + """ + if not items: + return [] + + objects = type(items[0]).objects + query = {attr + '__in': [x.name for x in items]} + repeated = objects.filter(**query).values_list(attr, flat=True) + + # Check the database + if repeated: + print(f'Repeated values for {attr}:', ', '.join(repeated)) + repeated = set(repeated) + items = [x for x in items if getattr(x, attr) not in repeated] + + # Check internal consistency + values = set() + items_final = [] + for item in items: + value = getattr(item, attr) + if value in values: + print(f'Duplicated entry:', value) + else: + items_final.append(item) + + return objects.bulk_create(items) +def innerHTML(element): + return element.decode_contents(formatter="html") class UttersFile(APIView): """ @@ -290,4 +302,78 @@ def get(self, request, project_id): return response else: - raise Http404 \ No newline at end of file + raise Http404 + + +from collections import deque, namedtuple +Token = namedtuple('Token', ['type', 'data']) + + +def lex(story): + for line in story.splitlines(): + line = line.strip() + if line.startwith(' +## Money 8.3 +* captacao + - utter_captacao + - utter_continuar_conversa + +## Money 10 +* lei_rouanet_valor_maximo_projeto + - utter_lei_rouanet_valor_maximo_projeto +* lei_rouanet_valor_maximo_geral + - utter_lei_rouanet_valor_minimo + - utter_lei_rouanet_valor_maximo_pessoa_fisica + - utter_lei_rouanet_valor_maximo_pessoa_juridica + - utter_lei_rouanet_valor_maximo_regiao + - utter_continuar_conversa +''' + print(parse(src)) \ No newline at end of file From 93df7ba3a0a677a01d3fac60446468ee7467ee83 Mon Sep 17 00:00:00 2001 From: Hugo Carvalho Date: Thu, 26 Sep 2019 15:40:54 -0300 Subject: [PATCH 08/20] Update utter parser --- src/api/{parsers.py => decoder.py} | 33 +---- src/api/views/files.py | 197 +++++++---------------------- 2 files changed, 54 insertions(+), 176 deletions(-) rename src/api/{parsers.py => decoder.py} (74%) diff --git a/src/api/parsers.py b/src/api/decoder.py similarity index 74% rename from src/api/parsers.py rename to src/api/decoder.py index 3a93dc94..52fcbe36 100644 --- a/src/api/parsers.py +++ b/src/api/decoder.py @@ -1,10 +1,8 @@ - from collections import deque, namedtuple Token = namedtuple('Token', ['type', 'data']) - -def parse_story(src): +def decode_story_file(src): """ Parse a story file and return a list of documents with the format: @@ -20,14 +18,14 @@ def parse_story(src): ... ] """ - parser = StoryParser(src) - return parser.parse() + decoder = StoryDecoder(src) + return decoder.decode() # # Auxiliary classes # -class StoryParser: +class StoryDecoder: """ Recursive desecent parser for stories (not part of public API) """ @@ -51,7 +49,7 @@ def lex(self, src): else: raise ValueError(f'invalid line: {line}') - def parse(self): + def decode(self): stories = [] while self.tokens: stories.append(self.story()) @@ -75,24 +73,3 @@ def intent(self): utter = tks.popleft() utters.append(utter.data) return {'intent': intent.data, 'utters': utters} - -if __name__ == '__main__': - from pprint import pprint - src = ''' - -## Money 8.3 -* captacao - - utter_captacao - - utter_continuar_conversa - -## Money 10 -* lei_rouanet_valor_maximo_projeto - - utter_lei_rouanet_valor_maximo_projeto -* lei_rouanet_valor_maximo_geral - - utter_lei_rouanet_valor_minimo - - utter_lei_rouanet_valor_maximo_pessoa_fisica - - utter_lei_rouanet_valor_maximo_pessoa_juridica - - utter_lei_rouanet_valor_maximo_regiao - - utter_continuar_conversa -''' - pprint(parse_story(src)) \ No newline at end of file diff --git a/src/api/views/files.py b/src/api/views/files.py index 9586665d..b435c2ae 100644 --- a/src/api/views/files.py +++ b/src/api/views/files.py @@ -9,6 +9,7 @@ from api.parser import StoryParser, IntentParser, DomainParser from api.utils.handlers import handle_uploaded_file from api.utils import get_zipped_files +from api.decoder import decode_story_file import os import markdown @@ -38,47 +39,6 @@ def get(self, request, project_id): return JsonResponse({'content': markdown_str}) - def clean_str(string): - string = string.replace(" ", "") - string = string.replace("\n", "") - - return string - - - def markdown_parser(self, markdown_file): - stories = [] - stories_file = markdown_file.split("## ")[1:] - - for story in stories_file: - story = story.split("\n*") - story_name = story[0] - intents = story[1:] - - content = [] - for intent in intents: - intent = intent.split("\n -") - - intent_name = StoriesFile.clean_str(intent[0]) - utters = intent[1:] - - intent = Intent.objects.get(name=intent_name) - content.append({"id": intent.id, - "type": "intent"}) - - - for utter in utters: - utter_name = StoriesFile.clean_str(utter) - utter = Utter.objects.get(name=utter_name) - - content.append({"id": utter.id, - "type": "utter"}) - - stories.append({"name": story_name, - "content": content}) - - return stories - - """ Receives a put request with a project id and a Markdown file with story specs as arguments. Then parse and add this file into DB """ @@ -88,20 +48,42 @@ def put(self, request, project_id, format=None): try: # Handle file from request file_obj = request.data['file'] - file_tmp = handle_uploaded_file(file_obj) - file_content = file_tmp.read().decode('utf-8') - stories_dicts = StoriesFile().markdown_parser(file_content) - - stories = [] - for story in stories_dicts: - story.update({"project": project}) - print(story) - stories.append(Story(**story)) + with handle_uploaded_file(file_obj) as file_tmp: + file_content = file_tmp.read().decode('utf-8') + + stories_dicts = decode_story_file(file_content) + + stories = [] + for story in stories_dicts: + content = [] + for intent in story['intents']: + content.append( + { + "id": Intent.objects.get(name=intent['intent']).id, + "type": "intent" + } + ) + for utter in intent['utters']: + content.append( + { + "id": Utter.objects.get(name=utter).id, + "type": "utter" + } + ) + + print(content) + stories.append( + { + "name": story['story'], + "content": content, + "project": project + } + ) + + + # Story.objects.bulk_create(stories) - Story.objects.bulk_create(stories) - - file_tmp.close() except Exception as e: raise e @@ -209,38 +191,30 @@ class UttersFile(APIView): def put(self, request, project_id, format=None): project = get_object_or_404(Project, pk=project_id) - try: - # Handle file from request - file_obj = request.data['file'] - file_tmp = handle_uploaded_file(file_obj) + # Handle file from request + file_obj = request.data['file'] + file_tmp = handle_uploaded_file(file_obj) + with handle_uploaded_file(file_obj) as file_tmp: # Handle yaml yaml=YAML(typ="safe") domain = yaml.load(file_tmp) utters_list = domain['templates'] utters = [] - for utter_name in utters_list.keys(): - alternatives = [] - - for alternative in utters_list[utter_name]: - alternatives.append(alternative['text'].split("\n\n")) - - utter = {"name" : utter_name, - "alternatives" : [alternatives], - "multiple_alternatives": True if len(alternatives) > 1 else False, - "project" : project } - - utters.append(Utter(**utter)) - Utter.objects.bulk_create(utters) - - file_tmp.close() + for utter_name in utters_list.keys(): + alternatives = [x['text'].split("\n\n") for x in utters_list[utter_name]] - except Exception as e: - raise e - return JsonResponse({'content': "File had problems during upload"}) + utters.append(Utter( + name= utter_name, + alternatives=[alternatives], + multiple_alternatives=True if len(alternatives) > 1 else False, + project=project + )) + + bulk_update_unique(utters, 'name') return JsonResponse({'content': "File has been successfully uploaded"}) class DomainFile(APIView): @@ -304,76 +278,3 @@ def get(self, request, project_id): else: raise Http404 - -from collections import deque, namedtuple -Token = namedtuple('Token', ['type', 'data']) - - -def lex(story): - for line in story.splitlines(): - line = line.strip() - if line.startwith(' -## Money 8.3 -* captacao - - utter_captacao - - utter_continuar_conversa - -## Money 10 -* lei_rouanet_valor_maximo_projeto - - utter_lei_rouanet_valor_maximo_projeto -* lei_rouanet_valor_maximo_geral - - utter_lei_rouanet_valor_minimo - - utter_lei_rouanet_valor_maximo_pessoa_fisica - - utter_lei_rouanet_valor_maximo_pessoa_juridica - - utter_lei_rouanet_valor_maximo_regiao - - utter_continuar_conversa -''' - print(parse(src)) \ No newline at end of file From f407c8688af96f3f9af7f61514dd7d4d84f0c902 Mon Sep 17 00:00:00 2001 From: Hugo Carvalho Date: Thu, 26 Sep 2019 16:01:45 -0300 Subject: [PATCH 09/20] Refactor intent parser --- src/api/decoder.py | 54 ++++++++++++++++++++++++++++++++++++++++++ src/api/views/files.py | 31 ++++++++---------------- 2 files changed, 64 insertions(+), 21 deletions(-) diff --git a/src/api/decoder.py b/src/api/decoder.py index 52fcbe36..b5220531 100644 --- a/src/api/decoder.py +++ b/src/api/decoder.py @@ -22,6 +22,21 @@ def decode_story_file(src): return decoder.decode() +def decode_intent_file(src): + """ + Parse a intent file and return a list of documents with the format: + + [{ + 'intent': , + 'utters': [, , ...] + }, + ... + ] + """ + decoder = IntentDecoder(src) + return decoder.decode() + + # # Auxiliary classes # @@ -73,3 +88,42 @@ def intent(self): utter = tks.popleft() utters.append(utter.data) return {'intent': intent.data, 'utters': utters} + + + +class IntentDecoder: + """ + Recursive desecent parser for intents (not part of public API) + """ + def __init__(self, src): + self.tokens = deque(self.lex(src)) + + def lex(self, src): + for line in src.splitlines(): + line = line.strip() + if line.startswith('