From 183846d99faeea5cab1e8577126f06f4715d4a64 Mon Sep 17 00:00:00 2001 From: Masoud Abdi Date: Wed, 23 Jun 2021 22:05:08 +1000 Subject: [PATCH] GCP custom entity extractor added (AutoML) --- auto_labeling_pipeline/mappings.py | 3 +++ auto_labeling_pipeline/menu.py | 5 ++++ auto_labeling_pipeline/models.py | 26 +++++++++++++++++++ .../templates/gcp_custom_entities.jinja2 | 14 ++++++++++ 4 files changed, 48 insertions(+) create mode 100644 auto_labeling_pipeline/templates/gcp_custom_entities.jinja2 diff --git a/auto_labeling_pipeline/mappings.py b/auto_labeling_pipeline/mappings.py index 6cb34bd..faec490 100644 --- a/auto_labeling_pipeline/mappings.py +++ b/auto_labeling_pipeline/mappings.py @@ -58,6 +58,9 @@ class GCPEntitiesTemplate(MappingTemplate): label_collection = SequenceLabels template_file = 'gcp_entities.jinja2' +class GCPCustomEntitiesTemplate(MappingTemplate): + label_collection = SequenceLabels + template_file = 'gcp_custom_entities.jinja2' class AmazonRekognitionLabelDetectionTemplate(MappingTemplate): label_collection = ClassificationLabels diff --git a/auto_labeling_pipeline/menu.py b/auto_labeling_pipeline/menu.py index ba28541..bcfbd52 100644 --- a/auto_labeling_pipeline/menu.py +++ b/auto_labeling_pipeline/menu.py @@ -70,6 +70,11 @@ def register(cls, task: Type[t.Task], model: Type[mo.RequestModel], template: Ty mo.GCPEntitiesRequestModel, mp.GCPEntitiesTemplate ) +Options.register( + t.SequenceLabeling, + mo.GCPCustomEntitiesRequestModel, + mp.GCPCustomEntitiesTemplate +) Options.register( t.SequenceLabeling, mo.AmazonComprehendEntityRequestModel, diff --git a/auto_labeling_pipeline/models.py b/auto_labeling_pipeline/models.py index bb06f99..f3185b5 100644 --- a/auto_labeling_pipeline/models.py +++ b/auto_labeling_pipeline/models.py @@ -107,6 +107,32 @@ def send(self, text: str): response = requests.post(url, headers=headers, params=params, json=body).json() return response +class GCPCustomEntitiesRequestModel(RequestModel): + """ + This allow you to analyze entities in a text using an AutoML custom entity extractor + AutoML Natural Language API. + """ + url: str + authorization: str + + class Config: + title = 'GCP Custom Entity Analysis' + + def send(self, text: str): + url = self.url + headers = {'Content-Type': 'application/json', + 'Authorization': self.authorization} + params = {} + body = { + "payload": { + "textSnippet": { + "content": text, + "mime_type": "text/plain" + } + } + } + response = requests.post(url, headers=headers, params=params, json=body).json() + return response class AWSMixin(BaseModel): aws_access_key: str diff --git a/auto_labeling_pipeline/templates/gcp_custom_entities.jinja2 b/auto_labeling_pipeline/templates/gcp_custom_entities.jinja2 new file mode 100644 index 0000000..865d45f --- /dev/null +++ b/auto_labeling_pipeline/templates/gcp_custom_entities.jinja2 @@ -0,0 +1,14 @@ +[ + {% for entity in input.payload %} + {% set outer_loop = loop %} + {%- set start_offset = entity.textExtraction.textSegment.startOffset -%} + {%- set end_offset = entity.textExtraction.textSegment.endOffset -%} + {% if (start_offset) and (end_offset) %} + { + "label": "{{ entity.displayName }}", + "start_offset": {{ start_offset }}, + "end_offset": {{ end_offset }} + }{% if not outer_loop.last %},{% endif %} + {% endif %} + {% endfor %} +] \ No newline at end of file