From 67c80a5c70013fd899be1b2e5474874fd42917e6 Mon Sep 17 00:00:00 2001 From: sonurdogan Date: Wed, 10 Jan 2024 23:00:19 +0300 Subject: [PATCH 1/5] Added new healthcare pipelines --- nlu/spellbook.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/nlu/spellbook.py b/nlu/spellbook.py index 4e993451..6fb87f36 100644 --- a/nlu/spellbook.py +++ b/nlu/spellbook.py @@ -10591,6 +10591,8 @@ class Spellbook: healthcare_component_alias_references = {} pretrained_healthcare_pipe_references = { + 'ar': {'ar.deid.clinical': 'clinical_deidentification' + }, 'de': {'de.deid.clinical': 'clinical_deidentification', 'de.deid.ner_subentity.pipeline': 'ner_deid_subentity_pipeline', 'de.deid.pipeline': 'german_deid_pipeline_spark24', @@ -10598,6 +10600,8 @@ class Spellbook: 'en': { 'en.classify.bert_sequence.binary_rct_biobert.pipeline': 'bert_sequence_classifier_binary_rct_biobert_pipeline', + 'en.classify.bert_sequence.vop_hcp_consult.pipeline': 'bert_sequence_classifier_vop_hcp_consult_pipeline', + 'en.classify.bert_sequence.vop_drug_side_effect.pipeline': 'bert_sequence_classifier_vop_drug_side_effect_pipeline', 'en.classify.bert_token_ner_jsl.pipeline': 'bert_token_classifier_ner_jsl_pipeline', 'en.classify.rct_binary_biobert.pipeline': 'rct_binary_classifier_biobert_pipeline', 'en.classify.rct_binary_use.pipeline': 'rct_binary_classifier_use_pipeline', @@ -10636,6 +10640,7 @@ class Spellbook: 'en.map_entity.icd10cm_to_snomed.pipe': 'icd10cm_snomed_mapping', 'en.map_entity.icdo_to_snomed.pipe': 'icdo_snomed_mapping', 'en.map_entity.rxnorm_to_ndc.pipe': 'rxnorm_ndc_mapping', + 'en.map_entity.rxnorm_resolver.pipe': 'rxnorm_resolver_pipeline', 'en.map_entity.snomed_to_icd10cm.pipe': 'snomed_icd10cm_mapping', 'en.map_entity.snomed_to_icdo.pipe': 'snomed_icdo_mapping', 'en.map_entity.umls_clinical_findings_resolver': 'umls_clinical_findings_resolver_pipeline', @@ -10729,9 +10734,15 @@ class Spellbook: 'en.med_ner.wip_greedy_biobert.pipeline': 'jsl_rd_ner_wip_greedy_biobert_pipeline', 'en.med_ner.wip_greedy_clinical.pipeline': 'jsl_ner_wip_greedy_clinical_pipeline', 'en.med_ner.wip_modifier_clinical.pipeline': 'jsl_ner_wip_modifier_clinical_pipeline', + 'en.med_ner.vop.pipeline': 'ner_vop_pipeline', + 'en.med_ner.vop_demographic.pipeline': 'ner_vop_demographic_pipeline', + 'en.med_ner.vop_treatment.pipeline': 'ner_vop_treatment_pipeline', + 'en.med_ner.vop_problem.pipeline': 'ner_vop_problem_pipeline', 'en.mesh.umls.mapping': 'mesh_umls_mapping', 'en.ner.medication': 'ner_medication_pipeline', 'en.ner.model_finder.pipeline': 'ner_model_finder', + 'en.ner.oncology.pipeline': 'ner_oncology_pipeline', + 'en.ner.oncology_response_to_treatment.pipeline': 'ner_oncology_response_to_treatment_pipeline', 'en.oncology_biomarker.pipeline': 'oncology_biomarker_pipeline', 'en.oncology_diagnosis.pipeline': 'oncology_diagnosis_pipeline', 'en.oncology_general.pipeline': 'oncology_general_pipeline', @@ -10752,6 +10763,7 @@ class Spellbook: 'en.rxnorm.umls.mapping': 'rxnorm_umls_mapping', 'en.snomed.umls.mapping': 'snomed_umls_mapping', 'en.spell.clinical.pipeline': 'spellcheck_clinical_pipeline', + 'en.summarize.biomedical_pubmed.pipeline':'summarizer_biomedical_pubmed_pipeline', 'en.summarize.clinical_guidelines_large.pipeline': 'summarizer_clinical_guidelines_large_pipeline', 'en.summarize.clinical_jsl_augmented.pipeline': 'summarizer_clinical_jsl_augmented_pipeline', 'en.summarize.clinical_questions.pipeline': 'summarizer_clinical_questions_pipeline', From d1c2d7256510860b2db9c22bc9751d14339aaa8a Mon Sep 17 00:00:00 2001 From: Christian Kasim Loan Date: Thu, 11 Jan 2024 03:28:52 +0100 Subject: [PATCH 2/5] Fix bug which caused pretrained sone pipes with sentence embed converters to crash sometimes --- nlu/pipe/utils/component_utils.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/nlu/pipe/utils/component_utils.py b/nlu/pipe/utils/component_utils.py index f9d174d6..809cdf8a 100644 --- a/nlu/pipe/utils/component_utils.py +++ b/nlu/pipe/utils/component_utils.py @@ -186,12 +186,11 @@ def set_storage_ref_attribute_of_embedding_converters(pipe_list: List[NluCompone for converter in pipe_list: if ComponentUtils.is_embedding_provider(converter) and ComponentUtils.is_embedding_converter(converter): # First find the embed col of the converter - embed_col = ComponentUtils.extract_embed_col(converter) + embed_col = ComponentUtils.extract_embed_col(converter, column='input') for provider in pipe_list: # Now find the Embedding generator that is feeding the converter - if embed_col in provider.spark_input_column_names: - converter.storage_ref = StorageRefUtils.nlp_extract_storage_ref_nlp_model(provider.model) - # converter.storage_ref = StorageRefUtils.extract_storage_ref(provider) + if embed_col in provider.spark_output_column_names: + converter.storage_ref = StorageRefUtils.nlp_extract_storage_ref_nlp_model(provider) return pipe_list @staticmethod From 8e5a21c43f0500367c56d42d62e407948be6d2e5 Mon Sep 17 00:00:00 2001 From: Christian Kasim Loan Date: Fri, 12 Jan 2024 00:04:07 +0100 Subject: [PATCH 3/5] disable chunkmapper LP preference --- nlu/pipe/utils/pipe_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nlu/pipe/utils/pipe_utils.py b/nlu/pipe/utils/pipe_utils.py index cddad1e2..e181caea 100644 --- a/nlu/pipe/utils/pipe_utils.py +++ b/nlu/pipe/utils/pipe_utils.py @@ -688,8 +688,8 @@ def add_metadata_to_pipe(pipe: NLUPipeline): if c.type == AnnoTypes.QUESTION_TABLE_ANSWERER: pipe.has_table_qa_models = True - if c.type == AnnoTypes.CHUNK_MAPPER: - pipe.prefer_light = True + # if c.type == AnnoTypes.CHUNK_MAPPER: + # pipe.prefer_light = True if c.type == AnnoTypes.QUESTION_SPAN_CLASSIFIER: pipe.has_span_classifiers = True From ef0f5bf488fe2f2142f746c17626f7b73b0d4501 Mon Sep 17 00:00:00 2001 From: Christian Kasim Loan Date: Fri, 12 Jan 2024 00:13:48 +0100 Subject: [PATCH 4/5] bump version --- nlu/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nlu/__init__.py b/nlu/__init__.py index 07d73006..cb7355ee 100644 --- a/nlu/__init__.py +++ b/nlu/__init__.py @@ -1,4 +1,4 @@ -__version__ = '5.1.1' +__version__ = '5.1.2' import nlu.utils.environment.env_utils as env_utils From 0efbdb9e77eef57af6609978b38740b9509d630c Mon Sep 17 00:00:00 2001 From: Christian Kasim Loan Date: Fri, 12 Jan 2024 00:18:47 +0100 Subject: [PATCH 5/5] bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9621127e..ff40fe74 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ name='nlu', - version='5.1.1', + version='5.1.2', description='John Snow Labs NLU provides state of the art algorithms for NLP&NLU with 20000+ of pretrained models in 200+ languages. It enables swift and simple development and research with its powerful Pythonic and Keras inspired API. It is powerd by John Snow Labs powerful Spark NLP library.',