From ffb89964e7bfe4486d375532605d6737e7a988bc Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 19 Jan 2024 16:45:29 +0100 Subject: [PATCH] add fewshot config with NER.v3 --- .../configs/fewshot_drugs_dose.json | 48 +++++++++++++++++++ .../configs/ner_fewshot_openai.cfg | 27 +++++++++++ .../configs/ner_openai.cfg | 17 ------- .../configs/ner_zeroshot_openai.cfg | 2 +- tutorials/llm_clinical_trials/project.yml | 2 +- .../scripts/visualise_entities.py | 12 ++--- 6 files changed, 83 insertions(+), 25 deletions(-) create mode 100644 tutorials/llm_clinical_trials/configs/fewshot_drugs_dose.json create mode 100644 tutorials/llm_clinical_trials/configs/ner_fewshot_openai.cfg delete mode 100644 tutorials/llm_clinical_trials/configs/ner_openai.cfg diff --git a/tutorials/llm_clinical_trials/configs/fewshot_drugs_dose.json b/tutorials/llm_clinical_trials/configs/fewshot_drugs_dose.json new file mode 100644 index 00000000..91248318 --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/fewshot_drugs_dose.json @@ -0,0 +1,48 @@ +[ + { + "text": "The patient was given 1mg of paracetamol.", + "spans": [ + { + "text": "paracetamol", + "is_entity": true, + "label": "Drug", + "reason": "is a drug name, used as medication" + }, + { + "text": "1mg", + "is_entity": true, + "label": "Dose", + "reason": "is the quantity or dose of the given medication" + }, + { + "text": "patient", + "is_entity": false, + "label": "==NONE==", + "reason": "is a person, not a drug or dose" + } + ] + }, + { + "text": "Throughout the treatment, they received Aspirin 1mg/kg.", + "spans": [ + { + "text": "Aspirin", + "is_entity": true, + "label": "Drug", + "reason": "is a drug brand, used as medication" + }, + { + "text": "1mg/kg", + "is_entity": true, + "label": "Dose", + "reason": "is the quantity or dose of the given drug" + }, + { + "text": "Aspirin 1mg/kg", + "is_entity": false, + "label": "==NONE==", + "reason": "contains both the drug and the dose - these should be two entities instead" + } + ] + } +] \ No newline at end of file diff --git a/tutorials/llm_clinical_trials/configs/ner_fewshot_openai.cfg b/tutorials/llm_clinical_trials/configs/ner_fewshot_openai.cfg new file mode 100644 index 00000000..e8f85033 --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/ner_fewshot_openai.cfg @@ -0,0 +1,27 @@ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.model] +@llm_models = "spacy.GPT-4.v2" +config = {"seed": 342, "temperature": 0.0} + +[components.llm.task] +@llm_tasks = "spacy.NER.v3" +labels = ["Drug", "Dose"] +description = Entities are drugs or their doses. They can be uppercased, title-cased, or lowercased. + Each occurrence of an entity in the text should be extracted. + +[components.llm.task.label_definitions] +Drug = "A medicine or drug given to a patient as a treatment. Can be a generic name or brand name, e.g. paracetamol, Aspirin" +Dose = "The measured quantity (dose) of a certain medicine given to patients, e.g. 1mg. This should exclude the drug name." + +[components.llm.task.examples] +@misc = "spacy.FewShotReader.v1" +path = "configs/fewshot_drugs_dose.json" \ No newline at end of file diff --git a/tutorials/llm_clinical_trials/configs/ner_openai.cfg b/tutorials/llm_clinical_trials/configs/ner_openai.cfg deleted file mode 100644 index 643e083b..00000000 --- a/tutorials/llm_clinical_trials/configs/ner_openai.cfg +++ /dev/null @@ -1,17 +0,0 @@ -[nlp] -lang = "en" -pipeline = ["llm"] -batch_size = 128 - -[components] - -[components.llm] -factory = "llm" - -[components.llm.model] -@llm_models = "spacy.GPT-3-5.v1" -name = "gpt-3.5-turbo" - -[components.llm.task] -@llm_tasks = "spacy.NER.v2" -labels = "Drug,Dose" diff --git a/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg b/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg index 33e1bdb8..98e4d1e5 100644 --- a/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg +++ b/tutorials/llm_clinical_trials/configs/ner_zeroshot_openai.cfg @@ -14,4 +14,4 @@ config = {"seed": 342, "temperature": 0.0} [components.llm.task] @llm_tasks = "spacy.NER.v2" -labels = "Drug,Dose" +labels = ["Drug", "Dose"] diff --git a/tutorials/llm_clinical_trials/project.yml b/tutorials/llm_clinical_trials/project.yml index 4448551c..965d8b81 100644 --- a/tutorials/llm_clinical_trials/project.yml +++ b/tutorials/llm_clinical_trials/project.yml @@ -2,7 +2,7 @@ title: 'Clinical trial results extraction with LLMs' description: "Using an LLM in a spaCy pipeline to extract patient groups, treatments and outcomes in clinical trials." vars: - ner_config: "ner_zeroshot_openai.cfg" # "ner_dolly.cfg" # "ner_falcon.cfg" + ner_config: "ner_fewshot_openai.cfg" # "ner_zeroshot_openai.cfg" trial_config: "trial_openai.cfg" pmid: 27144689 diff --git a/tutorials/llm_clinical_trials/scripts/visualise_entities.py b/tutorials/llm_clinical_trials/scripts/visualise_entities.py index 0030c779..5ef3af55 100644 --- a/tutorials/llm_clinical_trials/scripts/visualise_entities.py +++ b/tutorials/llm_clinical_trials/scripts/visualise_entities.py @@ -9,8 +9,8 @@ from wasabi import msg DEBUG = False -PRINT_CONSOLE = False -PRINT_DISPLACY = True +PRINT_CONSOLE = True +PRINT_DISPLACY = False def visualise_entities(pmid: int, config_path: Path, verbose: bool = False): @@ -18,16 +18,16 @@ def visualise_entities(pmid: int, config_path: Path, verbose: bool = False): if DEBUG: spacy_llm.logger.setLevel(logging.DEBUG) - msg.text(f"Processing PMID {pmid}", show=verbose) - msg.text(f"Loading config from {config_path}", show=verbose) + msg.info(f"Processing PMID {pmid}", show=verbose) + msg.info(f"Loading config from {config_path}", show=verbose) text = read_trial(pmid, verbose=verbose) nlp = assemble(config_path) doc = nlp(text) ents = list(doc.ents) if PRINT_CONSOLE: - print("ents", len(ents)) + msg.text(f" - Number of entities: {len(ents)}") for ent in ents: - print(ent.text, ent.label_) + msg.text(f" - {ent.text} [{ent.label_}]") if PRINT_DISPLACY: options = { "ents": ["Drug", "Dose"],