Skip to content

Commit

Permalink
add fewshot config with NER.v3
Browse files Browse the repository at this point in the history
  • Loading branch information
svlandeg committed Jan 19, 2024
1 parent efbae95 commit ffb8996
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 25 deletions.
48 changes: 48 additions & 0 deletions tutorials/llm_clinical_trials/configs/fewshot_drugs_dose.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
[
{
"text": "The patient was given 1mg of paracetamol.",
"spans": [
{
"text": "paracetamol",
"is_entity": true,
"label": "Drug",
"reason": "is a drug name, used as medication"
},
{
"text": "1mg",
"is_entity": true,
"label": "Dose",
"reason": "is the quantity or dose of the given medication"
},
{
"text": "patient",
"is_entity": false,
"label": "==NONE==",
"reason": "is a person, not a drug or dose"
}
]
},
{
"text": "Throughout the treatment, they received Aspirin 1mg/kg.",
"spans": [
{
"text": "Aspirin",
"is_entity": true,
"label": "Drug",
"reason": "is a drug brand, used as medication"
},
{
"text": "1mg/kg",
"is_entity": true,
"label": "Dose",
"reason": "is the quantity or dose of the given drug"
},
{
"text": "Aspirin 1mg/kg",
"is_entity": false,
"label": "==NONE==",
"reason": "contains both the drug and the dose - these should be two entities instead"
}
]
}
]
27 changes: 27 additions & 0 deletions tutorials/llm_clinical_trials/configs/ner_fewshot_openai.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[nlp]
lang = "en"
pipeline = ["llm"]
batch_size = 128

[components]

[components.llm]
factory = "llm"

[components.llm.model]
@llm_models = "spacy.GPT-4.v2"
config = {"seed": 342, "temperature": 0.0}

[components.llm.task]
@llm_tasks = "spacy.NER.v3"
labels = ["Drug", "Dose"]
description = Entities are drugs or their doses. They can be uppercased, title-cased, or lowercased.
Each occurrence of an entity in the text should be extracted.

[components.llm.task.label_definitions]
Drug = "A medicine or drug given to a patient as a treatment. Can be a generic name or brand name, e.g. paracetamol, Aspirin"
Dose = "The measured quantity (dose) of a certain medicine given to patients, e.g. 1mg. This should exclude the drug name."

[components.llm.task.examples]
@misc = "spacy.FewShotReader.v1"
path = "configs/fewshot_drugs_dose.json"
17 changes: 0 additions & 17 deletions tutorials/llm_clinical_trials/configs/ner_openai.cfg

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ config = {"seed": 342, "temperature": 0.0}

[components.llm.task]
@llm_tasks = "spacy.NER.v2"
labels = "Drug,Dose"
labels = ["Drug", "Dose"]
2 changes: 1 addition & 1 deletion tutorials/llm_clinical_trials/project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ title: 'Clinical trial results extraction with LLMs'
description: "Using an LLM in a spaCy pipeline to extract patient groups, treatments and outcomes in clinical trials."

vars:
ner_config: "ner_zeroshot_openai.cfg" # "ner_dolly.cfg" # "ner_falcon.cfg"
ner_config: "ner_fewshot_openai.cfg" # "ner_zeroshot_openai.cfg"
trial_config: "trial_openai.cfg"
pmid: 27144689

Expand Down
12 changes: 6 additions & 6 deletions tutorials/llm_clinical_trials/scripts/visualise_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,25 @@
from wasabi import msg

DEBUG = False
PRINT_CONSOLE = False
PRINT_DISPLACY = True
PRINT_CONSOLE = True
PRINT_DISPLACY = False


def visualise_entities(pmid: int, config_path: Path, verbose: bool = False):
spacy_llm.logger.addHandler(logging.StreamHandler())
if DEBUG:
spacy_llm.logger.setLevel(logging.DEBUG)

msg.text(f"Processing PMID {pmid}", show=verbose)
msg.text(f"Loading config from {config_path}", show=verbose)
msg.info(f"Processing PMID {pmid}", show=verbose)
msg.info(f"Loading config from {config_path}", show=verbose)
text = read_trial(pmid, verbose=verbose)
nlp = assemble(config_path)
doc = nlp(text)
ents = list(doc.ents)
if PRINT_CONSOLE:
print("ents", len(ents))
msg.text(f" - Number of entities: {len(ents)}")
for ent in ents:
print(ent.text, ent.label_)
msg.text(f" - {ent.text} [{ent.label_}]")
if PRINT_DISPLACY:
options = {
"ents": ["Drug", "Dose"],
Expand Down

0 comments on commit ffb8996

Please sign in to comment.