Skip to content

Commit

Permalink
Add train-curve command
Browse files Browse the repository at this point in the history
  • Loading branch information
ljvmiranda921 committed Feb 23, 2023
1 parent 8c984fc commit c16aa00
Showing 1 changed file with 21 additions and 2 deletions.
23 changes: 21 additions & 2 deletions integrations/prodigy_openai/project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ directories:

vars:
gpu_id: 0
prodigy_dataset: "prodigy_openai"
config: "ner.cfg"
prompt_template: "ner_prompt.jinja2"
seed: 42
Expand Down Expand Up @@ -114,6 +115,24 @@ commands:
- metrics/scores_gpt.json

- name: "train-curve"
help: "Train a model at 25%, 50%, and 75% of the training data"
help: "Train a model at varying portions of the training data"
script:
- ls
# Create JSONL files to hydrate prodigy dataset into
- python -m scripts.convert_to_jsonl corpus/anem-train.spacy corpus/anem-train.jsonl
- python -m scripts.convert_to_jsonl corpus/anem-dev.spacy corpus/anem-dev.jsonl
# Hydrate the prodigy dataset
- python -m prodigy db-in ${vars.prodigy_dataset} corpus/anem-train.jsonl
- python -m prodigy db-in ${vars.prodigy_dataset}_eval corpus/anem-dev.jsonl
# Run train-curve command
- >-
python -m prodigy train-curve
--ner ${vars.prodigy_dataset},eval:${vars.prodigy_dataset}_eval
--config ${vars.config}
--gpu-id ${vars.gpu_id}
--show-plot
deps:
- corpus/anem-train.spacy
- corpus/anem-dev.spacy
outputs:
- corpus/anem-train.jsonl
- corpus/anem-dev.jsonl

0 comments on commit c16aa00

Please sign in to comment.