Skip to content

Commit

Permalink
Sync recipes based on v1.12 PR
Browse files Browse the repository at this point in the history
  • Loading branch information
ljvmiranda921 committed Feb 27, 2023
1 parent 95ccb15 commit edb2591
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 14 deletions.
42 changes: 29 additions & 13 deletions integrations/prodigy_openai/openai/recipes/ner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections import defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union

import spacy
Expand Down Expand Up @@ -154,10 +155,10 @@ def _unique(items: List[str]) -> List[str]:
source=("Data to annotate (file path or '-' to read from standard input)", "positional", None, str),
labels=("Labels (comma delimited)", "option", "L", lambda s: s.split(",")),
model=("GPT-3 model to use for initial predictions", "option", "m", str),
examples_path=("Path to examples to help define the task", "option", "e", str),
examples_path=("Path to examples to help define the task", "option", "e", Path),
lang=("Language to use for tokenizer", "option", "l", str),
max_examples=("Max examples to include in prompt", "option", "n", int),
prompt_path=("Path to jinja2 prompt template", "option", "p", str),
prompt_path=("Path to jinja2 prompt template", "option", "p", Path),
batch_size=("Batch size to send to OpenAI API", "option", "b", int),
segment=("Split articles into sentences", "flag", "S", bool),
loader=("Loader (guessed from file extension if not set)", "option", "lo", str),
Expand All @@ -172,8 +173,8 @@ def openai_correct_ner(
model: str = "text-davinci-003",
batch_size: int = 10,
segment: bool = False,
examples_path: Optional[str] = None,
prompt_path: str = OPENAI_DEFAULTS.NER_PROMPT_PATH,
examples_path: Optional[Path] = None,
prompt_path: Path = OPENAI_DEFAULTS.NER_PROMPT_PATH,
max_examples: int = 2,
loader: Optional[str] = None,
verbose: bool = False,
Expand All @@ -187,6 +188,11 @@ def openai_correct_ner(
if segment:
nlp.add_pipe("sentencizer")
api_key, api_org = get_api_credentials(model)

if not labels:
msg.fail("No --label argument set", exits=1)
msg.text(f"Using {len(labels)} labels from model: {', '.join(labels)}")

labels = [normalize_label(label) for label in labels]
openai = OpenAISuggester(
response_parser=make_ner_response_parser(labels=labels, lang=lang),
Expand Down Expand Up @@ -239,31 +245,32 @@ def openai_correct_ner(

@recipe(
"ner.openai.fetch",
file_path=("Path to jsonl data to annotate", "positional", None, str),
output_path=("Path to save the output", "positional", None, str),
source=("Path to jsonl data to annotate", "positional", None, str),
output_path=("Path to save the output", "positional", None, Path),
labels=("Labels (comma delimited)", "option", "L", lambda s: s.split(",")),
lang=("Language to use for tokenizer.", "option", "l", str),
model=("GPT-3 model to use for completion", "option", "m", str),
examples_path=("Examples file to help define the task", "option", "e", str),
examples_path=("Examples file to help define the task", "option", "e", Path),
max_examples=("Max examples to include in prompt", "option", "n", int),
prompt_path=("Path to jinja2 prompt template", "option", "p", str),
prompt_path=("Path to jinja2 prompt template", "option", "p", Path),
batch_size=("Batch size to send to OpenAI API", "option", "b", int),
segment=("Split sentences", "flag", "S", bool),
resume=("Resume fetch by passing a path to a cache", "flag", "r", bool),
verbose=("Print extra information to terminal", "flag", "v", bool),
)
def openai_fetch_ner(
file_path: str,
output_path: str,
source: str,
output_path: Path,
labels: List[str],
lang: str = "en",
model: str = "text-davinci-003",
batch_size: int = 10,
segment: bool = False,
examples_path: Optional[str] = None,
prompt_path: str = OPENAI_DEFAULTS.NER_PROMPT_PATH,
examples_path: Optional[Path] = None,
prompt_path: Path = OPENAI_DEFAULTS.NER_PROMPT_PATH,
max_examples: int = 2,
resume: bool = False,
loader=("Loader (guessed from file extension if not set)", "option", "lo", str),
verbose: bool = False,
):
"""
Expand All @@ -280,6 +287,10 @@ def openai_fetch_ner(
if segment:
nlp.add_pipe("sentencizer")

if not labels:
msg.fail("No --label argument set", exits=1)
msg.text(f"Using {len(labels)} labels from model: {', '.join(labels)}")

labels = [normalize_label(label) for label in labels]
openai = OpenAISuggester(
response_parser=make_ner_response_parser(labels=labels, lang=lang),
Expand All @@ -299,7 +310,12 @@ def openai_fetch_ner(
)
for eg in examples:
openai.add_example(eg)
stream = list(srsly.read_jsonl(file_path))

# Set up the stream
stream = get_stream(
source, loader=loader, rehash=False, dedup=False, input_key="text"
)

# If we want to resume, we take the path to the cache and
# compare the hashes with respect to our inputs.
if resume:
Expand Down
2 changes: 1 addition & 1 deletion integrations/prodigy_openai/openai/recipes/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class OPENAI_DEFAULTS:
# prompt paths #
# fmt: off
TEMPLATES_DIR = "templates"
NER_PROMPT_PATH = str(Path(__file__).parent / TEMPLATES_DIR / "ner_prompt.jinja2")
NER_PROMPT_PATH = Path(__file__).parent / TEMPLATES_DIR / "ner_prompt.jinja2"
# fmt: on

# html templates #
Expand Down

0 comments on commit edb2591

Please sign in to comment.