diff --git a/etc/scallopy-plugins/gpt/pyproject.toml b/etc/scallopy-plugins/gpt/pyproject.toml index 7b66e3e..ec27221 100644 --- a/etc/scallopy-plugins/gpt/pyproject.toml +++ b/etc/scallopy-plugins/gpt/pyproject.toml @@ -2,7 +2,7 @@ name = "scallop-gpt" version = "0.0.1" dependencies = [ - "openai", + "openai == 0.28", "torch", ] diff --git a/etc/scallopy-plugins/gpt/src/scallop_gpt/fa_encoder.py b/etc/scallopy-plugins/gpt/src/scallop_gpt/fa_encoder.py index 71316db..d151d9f 100644 --- a/etc/scallopy-plugins/gpt/src/scallop_gpt/fa_encoder.py +++ b/etc/scallopy-plugins/gpt/src/scallop_gpt/fa_encoder.py @@ -1,11 +1,7 @@ from typing import * -from openai import OpenAI - -client = OpenAI() +import openai import torch -import pickle -import os import scallopy from scallop_gpu import get_device @@ -15,8 +11,9 @@ FA_NAME = "gpt_encoder" ERR_HEAD = f"[@{FA_NAME}]" + @scallopy.foreign_attribute -def gpt_encoder(item, *, debug: bool = False, model: str = "text-embedding-ada-002", storage_path: str = None): +def gpt_encoder(item, *, debug: bool = False, model: str = "text-embedding-ada-002"): # Check if the annotation is on function type decl assert item.is_function_decl(), f"{ERR_HEAD} has to be an attribute of a function type declaration" @@ -25,19 +22,13 @@ def gpt_encoder(item, *, debug: bool = False, model: str = "text-embedding-ada-0 assert len(arg_types) == 1 and arg_types[0].is_string(), f"{ERR_HEAD} expects only one `String` argument" assert item.function_decl_ret_type().is_tensor(), f"{ERR_HEAD} expects that the return type is `Tensor`" - if not storage_path is None and os.path.exists(storage_path): - STORAGE = pickle.load(open(storage_path, "rb")) - else: - STORAGE = {} + STORAGE = {} # Generate foreign function @scallopy.foreign_function(name=item.function_decl_name()) def encode_text(text: str) -> scallopy.Tensor: - # print("fa encoder encode text start") - # Check memoization if text in STORAGE: - # print("no need to query") pass else: # Make sure that we can do request @@ -47,15 +38,13 @@ def encode_text(text: str) -> scallopy.Tensor: print(f"{ERR_HEAD} Querying `{model}` for text `{text}`") # Memoize the response - response = client.embeddings.create(input=[text], model=model) - embedding = response.data[0].embedding + response = openai.Embedding.create(input=[text], model=model) + embedding = response['data'][0]['embedding'] if debug: print(f"{ERR_HEAD} Obtaining response: {response}") STORAGE[text] = embedding - if not storage_path is None: - pickle.dump(STORAGE, open(storage_path, 'wb')) # Return device = get_device() @@ -63,4 +52,4 @@ def encode_text(text: str) -> scallopy.Tensor: result = torch.tensor(result_embedding).to(device=device) return result - return encode_text + return encode_text \ No newline at end of file diff --git a/etc/scallopy-plugins/gpt/src/scallop_gpt/ff_gpt.py b/etc/scallopy-plugins/gpt/src/scallop_gpt/ff_gpt.py index c456a3e..d943073 100644 --- a/etc/scallopy-plugins/gpt/src/scallop_gpt/ff_gpt.py +++ b/etc/scallopy-plugins/gpt/src/scallop_gpt/ff_gpt.py @@ -6,21 +6,23 @@ # For memoization STORAGE = {} + @scallopy.foreign_function def gpt(s: str) -> str: - if s in STORAGE: - return STORAGE[s] - else: - # Make sure that we can do so - config.assert_can_request() + if s in STORAGE: + return STORAGE[s] + else: + # Make sure that we can do so + config.assert_can_request() - # Add performed requests - config.NUM_PERFORMED_REQUESTS += 1 - response = openai.ChatCompletion.create( - model=config.MODEL, - prompt=s, - temperature=config.TEMPERATURE) - choice = response["choices"][0] - result = choice["text"].strip() - STORAGE[s] = result - return result + # Add performed requests + config.NUM_PERFORMED_REQUESTS += 1 + response = openai.ChatCompletion.create( + model=config.MODEL, + messages=[{"role": "user", "content": s}], + temperature=config.TEMPERATURE, + ) + choice = response["choices"][0] + result = choice["message"]["content"].strip() + STORAGE[s] = result + return result \ No newline at end of file diff --git a/etc/scallopy-plugins/gpt/src/scallop_gpt/fp_gpt.py b/etc/scallopy-plugins/gpt/src/scallop_gpt/fp_gpt.py index 1c6615e..e222ae7 100644 --- a/etc/scallopy-plugins/gpt/src/scallop_gpt/fp_gpt.py +++ b/etc/scallopy-plugins/gpt/src/scallop_gpt/fp_gpt.py @@ -1,4 +1,4 @@ -from typing import Tuple +from typing import Tuple, Generator import openai import scallopy @@ -7,24 +7,26 @@ STORAGE = {} + @scallopy.foreign_predicate def gpt(s: str) -> scallopy.Facts[None, str]: - # Check if the storage already contains the response - if s in STORAGE: - response = STORAGE[s] - else: - # Make sure that we can do so - config.assert_can_request() + # Check if the storage already contains the response + if s in STORAGE: + response = STORAGE[s] + else: + # Make sure that we can do so + config.assert_can_request() - # Memoize the response - config.NUM_PERFORMED_REQUESTS += 1 - response = openai.ChatCompletion.create( - model=config.MODEL, - prompt=s, - temperature=config.TEMPERATURE) - STORAGE[s] = response + # Memoize the response + config.NUM_PERFORMED_REQUESTS += 1 + response = openai.ChatCompletion.create( + model=config.MODEL, + messages=[{"role": "user", "content": s}], + temperature=config.TEMPERATURE, + ) + STORAGE[s] = response - # Iterate through all the choices - for choice in response["choices"]: - result = choice["text"].strip() - yield (result,) + # Iterate through all the choices + for choice in response["choices"]: + result = choice["message"]["content"].strip() + yield (result,) \ No newline at end of file diff --git a/etc/scallopy-plugins/opencv/MANIFEST.in b/etc/scallopy-plugins/opencv/MANIFEST.in new file mode 100644 index 0000000..5f9051c --- /dev/null +++ b/etc/scallopy-plugins/opencv/MANIFEST.in @@ -0,0 +1 @@ +recursive-include src *.ttf \ No newline at end of file diff --git a/etc/scallopy-plugins/plip/src/scallop_plip/plip.py b/etc/scallopy-plugins/plip/src/scallop_plip/plip.py index b976ece..fe228fa 100644 --- a/etc/scallopy-plugins/plip/src/scallop_plip/plip.py +++ b/etc/scallopy-plugins/plip/src/scallop_plip/plip.py @@ -40,7 +40,7 @@ def plip( assert args[2].ty.is_string() and (args[2].adornment is None or args[2].adornment.is_free()), f"{ERR_HEAD} third argument has to be of free type `String`" @scallopy.foreign_predicate(name=relation_decl.name.name) - def plip_classify(img: scallopy.Tensor) -> scallopy.Generator[float, Tuple[str]]: + def plip_classify(img: scallopy.Tensor) -> scallopy.Facts[float, Tuple[str]]: device = get_device() maybe_plip_model = get_plip_model(debug=debug) if maybe_plip_model is None: @@ -64,7 +64,7 @@ def plip_classify(img: scallopy.Tensor) -> scallopy.Generator[float, Tuple[str]] # Generate the foreign predicate for dynamic labels @scallopy.foreign_predicate(name=relation_decl.name.name) - def plip_classify_with_labels(img: scallopy.Tensor, list: scallopy.String) -> scallopy.Generator[float, Tuple[str]]: + def plip_classify_with_labels(img: scallopy.Tensor, list: scallopy.String) -> scallopy.Facts[float, Tuple[str]]: nonlocal labels labels = [item.strip() for item in list.split(DELIMITER)] return plip_classify(img) diff --git a/experiments/clutrr-v2/kinship.scl b/experiments/clutrr-v2/kinship.scl index 794a190..f3a4b4c 100644 --- a/experiments/clutrr-v2/kinship.scl +++ b/experiments/clutrr-v2/kinship.scl @@ -1,58 +1,52 @@ type context(ctx: String) +// Few-shot prompt GPT to extract structured relational data from the natural language question +// Note that given mentioned kinships and the goal, we can deduce the final answer via a knowledge base of kinship compositions @gpt_extract_info( - header=""" -In this task, you will be given a question regarding kinships between characters in a story. -Please extract individual kinships mentioned in the story and the characters whose kinship is desired in the question. - -Formally, you should extract two types of information in JSON format: -1. Mentioned kinships. This should be a JSON list covering all the kinships mentioned, where each element -is a JSON object with `p1`, `p2`, and `rela` fields, denoting that `p1` is `p2`'s' `rela`. -Please pay extra attention to the directionality of the relation. For a pair of people, -you should generate two relations, one between p1 and p2 and another between p2 and p1. Please only extract direct information mentioned in the question and avoid -doing any computation except for making sure you get both directions. -2. The query. It should be a JSON object with `p1` and `p2` fields, between which is the -relationship we want to derived. - -Examples: - -Question: Dorothy's brother Michael and her went to get ice cream. Michael is the proud father of the lovely Donald. Who is Dorothy to Donald? -What are the mentioned kinships in JSON format? -[{"p1": "Michael", "p2": "Dorothy", "rela": "brother"}, {"p1": "Dorothy", "p2": "Michael", "rela": "sister"}, {"p1": "Michael", "p2": "Donald", "rela": "father"}, {"p1": "Donald", "p2": "Michael", "rela": "son"}] -Whose kinship do we want to find? -[{"p1": "Dorothy", "p2": "Donald"}] - - -Question: Michael and his daughter Jennifer like to read poems together. Jason is the proud father of the lovely Michael. Who is Jason to Jennifer? -What are the mentioned kinships in JSON format? -[{"p1": "Jennifer", "p2": "Michael", "rela": "daughter"}, {"p1": "Michael", "p2": "Jennifer", "rela": "father"}, {"p1": "Jason", "p2": "Michael", "rela": "father"}, {"p1": "Michael", "p2": "Jason", "rela": "son"}] -Whose kinship do we want to find? -[{"p1": "Jason", "p2": "Jennifer"}] - - -Question: Kevin loves going to plays with his wife Aida. Aida's dad James, however, does not like them at all. Who is James to Kevin? -What are the mentioned kinships in JSON format? -[{"p1": "Aida", "p2": "Kevin", "rela": "wife"}, {"p1": "Kevin", "p2": "Aida", "rela": "husband"}, {"p1": "James", "p2": "Aida", "rela": "father"}, {"p1": "Aida", "p2": "James", "rela": "daughter"}] -Whose kinship do we want to find? -[{"p1": "James", "p2": "Kevin"}] - - -Now, look at the following context. - -Question: {{context}} - """, + header="Please extract kinships mentioned in the story and the characters whose kinship is desired in the question.", prompts=[ - "Now, first give me the kinships mentioned in this question in JSON format", - "Good, now please tell me the two people whose kinship we want to find in JSON format" + "What are the mentioned kinships in JSON format (with `p1`, `p2`, and `rela` fields, denoting `p1` is `p2`'s' `rela`)?", + "Whose kinship do we want to find?" + ], + examples=[ + ( + ["Dorothy's brother Michael and her went to get ice cream. Michael is the proud father of the lovely Donald. + Who is Dorothy to Donald?"], + [ + [("Michael", "Dorothy", "brother"), ("Dorothy", "Michael", "sister"), + ("Michael", "Donald", "father"), ("Donald", "Michael", "son")], + [("Dorothy", "Donald")] + ] + ), + ( + ["Michael and his daughter Jennifer like to read poems together. Jason is the proud father of the lovely Michael. + Who is Jason to Jennifer?"], + [ + [("Jennifer", "Michael", "daughter"), ("Michael", "Jennifer", "father"), + ("Jason", "Michael", "father"), ("Michael", "Jason", "son")], + [("Jason", "Jennifer")] + ] + ), + ( + ["Kevin loves going to plays with his wife Aida. Aida's dad James, however, does not like them at all. + Who is James to Kevin?"], + [ + [("Aida", "Kevin", "wife"), ("Kevin", "Aida", "husband"), + ("James", "Aida", "father"), ("Aida", "James", "daughter")], + [("James", "Kevin")] + ] + ) ], model="gpt-4", ) -type extract_kinship(bound context: String, p1: String, p2: String, rela: String), - extract_question(bound context: String, p1: String, p2: String) +type extract_kinship (bound context: String, p1: String, p2: String, rela: String), + extract_question (bound context: String, p1: String, p2: String) +// Extract information from the context rel kinship(p1, p2, rela) = context(ctx) and extract_kinship(ctx, p1, p2, rela) -rel question(p1, p2) = context(ctx) and extract_question(ctx, p1, p2) +rel question(p1, p2) = context(ctx) and extract_question(ctx, p1, p2) +// Knowledge base for composing kinship relations rel composition = { ("daughter", "daughter", "granddaughter"), ("daughter", "sister", "daughter"), @@ -186,6 +180,10 @@ rel composition = { ("grandson", "brother", "grandson"), } +// Perform computation on the kinship graph rel derived_kinship(p1, p2, rela) = kinship(p1, p2, rela) -rel derived_kinship(p1, p3, r3) = p1 != p3 and derived_kinship(p1, p2, r1) and derived_kinship(p2, p3, r2) and composition(r2, r1, r3) -rel result(r) = question(p1, p2) and derived_kinship(p1, p2, r) +rel derived_kinship(p1, p3, r3) = p1 != p3 and derived_kinship(p1, p2, r1) and derived_kinship(p2, p3, r2) + and composition(r2, r1, r3) + +// Get the answer +rel result(r) = question(p1, p2) and derived_kinship(p1, p2, r) \ No newline at end of file diff --git a/experiments/clutrr-v2/kinship_scallop.py b/experiments/clutrr-v2/kinship_scallop.py index 17eae7f..c009d40 100644 --- a/experiments/clutrr-v2/kinship_scallop.py +++ b/experiments/clutrr-v2/kinship_scallop.py @@ -1,8 +1,8 @@ -import openai import json from tqdm import tqdm from io import StringIO import sys +import argparse import scallopy import scallopy_ext @@ -23,6 +23,12 @@ def __init__(self): def test_kinship(range=range(N)): out = {"score": 0, "data": [], "logs": []} + + plugins = scallopy_ext.PluginRegistry() + parser = argparse.ArgumentParser() + plugins.setup_argument_parser(parser) + known_args, unknown_args = parser.parse_known_args() + plugins.configure(known_args, unknown_args) for i in tqdm(range): (ctx, query), ans = TASK[i] @@ -32,8 +38,7 @@ def test_kinship(range=range(N)): sys.stdout = buffer try: ctx = scallopy.ScallopContext(provenance="unit") - scallopy_ext.config.configure(Args(), []) - scallopy_ext.extlib.load_extlib(ctx) + plugins.load_into_ctx(ctx) ctx.import_file(SCALLOP_FILE) ctx.add_facts("context", [(input,)]) ctx.run()