Merge pull request #25 from scallop-lang/gpt-fix

GPT plugin fix
scallop-lang · Sep 14, 2024 · 7b1a8cc · 7b1a8cc
2 parents 824365b + d9906f0
commit 7b1a8cc
Show file tree

Hide file tree

Showing 8 changed files with 101 additions and 104 deletions.
diff --git a/etc/scallopy-plugins/gpt/pyproject.toml b/etc/scallopy-plugins/gpt/pyproject.toml
@@ -2,7 +2,7 @@
 name = "scallop-gpt"
 version = "0.0.1"
 dependencies = [
-  "openai",
+  "openai == 0.28",
   "torch",
 ]
 

diff --git a/etc/scallopy-plugins/gpt/src/scallop_gpt/fa_encoder.py b/etc/scallopy-plugins/gpt/src/scallop_gpt/fa_encoder.py
@@ -1,11 +1,7 @@
 from typing import *
 
-from openai import OpenAI
-
-client = OpenAI()
+import openai
 import torch
-import pickle
-import os 
 
 import scallopy
 from scallop_gpu import get_device
@@ -15,8 +11,9 @@
 FA_NAME = "gpt_encoder"
 ERR_HEAD = f"[@{FA_NAME}]"
 
+
 @scallopy.foreign_attribute
-def gpt_encoder(item, *, debug: bool = False, model: str = "text-embedding-ada-002", storage_path: str = None):
+def gpt_encoder(item, *, debug: bool = False, model: str = "text-embedding-ada-002"):
   # Check if the annotation is on function type decl
   assert item.is_function_decl(), f"{ERR_HEAD} has to be an attribute of a function type declaration"
 
@@ -25,19 +22,13 @@ def gpt_encoder(item, *, debug: bool = False, model: str = "text-embedding-ada-0
   assert len(arg_types) == 1 and arg_types[0].is_string(), f"{ERR_HEAD} expects only one `String` argument"
   assert item.function_decl_ret_type().is_tensor(), f"{ERR_HEAD} expects that the return type is `Tensor`"
 
-  if not storage_path is None and os.path.exists(storage_path):
-    STORAGE = pickle.load(open(storage_path, "rb"))
-  else:
-    STORAGE = {}
+  STORAGE = {}
 
   # Generate foreign function
   @scallopy.foreign_function(name=item.function_decl_name())
   def encode_text(text: str) -> scallopy.Tensor:
-    # print("fa encoder encode text start")
-
     # Check memoization
     if text in STORAGE:
-      # print("no need to query")
       pass
     else:
       # Make sure that we can do request
@@ -47,20 +38,18 @@ def encode_text(text: str) -> scallopy.Tensor:
         print(f"{ERR_HEAD} Querying `{model}` for text `{text}`")
 
       # Memoize the response
-      response = client.embeddings.create(input=[text], model=model)
-      embedding = response.data[0].embedding
+      response = openai.Embedding.create(input=[text], model=model)
+      embedding = response['data'][0]['embedding']
 
       if debug:
         print(f"{ERR_HEAD} Obtaining response: {response}")
 
       STORAGE[text] = embedding
-      if not storage_path is None:
-        pickle.dump(STORAGE, open(storage_path, 'wb'))
 
     # Return
     device = get_device()
     result_embedding = STORAGE[text]
     result = torch.tensor(result_embedding).to(device=device)
     return result
 
-  return encode_text
+  return encode_text
diff --git a/etc/scallopy-plugins/gpt/src/scallop_gpt/ff_gpt.py b/etc/scallopy-plugins/gpt/src/scallop_gpt/ff_gpt.py
@@ -6,21 +6,23 @@
 # For memoization
 STORAGE = {}
 
+
 @scallopy.foreign_function
 def gpt(s: str) -> str:
-  if s in STORAGE:
-    return STORAGE[s]
-  else:
-    # Make sure that we can do so
-    config.assert_can_request()
+    if s in STORAGE:
+        return STORAGE[s]
+    else:
+        # Make sure that we can do so
+        config.assert_can_request()
 
-    # Add performed requests
-    config.NUM_PERFORMED_REQUESTS += 1
-    response = openai.ChatCompletion.create(
-      model=config.MODEL,
-      prompt=s,
-      temperature=config.TEMPERATURE)
-    choice = response["choices"][0]
-    result = choice["text"].strip()
-    STORAGE[s] = result
-    return result
+        # Add performed requests
+        config.NUM_PERFORMED_REQUESTS += 1
+        response = openai.ChatCompletion.create(
+            model=config.MODEL,
+            messages=[{"role": "user", "content": s}],
+            temperature=config.TEMPERATURE,
+        )
+        choice = response["choices"][0]
+        result = choice["message"]["content"].strip()
+        STORAGE[s] = result
+        return result
diff --git a/etc/scallopy-plugins/gpt/src/scallop_gpt/fp_gpt.py b/etc/scallopy-plugins/gpt/src/scallop_gpt/fp_gpt.py
@@ -1,4 +1,4 @@
-from typing import Tuple
+from typing import Tuple, Generator
 
 import openai
 import scallopy
@@ -7,24 +7,26 @@
 
 STORAGE = {}
 
+
 @scallopy.foreign_predicate
 def gpt(s: str) -> scallopy.Facts[None, str]:
-  # Check if the storage already contains the response
-  if s in STORAGE:
-    response = STORAGE[s]
-  else:
-    # Make sure that we can do so
-    config.assert_can_request()
+    # Check if the storage already contains the response
+    if s in STORAGE:
+        response = STORAGE[s]
+    else:
+        # Make sure that we can do so
+        config.assert_can_request()
 
-    # Memoize the response
-    config.NUM_PERFORMED_REQUESTS += 1
-    response = openai.ChatCompletion.create(
-      model=config.MODEL,
-      prompt=s,
-      temperature=config.TEMPERATURE)
-    STORAGE[s] = response
+        # Memoize the response
+        config.NUM_PERFORMED_REQUESTS += 1
+        response = openai.ChatCompletion.create(
+            model=config.MODEL,
+            messages=[{"role": "user", "content": s}],
+            temperature=config.TEMPERATURE,
+        )
+        STORAGE[s] = response
 
-  # Iterate through all the choices
-  for choice in response["choices"]:
-    result = choice["text"].strip()
-    yield (result,)
+    # Iterate through all the choices
+    for choice in response["choices"]:
+        result = choice["message"]["content"].strip()
+        yield (result,)
diff --git a/etc/scallopy-plugins/opencv/MANIFEST.in b/etc/scallopy-plugins/opencv/MANIFEST.in
@@ -0,0 +1 @@
+recursive-include src *.ttf
diff --git a/etc/scallopy-plugins/plip/src/scallop_plip/plip.py b/etc/scallopy-plugins/plip/src/scallop_plip/plip.py
@@ -40,7 +40,7 @@ def plip(
     assert args[2].ty.is_string() and (args[2].adornment is None or args[2].adornment.is_free()), f"{ERR_HEAD} third argument has to be of free type `String`"
 
   @scallopy.foreign_predicate(name=relation_decl.name.name)
-  def plip_classify(img: scallopy.Tensor) -> scallopy.Generator[float, Tuple[str]]:
+  def plip_classify(img: scallopy.Tensor) -> scallopy.Facts[float, Tuple[str]]:
     device = get_device()
     maybe_plip_model = get_plip_model(debug=debug)
     if maybe_plip_model is None:
@@ -64,7 +64,7 @@ def plip_classify(img: scallopy.Tensor) -> scallopy.Generator[float, Tuple[str]]
 
   # Generate the foreign predicate for dynamic labels
   @scallopy.foreign_predicate(name=relation_decl.name.name)
-  def plip_classify_with_labels(img: scallopy.Tensor, list: scallopy.String) -> scallopy.Generator[float, Tuple[str]]:
+  def plip_classify_with_labels(img: scallopy.Tensor, list: scallopy.String) -> scallopy.Facts[float, Tuple[str]]:
     nonlocal labels
     labels = [item.strip() for item in list.split(DELIMITER)]
     return plip_classify(img)

diff --git a/experiments/clutrr-v2/kinship.scl b/experiments/clutrr-v2/kinship.scl
@@ -1,58 +1,52 @@
 type context(ctx: String)
 
+// Few-shot prompt GPT to extract structured relational data from the natural language question
+// Note that given mentioned kinships and the goal, we can deduce the final answer via a knowledge base of kinship compositions
 @gpt_extract_info(
-  header="""
-In this task, you will be given a question regarding kinships between characters in a story.
-Please extract individual kinships mentioned in the story and the characters whose kinship is desired in the question.
-
-Formally, you should extract two types of information in JSON format:
-1. Mentioned kinships. This should be a JSON list covering all the kinships mentioned, where each element
-is a JSON object with `p1`, `p2`, and `rela` fields, denoting that `p1` is `p2`'s' `rela`.
-Please pay extra attention to the directionality of the relation. For a pair of people,
-you should generate two relations, one between p1 and p2 and another between p2 and p1. Please only extract direct information mentioned in the question and avoid
-doing any computation except for making sure you get both directions.
-2. The query. It should be a JSON object with `p1` and `p2` fields, between which is the
-relationship we want to derived.
-
-Examples:
-
-Question: Dorothy's brother Michael and her went to get ice cream. Michael is the proud father of the lovely Donald. Who is Dorothy to Donald?
-What are the mentioned kinships in JSON format?
-[{"p1": "Michael", "p2": "Dorothy", "rela": "brother"}, {"p1": "Dorothy", "p2": "Michael", "rela": "sister"}, {"p1": "Michael", "p2": "Donald", "rela": "father"}, {"p1": "Donald", "p2": "Michael", "rela": "son"}]
-Whose kinship do we want to find?
-[{"p1": "Dorothy", "p2": "Donald"}]
-
-
-Question: Michael and his daughter Jennifer like to read poems together. Jason is the proud father of the lovely Michael. Who is Jason to Jennifer?
-What are the mentioned kinships in JSON format?
-[{"p1": "Jennifer", "p2": "Michael", "rela": "daughter"}, {"p1": "Michael", "p2": "Jennifer", "rela": "father"}, {"p1": "Jason", "p2": "Michael", "rela": "father"}, {"p1": "Michael", "p2": "Jason", "rela": "son"}]
-Whose kinship do we want to find?
-[{"p1": "Jason", "p2": "Jennifer"}]
-
-
-Question: Kevin loves going to plays with his wife Aida. Aida's dad James, however, does not like them at all. Who is James to Kevin?
-What are the mentioned kinships in JSON format?
-[{"p1": "Aida", "p2": "Kevin", "rela": "wife"}, {"p1": "Kevin", "p2": "Aida", "rela": "husband"}, {"p1": "James", "p2": "Aida", "rela": "father"}, {"p1": "Aida", "p2": "James", "rela": "daughter"}]
-Whose kinship do we want to find?
-[{"p1": "James", "p2": "Kevin"}]
-
-
-Now, look at the following context.
-
-Question: {{context}}
-  """,
+  header="Please extract kinships mentioned in the story and the characters whose kinship is desired in the question.",
   prompts=[
-    "Now, first give me the kinships mentioned in this question in JSON format",
-    "Good, now please tell me the two people whose kinship we want to find in JSON format"
+    "What are the mentioned kinships in JSON format (with `p1`, `p2`, and `rela` fields, denoting `p1` is `p2`'s' `rela`)?",
+    "Whose kinship do we want to find?"
+  ],
+  examples=[
+    (
+      ["Dorothy's brother Michael and her went to get ice cream. Michael is the proud father of the lovely Donald.
+        Who is Dorothy to Donald?"],
+      [
+        [("Michael", "Dorothy", "brother"), ("Dorothy", "Michael", "sister"),
+         ("Michael", "Donald", "father"), ("Donald", "Michael", "son")],
+        [("Dorothy", "Donald")]
+      ]
+    ),
+    (
+      ["Michael and his daughter Jennifer like to read poems together. Jason is the proud father of the lovely Michael.
+        Who is Jason to Jennifer?"],
+      [
+        [("Jennifer", "Michael", "daughter"), ("Michael", "Jennifer", "father"),
+         ("Jason", "Michael", "father"), ("Michael", "Jason", "son")],
+        [("Jason", "Jennifer")]
+      ]
+    ),
+    (
+      ["Kevin loves going to plays with his wife Aida. Aida's dad James, however, does not like them at all.
+        Who is James to Kevin?"],
+      [
+        [("Aida", "Kevin", "wife"), ("Kevin", "Aida", "husband"),
+         ("James", "Aida", "father"), ("Aida", "James", "daughter")],
+        [("James", "Kevin")]
+      ]
+    )
   ],
   model="gpt-4",
 )
-type extract_kinship(bound context: String, p1: String, p2: String, rela: String),
-     extract_question(bound context: String, p1: String, p2: String)
+type extract_kinship  (bound context: String, p1: String, p2: String, rela: String),
+     extract_question (bound context: String, p1: String, p2: String)
 
+// Extract information from the context
 rel kinship(p1, p2, rela) = context(ctx) and extract_kinship(ctx, p1, p2, rela)
-rel question(p1, p2) = context(ctx) and extract_question(ctx, p1, p2)
+rel question(p1, p2)      = context(ctx) and extract_question(ctx, p1, p2)
 
+// Knowledge base for composing kinship relations
 rel composition = {
   ("daughter", "daughter", "granddaughter"),
   ("daughter", "sister", "daughter"),
@@ -186,6 +180,10 @@ rel composition = {
   ("grandson", "brother", "grandson"),
 }
 
+// Perform computation on the kinship graph
 rel derived_kinship(p1, p2, rela) = kinship(p1, p2, rela)
-rel derived_kinship(p1, p3, r3) = p1 != p3 and derived_kinship(p1, p2, r1) and derived_kinship(p2, p3, r2) and composition(r2, r1, r3)
-rel result(r) = question(p1, p2) and derived_kinship(p1, p2, r)
+rel derived_kinship(p1, p3, r3)   = p1 != p3 and derived_kinship(p1, p2, r1) and derived_kinship(p2, p3, r2)
+                                    and composition(r2, r1, r3)
+
+// Get the answer
+rel result(r) = question(p1, p2) and derived_kinship(p1, p2, r)
diff --git a/experiments/clutrr-v2/kinship_scallop.py b/experiments/clutrr-v2/kinship_scallop.py
@@ -1,8 +1,8 @@
-import openai
 import json
 from tqdm import tqdm
 from io import StringIO
 import sys
+import argparse
 
 import scallopy
 import scallopy_ext
@@ -23,6 +23,12 @@ def __init__(self):
 
 def test_kinship(range=range(N)):
     out = {"score": 0, "data": [], "logs": []}
+
+    plugins = scallopy_ext.PluginRegistry()
+    parser = argparse.ArgumentParser()
+    plugins.setup_argument_parser(parser)
+    known_args, unknown_args = parser.parse_known_args()
+    plugins.configure(known_args, unknown_args)
 
     for i in tqdm(range):
         (ctx, query), ans = TASK[i]
@@ -32,8 +38,7 @@ def test_kinship(range=range(N)):
         sys.stdout = buffer
         try:
             ctx = scallopy.ScallopContext(provenance="unit")
-            scallopy_ext.config.configure(Args(), [])
-            scallopy_ext.extlib.load_extlib(ctx)
+            plugins.load_into_ctx(ctx)
             ctx.import_file(SCALLOP_FILE)
             ctx.add_facts("context", [(input,)])
             ctx.run()