Skip to content

Commit

Permalink
Merge pull request #25 from scallop-lang/gpt-fix
Browse files Browse the repository at this point in the history
GPT plugin fix
  • Loading branch information
Liby99 authored Sep 14, 2024
2 parents 824365b + d9906f0 commit 7b1a8cc
Show file tree
Hide file tree
Showing 8 changed files with 101 additions and 104 deletions.
2 changes: 1 addition & 1 deletion etc/scallopy-plugins/gpt/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "scallop-gpt"
version = "0.0.1"
dependencies = [
"openai",
"openai == 0.28",
"torch",
]

Expand Down
25 changes: 7 additions & 18 deletions etc/scallopy-plugins/gpt/src/scallop_gpt/fa_encoder.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
from typing import *

from openai import OpenAI

client = OpenAI()
import openai
import torch
import pickle
import os

import scallopy
from scallop_gpu import get_device
Expand All @@ -15,8 +11,9 @@
FA_NAME = "gpt_encoder"
ERR_HEAD = f"[@{FA_NAME}]"


@scallopy.foreign_attribute
def gpt_encoder(item, *, debug: bool = False, model: str = "text-embedding-ada-002", storage_path: str = None):
def gpt_encoder(item, *, debug: bool = False, model: str = "text-embedding-ada-002"):
# Check if the annotation is on function type decl
assert item.is_function_decl(), f"{ERR_HEAD} has to be an attribute of a function type declaration"

Expand All @@ -25,19 +22,13 @@ def gpt_encoder(item, *, debug: bool = False, model: str = "text-embedding-ada-0
assert len(arg_types) == 1 and arg_types[0].is_string(), f"{ERR_HEAD} expects only one `String` argument"
assert item.function_decl_ret_type().is_tensor(), f"{ERR_HEAD} expects that the return type is `Tensor`"

if not storage_path is None and os.path.exists(storage_path):
STORAGE = pickle.load(open(storage_path, "rb"))
else:
STORAGE = {}
STORAGE = {}

# Generate foreign function
@scallopy.foreign_function(name=item.function_decl_name())
def encode_text(text: str) -> scallopy.Tensor:
# print("fa encoder encode text start")

# Check memoization
if text in STORAGE:
# print("no need to query")
pass
else:
# Make sure that we can do request
Expand All @@ -47,20 +38,18 @@ def encode_text(text: str) -> scallopy.Tensor:
print(f"{ERR_HEAD} Querying `{model}` for text `{text}`")

# Memoize the response
response = client.embeddings.create(input=[text], model=model)
embedding = response.data[0].embedding
response = openai.Embedding.create(input=[text], model=model)
embedding = response['data'][0]['embedding']

if debug:
print(f"{ERR_HEAD} Obtaining response: {response}")

STORAGE[text] = embedding
if not storage_path is None:
pickle.dump(STORAGE, open(storage_path, 'wb'))

# Return
device = get_device()
result_embedding = STORAGE[text]
result = torch.tensor(result_embedding).to(device=device)
return result

return encode_text
return encode_text
32 changes: 17 additions & 15 deletions etc/scallopy-plugins/gpt/src/scallop_gpt/ff_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,23 @@
# For memoization
STORAGE = {}


@scallopy.foreign_function
def gpt(s: str) -> str:
if s in STORAGE:
return STORAGE[s]
else:
# Make sure that we can do so
config.assert_can_request()
if s in STORAGE:
return STORAGE[s]
else:
# Make sure that we can do so
config.assert_can_request()

# Add performed requests
config.NUM_PERFORMED_REQUESTS += 1
response = openai.ChatCompletion.create(
model=config.MODEL,
prompt=s,
temperature=config.TEMPERATURE)
choice = response["choices"][0]
result = choice["text"].strip()
STORAGE[s] = result
return result
# Add performed requests
config.NUM_PERFORMED_REQUESTS += 1
response = openai.ChatCompletion.create(
model=config.MODEL,
messages=[{"role": "user", "content": s}],
temperature=config.TEMPERATURE,
)
choice = response["choices"][0]
result = choice["message"]["content"].strip()
STORAGE[s] = result
return result
38 changes: 20 additions & 18 deletions etc/scallopy-plugins/gpt/src/scallop_gpt/fp_gpt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Tuple
from typing import Tuple, Generator

import openai
import scallopy
Expand All @@ -7,24 +7,26 @@

STORAGE = {}


@scallopy.foreign_predicate
def gpt(s: str) -> scallopy.Facts[None, str]:
# Check if the storage already contains the response
if s in STORAGE:
response = STORAGE[s]
else:
# Make sure that we can do so
config.assert_can_request()
# Check if the storage already contains the response
if s in STORAGE:
response = STORAGE[s]
else:
# Make sure that we can do so
config.assert_can_request()

# Memoize the response
config.NUM_PERFORMED_REQUESTS += 1
response = openai.ChatCompletion.create(
model=config.MODEL,
prompt=s,
temperature=config.TEMPERATURE)
STORAGE[s] = response
# Memoize the response
config.NUM_PERFORMED_REQUESTS += 1
response = openai.ChatCompletion.create(
model=config.MODEL,
messages=[{"role": "user", "content": s}],
temperature=config.TEMPERATURE,
)
STORAGE[s] = response

# Iterate through all the choices
for choice in response["choices"]:
result = choice["text"].strip()
yield (result,)
# Iterate through all the choices
for choice in response["choices"]:
result = choice["message"]["content"].strip()
yield (result,)
1 change: 1 addition & 0 deletions etc/scallopy-plugins/opencv/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
recursive-include src *.ttf
4 changes: 2 additions & 2 deletions etc/scallopy-plugins/plip/src/scallop_plip/plip.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def plip(
assert args[2].ty.is_string() and (args[2].adornment is None or args[2].adornment.is_free()), f"{ERR_HEAD} third argument has to be of free type `String`"

@scallopy.foreign_predicate(name=relation_decl.name.name)
def plip_classify(img: scallopy.Tensor) -> scallopy.Generator[float, Tuple[str]]:
def plip_classify(img: scallopy.Tensor) -> scallopy.Facts[float, Tuple[str]]:
device = get_device()
maybe_plip_model = get_plip_model(debug=debug)
if maybe_plip_model is None:
Expand All @@ -64,7 +64,7 @@ def plip_classify(img: scallopy.Tensor) -> scallopy.Generator[float, Tuple[str]]

# Generate the foreign predicate for dynamic labels
@scallopy.foreign_predicate(name=relation_decl.name.name)
def plip_classify_with_labels(img: scallopy.Tensor, list: scallopy.String) -> scallopy.Generator[float, Tuple[str]]:
def plip_classify_with_labels(img: scallopy.Tensor, list: scallopy.String) -> scallopy.Facts[float, Tuple[str]]:
nonlocal labels
labels = [item.strip() for item in list.split(DELIMITER)]
return plip_classify(img)
Expand Down
92 changes: 45 additions & 47 deletions experiments/clutrr-v2/kinship.scl
Original file line number Diff line number Diff line change
@@ -1,58 +1,52 @@
type context(ctx: String)

// Few-shot prompt GPT to extract structured relational data from the natural language question
// Note that given mentioned kinships and the goal, we can deduce the final answer via a knowledge base of kinship compositions
@gpt_extract_info(
header="""
In this task, you will be given a question regarding kinships between characters in a story.
Please extract individual kinships mentioned in the story and the characters whose kinship is desired in the question.

Formally, you should extract two types of information in JSON format:
1. Mentioned kinships. This should be a JSON list covering all the kinships mentioned, where each element
is a JSON object with `p1`, `p2`, and `rela` fields, denoting that `p1` is `p2`'s' `rela`.
Please pay extra attention to the directionality of the relation. For a pair of people,
you should generate two relations, one between p1 and p2 and another between p2 and p1. Please only extract direct information mentioned in the question and avoid
doing any computation except for making sure you get both directions.
2. The query. It should be a JSON object with `p1` and `p2` fields, between which is the
relationship we want to derived.

Examples:

Question: Dorothy's brother Michael and her went to get ice cream. Michael is the proud father of the lovely Donald. Who is Dorothy to Donald?
What are the mentioned kinships in JSON format?
[{"p1": "Michael", "p2": "Dorothy", "rela": "brother"}, {"p1": "Dorothy", "p2": "Michael", "rela": "sister"}, {"p1": "Michael", "p2": "Donald", "rela": "father"}, {"p1": "Donald", "p2": "Michael", "rela": "son"}]
Whose kinship do we want to find?
[{"p1": "Dorothy", "p2": "Donald"}]


Question: Michael and his daughter Jennifer like to read poems together. Jason is the proud father of the lovely Michael. Who is Jason to Jennifer?
What are the mentioned kinships in JSON format?
[{"p1": "Jennifer", "p2": "Michael", "rela": "daughter"}, {"p1": "Michael", "p2": "Jennifer", "rela": "father"}, {"p1": "Jason", "p2": "Michael", "rela": "father"}, {"p1": "Michael", "p2": "Jason", "rela": "son"}]
Whose kinship do we want to find?
[{"p1": "Jason", "p2": "Jennifer"}]


Question: Kevin loves going to plays with his wife Aida. Aida's dad James, however, does not like them at all. Who is James to Kevin?
What are the mentioned kinships in JSON format?
[{"p1": "Aida", "p2": "Kevin", "rela": "wife"}, {"p1": "Kevin", "p2": "Aida", "rela": "husband"}, {"p1": "James", "p2": "Aida", "rela": "father"}, {"p1": "Aida", "p2": "James", "rela": "daughter"}]
Whose kinship do we want to find?
[{"p1": "James", "p2": "Kevin"}]


Now, look at the following context.

Question: {{context}}
""",
header="Please extract kinships mentioned in the story and the characters whose kinship is desired in the question.",
prompts=[
"Now, first give me the kinships mentioned in this question in JSON format",
"Good, now please tell me the two people whose kinship we want to find in JSON format"
"What are the mentioned kinships in JSON format (with `p1`, `p2`, and `rela` fields, denoting `p1` is `p2`'s' `rela`)?",
"Whose kinship do we want to find?"
],
examples=[
(
["Dorothy's brother Michael and her went to get ice cream. Michael is the proud father of the lovely Donald.
Who is Dorothy to Donald?"],
[
[("Michael", "Dorothy", "brother"), ("Dorothy", "Michael", "sister"),
("Michael", "Donald", "father"), ("Donald", "Michael", "son")],
[("Dorothy", "Donald")]
]
),
(
["Michael and his daughter Jennifer like to read poems together. Jason is the proud father of the lovely Michael.
Who is Jason to Jennifer?"],
[
[("Jennifer", "Michael", "daughter"), ("Michael", "Jennifer", "father"),
("Jason", "Michael", "father"), ("Michael", "Jason", "son")],
[("Jason", "Jennifer")]
]
),
(
["Kevin loves going to plays with his wife Aida. Aida's dad James, however, does not like them at all.
Who is James to Kevin?"],
[
[("Aida", "Kevin", "wife"), ("Kevin", "Aida", "husband"),
("James", "Aida", "father"), ("Aida", "James", "daughter")],
[("James", "Kevin")]
]
)
],
model="gpt-4",
)
type extract_kinship(bound context: String, p1: String, p2: String, rela: String),
extract_question(bound context: String, p1: String, p2: String)
type extract_kinship (bound context: String, p1: String, p2: String, rela: String),
extract_question (bound context: String, p1: String, p2: String)

// Extract information from the context
rel kinship(p1, p2, rela) = context(ctx) and extract_kinship(ctx, p1, p2, rela)
rel question(p1, p2) = context(ctx) and extract_question(ctx, p1, p2)
rel question(p1, p2) = context(ctx) and extract_question(ctx, p1, p2)

// Knowledge base for composing kinship relations
rel composition = {
("daughter", "daughter", "granddaughter"),
("daughter", "sister", "daughter"),
Expand Down Expand Up @@ -186,6 +180,10 @@ rel composition = {
("grandson", "brother", "grandson"),
}

// Perform computation on the kinship graph
rel derived_kinship(p1, p2, rela) = kinship(p1, p2, rela)
rel derived_kinship(p1, p3, r3) = p1 != p3 and derived_kinship(p1, p2, r1) and derived_kinship(p2, p3, r2) and composition(r2, r1, r3)
rel result(r) = question(p1, p2) and derived_kinship(p1, p2, r)
rel derived_kinship(p1, p3, r3) = p1 != p3 and derived_kinship(p1, p2, r1) and derived_kinship(p2, p3, r2)
and composition(r2, r1, r3)

// Get the answer
rel result(r) = question(p1, p2) and derived_kinship(p1, p2, r)
11 changes: 8 additions & 3 deletions experiments/clutrr-v2/kinship_scallop.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import openai
import json
from tqdm import tqdm
from io import StringIO
import sys
import argparse

import scallopy
import scallopy_ext
Expand All @@ -23,6 +23,12 @@ def __init__(self):

def test_kinship(range=range(N)):
out = {"score": 0, "data": [], "logs": []}

plugins = scallopy_ext.PluginRegistry()
parser = argparse.ArgumentParser()
plugins.setup_argument_parser(parser)
known_args, unknown_args = parser.parse_known_args()
plugins.configure(known_args, unknown_args)

for i in tqdm(range):
(ctx, query), ans = TASK[i]
Expand All @@ -32,8 +38,7 @@ def test_kinship(range=range(N)):
sys.stdout = buffer
try:
ctx = scallopy.ScallopContext(provenance="unit")
scallopy_ext.config.configure(Args(), [])
scallopy_ext.extlib.load_extlib(ctx)
plugins.load_into_ctx(ctx)
ctx.import_file(SCALLOP_FILE)
ctx.add_facts("context", [(input,)])
ctx.run()
Expand Down

0 comments on commit 7b1a8cc

Please sign in to comment.