-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
886 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
*.json | ||
extra*.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import os | ||
import json | ||
|
||
class GSM8KDataset: | ||
def __init__(self): | ||
self.dataset_loc = "../../../gsm8k/test.jsonl" | ||
|
||
with open(self.dataset_loc) as dataset: | ||
self.data = list(json.loads(obj) for obj in dataset) | ||
|
||
def __len__(self): | ||
return len(self.data) | ||
|
||
def __getitem__(self, i): | ||
question = self.data[i]["question"] | ||
answer = float(self.data[i]["answer"].split()[-1].replace(',', '')) # The answer is always given at the end as the last word | ||
|
||
return (question, answer) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import openai | ||
import json | ||
from tqdm import tqdm | ||
from io import StringIO | ||
import sys | ||
|
||
from dataset_loader import GSM8KDataset | ||
|
||
TASK = GSM8KDataset() | ||
N = len(TASK) | ||
MARGIN = 0.001 | ||
|
||
HEADER = ''' | ||
In this task, you will be given a math word problem to solve. | ||
Please output your reasoning as a chain of thought and then output your answer on a new line at the end. For the final answer, do not include any non-numerical digits except for a decimal point when applicable. | ||
Here are some examples: | ||
Question: Lisa, Jack, and Tommy earned $60 from washing cars all week. However, half of the $60 was earned by Lisa. Tommy earned half of what Lisa earned. How much more money did Lisa earn than Tommy? | ||
Answer: Lisa earned $60 * 1/2 = $30.\nTommy earned $30 * 1/2 = $15.\nLisa earned $30 - $15 = $15 more than Tommy. | ||
15 | ||
Now, answer the following question: | ||
\n | ||
Question:\s | ||
''' | ||
|
||
def run_gpt(question): | ||
messages = [{"role": "user", "content": HEADER + question + "\nAnswer: "}] | ||
response = openai.ChatCompletion.create( | ||
model="gpt-4", | ||
messages=messages, | ||
temperature=0, | ||
) | ||
return response["choices"][0]["message"]["content"] | ||
|
||
def test_gsm8k(range): | ||
out = {"exact_score": 0, "margin_score": 0, "data": []} | ||
|
||
for i in tqdm(range): | ||
input, ans = TASK[i] | ||
|
||
try: | ||
output_ans = run_gpt(input) | ||
final_ans = output_ans.split('\n')[-1].replace(',', '') | ||
exact_score = int(float(final_ans) == float(ans)) | ||
margin_score = int(abs(float(final_ans) - float(ans)) <= MARGIN) | ||
out["exact_score"] += exact_score | ||
out["margin_score"] += margin_score | ||
out["data"] += [ | ||
{ | ||
"id": i, | ||
"question": input, | ||
"reasoning": output_ans, | ||
"answer": final_ans, | ||
"correct_answer": ans, | ||
"exact_score": exact_score, | ||
"margin_score": margin_score, | ||
} | ||
] | ||
except Exception as e: | ||
out["data"] += [ | ||
{"id": i, "question": input, "exception": str(e), "exact_score": 0, "margin_score": 0} | ||
] | ||
|
||
json_object = json.dumps(out.copy(), indent=4) | ||
with open("data_baseline.json", "w") as outfile: | ||
outfile.write(json_object) | ||
|
||
test_gsm8k(range(N)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import openai | ||
import json | ||
from tqdm import tqdm | ||
from io import StringIO | ||
import sys | ||
|
||
from dataset_loader import GSM8KDataset | ||
|
||
TASK = GSM8KDataset() | ||
N = len(TASK) | ||
MARGIN = 0.001 | ||
|
||
HEADER = ''' | ||
In this task, you will be given a math word problem to solve. | ||
Please output your reasoning as a chain of thought and then output your answer on a new line at the end. For the final answer, do not include any non-numerical digits except for a decimal point when applicable. | ||
Here are some examples: | ||
Question: Lisa, Jack, and Tommy earned $60 from washing cars all week. However, half of the $60 was earned by Lisa. Tommy earned half of what Lisa earned. How much more money did Lisa earn than Tommy? | ||
Answer: Let's think step by step. First, we know that Lisa earned half of the $60, which is $60 / 2 = $30. Then, we know that Tommy earned half of what Lisa earned, which is $30 / 2 = $15. So, Lisa earned $30 - $15 = $15 more than Tommy. | ||
15 | ||
Now, answer the following question: | ||
\n | ||
Question:\s | ||
''' | ||
|
||
def run_gpt(question): | ||
messages = [{"role": "user", "content": HEADER + question + "\nAnswer: Let's think step by step. "}] | ||
response = openai.ChatCompletion.create( | ||
model="gpt-4", | ||
messages=messages, | ||
temperature=0, | ||
) | ||
return response["choices"][0]["message"]["content"] | ||
|
||
def test_gsm8k(range): | ||
out = {"exact_score": 0, "margin_score": 0, "data": []} | ||
|
||
for i in tqdm(range): | ||
input, ans = TASK[i] | ||
|
||
try: | ||
output_ans = run_gpt(input) | ||
final_ans = output_ans.split('\n')[-1].replace(',', '') | ||
exact_score = int(float(final_ans) == float(ans)) | ||
margin_score = int(abs(float(final_ans) - float(ans)) <= MARGIN) | ||
out["exact_score"] += exact_score | ||
out["margin_score"] += margin_score | ||
out["data"] += [ | ||
{ | ||
"id": i, | ||
"question": input, | ||
"reasoning": output_ans, | ||
"answer": final_ans, | ||
"correct_answer": ans, | ||
"exact_score": exact_score, | ||
"margin_score": margin_score, | ||
} | ||
] | ||
except Exception as e: | ||
out["data"] += [ | ||
{"id": i, "question": input, "exception": str(e), "exact_score": 0, "margin_score": 0} | ||
] | ||
|
||
json_object = json.dumps(out.copy(), indent=4) | ||
with open("data_baseline.json", "w") as outfile: | ||
outfile.write(json_object) | ||
|
||
test_gsm8k(range(N)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import json | ||
from tqdm import tqdm | ||
from io import StringIO | ||
import sys | ||
|
||
import scallopy | ||
import scallopy_ext | ||
|
||
from dataset_loader import GSM8KDataset | ||
|
||
TASK = GSM8KDataset() | ||
N = len(TASK) | ||
SCALLOP_FILE = "py_expr_steps.scl" | ||
MARGIN = 0.001 | ||
|
||
class Args: | ||
def __init__(self): | ||
self.cuda = False | ||
self.gpu = None | ||
self.num_allowed_openai_request = 100 | ||
self.openai_gpt_model = "gpt-4" | ||
self.openai_gpt_temperature = 0 | ||
|
||
def test_py_expr_parsing(range=range(N)): | ||
out = {"exact_score": 0, "margin_score": 0, "data": [], "logs": []} | ||
|
||
for i in tqdm(range): | ||
question, answer = TASK[i] | ||
|
||
buffer = StringIO() | ||
sys.stdout = buffer | ||
try: | ||
ctx = scallopy.ScallopContext(provenance="unit") | ||
scallopy_ext.config.configure(Args(), []) | ||
scallopy_ext.extlib.load_extlib(ctx) | ||
ctx.import_file(SCALLOP_FILE) | ||
ctx.add_facts("question", [(question,)]) | ||
ctx.run() | ||
res = list(ctx.relation("result")) | ||
exact_score = 0 | ||
margin_score = 0 | ||
final_answer = None | ||
for output_ans, in res: | ||
if float(output_ans) == answer: | ||
exact_score = 1 | ||
margin_score = 1 | ||
final_answer = output_ans | ||
if exact_score == 0 and abs(float(output_ans) - answer) <= MARGIN: | ||
margin_score = 1 | ||
final_answer = output_ans | ||
out["data"] += [ | ||
{ | ||
"id": i, | ||
"question": question, | ||
"correct_answer": answer, | ||
"final_answer": final_answer, | ||
"exact_score": exact_score, | ||
"margin_score": margin_score, | ||
"steps": list(ctx.relation("step")), | ||
"outputted_answers": list(ctx.relation("result")), | ||
"num_answers": len(res), | ||
} | ||
] | ||
out["exact_score"] += exact_score | ||
out["margin_score"] += margin_score | ||
except Exception as e: | ||
out["data"] += [ | ||
{ | ||
"id": i, | ||
"question": question, | ||
"exception": str(e), | ||
"exact_score": 0, | ||
"margin_score": 0, | ||
} | ||
] | ||
|
||
out["logs"] += [ | ||
{ | ||
"id": i, | ||
"log": buffer.getvalue().encode("utf-8").decode("unicode_escape"), | ||
} | ||
] | ||
|
||
json_object = json.dumps(out.copy(), indent=2) | ||
with open("data_py_expr.json", "w") as outfile: | ||
outfile.write(json_object) | ||
|
||
sys.stdout = sys.__stdout__ | ||
print(out["exact_score"]) | ||
|
||
if __name__ == "__main__": | ||
test_py_expr_parsing(range(N)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import json | ||
from tqdm import tqdm | ||
from io import StringIO | ||
import sys | ||
|
||
import scallopy | ||
import scallopy_ext | ||
|
||
from dataset_loader import GSM8KDataset | ||
|
||
TASK = GSM8KDataset() | ||
N = len(TASK) | ||
SCALLOP_FILE = "semantic_parser.scl" | ||
MARGIN = 0.001 | ||
|
||
class Args: | ||
def __init__(self): | ||
self.cuda = False | ||
self.gpu = None | ||
self.num_allowed_openai_request = 100 | ||
self.openai_gpt_model = "gpt-4" | ||
self.openai_gpt_temperature = 0 | ||
|
||
def test_semantic_parser(range=range(N)): | ||
out = {"exact_score": 0, "margin_score": 0, "data": [], "logs": []} | ||
|
||
for i in tqdm(range): | ||
question, answer = TASK[i] | ||
|
||
buffer = StringIO() | ||
sys.stdout = buffer | ||
try: | ||
ctx = scallopy.ScallopContext(provenance="unit") | ||
scallopy_ext.config.configure(Args(), []) | ||
scallopy_ext.extlib.load_extlib(ctx) | ||
ctx.import_file(SCALLOP_FILE) | ||
ctx.add_facts("question", [(question,)]) | ||
ctx.run() | ||
res = list(ctx.relation("result")) | ||
exact_score = 0 | ||
margin_score = 0 | ||
final_answer = None | ||
for output_ans, in res: | ||
if float(output_ans) == answer: | ||
exact_score = 1 | ||
margin_score = 1 | ||
final_answer = output_ans | ||
if exact_score == 0 and abs(float(output_ans) - answer) <= MARGIN: | ||
margin_score = 1 | ||
final_answer = output_ans | ||
out["data"] += [ | ||
{ | ||
"id": i, | ||
"question": question, | ||
"correct_answer": answer, | ||
"final_answer": final_answer, | ||
"exact_score": exact_score, | ||
"margin_score": margin_score, | ||
"parsed_expr": list(ctx.relation("parsed_expr")), | ||
"outputted_answers": list(ctx.relation("result")), | ||
"num_answers": len(res), | ||
} | ||
] | ||
out["exact_score"] += exact_score | ||
out["margin_score"] += margin_score | ||
except Exception as e: | ||
out["data"] += [ | ||
{ | ||
"id": i, | ||
"question": question, | ||
"exception": str(e), | ||
"exact_score": 0, | ||
"margin_score": 0, | ||
} | ||
] | ||
|
||
out["logs"] += [ | ||
{ | ||
"id": i, | ||
"log": buffer.getvalue().encode("utf-8").decode("unicode_escape"), | ||
} | ||
] | ||
|
||
json_object = json.dumps(out.copy(), indent=2) | ||
with open("data_scallop.json", "w") as outfile: | ||
outfile.write(json_object) | ||
|
||
sys.stdout = sys.__stdout__ | ||
print(out["exact_score"]) | ||
|
||
test_semantic_parser() |
Oops, something went wrong.