Skip to content

Commit

Permalink
Adding gsm8k experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
Liby99 committed Feb 23, 2024
1 parent 3dbe1e9 commit 2f01a6f
Show file tree
Hide file tree
Showing 12 changed files with 886 additions and 0 deletions.
2 changes: 2 additions & 0 deletions experiments/gsm8k/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.json
extra*.py
18 changes: 18 additions & 0 deletions experiments/gsm8k/dataset_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
import json

class GSM8KDataset:
def __init__(self):
self.dataset_loc = "../../../gsm8k/test.jsonl"

with open(self.dataset_loc) as dataset:
self.data = list(json.loads(obj) for obj in dataset)

def __len__(self):
return len(self.data)

def __getitem__(self, i):
question = self.data[i]["question"]
answer = float(self.data[i]["answer"].split()[-1].replace(',', '')) # The answer is always given at the end as the last word

return (question, answer)
72 changes: 72 additions & 0 deletions experiments/gsm8k/gsm8k_baseline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import openai
import json
from tqdm import tqdm
from io import StringIO
import sys

from dataset_loader import GSM8KDataset

TASK = GSM8KDataset()
N = len(TASK)
MARGIN = 0.001

HEADER = '''
In this task, you will be given a math word problem to solve.
Please output your reasoning as a chain of thought and then output your answer on a new line at the end. For the final answer, do not include any non-numerical digits except for a decimal point when applicable.
Here are some examples:
Question: Lisa, Jack, and Tommy earned $60 from washing cars all week. However, half of the $60 was earned by Lisa. Tommy earned half of what Lisa earned. How much more money did Lisa earn than Tommy?
Answer: Lisa earned $60 * 1/2 = $30.\nTommy earned $30 * 1/2 = $15.\nLisa earned $30 - $15 = $15 more than Tommy.
15
Now, answer the following question:
\n
Question:\s
'''

def run_gpt(question):
messages = [{"role": "user", "content": HEADER + question + "\nAnswer: "}]
response = openai.ChatCompletion.create(
model="gpt-4",
messages=messages,
temperature=0,
)
return response["choices"][0]["message"]["content"]

def test_gsm8k(range):
out = {"exact_score": 0, "margin_score": 0, "data": []}

for i in tqdm(range):
input, ans = TASK[i]

try:
output_ans = run_gpt(input)
final_ans = output_ans.split('\n')[-1].replace(',', '')
exact_score = int(float(final_ans) == float(ans))
margin_score = int(abs(float(final_ans) - float(ans)) <= MARGIN)
out["exact_score"] += exact_score
out["margin_score"] += margin_score
out["data"] += [
{
"id": i,
"question": input,
"reasoning": output_ans,
"answer": final_ans,
"correct_answer": ans,
"exact_score": exact_score,
"margin_score": margin_score,
}
]
except Exception as e:
out["data"] += [
{"id": i, "question": input, "exception": str(e), "exact_score": 0, "margin_score": 0}
]

json_object = json.dumps(out.copy(), indent=4)
with open("data_baseline.json", "w") as outfile:
outfile.write(json_object)

test_gsm8k(range(N))
72 changes: 72 additions & 0 deletions experiments/gsm8k/gsm8k_cot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import openai
import json
from tqdm import tqdm
from io import StringIO
import sys

from dataset_loader import GSM8KDataset

TASK = GSM8KDataset()
N = len(TASK)
MARGIN = 0.001

HEADER = '''
In this task, you will be given a math word problem to solve.
Please output your reasoning as a chain of thought and then output your answer on a new line at the end. For the final answer, do not include any non-numerical digits except for a decimal point when applicable.
Here are some examples:
Question: Lisa, Jack, and Tommy earned $60 from washing cars all week. However, half of the $60 was earned by Lisa. Tommy earned half of what Lisa earned. How much more money did Lisa earn than Tommy?
Answer: Let's think step by step. First, we know that Lisa earned half of the $60, which is $60 / 2 = $30. Then, we know that Tommy earned half of what Lisa earned, which is $30 / 2 = $15. So, Lisa earned $30 - $15 = $15 more than Tommy.
15
Now, answer the following question:
\n
Question:\s
'''

def run_gpt(question):
messages = [{"role": "user", "content": HEADER + question + "\nAnswer: Let's think step by step. "}]
response = openai.ChatCompletion.create(
model="gpt-4",
messages=messages,
temperature=0,
)
return response["choices"][0]["message"]["content"]

def test_gsm8k(range):
out = {"exact_score": 0, "margin_score": 0, "data": []}

for i in tqdm(range):
input, ans = TASK[i]

try:
output_ans = run_gpt(input)
final_ans = output_ans.split('\n')[-1].replace(',', '')
exact_score = int(float(final_ans) == float(ans))
margin_score = int(abs(float(final_ans) - float(ans)) <= MARGIN)
out["exact_score"] += exact_score
out["margin_score"] += margin_score
out["data"] += [
{
"id": i,
"question": input,
"reasoning": output_ans,
"answer": final_ans,
"correct_answer": ans,
"exact_score": exact_score,
"margin_score": margin_score,
}
]
except Exception as e:
out["data"] += [
{"id": i, "question": input, "exception": str(e), "exact_score": 0, "margin_score": 0}
]

json_object = json.dumps(out.copy(), indent=4)
with open("data_baseline.json", "w") as outfile:
outfile.write(json_object)

test_gsm8k(range(N))
92 changes: 92 additions & 0 deletions experiments/gsm8k/gsm8k_py_expr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import json
from tqdm import tqdm
from io import StringIO
import sys

import scallopy
import scallopy_ext

from dataset_loader import GSM8KDataset

TASK = GSM8KDataset()
N = len(TASK)
SCALLOP_FILE = "py_expr_steps.scl"
MARGIN = 0.001

class Args:
def __init__(self):
self.cuda = False
self.gpu = None
self.num_allowed_openai_request = 100
self.openai_gpt_model = "gpt-4"
self.openai_gpt_temperature = 0

def test_py_expr_parsing(range=range(N)):
out = {"exact_score": 0, "margin_score": 0, "data": [], "logs": []}

for i in tqdm(range):
question, answer = TASK[i]

buffer = StringIO()
sys.stdout = buffer
try:
ctx = scallopy.ScallopContext(provenance="unit")
scallopy_ext.config.configure(Args(), [])
scallopy_ext.extlib.load_extlib(ctx)
ctx.import_file(SCALLOP_FILE)
ctx.add_facts("question", [(question,)])
ctx.run()
res = list(ctx.relation("result"))
exact_score = 0
margin_score = 0
final_answer = None
for output_ans, in res:
if float(output_ans) == answer:
exact_score = 1
margin_score = 1
final_answer = output_ans
if exact_score == 0 and abs(float(output_ans) - answer) <= MARGIN:
margin_score = 1
final_answer = output_ans
out["data"] += [
{
"id": i,
"question": question,
"correct_answer": answer,
"final_answer": final_answer,
"exact_score": exact_score,
"margin_score": margin_score,
"steps": list(ctx.relation("step")),
"outputted_answers": list(ctx.relation("result")),
"num_answers": len(res),
}
]
out["exact_score"] += exact_score
out["margin_score"] += margin_score
except Exception as e:
out["data"] += [
{
"id": i,
"question": question,
"exception": str(e),
"exact_score": 0,
"margin_score": 0,
}
]

out["logs"] += [
{
"id": i,
"log": buffer.getvalue().encode("utf-8").decode("unicode_escape"),
}
]

json_object = json.dumps(out.copy(), indent=2)
with open("data_py_expr.json", "w") as outfile:
outfile.write(json_object)

sys.stdout = sys.__stdout__
print(out["exact_score"])

if __name__ == "__main__":
test_py_expr_parsing(range(N))
91 changes: 91 additions & 0 deletions experiments/gsm8k/gsm8k_scallop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import json
from tqdm import tqdm
from io import StringIO
import sys

import scallopy
import scallopy_ext

from dataset_loader import GSM8KDataset

TASK = GSM8KDataset()
N = len(TASK)
SCALLOP_FILE = "semantic_parser.scl"
MARGIN = 0.001

class Args:
def __init__(self):
self.cuda = False
self.gpu = None
self.num_allowed_openai_request = 100
self.openai_gpt_model = "gpt-4"
self.openai_gpt_temperature = 0

def test_semantic_parser(range=range(N)):
out = {"exact_score": 0, "margin_score": 0, "data": [], "logs": []}

for i in tqdm(range):
question, answer = TASK[i]

buffer = StringIO()
sys.stdout = buffer
try:
ctx = scallopy.ScallopContext(provenance="unit")
scallopy_ext.config.configure(Args(), [])
scallopy_ext.extlib.load_extlib(ctx)
ctx.import_file(SCALLOP_FILE)
ctx.add_facts("question", [(question,)])
ctx.run()
res = list(ctx.relation("result"))
exact_score = 0
margin_score = 0
final_answer = None
for output_ans, in res:
if float(output_ans) == answer:
exact_score = 1
margin_score = 1
final_answer = output_ans
if exact_score == 0 and abs(float(output_ans) - answer) <= MARGIN:
margin_score = 1
final_answer = output_ans
out["data"] += [
{
"id": i,
"question": question,
"correct_answer": answer,
"final_answer": final_answer,
"exact_score": exact_score,
"margin_score": margin_score,
"parsed_expr": list(ctx.relation("parsed_expr")),
"outputted_answers": list(ctx.relation("result")),
"num_answers": len(res),
}
]
out["exact_score"] += exact_score
out["margin_score"] += margin_score
except Exception as e:
out["data"] += [
{
"id": i,
"question": question,
"exception": str(e),
"exact_score": 0,
"margin_score": 0,
}
]

out["logs"] += [
{
"id": i,
"log": buffer.getvalue().encode("utf-8").decode("unicode_escape"),
}
]

json_object = json.dumps(out.copy(), indent=2)
with open("data_scallop.json", "w") as outfile:
outfile.write(json_object)

sys.stdout = sys.__stdout__
print(out["exact_score"])

test_semantic_parser()
Loading

0 comments on commit 2f01a6f

Please sign in to comment.