Skip to content

Commit 76837e4

Browse files
committed
code clean
1 parent c7ed3da commit 76837e4

File tree

2 files changed

+4
-57
lines changed

2 files changed

+4
-57
lines changed

src/agents/datasets/hotpotqa.py

-8
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,3 @@ def evaluate(self, idx: int, answer: str):
114114
em = pred == gt
115115
f1 = f1_score(pred, gt)[0]
116116
return em, {"em": em, "f1": f1, "gt": gt, "pred": pred}
117-
118-
def mertirc(self, gold, pred, trace=None):
119-
gt = gold.answer
120-
answer = pred.answer
121-
pred = normalize_answer(answer)
122-
gt = normalize_answer(gt)
123-
f1 = f1_score(pred, gt)[0]
124-
return f1

src/agents/datasets/math.py

+4-49
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ def get_content_between_a_b(start_tag, end_tag, text):
1313
while start_index != -1:
1414
end_index = text.find(end_tag, start_index + len(start_tag))
1515
if end_index != -1:
16-
extracted_text += text[start_index + len(start_tag): end_index] + " "
16+
extracted_text += text[start_index +
17+
len(start_tag): end_index] + " "
1718
start_index = text.find(start_tag, end_index + len(end_tag))
1819
else:
1920
break
@@ -65,7 +66,8 @@ def _load_data(self, root_dir: str) -> List[Dict[str, Any]]:
6566
data_file = os.path.join(root, file)
6667
data_point = self.load(data_file)
6768
data.extend(
68-
data_point if isinstance(data_point, list) else [data_point]
69+
data_point if isinstance(data_point, list) else [
70+
data_point]
6971
)
7072

7173
df = pd.DataFrame(data)
@@ -153,50 +155,3 @@ def evaluate(self, idx: int, answer: str):
153155
return 1, {"score": 1}
154156
else:
155157
return 0, {"score": 0}
156-
157-
def mertirc(self, gold, pred, trace=None):
158-
problem = gold.problem
159-
solution = gold.solution
160-
answer = pred.answer
161-
prompt = f"""
162-
You are the wise mathematics answer verifier:
163-
You identify as math word problem answer verifier, not an assistant.
164-
You will be provided an math word problem, the real answer for this math word problem, and the predicted answer from a generation model. You should understand the problem and validate the correctness of the generated answer in the context of the provided math word problem and the real answer.
165-
You should not solve the problem by yourself, you only job is to act as a verifier.
166-
167-
On your profile and general capabilities:
168-
Your responses should avoid being vague, controversial or off-topic.
169-
Your logic and reasoning should be rigorous and intelligent.
170-
171-
The problem: {problem}
172-
173-
The standard solution: {solution}
174-
175-
The output of generation model: {answer}
176-
177-
Now, please give your verdict(You should first show your thinking of your verification logic and then output your final verdict,You final verdict is limited to correct or incorrect,and wrapped into the <verdict></verdict>, such as <verdict>correct</verdict>):
178-
"""
179-
180-
messages = [{"role": "user", "content": prompt}]
181-
flag = True
182-
cnt = 0
183-
while flag and cnt < 20:
184-
try:
185-
result_outputs = (
186-
completion_with_backoff(
187-
messages=messages, model="gpt-4-turbo-2024-04-09"
188-
)
189-
.choices[0]
190-
.message.content
191-
)
192-
verdict = extract(result_outputs, "verdict")
193-
flag = False
194-
except Exception as e:
195-
print(e)
196-
time.sleep(10)
197-
cnt += 1
198-
199-
if verdict == "correct":
200-
return 1
201-
else:
202-
return 0

0 commit comments

Comments
 (0)