diff --git a/swe_bench_pro_eval.py b/swe_bench_pro_eval.py index cac63a62..302fa132 100644 --- a/swe_bench_pro_eval.py +++ b/swe_bench_pro_eval.py @@ -552,9 +552,9 @@ def main(): eval_results[instance_id] = False else: raw_sample = raw_sample_df.loc[instance_id] - passed_tests = {x["name"] for x in output["tests"] if x["status"] == "PASSED"} - f2p = set(eval(raw_sample["fail_to_pass"])) - p2p = set(eval(raw_sample["pass_to_pass"])) + passed_tests = {x["name"].strip() for x in output["tests"] if x["status"] == "PASSED"} + f2p = {t.strip() for t in eval(raw_sample["fail_to_pass"])} + p2p = {t.strip() for t in eval(raw_sample["pass_to_pass"])} result = (f2p | p2p) <= passed_tests eval_results[instance_id] = result