-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathevaluate_instance.py
101 lines (83 loc) · 3.65 KB
/
evaluate_instance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import base64
import json
import logging
import os
import sys
from swebench_docker.constants import PatchType, KEY_PREDICTION
from swebench_docker.context_manager import TaskEnvContextManager
from swebench_docker.utils import extract_minimal_patch
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logging.getLogger().setLevel(logging.INFO)
logger = logging.getLogger("evaluate_instance")
def main(
task_instance: dict,
testbed_name: str,
repo_dir: str,
log_dir: str,
timeout: int,
log_suffix: str = None,
image_type: str = 'conda'
):
logger.info("Instance ID: " + task_instance['instance_id'] + "\nTestbed: " + testbed_name + "\nLog dir: " + log_dir)
with TaskEnvContextManager(
task_instance,
testbed_name,
repo_dir,
log_dir,
timeout=timeout,
log_suffix=log_suffix,
image_type=image_type,
) as tcm:
# Attempt to apply prediction
patch_type = PatchType.PATCH_PRED_TRY.value
# If prediction patch doesn't apply, try to do some minor patch refactoring and try again
if not tcm.apply_patch(task_instance[KEY_PREDICTION], patch_type=patch_type) \
and task_instance[KEY_PREDICTION] is not None \
and task_instance[KEY_PREDICTION] != "":
task_instance[KEY_PREDICTION] = extract_minimal_patch(task_instance[KEY_PREDICTION])
patch_type = PatchType.PATCH_PRED_MINIMAL_TRY.value
if not tcm.apply_patch(task_instance[KEY_PREDICTION], patch_type=patch_type):
logger.warning("Failed to apply prediction patch")
sys.exit(1)
tcm.apply_patch(task_instance[KEY_PREDICTION], patch_type=patch_type, revert=True)
# Set prediction patch label based on whether patch was edited
if patch_type == PatchType.PATCH_PRED_MINIMAL_TRY.value:
patch_type = PatchType.PATCH_PRED_MINIMAL.value
else:
patch_type = PatchType.PATCH_PRED.value
# Run testing script
prediction_patch = task_instance[KEY_PREDICTION]
test_patch = task_instance["test_patch"]
if (
(prediction_patch and not tcm.apply_patch(prediction_patch, patch_type=patch_type))
or (test_patch and not tcm.apply_patch(test_patch, patch_type=PatchType.PATCH_TEST.value))
or not tcm.run_tests_task(task_instance)
):
logger.warning("Evaluation failed")
sys.exit(1)
logger.info("Evaluation succeeded")
if __name__ == "__main__":
TASK_INSTANCE_JSON = "/home/swe-bench/task_instance.json"
if os.path.exists(TASK_INSTANCE_JSON):
with open(TASK_INSTANCE_JSON, "r") as f:
task_instance = json.load(f)
else:
assert os.getenv('INSTANCE') is not None, "INSTANCE environment variable is not set"
task_instance = json.loads(base64.b64decode(os.getenv('INSTANCE')).decode('utf-8'))
assert os.getenv('LOG_DIR') is not None, "LOG_DIR environment variable is not set"
assert os.getenv('TESTBED_NAME') is not None, "TESTBED_NAME environment variable is not set"
repo_dir = os.getenv('REPO_DIR')
if not repo_dir:
repo_dir = os.getenv('TESTBED')
assert repo_dir, "REPO_DIR environment variable is not set"
main(
task_instance=task_instance,
testbed_name=os.getenv('TESTBED_NAME'),
repo_dir=repo_dir,
log_dir=os.getenv('LOG_DIR'),
timeout=int(os.getenv('TIMEOUT')) if os.getenv('TIMEOUT') is not None else None,
log_suffix=os.getenv('LOG_SUFFIX'),
image_type=os.getenv('IMAGE_TYPE', 'conda')
)