-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
235 lines (193 loc) · 8.05 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import sys
import math
from botleague_helpers.utils import find_replace
from datetime import datetime
import time
import traceback
import json
from box import Box
from flask import Flask, jsonify, request, current_app
from google.cloud import firestore
from google.cloud.firestore_v1 import SERVER_TIMESTAMP
from problem_constants import constants
from problem_constants.constants import DIR_DATE_FORMAT
from botleague_helpers.utils import box2json
import common
from common import get_jobs_db, get_config_db
from logs import log
from constants import ON_GAE
from utils import dbox
try:
import googleclouddebugger
googleclouddebugger.enable()
except ImportError:
pass
app = Flask(__name__)
# Creates a JSON error response with the specified HTTP status code
def make_error(err, code=400):
response = jsonify({'error': str(err)})
response.status_code = code
return response
@log.catch(reraise=True)
@app.route("/")
def index():
return 'Deepdrive sim service that serves as a Botleague problem ' \
'endpoint and CI service.<br>' \
'Source https://github.com/deepdrive/problem-endpoint <br>' \
f'Botleague host: {constants.BOTLEAGUE_LIAISON_HOST}'
@log.catch(reraise=True)
@app.route('/job/status', methods=['POST'])
def handle_job_status_request():
db = common.get_jobs_db()
job_id = request.json['job_id']
job = dbox(db.get(job_id))
ret = dict(id=job_id,
status=job.status,
created_at=job.created_at,
started_at=job.started_at,
finished_at=job.finished_at,
results=job.results, )
return jsonify(ret)
LOCAL_EXE_WHITELIST = ['/home/c2/anaconda3/envs/bl2/bin/python']
@log.catch(reraise=True)
@app.route('/jobs')
def handle_jobs_request():
from_local = request.remote_addr == '127.0.0.1'
to_local = request.host in ['0.0.0.0:8000', 'localhost:8000']
in_whitelist = sys.executable in LOCAL_EXE_WHITELIST
if not(from_local and to_local and in_whitelist):
# TODO: Add user-auth or here to protect eval keys
# https://cloud.google.com/appengine/docs/standard/python/users/
return 'Only available on localhost'
db = common.get_jobs_db()
jobs_ref = db.collection
query = jobs_ref.order_by(
'created_at', direction=firestore.Query.DESCENDING).order_by(
'id', direction=firestore.Query.DESCENDING).limit(100)
jobs = [j.to_dict() for j in list(query.stream())]
[find_replace(j, math.inf, replace="Infinity") for j in jobs]
[find_replace(j, -math.inf, replace="-Infinity") for j in jobs]
ret = json.dumps(jobs, indent=2, default=str, sort_keys=True)
ret = current_app.response_class(ret, mimetype="application/json")
return ret
@log.catch(reraise=True)
@app.route('/build-deepdrive', methods=['POST'])
def handle_deepdrive_build_request():
# TODO: Verify that CircleCI initiated the request with some shared secret.
ret = start_build(build_type=constants.JOB_TYPE_DEEPDRIVE_BUILD,
job_abbr='bdd')
return ret
@log.catch(reraise=True)
@app.route('/build', methods=['POST'])
def handle_sim_build_request():
# TODO: Verify that CircleCI initiated the request with some shared secret.
ret = start_build(build_type=constants.JOB_TYPE_SIM_BUILD,
job_abbr='bsim')
return ret
@log.catch(reraise=True)
@app.route('/eval/<problem_name>', methods=['POST'])
def handle_eval_request(problem_name):
start = time.time()
log.info(f'Starting eval request {json.dumps(request.json, indent=2)}')
db = get_config_db()
if ON_GAE and db.get('DISABLE_EVAL') is True:
return make_error('Evals are disabled', 423)
try:
# Unpack our endpoint parameters from the URL arguments
eval_id = request.json['eval_id']
eval_key = request.json['eval_key']
seed = request.json['seed']
docker_tag = request.json['docker_tag']
eval_request = Box(request.json, default_box=True)
max_seconds = eval_request.problem_def.max_seconds or None
botleague_liaison_host = eval_request.botleague_liaison_host or None
pull_request = request.json.get('pull_request', None)
except KeyError as err:
log.error(traceback.format_exc())
log.exception('Error getting required params')
# If any or our required parameters were missing,
# send a "400 Bad Request" response
ret = make_error('the parameter {} is required'.format(err.args[0]),
400)
else:
try:
ret = submit_eval_job(docker_tag, eval_id, eval_key, problem_name,
pull_request, seed, max_seconds,
eval_request.problem_def,
eval_request,
botleague_liaison_host,)
except Exception as err:
# If anything went wrong inside the endpoint logic,
# send a "500 Internal Server Error" response
log.error(traceback.format_exc())
log.exception('Problem submitting job')
ret = make_error(err, 500)
log.info(ret)
log.info(f'Eval request took {time.time() - start} seconds')
return ret
def submit_eval_job(docker_tag, eval_id, eval_key, problem_name: str,
pull_request, seed, max_seconds,
problem_def,
full_eval_request,
botleague_liaison_host=None):
messages = []
start_job_submit = time.time()
db = get_jobs_db()
if not max_seconds:
messages.append(f'max_seconds not set in problem definition, '
f'defaulting to '
f'{constants.MAX_EVAL_SECONDS_DEFAULT} seconds')
max_seconds = constants.MAX_EVAL_SECONDS_DEFAULT
job_id = f'{datetime.utcnow().strftime(DIR_DATE_FORMAT)}_{eval_id}'
botleague_liaison_host = botleague_liaison_host or \
constants.BOTLEAGUE_LIAISON_HOST
job = Box(id=job_id,
status=constants.JOB_STATUS_CREATED,
job_type=constants.JOB_TYPE_EVAL,
botleague_liaison_host=botleague_liaison_host,
created_at=SERVER_TIMESTAMP,
eval_spec=dict(
problem=problem_name,
eval_id=eval_id,
eval_key=eval_key,
seed=seed,
docker_tag=docker_tag,
pull_request=pull_request,
max_seconds=max_seconds,
problem_def=problem_def,
full_eval_request=full_eval_request, # TODO: Clean this up.
))
log.info(f'Submitting job {eval_id}: {box2json(job)}')
submitted = db.compare_and_swap(key=job_id,
expected_current_value=None,
new_value=job.to_dict())
if not submitted:
ret = make_error(f'eval_id {eval_id} has already been processed', 403)
else:
for msg in messages:
log.info(msg)
ret = jsonify({'success': True, 'message': ' '.join(messages)})
log.info(f'Save submitted job took {time.time() - start_job_submit} '
f'seconds')
return ret
def start_build(build_type, job_abbr):
db = common.get_jobs_db()
commit = request.json['commit']
job_id = f'{datetime.utcnow().strftime(DIR_DATE_FORMAT)}_{job_abbr}_{commit}'
run_local_debug = dbox(request.json).run_local_debug or False
job = Box(id=job_id,
commit=commit,
branch=request.json['branch'],
build_id=request.json['build_id'],
status=constants.JOB_STATUS_CREATED,
job_type=build_type,
created_at=SERVER_TIMESTAMP,
run_local_debug=run_local_debug)
db.set(job_id, job)
log.success(f'Created job {box2json(job)}')
return jsonify({'job_id': job_id})
common.add_botleague_host_watch()
if __name__ == "__main__":
# Don't use debug mode in production or if you don't want to
# reload on change.
app.run(host="0.0.0.0", port=8000, debug=False)