8
8
from urllib .parse import urljoin
9
9
from datetime import datetime
10
10
11
- import teuthology
11
+ import teuthology .lock .ops as lock_ops
12
+ import teuthology .nuke as nuke
13
+
12
14
from teuthology import report
13
15
from teuthology import safepath
14
16
from teuthology .config import config as teuth_config
15
17
from teuthology .exceptions import SkipJob , MaxWhileTries
16
18
from teuthology import setup_log_file , install_except_hook
17
- from teuthology .lock .ops import reimage_machines
18
19
from teuthology .misc import get_user , archive_logs , compress_logs
19
20
from teuthology .config import FakeNamespace
20
21
from teuthology .job_status import get_status
21
- from teuthology .nuke import nuke
22
22
from teuthology .kill import kill_job
23
23
from teuthology .task .internal import add_remotes
24
24
from teuthology .misc import decanonicalize_hostname as shortname
@@ -165,6 +165,7 @@ def failure_is_reimage(failure_reason):
165
165
else :
166
166
return False
167
167
168
+
168
169
def check_for_reimage_failures_and_mark_down (targets , count = 10 ):
169
170
# Grab paddles history of jobs in the machine
170
171
# and count the number of reimaging errors
@@ -173,9 +174,8 @@ def check_for_reimage_failures_and_mark_down(targets, count=10):
173
174
for k , _ in targets .items ():
174
175
machine = k .split ('@' )[- 1 ]
175
176
url = urljoin (
176
- base_url ,
177
- '/nodes/{0}/jobs/?count={1}' .format (
178
- machine , count )
177
+ base_url ,
178
+ '/nodes/{0}/jobs/?count={1}' .format (machine , count )
179
179
)
180
180
resp = requests .get (url )
181
181
jobs = resp .json ()
@@ -189,15 +189,16 @@ def check_for_reimage_failures_and_mark_down(targets, count=10):
189
189
continue
190
190
# Mark machine down
191
191
machine_name = shortname (k )
192
- teuthology . lock . ops .update_lock (
193
- machine_name ,
194
- description = 'reimage failed {0} times' .format (count ),
195
- status = 'down' ,
196
- )
192
+ lock_ops .update_lock (
193
+ machine_name ,
194
+ description = 'reimage failed {0} times' .format (count ),
195
+ status = 'down' ,
196
+ )
197
197
log .error (
198
198
'Reimage failed {0} times ... marking machine down' .format (count )
199
199
)
200
200
201
+
201
202
def reimage (job_config ):
202
203
# Reimage the targets specified in job config
203
204
# and update their keys in config after reimaging
@@ -206,12 +207,15 @@ def reimage(job_config):
206
207
report .try_push_job_info (ctx .config , dict (status = 'waiting' ))
207
208
targets = job_config ['targets' ]
208
209
try :
209
- reimaged = reimage_machines (ctx , targets , job_config ['machine_type' ])
210
+ reimaged = lock_ops . reimage_machines (ctx , targets , job_config ['machine_type' ])
210
211
except Exception as e :
211
212
log .exception ('Reimaging error. Nuking machines...' )
212
213
# Reimage failures should map to the 'dead' status instead of 'fail'
213
- report .try_push_job_info (ctx .config , dict (status = 'dead' , failure_reason = 'Error reimaging machines: ' + str (e )))
214
- nuke (ctx , True )
214
+ report .try_push_job_info (
215
+ ctx .config ,
216
+ dict (status = 'dead' , failure_reason = 'Error reimaging machines: ' + str (e ))
217
+ )
218
+ nuke .nuke (ctx , True )
215
219
# Machine that fails to reimage after 10 times will be marked down
216
220
check_for_reimage_failures_and_mark_down (targets )
217
221
raise
@@ -241,18 +245,20 @@ def unlock_targets(job_config):
241
245
if not locked :
242
246
return
243
247
job_status = get_status (job_info )
244
- if job_status == 'pass' or \
245
- ( job_config . get ( 'unlock_on_failure' , False ) and not job_config .get ('nuke-on-error' , False )):
248
+ if job_status == 'pass' or ( job_config . get ( 'unlock_on_failure' , False )
249
+ and not job_config .get ('nuke-on-error' , False )):
246
250
log .info ('Unlocking machines...' )
247
251
fake_ctx = create_fake_context (job_config )
248
252
for machine in locked :
249
- teuthology .lock .ops .unlock_one (fake_ctx ,
250
- machine , job_info ['owner' ],
251
- job_info ['archive_path' ])
253
+ lock_ops .unlock_one (
254
+ fake_ctx ,
255
+ machine , job_info ['owner' ],
256
+ job_info ['archive_path' ]
257
+ )
252
258
if job_status != 'pass' and job_config .get ('nuke-on-error' , False ):
253
259
log .info ('Nuking machines...' )
254
260
fake_ctx = create_fake_context (job_config )
255
- nuke (fake_ctx , True )
261
+ nuke . nuke (fake_ctx , True )
256
262
257
263
258
264
def run_with_watchdog (process , job_config ):
@@ -316,7 +322,8 @@ def run_with_watchdog(process, job_config):
316
322
extra_info = dict (status = 'dead' )
317
323
if hit_max_timeout :
318
324
extra_info ['failure_reason' ] = 'hit max job timeout'
319
- report .try_push_job_info (job_info , extra_info )
325
+ if not (job_config .get ('first_in_suite' ) or job_config .get ('last_in_suite' )):
326
+ report .try_push_job_info (job_info , extra_info )
320
327
321
328
322
329
def create_fake_context (job_config , block = False ):
0 commit comments