@@ -189,12 +189,12 @@ def main(args_in: list[str] | None = None) -> None:
189
189
"pp" : {
190
190
"p95" : round (data ['metrics' ]["llamacpp_prompt_processing_second" ]["p(95)" ], 2 ),
191
191
"avg" : round (data ['metrics' ]["llamacpp_prompt_processing_second" ]["avg" ], 2 ),
192
- "0" : round (mean (prometheus_metrics ['prompt_tokens_seconds' ]), 2 ),
192
+ "0" : round (mean (prometheus_metrics ['prompt_tokens_seconds' ]), 2 ) if 'prompt_tokens_seconds' in prometheus_metrics else 0 ,
193
193
},
194
194
"tg" : {
195
195
"p95" : round (data ['metrics' ]["llamacpp_tokens_second" ]["p(95)" ], 2 ),
196
196
"avg" : round (data ['metrics' ]["llamacpp_tokens_second" ]["avg" ], 2 ),
197
- "0" : round (mean (prometheus_metrics ['predicted_tokens_seconds' ]), 2 ),
197
+ "0" : round (mean (prometheus_metrics ['predicted_tokens_seconds' ]), 2 ) if 'predicted_tokens_seconds' in prometheus_metrics else 0 ,
198
198
},
199
199
}
200
200
with open ("results.github.env" , 'a' ) as github_env :
@@ -234,7 +234,7 @@ def start_server(args):
234
234
server_process = start_server_background (args )
235
235
236
236
attempts = 0
237
- max_attempts = 20
237
+ max_attempts = 600
238
238
if 'GITHUB_ACTIONS' in os .environ :
239
239
max_attempts *= 2
240
240
@@ -245,7 +245,15 @@ def start_server(args):
245
245
print (f"bench: waiting for server to start ..." )
246
246
time .sleep (0.5 )
247
247
248
- print ("bench: server started." )
248
+ attempts = 0
249
+ while not is_server_ready (args .host , args .port ):
250
+ attempts += 1
251
+ if attempts > max_attempts :
252
+ assert False , "server not ready"
253
+ print (f"bench: waiting for server to be ready ..." )
254
+ time .sleep (0.5 )
255
+
256
+ print ("bench: server started and ready." )
249
257
return server_process
250
258
251
259
@@ -258,11 +266,6 @@ def start_server_background(args):
258
266
'--host' , args .host ,
259
267
'--port' , args .port ,
260
268
]
261
- model_file = args .model_path_prefix + os .path .sep + args .hf_file
262
- model_dir = os .path .dirname (model_file )
263
- if not os .path .exists (model_dir ):
264
- os .makedirs (model_dir )
265
- server_args .extend (['--model' , model_file ])
266
269
server_args .extend (['--hf-repo' , args .hf_repo ])
267
270
server_args .extend (['--hf-file' , args .hf_file ])
268
271
server_args .extend (['--n-gpu-layers' , args .n_gpu_layers ])
@@ -306,6 +309,12 @@ def is_server_listening(server_fqdn, server_port):
306
309
return _is_server_listening
307
310
308
311
312
+ def is_server_ready (server_fqdn , server_port ):
313
+ url = f"http://{ server_fqdn } :{ server_port } /health"
314
+ response = requests .get (url )
315
+ return response .status_code == 200
316
+
317
+
309
318
def escape_metric_name (metric_name ):
310
319
return re .sub ('[^A-Z0-9]' , '_' , metric_name .upper ())
311
320
0 commit comments