Skip to content

Commit fab46ca

Browse files
committed
server/bench:
- fix when prometheus not started - wait for server to be ready before starting bench
1 parent 1bf38cf commit fab46ca

File tree

1 file changed

+18
-9
lines changed

1 file changed

+18
-9
lines changed

examples/server/bench/bench.py

+18-9
Original file line numberDiff line numberDiff line change
@@ -189,12 +189,12 @@ def main(args_in: list[str] | None = None) -> None:
189189
"pp": {
190190
"p95": round(data['metrics']["llamacpp_prompt_processing_second"]["p(95)"], 2),
191191
"avg": round(data['metrics']["llamacpp_prompt_processing_second"]["avg"], 2),
192-
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2),
192+
"0": round(mean(prometheus_metrics['prompt_tokens_seconds']), 2) if 'prompt_tokens_seconds' in prometheus_metrics else 0,
193193
},
194194
"tg": {
195195
"p95": round(data['metrics']["llamacpp_tokens_second"]["p(95)"], 2),
196196
"avg": round(data['metrics']["llamacpp_tokens_second"]["avg"], 2),
197-
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2),
197+
"0": round(mean(prometheus_metrics['predicted_tokens_seconds']), 2) if 'predicted_tokens_seconds' in prometheus_metrics else 0,
198198
},
199199
}
200200
with open("results.github.env", 'a') as github_env:
@@ -234,7 +234,7 @@ def start_server(args):
234234
server_process = start_server_background(args)
235235

236236
attempts = 0
237-
max_attempts = 20
237+
max_attempts = 600
238238
if 'GITHUB_ACTIONS' in os.environ:
239239
max_attempts *= 2
240240

@@ -245,7 +245,15 @@ def start_server(args):
245245
print(f"bench: waiting for server to start ...")
246246
time.sleep(0.5)
247247

248-
print("bench: server started.")
248+
attempts = 0
249+
while not is_server_ready(args.host, args.port):
250+
attempts += 1
251+
if attempts > max_attempts:
252+
assert False, "server not ready"
253+
print(f"bench: waiting for server to be ready ...")
254+
time.sleep(0.5)
255+
256+
print("bench: server started and ready.")
249257
return server_process
250258

251259

@@ -258,11 +266,6 @@ def start_server_background(args):
258266
'--host', args.host,
259267
'--port', args.port,
260268
]
261-
model_file = args.model_path_prefix + os.path.sep + args.hf_file
262-
model_dir = os.path.dirname(model_file)
263-
if not os.path.exists(model_dir):
264-
os.makedirs(model_dir)
265-
server_args.extend(['--model', model_file])
266269
server_args.extend(['--hf-repo', args.hf_repo])
267270
server_args.extend(['--hf-file', args.hf_file])
268271
server_args.extend(['--n-gpu-layers', args.n_gpu_layers])
@@ -306,6 +309,12 @@ def is_server_listening(server_fqdn, server_port):
306309
return _is_server_listening
307310

308311

312+
def is_server_ready(server_fqdn, server_port):
313+
url = f"http://{server_fqdn}:{server_port}/health"
314+
response = requests.get(url)
315+
return response.status_code == 200
316+
317+
309318
def escape_metric_name(metric_name):
310319
return re.sub('[^A-Z0-9]', '_', metric_name.upper())
311320

0 commit comments

Comments
 (0)