From 485dc01214f266afff7004bc702498b491abc404 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 23 Dec 2024 12:02:44 +0100 Subject: [PATCH] server : add system_fingerprint to chat/completion (#10917) * server : add system_fingerprint to chat/completion * update README --- examples/server/README.md | 3 +- examples/server/server.cpp | 32 +++++++++++-------- .../server/tests/unit/test_chat_completion.py | 3 ++ examples/server/utils.hpp | 2 ++ 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 6d64656926250..5e3d6a6e643a6 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -724,7 +724,8 @@ This endpoint is public (no API key check). By default, it is read-only. To make }, "total_slots": 1, "model_path": "../models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", - "chat_template": "..." + "chat_template": "...", + "build_info": "b(build number)-(build commit hash)" } ``` diff --git a/examples/server/server.cpp b/examples/server/server.cpp index fa3682a920649..c571ed3c104d4 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -595,10 +595,11 @@ struct server_task_result_cmpl_final : server_task_result { std::time_t t = std::time(0); json res = json { - {"choices", json::array({choice})}, - {"created", t}, - {"model", oaicompat_model}, - {"object", "chat.completion"}, + {"choices", json::array({choice})}, + {"created", t}, + {"model", oaicompat_model}, + {"system_fingerprint", build_info}, + {"object", "chat.completion"}, {"usage", json { {"completion_tokens", n_decoded}, {"prompt_tokens", n_prompt_tokens}, @@ -632,11 +633,12 @@ struct server_task_result_cmpl_final : server_task_result { }; json ret = json { - {"choices", json::array({choice})}, - {"created", t}, - {"id", oaicompat_cmpl_id}, - {"model", oaicompat_model}, - {"object", "chat.completion.chunk"}, + {"choices", json::array({choice})}, + {"created", t}, + {"id", oaicompat_cmpl_id}, + {"model", oaicompat_model}, + {"system_fingerprint", build_info}, + {"object", "chat.completion.chunk"}, {"usage", json { {"completion_tokens", n_decoded}, {"prompt_tokens", n_prompt_tokens}, @@ -761,11 +763,12 @@ struct server_task_result_cmpl_partial : server_task_result { } json ret = json { - {"choices", choices}, - {"created", t}, - {"id", oaicompat_cmpl_id}, - {"model", oaicompat_model}, - {"object", "chat.completion.chunk"} + {"choices", choices}, + {"created", t}, + {"id", oaicompat_cmpl_id}, + {"model", oaicompat_model}, + {"system_fingerprint", build_info}, + {"object", "chat.completion.chunk"} }; if (timings.prompt_n >= 0) { @@ -3476,6 +3479,7 @@ int main(int argc, char ** argv) { { "total_slots", ctx_server.params_base.n_parallel }, { "model_path", ctx_server.params_base.model }, { "chat_template", llama_get_chat_template(ctx_server.model) }, + { "build_info", build_info }, }; res_ok(res, data); diff --git a/examples/server/tests/unit/test_chat_completion.py b/examples/server/tests/unit/test_chat_completion.py index 0fa1a17c1f50a..88549708113e9 100644 --- a/examples/server/tests/unit/test_chat_completion.py +++ b/examples/server/tests/unit/test_chat_completion.py @@ -31,6 +31,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte }) assert res.status_code == 200 assert "cmpl" in res.body["id"] # make sure the completion id has the expected format + assert res.body["system_fingerprint"].startswith("b") assert res.body["model"] == model if model is not None else server.model_alias assert res.body["usage"]["prompt_tokens"] == n_prompt assert res.body["usage"]["completion_tokens"] == n_predicted @@ -63,6 +64,7 @@ def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_conte last_cmpl_id = None for data in res: choice = data["choices"][0] + assert data["system_fingerprint"].startswith("b") assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future if last_cmpl_id is None: last_cmpl_id = data["id"] @@ -92,6 +94,7 @@ def test_chat_completion_with_openai_library(): seed=42, temperature=0.8, ) + assert res.system_fingerprint is not None and res.system_fingerprint.startswith("b") assert res.choices[0].finish_reason == "length" assert res.choices[0].message.content is not None assert match_regex("(Suddenly)+", res.choices[0].message.content) diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 94bb285b6f2d1..1987acac89159 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -56,6 +56,8 @@ static T json_value(const json & body, const std::string & key, const T & defaul } } +const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT); + // // tokenizer and input processing utils //