Skip to content

Commit

Permalink
server : add system_fingerprint to chat/completion (#10917)
Browse files Browse the repository at this point in the history
* server : add system_fingerprint to chat/completion

* update README
  • Loading branch information
ngxson authored Dec 23, 2024
1 parent 86bf31c commit 485dc01
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 15 deletions.
3 changes: 2 additions & 1 deletion examples/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,8 @@ This endpoint is public (no API key check). By default, it is read-only. To make
},
"total_slots": 1,
"model_path": "../models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf",
"chat_template": "..."
"chat_template": "...",
"build_info": "b(build number)-(build commit hash)"
}
```

Expand Down
32 changes: 18 additions & 14 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -595,10 +595,11 @@ struct server_task_result_cmpl_final : server_task_result {
std::time_t t = std::time(0);

json res = json {
{"choices", json::array({choice})},
{"created", t},
{"model", oaicompat_model},
{"object", "chat.completion"},
{"choices", json::array({choice})},
{"created", t},
{"model", oaicompat_model},
{"system_fingerprint", build_info},
{"object", "chat.completion"},
{"usage", json {
{"completion_tokens", n_decoded},
{"prompt_tokens", n_prompt_tokens},
Expand Down Expand Up @@ -632,11 +633,12 @@ struct server_task_result_cmpl_final : server_task_result {
};

json ret = json {
{"choices", json::array({choice})},
{"created", t},
{"id", oaicompat_cmpl_id},
{"model", oaicompat_model},
{"object", "chat.completion.chunk"},
{"choices", json::array({choice})},
{"created", t},
{"id", oaicompat_cmpl_id},
{"model", oaicompat_model},
{"system_fingerprint", build_info},
{"object", "chat.completion.chunk"},
{"usage", json {
{"completion_tokens", n_decoded},
{"prompt_tokens", n_prompt_tokens},
Expand Down Expand Up @@ -761,11 +763,12 @@ struct server_task_result_cmpl_partial : server_task_result {
}

json ret = json {
{"choices", choices},
{"created", t},
{"id", oaicompat_cmpl_id},
{"model", oaicompat_model},
{"object", "chat.completion.chunk"}
{"choices", choices},
{"created", t},
{"id", oaicompat_cmpl_id},
{"model", oaicompat_model},
{"system_fingerprint", build_info},
{"object", "chat.completion.chunk"}
};

if (timings.prompt_n >= 0) {
Expand Down Expand Up @@ -3476,6 +3479,7 @@ int main(int argc, char ** argv) {
{ "total_slots", ctx_server.params_base.n_parallel },
{ "model_path", ctx_server.params_base.model },
{ "chat_template", llama_get_chat_template(ctx_server.model) },
{ "build_info", build_info },
};

res_ok(res, data);
Expand Down
3 changes: 3 additions & 0 deletions examples/server/tests/unit/test_chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte
})
assert res.status_code == 200
assert "cmpl" in res.body["id"] # make sure the completion id has the expected format
assert res.body["system_fingerprint"].startswith("b")
assert res.body["model"] == model if model is not None else server.model_alias
assert res.body["usage"]["prompt_tokens"] == n_prompt
assert res.body["usage"]["completion_tokens"] == n_predicted
Expand Down Expand Up @@ -63,6 +64,7 @@ def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_conte
last_cmpl_id = None
for data in res:
choice = data["choices"][0]
assert data["system_fingerprint"].startswith("b")
assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future
if last_cmpl_id is None:
last_cmpl_id = data["id"]
Expand Down Expand Up @@ -92,6 +94,7 @@ def test_chat_completion_with_openai_library():
seed=42,
temperature=0.8,
)
assert res.system_fingerprint is not None and res.system_fingerprint.startswith("b")
assert res.choices[0].finish_reason == "length"
assert res.choices[0].message.content is not None
assert match_regex("(Suddenly)+", res.choices[0].message.content)
Expand Down
2 changes: 2 additions & 0 deletions examples/server/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ static T json_value(const json & body, const std::string & key, const T & defaul
}
}

const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT);

//
// tokenizer and input processing utils
//
Expand Down

0 comments on commit 485dc01

Please sign in to comment.