diff --git a/openai_api.py b/openai_api.py index 7225562..a88e491 100644 --- a/openai_api.py +++ b/openai_api.py @@ -135,7 +135,7 @@ async def predict(query: str, history: List[List[str]], model_id: str): finish_reason=None ) chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk") - yield "{}".format(chunk.json(exclude_unset=True, ensure_ascii=False)) + yield "{}".format(chunk.model_dump_json(exclude_unset=True,exclude_none=True)) current_length = 0 @@ -152,7 +152,7 @@ async def predict(query: str, history: List[List[str]], model_id: str): finish_reason=None ) chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk") - yield "{}".format(chunk.json(exclude_unset=True, ensure_ascii=False)) + yield "{}".format(chunk.model_dump_json(exclude_unset=True,exclude_none=True)) choice_data = ChatCompletionResponseStreamChoice( @@ -161,14 +161,14 @@ async def predict(query: str, history: List[List[str]], model_id: str): finish_reason="stop" ) chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk") - yield "{}".format(chunk.json(exclude_unset=True, ensure_ascii=False)) + yield "{}".format(chunk.model_dump_json(exclude_unset=True,exclude_none=True)) yield '[DONE]' if __name__ == "__main__": - tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True) - model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).cuda() + tokenizer = AutoTokenizer.from_pretrained("chatglm2-6b", trust_remote_code=True) + model = AutoModel.from_pretrained("chatglm2-6b", trust_remote_code=True).cuda() # 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量 # from utils import load_model_on_gpus # model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)