From 8fa9ad4c769ece0c611734e250298eb1f6a3b488 Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Wed, 22 Jan 2025 18:23:36 +0800 Subject: [PATCH 01/14] UPDATE: create_embedding add field model_replica --- xinference/api/restful_api.py | 1 + xinference/model/embedding/core.py | 3 ++- xinference/types.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py index e718f842a9..70e11d5a43 100644 --- a/xinference/api/restful_api.py +++ b/xinference/api/restful_api.py @@ -1331,6 +1331,7 @@ async def create_embedding(self, request: Request) -> Response: try: embedding = await model.create_embedding(body.input, **kwargs) + embedding["model"] = model_uid # type: ignore return Response(embedding, media_type="application/json") except Exception as e: e = await self._get_model_last_error(model.uid, e) diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py index acf3447350..6a4aad7b5c 100644 --- a/xinference/model/embedding/core.py +++ b/xinference/model/embedding/core.py @@ -693,7 +693,8 @@ def base64_to_image(base64_str: str) -> Image.Image: if not is_bge_m3_flag_model and not kwargs.get("return_sparse") else "dict" ), - model=self._model_uid, + model="", + model_replica=self._model_uid, data=embedding_list, usage=usage, ) diff --git a/xinference/types.py b/xinference/types.py index 1e8ca71a02..e002988083 100644 --- a/xinference/types.py +++ b/xinference/types.py @@ -78,6 +78,7 @@ class EmbeddingData(TypedDict): class Embedding(TypedDict): object: Literal["list"] model: str + model_replica: str data: List[EmbeddingData] usage: EmbeddingUsage From db226def2e1780f4ba2fc7d29417317dd3495ccb Mon Sep 17 00:00:00 2001 From: codingl2k1 Date: Wed, 22 Jan 2025 14:15:47 +0100 Subject: [PATCH 02/14] Fix lint --- xinference/api/restful_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py index 70e11d5a43..b17a25c129 100644 --- a/xinference/api/restful_api.py +++ b/xinference/api/restful_api.py @@ -1331,7 +1331,7 @@ async def create_embedding(self, request: Request) -> Response: try: embedding = await model.create_embedding(body.input, **kwargs) - embedding["model"] = model_uid # type: ignore + embedding["model"] = model_uid # type: ignore return Response(embedding, media_type="application/json") except Exception as e: e = await self._get_model_last_error(model.uid, e) From 39747c0e6de88a6242d15eb42614bb7e223078b6 Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Thu, 23 Jan 2025 10:34:45 +0800 Subject: [PATCH 03/14] BLD: fix create_embedding field model_replica --- xinference/model/embedding/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py index 6a4aad7b5c..a69158d57b 100644 --- a/xinference/model/embedding/core.py +++ b/xinference/model/embedding/core.py @@ -693,7 +693,7 @@ def base64_to_image(base64_str: str) -> Image.Image: if not is_bge_m3_flag_model and not kwargs.get("return_sparse") else "dict" ), - model="", + # model="", model_replica=self._model_uid, data=embedding_list, usage=usage, From 7e456e1bf3f5505e222b18ef663ed2d75c2743d1 Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Fri, 14 Feb 2025 11:19:30 +0800 Subject: [PATCH 04/14] out model='' --- xinference/model/embedding/core.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py index a69158d57b..7fbbdaa960 100644 --- a/xinference/model/embedding/core.py +++ b/xinference/model/embedding/core.py @@ -693,7 +693,6 @@ def base64_to_image(base64_str: str) -> Image.Image: if not is_bge_m3_flag_model and not kwargs.get("return_sparse") else "dict" ), - # model="", model_replica=self._model_uid, data=embedding_list, usage=usage, From 9146668bcc9ecee85480a20bb6c5e255a8ecb791 Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Wed, 19 Feb 2025 11:44:30 +0800 Subject: [PATCH 05/14] embedding_bytes json load dict --- xinference/api/restful_api.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py index b17a25c129..6ac948255f 100644 --- a/xinference/api/restful_api.py +++ b/xinference/api/restful_api.py @@ -1330,9 +1330,10 @@ async def create_embedding(self, request: Request) -> Response: raise HTTPException(status_code=500, detail=str(e)) try: - embedding = await model.create_embedding(body.input, **kwargs) + embedding_bytes = await model.create_embedding(body.input, **kwargs) + embedding = json.loads(embedding_bytes.decode("utf-8")) embedding["model"] = model_uid # type: ignore - return Response(embedding, media_type="application/json") + return Response(json.dumps(embedding), media_type="application/json") except Exception as e: e = await self._get_model_last_error(model.uid, e) logger.error(e, exc_info=True) From d6a45f86ced16c04e46906b30aacd7527d5ace1f Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Fri, 21 Feb 2025 16:56:04 +0800 Subject: [PATCH 06/14] FEAT: model_uid from core embedding function --- xinference/api/restful_api.py | 7 +++---- xinference/model/embedding/core.py | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py index 6ac948255f..d60ef392e4 100644 --- a/xinference/api/restful_api.py +++ b/xinference/api/restful_api.py @@ -1330,10 +1330,9 @@ async def create_embedding(self, request: Request) -> Response: raise HTTPException(status_code=500, detail=str(e)) try: - embedding_bytes = await model.create_embedding(body.input, **kwargs) - embedding = json.loads(embedding_bytes.decode("utf-8")) - embedding["model"] = model_uid # type: ignore - return Response(json.dumps(embedding), media_type="application/json") + kwargs["model_uid"] = model_uid + embedding = await model.create_embedding(body.input, **kwargs) + return Response(embedding, media_type="application/json") except Exception as e: e = await self._get_model_last_error(model.uid, e) logger.error(e, exc_info=True) diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py index 7fbbdaa960..75702be227 100644 --- a/xinference/model/embedding/core.py +++ b/xinference/model/embedding/core.py @@ -693,6 +693,7 @@ def base64_to_image(base64_str: str) -> Image.Image: if not is_bge_m3_flag_model and not kwargs.get("return_sparse") else "dict" ), + model=kwargs.get("model_uid"), model_replica=self._model_uid, data=embedding_list, usage=usage, From e5920e782fc3a2a0fd76096e0f2f53ea3dbac993 Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Mon, 24 Feb 2025 18:02:21 +0800 Subject: [PATCH 07/14] add: test embedding field model_replica --- xinference/core/tests/test_restful_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xinference/core/tests/test_restful_api.py b/xinference/core/tests/test_restful_api.py index 510ddcd6d3..2b598020f4 100644 --- a/xinference/core/tests/test_restful_api.py +++ b/xinference/core/tests/test_restful_api.py @@ -356,6 +356,7 @@ def test_restful_api_for_embedding(setup): assert "embedding" in embedding_res["data"][0] assert len(embedding_res["data"][0]["embedding"]) == model_spec.dimensions + assert "model_replica" in embedding_res["data"][0] # test multiple payload = { From 9d449382b7dcefbf191757f1b953ae50524f218b Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Mon, 24 Feb 2025 18:23:43 +0800 Subject: [PATCH 08/14] add: test embedding field model_replica --- xinference/core/tests/test_restful_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xinference/core/tests/test_restful_api.py b/xinference/core/tests/test_restful_api.py index 2b598020f4..50ad071d7b 100644 --- a/xinference/core/tests/test_restful_api.py +++ b/xinference/core/tests/test_restful_api.py @@ -356,7 +356,7 @@ def test_restful_api_for_embedding(setup): assert "embedding" in embedding_res["data"][0] assert len(embedding_res["data"][0]["embedding"]) == model_spec.dimensions - assert "model_replica" in embedding_res["data"][0] + assert "model_replica" in embedding_res.keys() # test multiple payload = { From bc390195962b7520a51b282d969e5b3bd018c704 Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Tue, 25 Feb 2025 10:23:30 +0800 Subject: [PATCH 09/14] update: test case of /v1/embeddings --- xinference/core/tests/test_restful_api.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xinference/core/tests/test_restful_api.py b/xinference/core/tests/test_restful_api.py index 50ad071d7b..1cb557785e 100644 --- a/xinference/core/tests/test_restful_api.py +++ b/xinference/core/tests/test_restful_api.py @@ -357,6 +357,8 @@ def test_restful_api_for_embedding(setup): assert "embedding" in embedding_res["data"][0] assert len(embedding_res["data"][0]["embedding"]) == model_spec.dimensions assert "model_replica" in embedding_res.keys() + assert embedding_res["model_replica"] is not None + assert embedding_res["model"] == payload["model"] # test multiple payload = { From 3df83aadc95ad8f43b39fedca7fbdab258ff3258 Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Wed, 26 Feb 2025 14:49:28 +0800 Subject: [PATCH 10/14] update: embedding model name return --- xinference/api/restful_api.py | 2 +- xinference/model/embedding/core.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py index d60ef392e4..7681d8dd33 100644 --- a/xinference/api/restful_api.py +++ b/xinference/api/restful_api.py @@ -1330,7 +1330,7 @@ async def create_embedding(self, request: Request) -> Response: raise HTTPException(status_code=500, detail=str(e)) try: - kwargs["model_uid"] = model_uid + # kwargs["model_uid"] = model_uid embedding = await model.create_embedding(body.input, **kwargs) return Response(embedding, media_type="application/json") except Exception as e: diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py index 75702be227..0f4a6db8aa 100644 --- a/xinference/model/embedding/core.py +++ b/xinference/model/embedding/core.py @@ -693,7 +693,8 @@ def base64_to_image(base64_str: str) -> Image.Image: if not is_bge_m3_flag_model and not kwargs.get("return_sparse") else "dict" ), - model=kwargs.get("model_uid"), + # model=kwargs.get("model_uid"), + model="-".join(str(self._model_uid).split("-")[:-1]), model_replica=self._model_uid, data=embedding_list, usage=usage, From b19280403865a433dac24a82812ac6cb3eafc75e Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Thu, 27 Feb 2025 10:40:31 +0800 Subject: [PATCH 11/14] update: set default model uid is --- xinference/api/restful_api.py | 2 +- xinference/model/embedding/core.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py index 7681d8dd33..d60ef392e4 100644 --- a/xinference/api/restful_api.py +++ b/xinference/api/restful_api.py @@ -1330,7 +1330,7 @@ async def create_embedding(self, request: Request) -> Response: raise HTTPException(status_code=500, detail=str(e)) try: - # kwargs["model_uid"] = model_uid + kwargs["model_uid"] = model_uid embedding = await model.create_embedding(body.input, **kwargs) return Response(embedding, media_type="application/json") except Exception as e: diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py index 0f4a6db8aa..97ecce104d 100644 --- a/xinference/model/embedding/core.py +++ b/xinference/model/embedding/core.py @@ -693,8 +693,7 @@ def base64_to_image(base64_str: str) -> Image.Image: if not is_bge_m3_flag_model and not kwargs.get("return_sparse") else "dict" ), - # model=kwargs.get("model_uid"), - model="-".join(str(self._model_uid).split("-")[:-1]), + model=self._kwargs.get("model_uid",""), model_replica=self._model_uid, data=embedding_list, usage=usage, From 1eb18fd2757290fe2c80a9428c45d9e5f5b0b70a Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Thu, 27 Feb 2025 18:31:37 +0800 Subject: [PATCH 12/14] update: set default model uid is --- xinference/model/embedding/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py index 97ecce104d..82273370da 100644 --- a/xinference/model/embedding/core.py +++ b/xinference/model/embedding/core.py @@ -693,7 +693,7 @@ def base64_to_image(base64_str: str) -> Image.Image: if not is_bge_m3_flag_model and not kwargs.get("return_sparse") else "dict" ), - model=self._kwargs.get("model_uid",""), + model=self._kwargs.get("model_uid", ""), model_replica=self._model_uid, data=embedding_list, usage=usage, From ba7619c086ae554934fdb6ded0aa6eebc87ca948 Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Fri, 28 Feb 2025 15:53:32 +0800 Subject: [PATCH 13/14] add: test_restful_api_for_embedding run passed --- xinference/core/tests/test_restful_api.py | 3 ++- xinference/model/embedding/core.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/xinference/core/tests/test_restful_api.py b/xinference/core/tests/test_restful_api.py index 1cb557785e..1e8295f7e5 100644 --- a/xinference/core/tests/test_restful_api.py +++ b/xinference/core/tests/test_restful_api.py @@ -353,10 +353,11 @@ def test_restful_api_for_embedding(setup): } response = requests.post(url, json=payload) embedding_res = response.json() + print("embedding_res: ", embedding_res) assert "embedding" in embedding_res["data"][0] assert len(embedding_res["data"][0]["embedding"]) == model_spec.dimensions - assert "model_replica" in embedding_res.keys() + assert "model_replica" in embedding_res assert embedding_res["model_replica"] is not None assert embedding_res["model"] == payload["model"] diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py index 82273370da..de689842a1 100644 --- a/xinference/model/embedding/core.py +++ b/xinference/model/embedding/core.py @@ -693,7 +693,7 @@ def base64_to_image(base64_str: str) -> Image.Image: if not is_bge_m3_flag_model and not kwargs.get("return_sparse") else "dict" ), - model=self._kwargs.get("model_uid", ""), + model=kwargs.get("model_uid"), # type: ignore model_replica=self._model_uid, data=embedding_list, usage=usage, From 3088b43504b2905abd6c7b033b9929a812c59f40 Mon Sep 17 00:00:00 2001 From: zhoudelong <1727964916@qq.com> Date: Fri, 28 Feb 2025 18:10:07 +0800 Subject: [PATCH 14/14] remove print code --- xinference/core/tests/test_restful_api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xinference/core/tests/test_restful_api.py b/xinference/core/tests/test_restful_api.py index 1e8295f7e5..4270f23517 100644 --- a/xinference/core/tests/test_restful_api.py +++ b/xinference/core/tests/test_restful_api.py @@ -353,7 +353,6 @@ def test_restful_api_for_embedding(setup): } response = requests.post(url, json=payload) embedding_res = response.json() - print("embedding_res: ", embedding_res) assert "embedding" in embedding_res["data"][0] assert len(embedding_res["data"][0]["embedding"]) == model_spec.dimensions