ENH: allow rerank models run with LLM models on same device (#741)

xorbitsai · Dec 8, 2023 · b5a5f0a · b5a5f0a
1 parent 3b9b3a6
commit b5a5f0a
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/xinference/core/worker.py b/xinference/core/worker.py
@@ -195,7 +195,7 @@ async def _create_subpool(
             gpu_cnt = n_gpu if isinstance(n_gpu, int) else 1
             devices = (
                 [await self.allocate_devices_for_embedding(model_uid)]
-                if model_type == "embedding"
+                if model_type in ["embedding", "rerank"]
                 else self.allocate_devices(model_uid=model_uid, n_gpu=gpu_cnt)
             )
             env["CUDA_VISIBLE_DEVICES"] = ",".join([str(dev) for dev in devices])