Skip to content
25 changes: 24 additions & 1 deletion xinference/model/llm/vllm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,30 @@ def _sanitize_generate_config(

@classmethod
def check_lib(cls) -> bool:
return importlib.util.find_spec("vllm") is not None
if importlib.util.find_spec("vllm") is None:
return False

try:
import vllm

if not getattr(vllm, "__version__", None):
return False

# Check version
from packaging import version

if version.parse(vllm.__version__) < version.parse("0.3.0"):
return False

# Check CUDA
import torch

if not torch.cuda.is_available():
return False

return True
except Exception:
return False

@classmethod
def match_json(
Expand Down
254 changes: 238 additions & 16 deletions xinference/model/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import asyncio
import functools
import importlib.util
import json
import logging
import os
Expand Down Expand Up @@ -429,44 +430,265 @@ def __exit__(self, exc_type, exc_val, exc_tb):

def get_engine_params_by_name(
model_type: Optional[str], model_name: str
) -> Optional[Dict[str, List[dict]]]:
) -> Optional[Dict[str, Union[List[Dict[str, Any]], str]]]:
engine_params: Dict[str, Any] = {}

if model_type == "LLM":
from .llm.llm_family import LLM_ENGINES
from .llm.llm_family import LLM_ENGINES, SUPPORTED_ENGINES

if model_name not in LLM_ENGINES:
return None

# filter llm_class
engine_params = deepcopy(LLM_ENGINES[model_name])
for engine, params in engine_params.items():
# Get all supported engines, not just currently available ones
all_supported_engines = list(SUPPORTED_ENGINES.keys())

# First add currently available engine parameters
available_engines = deepcopy(LLM_ENGINES[model_name])
for engine, params in available_engines.items():
for param in params:
del param["llm_class"]
# Remove previous available attribute as available engines don't need this flag
if "available" in param:
del param["available"]
engine_params[engine] = params

# Check unavailable engines
for engine_name in all_supported_engines:
if engine_name not in engine_params: # Engine not in available list
try:
llm_engine_classes = SUPPORTED_ENGINES[engine_name]
error_msg = None

# Try to find specific error reasons
for engine_class in llm_engine_classes:
try:
if hasattr(engine_class, "check_lib"):
lib_available: bool = engine_class.check_lib() # type: ignore[assignment]
if not lib_available:
error_msg = (
f"Engine {engine_name} library is not available"
)
break
else:
# If no check_lib method, try import check
module_name = engine_name.lower().replace(".", "")
if engine_name == "vLLM":
module_name = "vllm"
elif engine_name == "SGLang":
module_name = "sglang"
elif engine_name == "llama.cpp":
module_name = "llama_cpp"
elif engine_name == "MLX":
module_name = "mlx"
elif engine_name == "LMDEPLOY":
module_name = "lmdeploy"
elif engine_name == "Transformers":
module_name = "transformers"

importlib.import_module(module_name)
break
except ImportError as e:
error_msg = f"Engine {engine_name} library is not installed: {str(e)}"
except Exception as e:
error_msg = (
f"Engine {engine_name} is not available: {str(e)}"
)

if error_msg is None:
error_msg = f"Engine {engine_name} is not compatible with current model or environment"

# For unavailable engines, directly return error message string
engine_params[engine_name] = error_msg

except Exception as e:
# If exception occurs during checking, return error message string
engine_params[engine_name] = (
f"Error checking engine {engine_name}: {str(e)}"
)

# Filter out llm_class field
for engine, params in engine_params.items():
if isinstance(
params, list
): # Only process parameter lists of available engines
for param in params:
if "llm_class" in param:
del param["llm_class"]

return engine_params
elif model_type == "embedding":
from .embedding.embed_family import EMBEDDING_ENGINES
from .embedding.embed_family import (
EMBEDDING_ENGINES,
)
from .embedding.embed_family import (
SUPPORTED_ENGINES as EMBEDDING_SUPPORTED_ENGINES,
)

if model_name not in EMBEDDING_ENGINES:
return None

# filter embedding_class
engine_params = deepcopy(EMBEDDING_ENGINES[model_name])
for engine, params in engine_params.items():
# Get all supported engines, not just currently available ones
all_supported_engines = list(EMBEDDING_SUPPORTED_ENGINES.keys())

# First add currently available engine parameters
available_engines = deepcopy(EMBEDDING_ENGINES[model_name])
for engine, params in available_engines.items():
for param in params:
del param["embedding_class"]
# Remove previous available attribute as available engines don't need this flag
if "available" in param:
del param["available"]
engine_params[engine] = params

# Check unavailable engines
for engine_name in all_supported_engines:
if engine_name not in engine_params: # Engine not in available list
try:
embedding_engine_classes = EMBEDDING_SUPPORTED_ENGINES[engine_name]
error_msg = None

# Try to find specific error reasons
for embedding_engine_class in embedding_engine_classes:
try:
if hasattr(embedding_engine_class, "check_lib"):
embedding_lib_available: bool = embedding_engine_class.check_lib() # type: ignore[assignment]
if not embedding_lib_available:
error_msg = (
f"Engine {engine_name} library is not available"
)
break
else:
# If no check_lib method, try import check
module_name = engine_name.lower().replace(".", "")
if engine_name == "vLLM":
module_name = "vllm"
elif engine_name == "SGLang":
module_name = "sglang"
elif engine_name == "llama.cpp":
module_name = "llama_cpp"
elif engine_name == "MLX":
module_name = "mlx"
elif engine_name == "LMDEPLOY":
module_name = "lmdeploy"
elif engine_name == "Transformers":
module_name = "transformers"
elif engine_name == "SentenceTransformers":
module_name = "sentence_transformers"

importlib.import_module(module_name)
break
except ImportError as e:
error_msg = f"Engine {engine_name} library is not installed: {str(e)}"
except Exception as e:
error_msg = (
f"Engine {engine_name} is not available: {str(e)}"
)

if error_msg is None:
error_msg = f"Engine {engine_name} is not compatible with current model or environment"

# For unavailable engines, directly return error message string
engine_params[engine_name] = error_msg

except Exception as e:
# If exception occurs during checking, return error message string
engine_params[engine_name] = (
f"Error checking engine {engine_name}: {str(e)}"
)

# Filter out embedding_class field
for engine, params in engine_params.items():
if isinstance(
params, list
): # Only process parameter lists of available engines
for param in params:
if "embedding_class" in param:
del param["embedding_class"]

return engine_params
elif model_type == "rerank":
from .rerank.rerank_family import RERANK_ENGINES
from .rerank.rerank_family import (
RERANK_ENGINES,
)
from .rerank.rerank_family import SUPPORTED_ENGINES as RERANK_SUPPORTED_ENGINES

if model_name not in RERANK_ENGINES:
return None

# filter rerank_class
engine_params = deepcopy(RERANK_ENGINES[model_name])
for engine, params in engine_params.items():
# Get all supported engines, not just currently available ones
all_supported_engines = list(RERANK_SUPPORTED_ENGINES.keys())

# First add currently available engine parameters
available_engines = deepcopy(RERANK_ENGINES[model_name])
for engine, params in available_engines.items():
for param in params:
del param["rerank_class"]
# Remove previous available attribute as available engines don't need this flag
if "available" in param:
del param["available"]
engine_params[engine] = params

# Check unavailable engines
for engine_name in all_supported_engines:
if engine_name not in engine_params: # Engine not in available list
try:
rerank_engine_classes = RERANK_SUPPORTED_ENGINES[engine_name]
error_msg = None

# Try to find specific error reasons
for rerank_engine_class in rerank_engine_classes:
try:
if hasattr(rerank_engine_class, "check_lib"):
rerank_lib_available: bool = rerank_engine_class.check_lib() # type: ignore[assignment]
if not rerank_lib_available:
error_msg = (
f"Engine {engine_name} library is not available"
)
break
else:
# If no check_lib method, try import check
module_name = engine_name.lower().replace(".", "")
if engine_name == "vLLM":
module_name = "vllm"
elif engine_name == "SGLang":
module_name = "sglang"
elif engine_name == "llama.cpp":
module_name = "llama_cpp"
elif engine_name == "MLX":
module_name = "mlx"
elif engine_name == "LMDEPLOY":
module_name = "lmdeploy"
elif engine_name == "Transformers":
module_name = "transformers"
elif engine_name == "SentenceTransformers":
module_name = "sentence_transformers"

importlib.import_module(module_name)
break
except ImportError as e:
error_msg = f"Engine {engine_name} library is not installed: {str(e)}"
except Exception as e:
error_msg = (
f"Engine {engine_name} is not available: {str(e)}"
)

if error_msg is None:
error_msg = f"Engine {engine_name} is not compatible with current model or environment"

# For unavailable engines, directly return error message string
engine_params[engine_name] = error_msg

except Exception as e:
# If exception occurs during checking, return error message string
engine_params[engine_name] = (
f"Error checking engine {engine_name}: {str(e)}"
)

# Filter out rerank_class field
for engine, params in engine_params.items():
if isinstance(
params, list
): # Only process parameter lists of available engines
for param in params:
if "rerank_class" in param:
del param["rerank_class"]

return engine_params
else:
Expand Down
Loading
Loading