diff --git a/xinference/model/embedding/core.py b/xinference/model/embedding/core.py
index fffbc7633c..b68e5236ca 100644
--- a/xinference/model/embedding/core.py
+++ b/xinference/model/embedding/core.py
@@ -25,6 +25,7 @@
 from ..core import VirtualEnvSettings
 from ..utils import ModelInstanceInfoMixin
 from .embed_family import match_embedding
+from .match_result import MatchResult
 
 logger = logging.getLogger(__name__)
 
@@ -171,6 +172,46 @@ def match_json(
     ) -> bool:
         pass
 
+    @classmethod
+    def match_with_reason(
+        cls,
+        model_family: EmbeddingModelFamilyV2,
+        model_spec: EmbeddingSpecV1,
+        quantization: str,
+    ) -> "MatchResult":
+        """
+        Check if the engine can handle the given embedding model with detailed error information.
+
+        This method provides detailed failure reasons and suggestions when an engine
+        cannot handle a specific model configuration. The default implementation
+        falls back to the boolean match_json method for backward compatibility.
+
+        Args:
+            model_family: The embedding model family information
+            model_spec: The model specification
+            quantization: The quantization method
+
+        Returns:
+            MatchResult: Detailed match result with reasons and suggestions
+        """
+        from .match_result import ErrorType, MatchResult
+
+        # Default implementation for backward compatibility
+        if cls.match_json(model_family, model_spec, quantization):
+            return MatchResult.success()
+        else:
+            # Get basic reason based on common failure patterns
+            if not cls.check_lib():
+                return MatchResult.failure(
+                    reason=f"Required library for {cls.__name__} is not available",
+                    error_type=ErrorType.DEPENDENCY_MISSING,
+                )
+            else:
+                return MatchResult.failure(
+                    reason=f"Embedding model configuration is not compatible with {cls.__name__}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                )
+
     @classmethod
     def match(
         cls,
diff --git a/xinference/model/embedding/llama_cpp/core.py b/xinference/model/embedding/llama_cpp/core.py
index fb8c4e45ca..d84434384f 100644
--- a/xinference/model/embedding/llama_cpp/core.py
+++ b/xinference/model/embedding/llama_cpp/core.py
@@ -26,6 +26,7 @@
 
 from ....types import Embedding
 from ..core import EmbeddingModel, EmbeddingModelFamilyV2, EmbeddingSpecV1
+from ..match_result import MatchResult
 
 logger = logging.getLogger(__name__)
 
@@ -235,6 +236,63 @@ def match_json(
         model_spec: EmbeddingSpecV1,
         quantization: str,
     ) -> bool:
+
+        result = cls.match_with_reason(model_family, model_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls,
+        model_family: EmbeddingModelFamilyV2,
+        model_spec: EmbeddingSpecV1,
+        quantization: str,
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Check library availability
+        if not cls.check_lib():
+            return MatchResult.failure(
+                reason="llama.cpp library (xllamacpp) is not installed for embedding",
+                error_type=ErrorType.DEPENDENCY_MISSING,
+                technical_details="xllamacpp package not found in Python environment",
+            )
+
+        # Check model format compatibility
         if model_spec.model_format not in ["ggufv2"]:
-            return False
-        return True
+            return MatchResult.failure(
+                reason=f"llama.cpp embedding only supports GGUF v2 format, got: {model_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Unsupported format: {model_spec.model_format}, required: ggufv2",
+            )
+
+        # Check embedding-specific requirements
+        if not hasattr(model_spec, "model_file_name_template"):
+            return MatchResult.failure(
+                reason="GGUF embedding model requires proper file configuration",
+                error_type=ErrorType.CONFIGURATION_ERROR,
+                technical_details="Missing model_file_name_template for GGUF embedding",
+            )
+
+        # Check model dimensions for llama.cpp compatibility
+        model_dimensions = model_family.dimensions
+        if model_dimensions > 4096:  # llama.cpp may have limitations
+            return MatchResult.failure(
+                reason=f"Large embedding model may have compatibility issues with llama.cpp ({model_dimensions} dimensions)",
+                error_type=ErrorType.MODEL_COMPATIBILITY,
+                technical_details=f"Large embedding dimensions: {model_dimensions}",
+            )
+
+        # Check platform-specific considerations
+        import platform
+
+        current_platform = platform.system()
+
+        # llama.cpp works across platforms but may have performance differences
+        if current_platform == "Windows":
+            return MatchResult.failure(
+                reason="llama.cpp embedding may have limited performance on Windows",
+                error_type=ErrorType.OS_REQUIREMENT,
+                technical_details=f"Windows platform: {current_platform}",
+            )
+
+        return MatchResult.success()
diff --git a/xinference/model/embedding/match_result.py b/xinference/model/embedding/match_result.py
new file mode 100644
index 0000000000..3e33c268d4
--- /dev/null
+++ b/xinference/model/embedding/match_result.py
@@ -0,0 +1,76 @@
+"""
+Error handling result structures for embedding model engine matching.
+
+This module provides structured error handling for engine matching operations,
+allowing engines to provide detailed failure reasons and suggestions.
+"""
+
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+
+
+@dataclass
+class MatchResult:
+    """
+    Result of engine matching operation with detailed error information.
+
+    This class provides structured information about whether an engine can handle
+    a specific model configuration, and if not, why and what alternatives exist.
+    """
+
+    is_match: bool
+    reason: Optional[str] = None
+    error_type: Optional[str] = None
+    technical_details: Optional[str] = None
+
+    @classmethod
+    def success(cls) -> "MatchResult":
+        """Create a successful match result."""
+        return cls(is_match=True)
+
+    @classmethod
+    def failure(
+        cls,
+        reason: str,
+        error_type: Optional[str] = None,
+        technical_details: Optional[str] = None,
+    ) -> "MatchResult":
+        """Create a failed match result with optional details."""
+        return cls(
+            is_match=False,
+            reason=reason,
+            error_type=error_type,
+            technical_details=technical_details,
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for API responses."""
+        result: Dict[str, Any] = {"is_match": self.is_match}
+        if not self.is_match:
+            if self.reason:
+                result["reason"] = self.reason
+            if self.error_type:
+                result["error_type"] = self.error_type
+            if self.technical_details:
+                result["technical_details"] = self.technical_details
+        return result
+
+    def to_error_string(self) -> str:
+        """Convert to error string for backward compatibility."""
+        if self.is_match:
+            return "Available"
+        error_msg = self.reason or "Unknown error"
+        return error_msg
+
+
+# Error type constants for better categorization
+class ErrorType:
+    HARDWARE_REQUIREMENT = "hardware_requirement"
+    OS_REQUIREMENT = "os_requirement"
+    MODEL_FORMAT = "model_format"
+    DEPENDENCY_MISSING = "dependency_missing"
+    MODEL_COMPATIBILITY = "model_compatibility"
+    DIMENSION_MISMATCH = "dimension_mismatch"
+    VERSION_REQUIREMENT = "version_requirement"
+    CONFIGURATION_ERROR = "configuration_error"
+    ENGINE_UNAVAILABLE = "engine_unavailable"
diff --git a/xinference/model/embedding/sentence_transformers/core.py b/xinference/model/embedding/sentence_transformers/core.py
index 05f7753e8e..c1789f9912 100644
--- a/xinference/model/embedding/sentence_transformers/core.py
+++ b/xinference/model/embedding/sentence_transformers/core.py
@@ -22,6 +22,7 @@
 from ....types import Embedding, EmbeddingData, EmbeddingUsage
 from ...utils import is_flash_attn_available
 from ..core import EmbeddingModel, EmbeddingModelFamilyV2, EmbeddingSpecV1
+from ..match_result import MatchResult
 
 logger = logging.getLogger(__name__)
 SENTENCE_TRANSFORMER_MODEL_LIST: List[str] = []
@@ -434,5 +435,77 @@ def match_json(
         model_spec: EmbeddingSpecV1,
         quantization: str,
     ) -> bool:
-        # As default embedding engine, sentence-transformer support all models
-        return model_spec.model_format in ["pytorch"]
+
+        result = cls.match_with_reason(model_family, model_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls,
+        model_family: EmbeddingModelFamilyV2,
+        model_spec: EmbeddingSpecV1,
+        quantization: str,
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Check library availability
+        if not cls.check_lib():
+            return MatchResult.failure(
+                reason="Sentence Transformers library is not installed",
+                error_type=ErrorType.DEPENDENCY_MISSING,
+                technical_details="sentence_transformers package not found in Python environment",
+            )
+
+        # Check model format compatibility
+        if model_spec.model_format not in ["pytorch"]:
+            return MatchResult.failure(
+                reason=f"Sentence Transformers only supports pytorch format, got: {model_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Unsupported format: {model_spec.model_format}, required: pytorch",
+            )
+
+        # Check model dimensions compatibility
+        model_dimensions = model_family.dimensions
+        if model_dimensions > 1536:  # Very large embedding models
+            return MatchResult.failure(
+                reason=f"Large embedding model detected ({model_dimensions} dimensions)",
+                error_type=ErrorType.MODEL_COMPATIBILITY,
+                technical_details=f"Large embedding dimensions: {model_dimensions}",
+            )
+
+        # Check token limits
+        max_tokens = model_family.max_tokens
+        if max_tokens > 8192:  # Very high token limits
+            return MatchResult.failure(
+                reason=f"High token limit model detected (max_tokens: {max_tokens})",
+                error_type=ErrorType.CONFIGURATION_ERROR,
+                technical_details=f"High max_tokens: {max_tokens}",
+            )
+
+        # Check for special model requirements
+        model_name = model_family.model_name.lower()
+
+        # Check Qwen2 GTE models
+        if "gte" in model_name and "qwen2" in model_name:
+            # These models have specific requirements
+            if not hasattr(cls, "_check_qwen_gte_requirements"):
+                return MatchResult.failure(
+                    reason="Qwen2 GTE models require special handling",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details="Qwen2 GTE model special requirements",
+                )
+
+        # Check Qwen3 models
+        if "qwen3" in model_name:
+            # Qwen3 has flash attention requirements
+            try:
+                # This would be checked during actual loading
+                pass
+            except Exception:
+                return MatchResult.failure(
+                    reason="Qwen3 embedding model may have compatibility issues",
+                    error_type=ErrorType.VERSION_REQUIREMENT,
+                    technical_details="Qwen3 model compatibility check",
+                )
+
+        return MatchResult.success()
diff --git a/xinference/model/llm/core.py b/xinference/model/llm/core.py
index 18747c6742..94c5814a08 100644
--- a/xinference/model/llm/core.py
+++ b/xinference/model/llm/core.py
@@ -31,6 +31,7 @@
 
 if TYPE_CHECKING:
     from .llm_family import LLMFamilyV2, LLMSpecV1
+    from .match_result import MatchResult
 
 logger = logging.getLogger(__name__)
 
@@ -157,6 +158,43 @@ def match_json(
     ) -> bool:
         raise NotImplementedError
 
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+        """
+        Check if the engine can handle the given model with detailed error information.
+
+        This method provides detailed failure reasons and suggestions when an engine
+        cannot handle a specific model configuration. The default implementation
+        falls back to the boolean match_json method for backward compatibility.
+
+        Args:
+            llm_family: The model family information
+            llm_spec: The model specification
+            quantization: The quantization method
+
+        Returns:
+            MatchResult: Detailed match result with reasons and suggestions
+        """
+        from .match_result import ErrorType, MatchResult
+
+        # Default implementation for backward compatibility
+        if cls.match_json(llm_family, llm_spec, quantization):
+            return MatchResult.success()
+        else:
+            # Get basic reason based on common failure patterns
+            if not cls.check_lib():
+                return MatchResult.failure(
+                    reason=f"Required library for {cls.__name__} is not available",
+                    error_type=ErrorType.DEPENDENCY_MISSING,
+                )
+            else:
+                return MatchResult.failure(
+                    reason=f"Model configuration is not compatible with {cls.__name__}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                )
+
     def prepare_parse_reasoning_content(
         self, reasoning_content: bool, enable_thinking: bool = True
     ):
diff --git a/xinference/model/llm/llama_cpp/core.py b/xinference/model/llm/llama_cpp/core.py
index 8fee5a081c..d77c15b917 100644
--- a/xinference/model/llm/llama_cpp/core.py
+++ b/xinference/model/llm/llama_cpp/core.py
@@ -25,6 +25,7 @@
 from ....types import ChatCompletion, ChatCompletionChunk, Completion, CompletionChunk
 from ..core import LLM, chat_context_var
 from ..llm_family import LLMFamilyV2, LLMSpecV1
+from ..match_result import MatchResult
 from ..utils import ChatModelMixin
 
 logger = logging.getLogger(__name__)
@@ -84,14 +85,66 @@ def check_lib(cls) -> bool:
     def match_json(
         cls, llm_family: LLMFamilyV2, llm_spec: LLMSpecV1, quantization: str
     ) -> bool:
+
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: LLMFamilyV2, llm_spec: LLMSpecV1, quantization: str
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Check library availability
+        if not cls.check_lib():
+            return MatchResult.failure(
+                reason="llama.cpp library (xllamacpp) is not installed",
+                error_type=ErrorType.DEPENDENCY_MISSING,
+                technical_details="xllamacpp package not found in Python environment",
+            )
+
+        # Check model format compatibility
         if llm_spec.model_format not in ["ggufv2"]:
-            return False
+            return MatchResult.failure(
+                reason=f"llama.cpp only supports GGUF v2 format, got: {llm_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Unsupported format: {llm_spec.model_format}, required: ggufv2",
+            )
+
+        # Check model abilities - llama.cpp supports both chat and generation
         if (
             "chat" not in llm_family.model_ability
             and "generate" not in llm_family.model_ability
         ):
-            return False
-        return True
+            return MatchResult.failure(
+                reason=f"llama.cpp requires 'chat' or 'generate' ability, model has: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Model abilities: {llm_family.model_ability}",
+            )
+
+        # Check platform-specific issues
+        import platform
+
+        current_platform = platform.system()
+
+        # Check for ARM64 specific issues
+        if current_platform == "Darwin" and platform.machine() == "arm64":
+            # Apple Silicon specific checks could go here
+            pass
+        elif current_platform == "Windows":
+            # Windows specific checks could go here
+            pass
+
+        # Check memory requirements (basic heuristic)
+        model_size = float(str(llm_spec.model_size_in_billions))
+        if model_size > 70:  # Very large models
+            return MatchResult.failure(
+                reason=f"llama.cpp may struggle with very large models ({model_size}B parameters)",
+                error_type=ErrorType.MODEL_COMPATIBILITY,
+                technical_details=f"Large model size: {model_size}B parameters",
+            )
+
+        return MatchResult.success()
 
     def load(self):
         try:
diff --git a/xinference/model/llm/lmdeploy/core.py b/xinference/model/llm/lmdeploy/core.py
index c532d1709d..b91e6d0c6e 100644
--- a/xinference/model/llm/lmdeploy/core.py
+++ b/xinference/model/llm/lmdeploy/core.py
@@ -21,6 +21,7 @@
 from ....types import ChatCompletion, ChatCompletionChunk, Completion, LoRA
 from ..core import LLM
 from ..llm_family import LLMFamilyV2, LLMSpecV1
+from ..match_result import MatchResult
 from ..utils import ChatModelMixin, generate_chat_completion, generate_completion_chunk
 
 logger = logging.getLogger(__name__)
@@ -119,7 +120,21 @@ def check_lib(cls) -> bool:
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
-        return False
+
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        return MatchResult.failure(
+            reason="LMDeploy base model does not support direct inference",
+            error_type=ErrorType.MODEL_COMPATIBILITY,
+            technical_details="LMDeploy base model class is not intended for direct use",
+        )
 
     def generate(
         self,
@@ -172,13 +187,51 @@ def load(self):
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
+
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Check library availability first
+        if not LMDEPLOY_INSTALLED:
+            return MatchResult.failure(
+                reason="LMDeploy library is not installed",
+                error_type=ErrorType.DEPENDENCY_MISSING,
+                technical_details="lmdeploy package not found in Python environment",
+            )
+
+        # Check model format compatibility and quantization
         if llm_spec.model_format == "awq":
-            # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
+            # LMDeploy has specific AWQ quantization requirements
             if "4" not in quantization:
-                return False
+                return MatchResult.failure(
+                    reason=f"LMDeploy AWQ format requires 4-bit quantization, got: {quantization}",
+                    error_type=ErrorType.QUANTIZATION,
+                    technical_details=f"AWQ + {quantization} not supported by LMDeploy",
+                )
+
+        # Check model compatibility
         if llm_family.model_name not in LMDEPLOY_SUPPORTED_CHAT_MODELS:
-            return False
-        return LMDEPLOY_INSTALLED
+            return MatchResult.failure(
+                reason=f"Chat model not supported by LMDeploy: {llm_family.model_name}",
+                error_type=ErrorType.MODEL_COMPATIBILITY,
+                technical_details=f"Unsupported chat model: {llm_family.model_name}",
+            )
+
+        # Check model abilities - LMDeploy primarily supports chat models
+        if "chat" not in llm_family.model_ability:
+            return MatchResult.failure(
+                reason=f"LMDeploy Chat requires 'chat' ability, model has: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Model abilities: {llm_family.model_ability}",
+            )
+
+        return MatchResult.success()
 
     async def async_chat(
         self,
diff --git a/xinference/model/llm/match_result.py b/xinference/model/llm/match_result.py
new file mode 100644
index 0000000000..3ab90d2c37
--- /dev/null
+++ b/xinference/model/llm/match_result.py
@@ -0,0 +1,76 @@
+"""
+Error handling result structures for engine matching.
+
+This module provides structured error handling for engine matching operations,
+allowing engines to provide detailed failure reasons and suggestions.
+"""
+
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+
+
+@dataclass
+class MatchResult:
+    """
+    Result of engine matching operation with detailed error information.
+
+    This class provides structured information about whether an engine can handle
+    a specific model configuration, and if not, why and what alternatives exist.
+    """
+
+    is_match: bool
+    reason: Optional[str] = None
+    error_type: Optional[str] = None
+    technical_details: Optional[str] = None
+
+    @classmethod
+    def success(cls) -> "MatchResult":
+        """Create a successful match result."""
+        return cls(is_match=True)
+
+    @classmethod
+    def failure(
+        cls,
+        reason: str,
+        error_type: Optional[str] = None,
+        technical_details: Optional[str] = None,
+    ) -> "MatchResult":
+        """Create a failed match result with optional details."""
+        return cls(
+            is_match=False,
+            reason=reason,
+            error_type=error_type,
+            technical_details=technical_details,
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for API responses."""
+        result: Dict[str, Any] = {"is_match": self.is_match}
+        if not self.is_match:
+            if self.reason:
+                result["reason"] = self.reason
+            if self.error_type:
+                result["error_type"] = self.error_type
+            if self.technical_details:
+                result["technical_details"] = self.technical_details
+        return result
+
+    def to_error_string(self) -> str:
+        """Convert to error string for backward compatibility."""
+        if self.is_match:
+            return "Available"
+        error_msg = self.reason or "Unknown error"
+        return error_msg
+
+
+# Error type constants for better categorization
+class ErrorType:
+    HARDWARE_REQUIREMENT = "hardware_requirement"
+    OS_REQUIREMENT = "os_requirement"
+    MODEL_FORMAT = "model_format"
+    QUANTIZATION = "quantization"
+    DEPENDENCY_MISSING = "dependency_missing"
+    MODEL_COMPATIBILITY = "model_compatibility"
+    ABILITY_MISMATCH = "ability_mismatch"
+    VERSION_REQUIREMENT = "version_requirement"
+    CONFIGURATION_ERROR = "configuration_error"
diff --git a/xinference/model/llm/mlx/core.py b/xinference/model/llm/mlx/core.py
index 32de59be0b..b1b6505952 100644
--- a/xinference/model/llm/mlx/core.py
+++ b/xinference/model/llm/mlx/core.py
@@ -51,6 +51,7 @@
 )
 from ..core import LLM, chat_context_var
 from ..llm_family import LLMFamilyV2, LLMSpecV1
+from ..match_result import MatchResult
 from ..utils import (
     DEEPSEEK_TOOL_CALL_FAMILY,
     QWEN_TOOL_CALL_FAMILY,
@@ -411,17 +412,66 @@ def check_lib(cls) -> bool:
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
-        if llm_spec.model_format not in ["mlx"]:
-            return False
+
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Check platform compatibility first - MLX only works on Apple Silicon
         if sys.platform != "darwin" or platform.processor() != "arm":
-            # only work for Mac M chips
-            return False
+            return MatchResult.failure(
+                reason="MLX engine only works on Apple Silicon Macs (macOS with ARM processor)",
+                error_type=ErrorType.OS_REQUIREMENT,
+                technical_details=f"Current platform: {sys.platform}, processor: {platform.processor()}, required: darwin + arm",
+            )
+
+        # Check library availability (only if platform is compatible)
+        if not cls.check_lib():
+            return MatchResult.failure(
+                reason="MLX library (mlx_lm) is not installed",
+                error_type=ErrorType.DEPENDENCY_MISSING,
+                technical_details="mlx_lm package not found in Python environment",
+            )
+
+        # Check model format compatibility
+        if llm_spec.model_format not in ["mlx"]:
+            return MatchResult.failure(
+                reason=f"MLX engine only supports MLX format, got: {llm_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Unsupported format: {llm_spec.model_format}, required: mlx",
+            )
+
+        # Check model abilities - MLX supports generation but not chat/vision in this base class
         if "generate" not in llm_family.model_ability:
-            return False
+            return MatchResult.failure(
+                reason=f"MLX engine requires 'generate' ability, model has: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Model abilities: {llm_family.model_ability}",
+            )
+
+        # MLX base model doesn't support chat or vision
         if "chat" in llm_family.model_ability or "vision" in llm_family.model_ability:
-            # do not process chat or vision
-            return False
-        return True
+            return MatchResult.failure(
+                reason="MLX base model does not support chat or vision abilities",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Unsupported abilities for base MLX: {[a for a in llm_family.model_ability if a in ['chat', 'vision']]}",
+            )
+
+        # Check memory constraints for Apple Silicon
+        model_size = float(str(llm_spec.model_size_in_billions))
+        if model_size > 70:  # Large models may be problematic
+            return MatchResult.failure(
+                reason=f"MLX may have memory limitations with very large models ({model_size}B parameters)",
+                error_type=ErrorType.MODEL_COMPATIBILITY,
+                technical_details=f"Large model size: {model_size}B on Apple Silicon",
+            )
+
+        return MatchResult.success()
 
     def _get_prompt_cache(
         self, prompt, lora_name: Optional[str] = None, model: Any = None
@@ -720,17 +770,38 @@ def _sanitize_generate_config(
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
-        if llm_spec.model_format not in ["mlx"]:
-            return False
-        if sys.platform != "darwin" or platform.processor() != "arm":
-            # only work for Mac M chips
-            return False
+
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Use base class validation first
+        base_result = super().match_with_reason(llm_family, llm_spec, quantization)
+        if not base_result.is_match:
+            return base_result
+
+        # Check chat ability
         if "chat" not in llm_family.model_ability:
-            return False
+            return MatchResult.failure(
+                reason=f"MLX Chat requires 'chat' ability, model has: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Model abilities: {llm_family.model_ability}",
+            )
+
+        # MLX Chat doesn't support vision
         if "vision" in llm_family.model_ability:
-            # do not process vision
-            return False
-        return True
+            return MatchResult.failure(
+                reason="MLX Chat model does not support vision abilities",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Vision ability not supported in MLXChatModel",
+            )
+
+        return MatchResult.success()
 
     def chat(
         self,
@@ -784,14 +855,52 @@ def check_lib(cls) -> bool:
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
-        if llm_spec.model_format not in ["mlx"]:
-            return False
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Check platform compatibility first - MLX only works on Apple Silicon
         if sys.platform != "darwin" or platform.processor() != "arm":
-            # only work for Mac M chips
-            return False
+            return MatchResult.failure(
+                reason="MLX Vision engine only works on Apple Silicon Macs (macOS with ARM processor)",
+                error_type=ErrorType.OS_REQUIREMENT,
+                technical_details=f"Current platform: {sys.platform}, processor: {platform.processor()}, required: darwin + arm",
+            )
+
+        # Check library availability (only if platform is compatible) - MLX Vision uses mlx_vlm
+        if not cls.check_lib():
+            return MatchResult.failure(
+                reason="MLX Vision library (mlx_vlm) is not installed",
+                error_type=ErrorType.DEPENDENCY_MISSING,
+                technical_details="mlx_vlm package not found in Python environment",
+            )
+
+        # Check model format compatibility
+        if llm_spec.model_format not in ["mlx"]:
+            return MatchResult.failure(
+                reason=f"MLX Vision engine only supports MLX format, got: {llm_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Unsupported format: {llm_spec.model_format}, required: mlx",
+            )
+
+        # Check vision ability
         if "vision" not in llm_family.model_ability:
-            return False
-        return True
+            return MatchResult.failure(
+                reason=f"MLX Vision requires 'vision' ability, model has: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Model abilities: {llm_family.model_ability}",
+            )
+
+        # Check for distributed inference limitations
+        # MLX Vision models don't support distributed inference
+        # This could be checked here if needed
+
+        return MatchResult.success()
 
     def _load_model(self, **kwargs):
         try:
diff --git a/xinference/model/llm/sglang/core.py b/xinference/model/llm/sglang/core.py
index b6f28e86bf..70fe429481 100644
--- a/xinference/model/llm/sglang/core.py
+++ b/xinference/model/llm/sglang/core.py
@@ -15,6 +15,7 @@
 import json
 import logging
 import multiprocessing
+import platform
 import sys
 import threading
 import time
@@ -36,6 +37,7 @@
 from .. import LLM, LLMFamilyV2, LLMSpecV1
 from ..core import chat_context_var
 from ..llm_family import CustomLLMFamilyV2
+from ..match_result import MatchResult
 from ..utils import (
     DEEPSEEK_TOOL_CALL_FAMILY,
     QWEN_TOOL_CALL_FAMILY,
@@ -339,24 +341,103 @@ def check_lib(cls) -> bool:
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
+
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Check library availability first
+        if not SGLANG_INSTALLED:
+            return MatchResult.failure(
+                reason="SGLang library is not installed",
+                error_type=ErrorType.DEPENDENCY_MISSING,
+                technical_details="sglang package not found in Python environment",
+            )
+
+        # Check hardware requirements - SGLang requires CUDA
         if not cls._has_cuda_device():
-            return False
+            return MatchResult.failure(
+                reason="SGLang requires CUDA GPU support",
+                error_type=ErrorType.HARDWARE_REQUIREMENT,
+                technical_details="No CUDA devices detected",
+            )
+
+        # Check OS requirements
         if not cls._is_linux():
-            return False
-        if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
-            return False
+            return MatchResult.failure(
+                reason="SGLang only supports Linux operating system",
+                error_type=ErrorType.OS_REQUIREMENT,
+                technical_details=f"Current OS: {platform.system()}, required: Linux",
+            )
+
+        # Check model format compatibility
+        supported_formats = ["pytorch", "gptq", "awq", "fp8", "bnb"]
+        if llm_spec.model_format not in supported_formats:
+            return MatchResult.failure(
+                reason=f"SGLang does not support model format: {llm_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Unsupported format: {llm_spec.model_format}",
+            )
+
+        # Check quantization compatibility with format
         if llm_spec.model_format == "pytorch":
-            if quantization != "none" and not (quantization is None):
-                return False
+            if quantization != "none" and quantization is not None:
+                return MatchResult.failure(
+                    reason=f"SGLang pytorch format does not support quantization: {quantization}",
+                    error_type=ErrorType.QUANTIZATION,
+                    technical_details=f"pytorch + {quantization} combination not supported",
+                )
+
+        # Check model compatibility
         if isinstance(llm_family, CustomLLMFamilyV2):
             if llm_family.model_family not in SGLANG_SUPPORTED_MODELS:
-                return False
+                return MatchResult.failure(
+                    reason=f"Custom model family not supported by SGLang: {llm_family.model_family}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details=f"Custom family: {llm_family.model_family}",
+                )
         else:
             if llm_family.model_name not in SGLANG_SUPPORTED_MODELS:
-                return False
-        if "generate" not in llm_family.model_ability:
-            return False
-        return SGLANG_INSTALLED
+                return MatchResult.failure(
+                    reason=f"Model not supported by SGLang: {llm_family.model_name}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details=f"Unsupported model: {llm_family.model_name}",
+                )
+
+        # Check model abilities with flexible logic
+        # SGLang can handle models with various text generation capabilities
+        has_text_capability = (
+            "generate" in llm_family.model_ability
+            or "chat" in llm_family.model_ability
+            or "reasoning" in llm_family.model_ability
+            or "tools" in llm_family.model_ability
+        )
+
+        if not has_text_capability:
+            return MatchResult.failure(
+                reason=f"SGLang requires text generation capabilities, model has: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Model abilities: {llm_family.model_ability}",
+            )
+
+        # SGLang is primarily designed for text models, not specialized models
+        specialized_abilities = ["embedding", "rerank", "audio", "vision"]
+        has_specialized = any(
+            ability in llm_family.model_ability for ability in specialized_abilities
+        )
+        if has_specialized:
+            return MatchResult.failure(
+                reason=f"SGLang is designed for text models, this model has specialized abilities: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Specialized abilities: {[a for a in llm_family.model_ability if a in specialized_abilities]}",
+            )
+
+        return MatchResult.success()
 
     @staticmethod
     def _convert_state_to_completion_chunk(
@@ -645,20 +726,64 @@ class SGLANGChatModel(SGLANGModel, ChatModelMixin):
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
-        if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
-            return False
+
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Use base class validation first
+        base_result = super().match_with_reason(llm_family, llm_spec, quantization)
+        if not base_result.is_match:
+            return base_result
+
+        # Check model format compatibility (same as base)
+        supported_formats = ["pytorch", "gptq", "awq", "fp8", "bnb"]
+        if llm_spec.model_format not in supported_formats:
+            return MatchResult.failure(
+                reason=f"SGLang Chat does not support model format: {llm_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Chat model unsupported format: {llm_spec.model_format}",
+            )
+
+        # Check quantization compatibility with format
         if llm_spec.model_format == "pytorch":
-            if quantization != "none" and not (quantization is None):
-                return False
+            if quantization != "none" and quantization is not None:
+                return MatchResult.failure(
+                    reason=f"SGLang Chat pytorch format does not support quantization: {quantization}",
+                    error_type=ErrorType.QUANTIZATION,
+                    technical_details=f"Chat pytorch + {quantization} not supported",
+                )
+
+        # Check chat model compatibility
         if isinstance(llm_family, CustomLLMFamilyV2):
             if llm_family.model_family not in SGLANG_SUPPORTED_CHAT_MODELS:
-                return False
+                return MatchResult.failure(
+                    reason=f"Custom chat model not supported by SGLang: {llm_family.model_family}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details=f"Custom chat family: {llm_family.model_family}",
+                )
         else:
             if llm_family.model_name not in SGLANG_SUPPORTED_CHAT_MODELS:
-                return False
+                return MatchResult.failure(
+                    reason=f"Chat model not supported by SGLang: {llm_family.model_name}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details=f"Unsupported chat model: {llm_family.model_name}",
+                )
+
+        # Check chat ability
         if "chat" not in llm_family.model_ability:
-            return False
-        return SGLANG_INSTALLED
+            return MatchResult.failure(
+                reason=f"SGLang Chat requires 'chat' ability, model has: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Model abilities: {llm_family.model_ability}",
+            )
+
+        return MatchResult.success()
 
     def _sanitize_chat_config(
         self,
@@ -732,24 +857,64 @@ class SGLANGVisionModel(SGLANGModel, ChatModelMixin):
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
-        if not cls._has_cuda_device():
-            return False
-        if not cls._is_linux():
-            return False
-        if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
-            return False
+
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Use base class validation first
+        base_result = super().match_with_reason(llm_family, llm_spec, quantization)
+        if not base_result.is_match:
+            return base_result
+
+        # Vision models have the same format restrictions as base SGLANG
+        supported_formats = ["pytorch", "gptq", "awq", "fp8", "bnb"]
+        if llm_spec.model_format not in supported_formats:
+            return MatchResult.failure(
+                reason=f"SGLang Vision does not support model format: {llm_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Vision model unsupported format: {llm_spec.model_format}",
+            )
+
+        # Vision models typically work with specific quantization settings
         if llm_spec.model_format == "pytorch":
-            if quantization != "none" and not (quantization is None):
-                return False
+            if quantization != "none" and quantization is not None:
+                return MatchResult.failure(
+                    reason=f"SGLang Vision pytorch format does not support quantization: {quantization}",
+                    error_type=ErrorType.QUANTIZATION,
+                    technical_details=f"Vision pytorch + {quantization} not supported",
+                )
+
+        # Check vision model compatibility
         if isinstance(llm_family, CustomLLMFamilyV2):
             if llm_family.model_family not in SGLANG_SUPPORTED_VISION_MODEL_LIST:
-                return False
+                return MatchResult.failure(
+                    reason=f"Custom vision model not supported by SGLang: {llm_family.model_family}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details=f"Custom vision family: {llm_family.model_family}",
+                )
         else:
             if llm_family.model_name not in SGLANG_SUPPORTED_VISION_MODEL_LIST:
-                return False
+                return MatchResult.failure(
+                    reason=f"Vision model not supported by SGLang: {llm_family.model_name}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details=f"Unsupported vision model: {llm_family.model_name}",
+                )
+
+        # Check vision ability
         if "vision" not in llm_family.model_ability:
-            return False
-        return SGLANG_INSTALLED
+            return MatchResult.failure(
+                reason=f"SGLang Vision requires 'vision' ability, model has: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Model abilities: {llm_family.model_ability}",
+            )
+
+        return MatchResult.success()
 
     def _sanitize_chat_config(
         self,
diff --git a/xinference/model/llm/transformers/core.py b/xinference/model/llm/transformers/core.py
index a102b14045..0ac1597164 100644
--- a/xinference/model/llm/transformers/core.py
+++ b/xinference/model/llm/transformers/core.py
@@ -40,6 +40,7 @@
 from ...utils import select_device
 from ..core import LLM, chat_context_var
 from ..llm_family import LLMFamilyV2, LLMSpecV1
+from ..match_result import MatchResult
 from ..utils import (
     DEEPSEEK_TOOL_CALL_FAMILY,
     LLAMA3_TOOL_CALL_FAMILY,
@@ -498,14 +499,71 @@ def check_lib(cls) -> bool:
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
-        if llm_spec.model_format not in ["pytorch", "gptq", "awq", "bnb"]:
-            return False
+
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Check library availability
+        if not cls.check_lib():
+            return MatchResult.failure(
+                reason="Transformers library is not installed",
+                error_type=ErrorType.DEPENDENCY_MISSING,
+                technical_details="transformers or torch package not found",
+            )
+
+        # Check model format compatibility
+        supported_formats = ["pytorch", "gptq", "awq", "bnb"]
+        if llm_spec.model_format not in supported_formats:
+            return MatchResult.failure(
+                reason=f"Transformers does not support model format: {llm_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Transformers unsupported format: {llm_spec.model_format}",
+            )
+
+        # Check for models that shouldn't use Transformers by default
         model_family = llm_family.model_family or llm_family.model_name
         if model_family in NON_DEFAULT_MODEL_LIST:
-            return False
-        if "generate" not in llm_family.model_ability:
-            return False
-        return True
+            return MatchResult.failure(
+                reason=f"Model {model_family} is not recommended for Transformers engine",
+                error_type=ErrorType.MODEL_COMPATIBILITY,
+                technical_details=f"Model in NON_DEFAULT_MODEL_LIST: {model_family}",
+            )
+
+        # Check model abilities with flexible logic
+        # Transformers can handle models with various text processing capabilities
+        has_text_capability = (
+            "generate" in llm_family.model_ability
+            or "chat" in llm_family.model_ability
+            or "reasoning" in llm_family.model_ability
+            or "tools" in llm_family.model_ability
+        )
+
+        if not has_text_capability:
+            return MatchResult.failure(
+                reason=f"Transformers engine requires text processing capabilities, model has: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Model abilities: {llm_family.model_ability}",
+            )
+
+        # Check for highly specialized models that might not work well with generic Transformers engine
+        specialized_abilities = ["embedding", "rerank", "audio", "vision"]
+        has_specialized = any(
+            ability in llm_family.model_ability for ability in specialized_abilities
+        )
+        if has_specialized and not has_text_capability:
+            return MatchResult.failure(
+                reason=f"Model requires specialized engine for its abilities: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Specialized abilities detected: {[a for a in llm_family.model_ability if a in specialized_abilities]}",
+            )
+
+        return MatchResult.success()
 
     def build_prefill_attention_mask(
         self, batch_size: int, seq_length: int, reqs: List[InferenceRequest]
diff --git a/xinference/model/llm/transformers/multimodal/core.py b/xinference/model/llm/transformers/multimodal/core.py
index ae67e102b5..4d6451f42e 100644
--- a/xinference/model/llm/transformers/multimodal/core.py
+++ b/xinference/model/llm/transformers/multimodal/core.py
@@ -39,21 +39,18 @@ def decide_device(self):
         """
         Update self._device
         """
-        pass
 
     @abstractmethod
     def load_processor(self):
         """
         Load self._processor and self._tokenizer
         """
-        pass
 
     @abstractmethod
     def load_multimodal_model(self):
         """
         Load self._model
         """
-        pass
 
     def load(self):
         self.decide_device()
@@ -71,7 +68,6 @@ def build_inputs_from_messages(
         actual parameters needed for inference,
         e.g. input_ids, attention_masks, etc.
         """
-        pass
 
     @abstractmethod
     def build_generate_kwargs(
@@ -82,7 +78,6 @@ def build_generate_kwargs(
         Hyperparameters needed for generation,
         e.g. temperature, max_new_tokens, etc.
         """
-        pass
 
     @abstractmethod
     def build_streaming_iter(
@@ -95,7 +90,6 @@ def build_streaming_iter(
         The length of prompt token usually comes from the input_ids.
         In this interface you need to call the `build_inputs_from_messages` and `build_generate_kwargs`.
         """
-        pass
 
     def get_stop_strs(self) -> List[str]:
         return []
diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py
index 3a55314fb9..02e8871f4d 100644
--- a/xinference/model/llm/vllm/core.py
+++ b/xinference/model/llm/vllm/core.py
@@ -19,6 +19,7 @@
 import logging
 import multiprocessing
 import os
+import platform
 import sys
 import threading
 import time
@@ -55,6 +56,7 @@
 from .. import BUILTIN_LLM_FAMILIES, LLM, LLMFamilyV2, LLMSpecV1
 from ..core import chat_context_var
 from ..llm_family import CustomLLMFamilyV2, cache_model_tokenizer_and_config
+from ..match_result import ErrorType, MatchResult
 from ..utils import (
     DEEPSEEK_TOOL_CALL_FAMILY,
     QWEN_TOOL_CALL_FAMILY,
@@ -849,41 +851,206 @@ def _sanitize_generate_config(
 
     @classmethod
     def check_lib(cls) -> bool:
-        return importlib.util.find_spec("vllm") is not None
+        if importlib.util.find_spec("vllm") is None:
+            return False
+
+        try:
+            import vllm
+
+            if not getattr(vllm, "__version__", None):
+                return False
+
+            # Check version
+            from packaging import version
+
+            if version.parse(vllm.__version__) < version.parse("0.3.0"):
+                return False
+
+            # Check CUDA
+            import torch
+
+            if not torch.cuda.is_available():
+                return False
+
+            return True
+        except Exception:
+            return False
 
     @classmethod
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
+
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Check library availability first
+        if not VLLM_INSTALLED:
+            return MatchResult.failure(
+                reason="vLLM library is not installed",
+                error_type=ErrorType.DEPENDENCY_MISSING,
+                technical_details="vllm package not found in Python environment",
+            )
+
+        # Check hardware requirements
         if not cls._has_cuda_device() and not cls._has_mlu_device():
-            return False
+            return MatchResult.failure(
+                reason="vLLM requires CUDA or MLU accelerator support",
+                error_type=ErrorType.HARDWARE_REQUIREMENT,
+                technical_details="No CUDA or MLU devices detected",
+            )
+
+        # Check OS requirements
         if not cls._is_linux():
-            return False
-        if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
-            return False
+            return MatchResult.failure(
+                reason="vLLM only supports Linux operating system",
+                error_type=ErrorType.OS_REQUIREMENT,
+                technical_details=f"Current OS: {platform.system()}, required: Linux",
+            )
+
+        # Check model format
+        supported_formats = ["pytorch", "gptq", "awq", "fp8", "bnb"]
+        if llm_spec.model_format not in supported_formats:
+            return MatchResult.failure(
+                reason=f"vLLM does not support model format: {llm_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Unsupported format: {llm_spec.model_format}",
+            )
+
+        # Check quantization compatibility with format
         if llm_spec.model_format == "pytorch":
             if quantization != "none" and quantization is not None:
-                return False
+                return MatchResult.failure(
+                    reason=f"vLLM pytorch format does not support quantization: {quantization}",
+                    error_type=ErrorType.QUANTIZATION,
+                    technical_details=f"pytorch + {quantization} combination not supported",
+                )
+
         if llm_spec.model_format == "awq":
-            # Currently, only 4-bit weight quantization is supported for AWQ, but got 8 bits.
             if "4" not in quantization:
-                return False
+                return MatchResult.failure(
+                    reason=f"vLLM AWQ format requires 4-bit quantization, got: {quantization}",
+                    error_type=ErrorType.QUANTIZATION,
+                    technical_details=f"AWQ + {quantization} not supported, only 4-bit",
+                )
+
         if llm_spec.model_format == "gptq":
             if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
                 if not any(q in quantization for q in ("3", "4", "8")):
-                    return False
+                    return MatchResult.failure(
+                        reason=f"vLLM GPTQ format requires 3/4/8-bit quantization, got: {quantization}",
+                        error_type=ErrorType.QUANTIZATION,
+                        technical_details=f"GPTQ + {quantization} not supported with vLLM >= 0.3.3",
+                    )
             else:
                 if "4" not in quantization:
-                    return False
+                    return MatchResult.failure(
+                        reason=f"Older vLLM version only supports 4-bit GPTQ, got: {quantization}",
+                        error_type=ErrorType.VERSION_REQUIREMENT,
+                        technical_details=f"GPTQ + {quantization} requires vLLM >= 0.3.3",
+                    )
+
+        # Check model compatibility with more flexible matching
+        def is_model_supported(model_name: str, supported_list: List[str]) -> bool:
+            """Check if model is supported with flexible matching."""
+            # Direct match
+            if model_name in supported_list:
+                return True
+
+            # Partial matching for models with variants (e.g., qwen3 variants)
+            for supported in supported_list:
+                if model_name.startswith(
+                    supported.lower()
+                ) or supported.lower().startswith(model_name):
+                    return True
+
+            # Family-based matching for common patterns
+            model_lower = model_name.lower()
+            if any(
+                family in model_lower
+                for family in [
+                    "qwen3",
+                    "llama",
+                    "mistral",
+                    "gemma",
+                    "baichuan",
+                    "deepseek",
+                ]
+            ):
+                # Check if there's a corresponding supported model with same family
+                for supported in supported_list:
+                    if any(
+                        family in supported.lower()
+                        for family in [
+                            "qwen3",
+                            "llama",
+                            "mistral",
+                            "gemma",
+                            "baichuan",
+                            "deepseek",
+                        ]
+                    ):
+                        return True
+
+            return False
+
         if isinstance(llm_family, CustomLLMFamilyV2):
-            if llm_family.model_family not in VLLM_SUPPORTED_MODELS:
-                return False
+            if not llm_family.model_family or not is_model_supported(
+                llm_family.model_family.lower(), VLLM_SUPPORTED_MODELS
+            ):
+                return MatchResult.failure(
+                    reason=f"Custom model family may not be fully supported by vLLM: {llm_family.model_family}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details=f"Custom family: {llm_family.model_family}",
+                )
         else:
-            if llm_family.model_name not in VLLM_SUPPORTED_MODELS:
-                return False
-        if "generate" not in llm_family.model_ability:
-            return False
-        return VLLM_INSTALLED
+            if not is_model_supported(
+                llm_family.model_name.lower(),
+                [s.lower() for s in VLLM_SUPPORTED_MODELS],
+            ):
+                return MatchResult.failure(
+                    reason=f"Model may not be supported by vLLM: {llm_family.model_name}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details=f"Unsupported model: {llm_family.model_name}",
+                )
+
+        # Check model abilities with flexible logic
+        # vLLM can handle models that have text generation capabilities
+        # Models with 'chat' ability usually also support 'generate'
+        has_text_capability = (
+            "generate" in llm_family.model_ability
+            or "chat" in llm_family.model_ability
+            or "reasoning" in llm_family.model_ability
+            or "tools" in llm_family.model_ability
+        )
+
+        if not has_text_capability:
+            return MatchResult.failure(
+                reason=f"vLLM requires text generation capabilities, model has: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Model abilities: {llm_family.model_ability}",
+            )
+
+        # Additional check: ensure model doesn't have conflicting abilities
+        conflicting_abilities = ["embedding", "rerank"]
+        has_conflicting = any(
+            ability in llm_family.model_ability for ability in conflicting_abilities
+        )
+        if has_conflicting:
+            return MatchResult.failure(
+                reason=f"Model has conflicting abilities for vLLM: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Conflicting abilities detected: {[a for a in llm_family.model_ability if a in conflicting_abilities]}",
+            )
+
+        # All checks passed
+        return MatchResult.success()
 
     @staticmethod
     def _convert_request_output_to_completion_chunk(
@@ -1291,40 +1458,140 @@ class VLLMChatModel(VLLMModel, ChatModelMixin):
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
-        if llm_spec.model_format not in [
-            "pytorch",
-            "gptq",
-            "awq",
-            "fp8",
-            "bnb",
-            "ggufv2",
-        ]:
-            return False
-        if llm_spec.model_format == "pytorch":
-            if quantization != "none" and quantization is not None:
-                return False
-        if llm_spec.model_format == "awq":
-            if not any(q in quantization for q in ("4", "8")):
-                return False
-        if llm_spec.model_format == "gptq":
-            if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
-                if not any(q in quantization for q in ("3", "4", "8")):
-                    return False
-            else:
-                if "4" not in quantization:
-                    return False
+
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Use base class validation first
+        base_result = super().match_with_reason(llm_family, llm_spec, quantization)
+        if not base_result.is_match:
+            return base_result
+
+        # Chat-specific format support (includes GGUFv2 for newer vLLM)
+        supported_formats = ["pytorch", "gptq", "awq", "fp8", "bnb", "ggufv2"]
+        if llm_spec.model_format not in supported_formats:
+            return MatchResult.failure(
+                reason=f"vLLM Chat does not support model format: {llm_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Chat model unsupported format: {llm_spec.model_format}",
+            )
+
+        # GGUFv2 requires newer vLLM version
         if llm_spec.model_format == "ggufv2":
             if not (VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.8.2")):
-                return False
+                return MatchResult.failure(
+                    reason="vLLM GGUF support requires version >= 0.8.2",
+                    error_type=ErrorType.VERSION_REQUIREMENT,
+                    technical_details=f"Current vLLM: {VLLM_VERSION}, required: >=0.8.2",
+                )
+
+        # AWQ chat models support more quantization levels
+        if llm_spec.model_format == "awq":
+            if not any(q in quantization for q in ("4", "8")):
+                return MatchResult.failure(
+                    reason=f"vLLM Chat AWQ requires 4 or 8-bit quantization, got: {quantization}",
+                    error_type=ErrorType.QUANTIZATION,
+                    technical_details=f"Chat AWQ + {quantization} not supported",
+                )
+
+        # Check chat model compatibility with flexible matching
+        def is_chat_model_supported(model_name: str, supported_list: List[str]) -> bool:
+            """Check if chat model is supported with flexible matching."""
+            # Direct match
+            if model_name in supported_list:
+                return True
+
+            # Partial matching for models with variants
+            for supported in supported_list:
+                if model_name.startswith(
+                    supported.lower()
+                ) or supported.lower().startswith(model_name):
+                    return True
+
+            # Family-based matching for common chat model patterns
+            model_lower = model_name.lower()
+            if any(
+                family in model_lower
+                for family in [
+                    "qwen3",
+                    "llama",
+                    "mistral",
+                    "gemma",
+                    "baichuan",
+                    "deepseek",
+                    "glm",
+                    "chatglm",
+                ]
+            ):
+                # Check if there's a corresponding supported chat model with same family
+                for supported in supported_list:
+                    if any(
+                        family in supported.lower()
+                        for family in [
+                            "qwen3",
+                            "llama",
+                            "mistral",
+                            "gemma",
+                            "baichuan",
+                            "deepseek",
+                            "glm",
+                            "chatglm",
+                        ]
+                    ):
+                        return True
+
+            return False
+
         if isinstance(llm_family, CustomLLMFamilyV2):
-            if llm_family.model_family not in VLLM_SUPPORTED_CHAT_MODELS:
-                return False
+            if not llm_family.model_family or not is_chat_model_supported(
+                llm_family.model_family.lower(), VLLM_SUPPORTED_CHAT_MODELS
+            ):
+                return MatchResult.failure(
+                    reason=f"Custom chat model may not be fully supported by vLLM: {llm_family.model_family}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details=f"Custom chat family: {llm_family.model_family}",
+                )
         else:
-            if llm_family.model_name not in VLLM_SUPPORTED_CHAT_MODELS:
-                return False
-        if "chat" not in llm_family.model_ability:
-            return False
-        return VLLM_INSTALLED
+            if not is_chat_model_supported(
+                llm_family.model_name.lower(),
+                [s.lower() for s in VLLM_SUPPORTED_CHAT_MODELS],
+            ):
+                return MatchResult.failure(
+                    reason=f"Chat model may not be supported by vLLM: {llm_family.model_name}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details=f"Unsupported chat model: {llm_family.model_name}",
+                )
+
+        # Check chat ability with flexible logic
+        # vLLM Chat should work with models that have conversation capabilities
+        has_chat_capability = (
+            "chat" in llm_family.model_ability
+            or "generate" in llm_family.model_ability
+            or "reasoning" in llm_family.model_ability
+        )
+
+        if not has_chat_capability:
+            return MatchResult.failure(
+                reason=f"vLLM Chat requires conversation capabilities, model has: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Model abilities: {llm_family.model_ability}",
+            )
+
+        # Additional check: ensure model is not purely a tool model without conversation
+        if set(llm_family.model_ability) == {"tools"}:
+            return MatchResult.failure(
+                reason=f"Model only has 'tools' capability without conversation support: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Tool-only model detected",
+            )
+
+        return MatchResult.success()
 
     def _sanitize_chat_config(
         self,
@@ -1469,38 +1736,107 @@ class VLLMMultiModel(VLLMModel, ChatModelMixin):
     def match_json(
         cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
     ) -> bool:
-        if not cls._has_cuda_device() and not cls._has_mlu_device():
-            return False
-        if not cls._is_linux():
-            return False
-        if llm_spec.model_format not in ["pytorch", "gptq", "awq", "fp8", "bnb"]:
-            return False
+
+        result = cls.match_with_reason(llm_family, llm_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls, llm_family: "LLMFamilyV2", llm_spec: "LLMSpecV1", quantization: str
+    ) -> "MatchResult":
+
+        # Use base class validation first
+        base_result = super().match_with_reason(llm_family, llm_spec, quantization)
+        if not base_result.is_match:
+            return base_result
+
+        # Vision models have the same format restrictions as base VLLM
+        supported_formats = ["pytorch", "gptq", "awq", "fp8", "bnb"]
+        if llm_spec.model_format not in supported_formats:
+            return MatchResult.failure(
+                reason=f"vLLM Vision does not support model format: {llm_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Vision model unsupported format: {llm_spec.model_format}",
+            )
+
+        # Vision models typically work with specific quantization settings
         if llm_spec.model_format == "pytorch":
             if quantization != "none" and quantization is not None:
-                return False
+                return MatchResult.failure(
+                    reason=f"vLLM Vision pytorch format does not support quantization: {quantization}",
+                    error_type=ErrorType.QUANTIZATION,
+                    technical_details=f"Vision pytorch + {quantization} not supported",
+                )
+
+        # AWQ vision models support more quantization levels than base
         if llm_spec.model_format == "awq":
             if not any(q in quantization for q in ("4", "8")):
-                return False
-        if llm_spec.model_format == "gptq":
-            if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.3.3"):
-                if not any(q in quantization for q in ("3", "4", "8")):
-                    return False
-            else:
-                if "4" not in quantization:
-                    return False
+                return MatchResult.failure(
+                    reason=f"vLLM Vision AWQ requires 4 or 8-bit quantization, got: {quantization}",
+                    error_type=ErrorType.QUANTIZATION,
+                    technical_details=f"Vision AWQ + {quantization} not supported",
+                )
+
+        # Check vision model compatibility with flexible matching
+        def is_vision_model_supported(
+            model_name: str, supported_list: List[str]
+        ) -> bool:
+            """Check if vision model is supported with flexible matching."""
+            # Direct match
+            if model_name in supported_list:
+                return True
+
+            # Partial matching for models with variants
+            for supported in supported_list:
+                if model_name.startswith(
+                    supported.lower()
+                ) or supported.lower().startswith(model_name):
+                    return True
+
+            # Family-based matching for common vision model patterns
+            model_lower = model_name.lower()
+            if any(
+                family in model_lower
+                for family in ["llama", "qwen", "internvl", "glm", "phi"]
+            ):
+                # Check if there's a corresponding supported vision model with same family
+                for supported in supported_list:
+                    if any(
+                        family in supported.lower()
+                        for family in ["llama", "qwen", "internvl", "glm", "phi"]
+                    ):
+                        return True
+
+            return False
+
         if isinstance(llm_family, CustomLLMFamilyV2):
-            if llm_family.model_family not in VLLM_SUPPORTED_MULTI_MODEL_LIST:
-                return False
+            if not llm_family.model_family or not is_vision_model_supported(
+                llm_family.model_family.lower(), VLLM_SUPPORTED_MULTI_MODEL_LIST
+            ):
+                return MatchResult.failure(
+                    reason=f"Custom vision model may not be fully supported by vLLM: {llm_family.model_family}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details=f"Custom vision family: {llm_family.model_family}",
+                )
         else:
-            if llm_family.model_name not in VLLM_SUPPORTED_MULTI_MODEL_LIST:
-                return False
-        if (
-            "vision" not in llm_family.model_ability
-            and "audio" not in llm_family.model_ability
-            and "omni" not in llm_family.model_ability
-        ):
-            return False
-        return VLLM_INSTALLED
+            if not llm_family.model_name or not is_vision_model_supported(
+                llm_family.model_name.lower(), VLLM_SUPPORTED_MULTI_MODEL_LIST
+            ):
+                return MatchResult.failure(
+                    reason=f"Vision model may not be supported by vLLM: {llm_family.model_name}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                    technical_details=f"Unsupported vision model: {llm_family.model_name}",
+                )
+
+        # Check vision ability
+        if "vision" not in llm_family.model_ability:
+            return MatchResult.failure(
+                reason=f"vLLM Vision requires 'vision' ability, model has: {llm_family.model_ability}",
+                error_type=ErrorType.ABILITY_MISMATCH,
+                technical_details=f"Model abilities: {llm_family.model_ability}",
+            )
+
+        return MatchResult.success()
 
     def _sanitize_model_config(
         self, model_config: Optional[VLLMModelConfig]
diff --git a/xinference/model/rerank/core.py b/xinference/model/rerank/core.py
index ae27e7e85e..2d3edde1c2 100644
--- a/xinference/model/rerank/core.py
+++ b/xinference/model/rerank/core.py
@@ -21,6 +21,7 @@
 from ...types import Rerank
 from ..core import VirtualEnvSettings
 from ..utils import ModelInstanceInfoMixin
+from .match_result import MatchResult
 from .rerank_family import check_engine_by_model_name_and_engine, match_rerank
 
 logger = logging.getLogger(__name__)
@@ -131,6 +132,46 @@ def match_json(
     ) -> bool:
         pass
 
+    @classmethod
+    def match_with_reason(
+        cls,
+        model_family: RerankModelFamilyV2,
+        model_spec: RerankSpecV1,
+        quantization: str,
+    ) -> "MatchResult":
+        """
+        Check if the engine can handle the given rerank model with detailed error information.
+
+        This method provides detailed failure reasons and suggestions when an engine
+        cannot handle a specific model configuration. The default implementation
+        falls back to the boolean match_json method for backward compatibility.
+
+        Args:
+            model_family: The rerank model family information
+            model_spec: The model specification
+            quantization: The quantization method
+
+        Returns:
+            MatchResult: Detailed match result with reasons and suggestions
+        """
+        from .match_result import ErrorType, MatchResult
+
+        # Default implementation for backward compatibility
+        if cls.match_json(model_family, model_spec, quantization):
+            return MatchResult.success()
+        else:
+            # Get basic reason based on common failure patterns
+            if not cls.check_lib():
+                return MatchResult.failure(
+                    reason=f"Required library for {cls.__name__} is not available",
+                    error_type=ErrorType.DEPENDENCY_MISSING,
+                )
+            else:
+                return MatchResult.failure(
+                    reason=f"Rerank model configuration is not compatible with {cls.__name__}",
+                    error_type=ErrorType.MODEL_COMPATIBILITY,
+                )
+
     @classmethod
     def match(
         cls,
diff --git a/xinference/model/rerank/match_result.py b/xinference/model/rerank/match_result.py
new file mode 100644
index 0000000000..1cd278aa5d
--- /dev/null
+++ b/xinference/model/rerank/match_result.py
@@ -0,0 +1,77 @@
+"""
+Error handling result structures for rerank model engine matching.
+
+This module provides structured error handling for engine matching operations,
+allowing engines to provide detailed failure reasons and suggestions.
+"""
+
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+
+
+@dataclass
+class MatchResult:
+    """
+    Result of engine matching operation with detailed error information.
+
+    This class provides structured information about whether an engine can handle
+    a specific model configuration, and if not, why and what alternatives exist.
+    """
+
+    is_match: bool
+    reason: Optional[str] = None
+    error_type: Optional[str] = None
+    technical_details: Optional[str] = None
+
+    @classmethod
+    def success(cls) -> "MatchResult":
+        """Create a successful match result."""
+        return cls(is_match=True)
+
+    @classmethod
+    def failure(
+        cls,
+        reason: str,
+        error_type: Optional[str] = None,
+        technical_details: Optional[str] = None,
+    ) -> "MatchResult":
+        """Create a failed match result with optional details."""
+        return cls(
+            is_match=False,
+            reason=reason,
+            error_type=error_type,
+            technical_details=technical_details,
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary for API responses."""
+        result: Dict[str, Any] = {"is_match": self.is_match}
+        if not self.is_match:
+            if self.reason:
+                result["reason"] = self.reason
+            if self.error_type:
+                result["error_type"] = self.error_type
+            if self.technical_details:
+                result["technical_details"] = self.technical_details
+        return result
+
+    def to_error_string(self) -> str:
+        """Convert to error string for backward compatibility."""
+        if self.is_match:
+            return "Available"
+        error_msg = self.reason or "Unknown error"
+        return error_msg
+
+
+# Error type constants for better categorization
+class ErrorType:
+    HARDWARE_REQUIREMENT = "hardware_requirement"
+    OS_REQUIREMENT = "os_requirement"
+    MODEL_FORMAT = "model_format"
+    DEPENDENCY_MISSING = "dependency_missing"
+    MODEL_COMPATIBILITY = "model_compatibility"
+    DIMENSION_MISMATCH = "dimension_mismatch"
+    VERSION_REQUIREMENT = "version_requirement"
+    CONFIGURATION_ERROR = "configuration_error"
+    ENGINE_UNAVAILABLE = "engine_unavailable"
+    RERANK_SPECIFIC = "rerank_specific"
diff --git a/xinference/model/rerank/sentence_transformers/core.py b/xinference/model/rerank/sentence_transformers/core.py
index ee57b06602..41e7b6da7c 100644
--- a/xinference/model/rerank/sentence_transformers/core.py
+++ b/xinference/model/rerank/sentence_transformers/core.py
@@ -31,6 +31,7 @@
     RerankModelFamilyV2,
     RerankSpecV1,
 )
+from ..match_result import MatchResult
 from ..utils import preprocess_sentence
 
 logger = logging.getLogger(__name__)
@@ -187,7 +188,7 @@ def compute_logits(inputs, **kwargs):
                     from FlagEmbedding import LayerWiseFlagLLMReranker as FlagReranker
                 else:
                     raise RuntimeError(
-                        f"Unsupported Rank model type: {self.model_family.type}"
+                        f"Unsupported Rerank model type: {self.model_family.type}"
                     )
             except ImportError:
                 error_message = "Failed to import module 'FlagEmbedding'"
@@ -333,5 +334,74 @@ def match_json(
         model_spec: RerankSpecV1,
         quantization: str,
     ) -> bool:
-        # As default embedding engine, sentence-transformer support all models
-        return model_spec.model_format in ["pytorch"]
+        pass
+
+        result = cls.match_with_reason(model_family, model_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls,
+        model_family: RerankModelFamilyV2,
+        model_spec: RerankSpecV1,
+        quantization: str,
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Check library availability
+        if not cls.check_lib():
+            return MatchResult.failure(
+                reason="Sentence Transformers library is not installed for reranking",
+                error_type=ErrorType.DEPENDENCY_MISSING,
+                technical_details="sentence_transformers package not found in Python environment",
+            )
+
+        # Check model format compatibility
+        if model_spec.model_format not in ["pytorch"]:
+            return MatchResult.failure(
+                reason=f"Sentence Transformers reranking only supports pytorch format, got: {model_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Unsupported format: {model_spec.model_format}, required: pytorch",
+            )
+
+        # Check rerank-specific requirements
+        if not hasattr(model_family, "model_name"):
+            return MatchResult.failure(
+                reason="Rerank model family requires model name specification",
+                error_type=ErrorType.CONFIGURATION_ERROR,
+                technical_details="Missing model_name in rerank model family",
+            )
+
+        # Check model type compatibility
+        if model_family.type and model_family.type not in [
+            "rerank",
+            "unknown",
+            "cross-encoder",
+            "normal",
+            "LLM-based",
+            "LLM-based layerwise",
+        ]:
+            return MatchResult.failure(
+                reason=f"Model type '{model_family.type}' may not be compatible with reranking engines",
+                error_type=ErrorType.MODEL_COMPATIBILITY,
+                technical_details=f"Model type: {model_family.type}",
+            )
+
+        # Check max tokens limit for reranking performance
+        max_tokens = model_family.max_tokens
+        if max_tokens and max_tokens > 8192:  # High token limits for reranking
+            return MatchResult.failure(
+                reason=f"High max_tokens limit for reranking model: {max_tokens}",
+                error_type=ErrorType.CONFIGURATION_ERROR,
+                technical_details=f"High max_tokens for reranking: {max_tokens}",
+            )
+
+        # Check language compatibility
+        if not model_family.language or len(model_family.language) == 0:
+            return MatchResult.failure(
+                reason="Rerank model language information is missing",
+                error_type=ErrorType.CONFIGURATION_ERROR,
+                technical_details="Missing language information in rerank model",
+            )
+
+        return MatchResult.success()
diff --git a/xinference/model/rerank/vllm/core.py b/xinference/model/rerank/vllm/core.py
index eac173b40c..c2ee75cfef 100644
--- a/xinference/model/rerank/vllm/core.py
+++ b/xinference/model/rerank/vllm/core.py
@@ -5,6 +5,7 @@
 from ....types import Document, DocumentObj, Meta, Rerank, RerankTokens
 from ...utils import cache_clean
 from ..core import RerankModel, RerankModelFamilyV2, RerankSpecV1
+from ..match_result import MatchResult
 
 SUPPORTED_MODELS_PREFIXES = ["bge", "gte", "text2vec", "m3e", "gte", "Qwen3"]
 
@@ -149,8 +150,70 @@ def match_json(
         model_spec: RerankSpecV1,
         quantization: str,
     ) -> bool:
-        if model_spec.model_format in ["pytorch"]:
-            prefix = model_family.model_name.split("-", 1)[0]
-            if prefix in SUPPORTED_MODELS_PREFIXES:
-                return True
-        return False
+
+        result = cls.match_with_reason(model_family, model_spec, quantization)
+        return result.is_match
+
+    @classmethod
+    def match_with_reason(
+        cls,
+        model_family: RerankModelFamilyV2,
+        model_spec: RerankSpecV1,
+        quantization: str,
+    ) -> "MatchResult":
+        from ..match_result import ErrorType, MatchResult
+
+        # Check library availability
+        if not cls.check_lib():
+            return MatchResult.failure(
+                reason="vLLM library is not installed for reranking",
+                error_type=ErrorType.DEPENDENCY_MISSING,
+                technical_details="vllm package not found in Python environment",
+            )
+
+        # Check model format compatibility
+        if model_spec.model_format not in ["pytorch"]:
+            return MatchResult.failure(
+                reason=f"vLLM reranking only supports pytorch format, got: {model_spec.model_format}",
+                error_type=ErrorType.MODEL_FORMAT,
+                technical_details=f"Unsupported format: {model_spec.model_format}, required: pytorch",
+            )
+
+        # Check model name prefix matching
+        if model_spec.model_format == "pytorch":
+            try:
+                prefix = model_family.model_name.split("-", 1)[0].lower()
+                # Support both prefix matching and special cases
+                if prefix.lower() not in [p.lower() for p in SUPPORTED_MODELS_PREFIXES]:
+                    # Special handling for Qwen3 models
+                    if "qwen3" not in model_family.model_name.lower():
+                        return MatchResult.failure(
+                            reason=f"Model family prefix not supported by vLLM reranking: {prefix}",
+                            error_type=ErrorType.MODEL_COMPATIBILITY,
+                            technical_details=f"Unsupported prefix: {prefix}",
+                        )
+            except (IndexError, AttributeError):
+                return MatchResult.failure(
+                    reason="Unable to parse model family name for vLLM compatibility check",
+                    error_type=ErrorType.CONFIGURATION_ERROR,
+                    technical_details=f"Model name parsing failed: {model_family.model_name}",
+                )
+
+        # Check rerank-specific requirements
+        if not hasattr(model_family, "model_name"):
+            return MatchResult.failure(
+                reason="Rerank model family requires model name specification for vLLM",
+                error_type=ErrorType.CONFIGURATION_ERROR,
+                technical_details="Missing model_name in vLLM rerank model family",
+            )
+
+        # Check max tokens limit for vLLM reranking performance
+        max_tokens = model_family.max_tokens
+        if max_tokens and max_tokens > 4096:  # vLLM has stricter limits
+            return MatchResult.failure(
+                reason=f"High max_tokens limit for vLLM reranking model: {max_tokens}",
+                error_type=ErrorType.CONFIGURATION_ERROR,
+                technical_details=f"High max_tokens for vLLM reranking: {max_tokens}",
+            )
+
+        return MatchResult.success()
diff --git a/xinference/model/utils.py b/xinference/model/utils.py
index ea5dec74d5..ea7adb309e 100644
--- a/xinference/model/utils.py
+++ b/xinference/model/utils.py
@@ -472,44 +472,454 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 
 def get_engine_params_by_name(
     model_type: Optional[str], model_name: str
-) -> Optional[Dict[str, List[dict]]]:
+) -> Optional[Dict[str, Union[List[Dict[str, Any]], str]]]:
+    engine_params: Dict[str, Union[List[Dict[str, Any]], str]] = {}
+
     if model_type == "LLM":
-        from .llm.llm_family import LLM_ENGINES
+        from .llm.llm_family import LLM_ENGINES, SUPPORTED_ENGINES
 
         if model_name not in LLM_ENGINES:
             return None
 
-        # filter llm_class
-        engine_params = deepcopy(LLM_ENGINES[model_name])
-        for engine, params in engine_params.items():
+        # Get all supported engines, not just currently available ones
+        all_supported_engines = list(SUPPORTED_ENGINES.keys())
+
+        # First add currently available engine parameters
+        available_engines = deepcopy(LLM_ENGINES[model_name])
+        for engine, params in available_engines.items():
             for param in params:
-                del param["llm_class"]
+                # Remove previous available attribute as available engines don't need this flag
+                if "available" in param:
+                    del param["available"]
+            engine_params[engine] = params
+
+        # Check unavailable engines with detailed error information
+        for engine_name in all_supported_engines:
+            if engine_name not in engine_params:  # Engine not in available list
+                try:
+                    llm_engine_classes = SUPPORTED_ENGINES[engine_name]
+
+                    # Try to get detailed error information from engine's match_with_reason
+                    detailed_error = None
+
+                    # We need a sample model to test against, use the first available spec
+                    if model_name in LLM_ENGINES and LLM_ENGINES[model_name]:
+                        # Try to get model family for testing
+                        try:
+                            from .llm.llm_family import match_llm
+
+                            llm_family = match_llm(model_name, None, None, None, None)
+                            if llm_family and llm_family.model_specs:
+                                llm_spec = llm_family.model_specs[0]
+                                quantization = llm_spec.quantization or "none"
+
+                                # Test each engine class for detailed error info
+                                for engine_class in llm_engine_classes:
+                                    try:
+                                        if hasattr(engine_class, "match_with_reason"):
+                                            pass
+
+                                            result = engine_class.match_with_reason(
+                                                llm_family, llm_spec, quantization
+                                            )
+                                            if not result.is_match:
+                                                detailed_error = {
+                                                    "error": result.reason,
+                                                    "error_type": result.error_type,
+                                                    "technical_details": result.technical_details,
+                                                }
+                                                break
+                                    except Exception as e:
+                                        # Fall back to next engine class with clear error logging
+                                        logger.warning(
+                                            f"Engine class {engine_class.__name__} match_with_reason failed: {e}"
+                                        )
+                                        # Continue to try next engine class, but this is expected behavior for fallback
+                                        continue
+                        except Exception as e:
+                            # If we can't get model family, fail with clear error
+                            logger.error(
+                                f"Failed to get model family for {model_name} (LLM): {e}"
+                            )
+                            raise RuntimeError(
+                                f"Unable to process LLM model {model_name}: {e}"
+                            )
+
+                    if detailed_error:
+                        # Return only the error message without engine_name prefix (key already contains engine name)
+                        engine_params[engine_name] = (
+                            detailed_error.get("error") or "Unknown error"
+                        )
+                    else:
+                        # Fallback to basic error checking for backward compatibility
+                        for engine_class in llm_engine_classes:
+                            try:
+                                if hasattr(engine_class, "check_lib"):
+                                    lib_available: bool = engine_class.check_lib()  # type: ignore[assignment]
+                                    if not lib_available:
+                                        break
+                                else:
+                                    # If no check_lib method, try to use engine's match method for compatibility check
+                                    # This provides more detailed and accurate error information
+                                    try:
+                                        # Create a minimal test spec if we don't have real model specs
+                                        from .llm.llm_family import (
+                                            LLMFamilyV2,
+                                            PytorchLLMSpecV2,
+                                        )
+
+                                        # Create a minimal test case
+                                        test_family = LLMFamilyV2(
+                                            model_name="test",
+                                            model_family="test",
+                                            model_specs=[
+                                                PytorchLLMSpecV2(
+                                                    model_format="pytorch",
+                                                    quantization="none",
+                                                )
+                                            ],
+                                        )
+                                        test_spec = test_family.model_specs[0]
+
+                                        # Use the engine's match method if available
+                                        if hasattr(engine_class, "match_with_reason"):
+                                            result = engine_class.match_with_reason(
+                                                test_family, test_spec, "none"
+                                            )
+                                            if result.is_match:
+                                                break  # Engine is available
+                                            else:
+                                                # Return only the error message without engine_name prefix (key already contains engine name)
+                                                engine_params[engine_name] = (
+                                                    result.reason
+                                                    or "Unknown compatibility error"
+                                                )
+                                                break
+                                        elif hasattr(engine_class, "match_json"):
+                                            # Fallback to simple match method - use test data
+                                            if engine_class.match_json(
+                                                test_family, test_spec, "none"
+                                            ):
+                                                break
+                                            else:
+                                                break
+                                        else:
+                                            # Final fallback: generic import check
+                                            raise ImportError(
+                                                "No compatibility check method available"
+                                            )
+
+                                    except ImportError as e:
+                                        engine_params[engine_name] = (
+                                            f"Engine {engine_name} library is not installed: {str(e)}"
+                                        )
+                                        break
+                                    except Exception as e:
+                                        engine_params[engine_name] = (
+                                            f"Engine {engine_name} is not available: {str(e)}"
+                                        )
+                                        break
+                            except ImportError as e:
+                                engine_params[engine_name] = (
+                                    f"Engine {engine_name} library is not installed: {str(e)}"
+                                )
+                                break
+                            except Exception as e:
+                                engine_params[engine_name] = (
+                                    f"Engine {engine_name} is not available: {str(e)}"
+                                )
+                                break
+
+                        # Only set default error if not already set by one of the exception handlers
+                        if engine_name not in engine_params:
+                            engine_params[engine_name] = (
+                                f"Engine {engine_name} is not compatible with current model or environment"
+                            )
+
+                except Exception as e:
+                    # If exception occurs during checking, return simple string format
+                    engine_params[engine_name] = (
+                        f"Error checking engine {engine_name}: {str(e)}"
+                    )
+
+        # Filter out llm_class field
+        for engine in engine_params.keys():
+            if isinstance(
+                engine_params[engine], list
+            ):  # Only process parameter lists of available engines
+                for param in engine_params[engine]:  # type: ignore
+                    if isinstance(param, dict) and "llm_class" in param:
+                        del param["llm_class"]
 
         return engine_params
     elif model_type == "embedding":
-        from .embedding.embed_family import EMBEDDING_ENGINES
+        from .embedding.embed_family import (
+            EMBEDDING_ENGINES,
+        )
+        from .embedding.embed_family import (
+            SUPPORTED_ENGINES as EMBEDDING_SUPPORTED_ENGINES,
+        )
 
         if model_name not in EMBEDDING_ENGINES:
             return None
 
-        # filter embedding_class
-        engine_params = deepcopy(EMBEDDING_ENGINES[model_name])
-        for engine, params in engine_params.items():
+        # Get all supported engines, not just currently available ones
+        all_supported_engines = list(EMBEDDING_SUPPORTED_ENGINES.keys())
+
+        # First add currently available engine parameters
+        available_engines = deepcopy(EMBEDDING_ENGINES[model_name])
+        for engine, params in available_engines.items():
             for param in params:
-                del param["embedding_class"]
+                # Remove previous available attribute as available engines don't need this flag
+                if "available" in param:
+                    del param["available"]
+            engine_params[engine] = params
+
+        # Check unavailable engines
+        for engine_name in all_supported_engines:
+            if engine_name not in engine_params:  # Engine not in available list
+                try:
+                    embedding_engine_classes = EMBEDDING_SUPPORTED_ENGINES[engine_name]
+                    embedding_error_details: Optional[Dict[str, str]] = None
+
+                    # Try to find specific error reasons
+                    for embedding_engine_class in embedding_engine_classes:
+                        try:
+                            if hasattr(embedding_engine_class, "check_lib"):
+                                embedding_lib_available: bool = embedding_engine_class.check_lib()  # type: ignore[assignment]
+                                if not embedding_lib_available:
+                                    embedding_error_details = {
+                                        "error": f"Engine {engine_name} library is not available",
+                                        "error_type": "dependency_missing",
+                                        "technical_details": f"The required library for {engine_name} engine is not installed or not accessible",
+                                    }
+                                    break
+                            else:
+                                # If no check_lib method, try to use engine's match method for compatibility check
+                                try:
+                                    from .embedding.core import (
+                                        EmbeddingModelFamilyV2,
+                                        TransformersEmbeddingSpecV1,
+                                    )
+
+                                    # Use the engine's match method if available
+                                    if hasattr(embedding_engine_class, "match"):
+                                        # Create a minimal test case
+                                        test_family = EmbeddingModelFamilyV2(
+                                            model_name="test",
+                                            model_specs=[
+                                                TransformersEmbeddingSpecV1(
+                                                    model_format="pytorch",
+                                                    quantization="none",
+                                                )
+                                            ],
+                                        )
+                                        test_spec = test_family.model_specs[0]
+
+                                        # Use the engine's match method to check compatibility
+                                        if embedding_engine_class.match(
+                                            test_family, test_spec, "none"
+                                        ):
+                                            break  # Engine is available
+                                        else:
+                                            embedding_error_details = {
+                                                "error": f"Engine {engine_name} is not compatible with current model or environment",
+                                                "error_type": "model_compatibility",
+                                                "technical_details": f"The {engine_name} engine cannot handle the current embedding model configuration",
+                                            }
+                                            break
+                                    else:
+                                        # Final fallback: generic import check
+                                        raise ImportError(
+                                            "No compatibility check method available"
+                                        )
+
+                                except ImportError as e:
+                                    embedding_error_details = {
+                                        "error": f"Engine {engine_name} library is not installed: {str(e)}",
+                                        "error_type": "dependency_missing",
+                                        "technical_details": f"Missing required dependency for {engine_name} engine: {str(e)}",
+                                    }
+                                except Exception as e:
+                                    embedding_error_details = {
+                                        "error": f"Engine {engine_name} is not available: {str(e)}",
+                                        "error_type": "configuration_error",
+                                        "technical_details": f"Configuration or environment issue preventing {engine_name} engine from working: {str(e)}",
+                                    }
+                                break
+                        except ImportError as e:
+                            embedding_error_details = {
+                                "error": f"Engine {engine_name} library is not installed: {str(e)}",
+                                "error_type": "dependency_missing",
+                                "technical_details": f"Missing required dependency for {engine_name} engine: {str(e)}",
+                            }
+                        except Exception as e:
+                            embedding_error_details = {
+                                "error": f"Engine {engine_name} is not available: {str(e)}",
+                                "error_type": "configuration_error",
+                                "technical_details": f"Configuration or environment issue preventing {engine_name} engine from working: {str(e)}",
+                            }
+
+                    if embedding_error_details is None:
+                        embedding_error_details = {
+                            "error": f"Engine {engine_name} is not compatible with current model or environment",
+                            "error_type": "model_compatibility",
+                            "technical_details": f"The {engine_name} engine cannot handle the current embedding model configuration",
+                        }
+
+                    # For unavailable engines, return simple string format
+                    engine_params[engine_name] = (
+                        embedding_error_details.get("error") or "Unknown error"
+                    )
+
+                except Exception as e:
+                    # If exception occurs during checking, return simple string format
+                    engine_params[engine_name] = (
+                        f"Error checking engine {engine_name}: {str(e)}"
+                    )
+
+        # Filter out embedding_class field
+        for engine in engine_params.keys():
+            if isinstance(
+                engine_params[engine], list
+            ):  # Only process parameter lists of available engines
+                for param in engine_params[engine]:  # type: ignore
+                    if isinstance(param, dict) and "embedding_class" in param:
+                        del param["embedding_class"]
 
         return engine_params
     elif model_type == "rerank":
-        from .rerank.rerank_family import RERANK_ENGINES
+        from .rerank.rerank_family import (
+            RERANK_ENGINES,
+        )
+        from .rerank.rerank_family import SUPPORTED_ENGINES as RERANK_SUPPORTED_ENGINES
 
         if model_name not in RERANK_ENGINES:
             return None
 
-        # filter rerank_class
-        engine_params = deepcopy(RERANK_ENGINES[model_name])
-        for engine, params in engine_params.items():
+        # Get all supported engines, not just currently available ones
+        all_supported_engines = list(RERANK_SUPPORTED_ENGINES.keys())
+
+        # First add currently available engine parameters
+        available_engines = deepcopy(RERANK_ENGINES[model_name])
+        for engine, params in available_engines.items():
             for param in params:
-                del param["rerank_class"]
+                # Remove previous available attribute as available engines don't need this flag
+                if "available" in param:
+                    del param["available"]
+            engine_params[engine] = params
+
+        # Check unavailable engines
+        for engine_name in all_supported_engines:
+            if engine_name not in engine_params:  # Engine not in available list
+                try:
+                    rerank_engine_classes = RERANK_SUPPORTED_ENGINES[engine_name]
+                    rerank_error_details: Optional[Dict[str, str]] = None
+
+                    # Try to find specific error reasons
+                    for rerank_engine_class in rerank_engine_classes:
+                        try:
+                            if hasattr(rerank_engine_class, "check_lib"):
+                                rerank_lib_available: bool = rerank_engine_class.check_lib()  # type: ignore[assignment]
+                                if not rerank_lib_available:
+                                    rerank_error_details = {
+                                        "error": f"Engine {engine_name} library is not available",
+                                        "error_type": "dependency_missing",
+                                        "technical_details": f"The required library for {engine_name} engine is not installed or not accessible",
+                                    }
+                                    break
+                            else:
+                                # If no check_lib method, try to use engine's match method for compatibility check
+                                try:
+                                    from .rerank.core import (
+                                        RerankModelFamilyV2,
+                                        RerankSpecV1,
+                                    )
+
+                                    # Use the engine's match method if available
+                                    if hasattr(rerank_engine_class, "match"):
+                                        # Create a minimal test case
+                                        test_family = RerankModelFamilyV2(
+                                            model_name="test",
+                                            model_specs=[
+                                                RerankSpecV1(
+                                                    model_format="pytorch",
+                                                    quantization="none",
+                                                )
+                                            ],
+                                        )
+                                        test_spec = test_family.model_specs[0]
+
+                                        # Use the engine's match method to check compatibility
+                                        if rerank_engine_class.match(
+                                            test_family, test_spec, "none"
+                                        ):
+                                            break  # Engine is available
+                                        else:
+                                            rerank_error_details = {
+                                                "error": f"Engine {engine_name} is not compatible with current model or environment",
+                                                "error_type": "model_compatibility",
+                                                "technical_details": f"The {engine_name} engine cannot handle the current rerank model configuration",
+                                            }
+                                            break
+                                    else:
+                                        # Final fallback: generic import check
+                                        raise ImportError(
+                                            "No compatibility check method available"
+                                        )
+
+                                except ImportError as e:
+                                    rerank_error_details = {
+                                        "error": f"Engine {engine_name} library is not installed: {str(e)}",
+                                        "error_type": "dependency_missing",
+                                        "technical_details": f"Missing required dependency for {engine_name} engine: {str(e)}",
+                                    }
+                                except Exception as e:
+                                    rerank_error_details = {
+                                        "error": f"Engine {engine_name} is not available: {str(e)}",
+                                        "error_type": "configuration_error",
+                                        "technical_details": f"Configuration or environment issue preventing {engine_name} engine from working: {str(e)}",
+                                    }
+                                break
+                        except ImportError as e:
+                            rerank_error_details = {
+                                "error": f"Engine {engine_name} library is not installed: {str(e)}",
+                                "error_type": "dependency_missing",
+                                "technical_details": f"Missing required dependency for {engine_name} engine: {str(e)}",
+                            }
+                        except Exception as e:
+                            rerank_error_details = {
+                                "error": f"Engine {engine_name} is not available: {str(e)}",
+                                "error_type": "configuration_error",
+                                "technical_details": f"Configuration or environment issue preventing {engine_name} engine from working: {str(e)}",
+                            }
+
+                    if rerank_error_details is None:
+                        rerank_error_details = {
+                            "error": f"Engine {engine_name} is not compatible with current model or environment",
+                            "error_type": "model_compatibility",
+                            "technical_details": f"The {engine_name} engine cannot handle the current rerank model configuration",
+                        }
+
+                    # For unavailable engines, return simple string format
+                    engine_params[engine_name] = (
+                        rerank_error_details.get("error") or "Unknown error"
+                    )
+
+                except Exception as e:
+                    # If exception occurs during checking, return simple string format
+                    engine_params[engine_name] = (
+                        f"Error checking engine {engine_name}: {str(e)}"
+                    )
+
+        # Filter out rerank_class field
+        for engine in engine_params.keys():
+            if isinstance(
+                engine_params[engine], list
+            ):  # Only process parameter lists of available engines
+                for param in engine_params[engine]:  # type: ignore
+                    if isinstance(param, dict) and "rerank_class" in param:
+                        del param["rerank_class"]
 
         return engine_params
     else:
diff --git a/xinference/ui/web/ui/src/scenes/launch_model/components/launchModelDrawer.js b/xinference/ui/web/ui/src/scenes/launch_model/components/launchModelDrawer.js
index 1169f06269..ccff202111 100644
--- a/xinference/ui/web/ui/src/scenes/launch_model/components/launchModelDrawer.js
+++ b/xinference/ui/web/ui/src/scenes/launch_model/components/launchModelDrawer.js
@@ -13,15 +13,11 @@ import {
   CircularProgress,
   Collapse,
   Drawer,
-  FormControl,
   FormControlLabel,
-  InputLabel,
   ListItemButton,
   ListItemText,
-  MenuItem,
   Radio,
   RadioGroup,
-  Select,
   Switch,
   TextField,
   Tooltip,
@@ -39,45 +35,11 @@ import DynamicFieldList from './dynamicFieldList'
 import getModelFormConfig from './modelFormConfig'
 import PasteDialog from './pasteDialog'
 import Progress from './progress'
+import SelectField from './selectField'
 
 const enginesWithNWorker = ['SGLang', 'vLLM', 'MLX']
 const modelEngineType = ['LLM', 'embedding', 'rerank']
 
-const SelectField = ({
-  label,
-  labelId,
-  name,
-  value,
-  onChange,
-  options = [],
-  disabled = false,
-  required = false,
-}) => (
-  <FormControl
-    variant="outlined"
-    margin="normal"
-    disabled={disabled}
-    required={required}
-    fullWidth
-  >
-    <InputLabel id={labelId}>{label}</InputLabel>
-    <Select
-      labelId={labelId}
-      name={name}
-      value={value}
-      onChange={onChange}
-      label={label}
-      className="textHighlight"
-    >
-      {options.map((item) => (
-        <MenuItem key={item.value || item} value={item.value || item}>
-          {item.label || item}
-        </MenuItem>
-      ))}
-    </Select>
-  </FormControl>
-)
-
 const LaunchModelDrawer = ({
   modelData,
   modelType,
@@ -549,19 +511,32 @@ const LaunchModelDrawer = ({
 
   const engineItems = useMemo(() => {
     return engineOptions.map((engine) => {
-      const modelFormats = Array.from(
-        new Set(enginesObj[engine]?.map((item) => item.model_format))
-      )
+      const engineData = enginesObj[engine]
+      let modelFormats = []
+      let label = engine
+      let disabled = false
+
+      if (Array.isArray(engineData)) {
+        modelFormats = Array.from(
+          new Set(engineData.map((item) => item.model_format))
+        )
 
-      const relevantSpecs = modelData.model_specs.filter((spec) =>
-        modelFormats.includes(spec.model_format)
-      )
+        const relevantSpecs = modelData.model_specs.filter((spec) =>
+          modelFormats.includes(spec.model_format)
+        )
+
+        const cached = relevantSpecs.some((spec) => isCached(spec))
 
-      const cached = relevantSpecs.some((spec) => isCached(spec))
+        label = cached ? `${engine} ${t('launchModel.cached')}` : engine
+      } else if (typeof engineData === 'string') {
+        label = `${engine} (${engineData})`
+        disabled = true
+      }
 
       return {
         value: engine,
-        label: cached ? `${engine} ${t('launchModel.cached')}` : engine,
+        label,
+        disabled,
       }
     })
   }, [engineOptions, enginesObj, modelData])
diff --git a/xinference/ui/web/ui/src/scenes/launch_model/components/selectField.js b/xinference/ui/web/ui/src/scenes/launch_model/components/selectField.js
new file mode 100644
index 0000000000..7e9a4af8ce
--- /dev/null
+++ b/xinference/ui/web/ui/src/scenes/launch_model/components/selectField.js
@@ -0,0 +1,42 @@
+import { FormControl, InputLabel, MenuItem, Select } from '@mui/material'
+
+const SelectField = ({
+  label,
+  labelId,
+  name,
+  value,
+  onChange,
+  options = [],
+  disabled = false,
+  required = false,
+}) => (
+  <FormControl
+    variant="outlined"
+    margin="normal"
+    disabled={disabled}
+    required={required}
+    fullWidth
+  >
+    <InputLabel id={labelId}>{label}</InputLabel>
+    <Select
+      labelId={labelId}
+      name={name}
+      value={value}
+      onChange={onChange}
+      label={label}
+      className="textHighlight"
+    >
+      {options.map((item) => (
+        <MenuItem
+          key={item.value || item}
+          value={item.value || item}
+          disabled={item.disabled}
+        >
+          {item.label || item}
+        </MenuItem>
+      ))}
+    </Select>
+  </FormControl>
+)
+
+export default SelectField