xorbitsai
diff --git a/‎xinference/core/worker.py‎
Lines changed: 10 additions & 4 deletions b/‎xinference/core/worker.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎xinference/model/audio/__init__.py‎
Lines changed: 20 additions & 47 deletions b/‎xinference/model/audio/__init__.py‎
Lines changed: 20 additions & 47 deletions
diff --git a/‎xinference/model/embedding/__init__.py‎
Lines changed: 77 additions & 42 deletions b/‎xinference/model/embedding/__init__.py‎
Lines changed: 77 additions & 42 deletions
diff --git a/‎xinference/model/image/__init__.py‎
Lines changed: 7 additions & 63 deletions b/‎xinference/model/image/__init__.py‎
Lines changed: 7 additions & 63 deletions
diff --git a/‎xinference/model/llm/__init__.py‎
Lines changed: 13 additions & 8 deletions b/‎xinference/model/llm/__init__.py‎
Lines changed: 13 additions & 8 deletions
@@ -727,10 +727,15 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
             if "model_src" in model_json:
                 # Simple flat format with model_src at top level
                 flattened_list = flatten_model_src(model_json)
-                converted_model_json = flattened_list[0] if flattened_list else model_json
-            elif "model_specs" in model_json and isinstance(model_json["model_specs"], list):
+                converted_model_json = (
+                    flattened_list[0] if flattened_list else model_json
+                )
+            elif "model_specs" in model_json and isinstance(
+                model_json["model_specs"], list
+            ):
                 # LLM/embedding/rerank format with model_specs
                 from ..model.utils import flatten_quantizations
+
                 converted_model_json = model_json.copy()
 
                 # Process all model_specs using flatten_quantizations - exactly like builtin models
@@ -747,7 +752,9 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
                 # Use all flattened specs like builtin models
                 if flattened_specs:
                     converted_model_json["model_specs"] = flattened_specs
-                    logger.info(f"Processed {len(flattened_specs)} model specifications for {model_name}")
+                    logger.info(
+                        f"Processed {len(flattened_specs)} model specifications for {model_name}"
+                    )
             else:
                 # Already flattened format, use as-is
                 converted_model_json = model_json
@@ -857,7 +864,6 @@ async def add_model(self, model_type: str, model_json: Dict[str, Any]):
                 f"Failed to register model '{model_spec.model_name}': {str(e)}"
             )
 
-    
     @log_async(logger=logger)
     async def update_model_type(self, model_type: str):
         """
 
@@ -27,50 +27,6 @@
 logger = logging.getLogger(__name__)
 
 
-def convert_audio_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Convert audio model hub JSON format to Xinference expected format.
-    """
-    logger.debug(
-        f"convert_audio_model_format called for: {model_json.get('model_name', 'Unknown')}"
-    )
-
-    # Apply conversion logic to handle null model_id and other issues
-    if model_json.get("model_id") is None and "model_src" in model_json:
-        model_src = model_json["model_src"]
-        # Extract model_id from available sources
-        if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
-            model_json["model_id"] = model_src["huggingface"]["model_id"]
-        elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
-            model_json["model_id"] = model_src["modelscope"]["model_id"]
-
-        # Extract model_revision if available
-        if model_json.get("model_revision") is None:
-            if (
-                "huggingface" in model_src
-                and "model_revision" in model_src["huggingface"]
-            ):
-                model_json["model_revision"] = model_src["huggingface"][
-                    "model_revision"
-                ]
-            elif (
-                "modelscope" in model_src
-                and "model_revision" in model_src["modelscope"]
-            ):
-                model_json["model_revision"] = model_src["modelscope"]["model_revision"]
-
-    # Ensure required fields for audio models
-    if "version" not in model_json:
-        model_json["version"] = 2
-    if "model_lang" not in model_json:
-        model_json["model_lang"] = [
-            "en",
-            "zh",
-        ]  # Audio models often support multiple languages
-
-    return model_json
-
-
 from .core import (
     AUDIO_MODEL_DESCRIPTIONS,
     AudioModelFamilyV2,
@@ -110,13 +66,30 @@ def register_custom_model():
 
 
 def register_builtin_model():
-    from ..utils import load_complete_builtin_models
+    # Use unified loading function with flatten_model_src + audio-specific defaults
+    from ..utils import flatten_model_src, load_complete_builtin_models
+
+    def convert_audio_with_flatten(model_json):
+        flattened_list = flatten_model_src(model_json)
+        if not flattened_list:
+            return model_json
+
+        result = flattened_list[0]
+
+        # Add required defaults for audio models
+        if "multilingual" not in result:
+            result["multilingual"] = True
+        if "model_lang" not in result:
+            result["model_lang"] = ["en", "zh"]
+        if "version" not in result:
+            result["version"] = 2
+
+        return result
 
-    # Use unified loading function
     loaded_count = load_complete_builtin_models(
         model_type="audio",
         builtin_registry=BUILTIN_AUDIO_MODELS,
-        convert_format_func=convert_audio_model_format,
+        convert_format_func=convert_audio_with_flatten,
         model_class=AudioModelFamilyV2,
     )
 
 
@@ -24,44 +24,6 @@
 logger = logging.getLogger(__name__)
 
 
-def convert_embedding_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Convert embedding model hub JSON format to Xinference expected format.
-    """
-    logger.debug(
-        f"convert_embedding_model_format called for: {model_json.get('model_name', 'Unknown')}"
-    )
-
-    # Ensure required fields for embedding models
-    converted = model_json.copy()
-
-    # Add missing required fields based on EmbeddingModelFamilyV2 requirements
-    if "version" not in converted:
-        converted["version"] = 2
-    if "model_lang" not in converted:
-        converted["model_lang"] = ["en"]
-
-    # Handle model_specs
-    if "model_specs" not in converted or not converted["model_specs"]:
-        converted["model_specs"] = [
-            {
-                "model_format": "pytorch",
-                "model_size_in_billions": None,
-                "quantization": "none",
-                "model_hub": "huggingface",
-            }
-        ]
-    else:
-        # Ensure each spec has required fields
-        for spec in converted["model_specs"]:
-            if "quantization" not in spec:
-                spec["quantization"] = "none"
-            if "model_hub" not in spec:
-                spec["model_hub"] = "huggingface"
-
-    return converted
-
-
 from .core import (
     EMBEDDING_MODEL_DESCRIPTIONS,
     EmbeddingModelFamilyV2,
@@ -108,17 +70,90 @@ def register_custom_model():
 
 
 def register_builtin_model():
-    from ..utils import load_complete_builtin_models
+    # Use unified loading function with flatten_quantizations for embedding models
+    from ..utils import flatten_quantizations, load_complete_builtin_models
     from .embed_family import BUILTIN_EMBEDDING_MODELS
 
-    # Use unified loading function
+    def convert_embedding_with_quantizations(model_json):
+        if "model_specs" not in model_json:
+            return model_json
+
+        # Process each model_spec with flatten_quantizations (like builtin embedding loading)
+        result = model_json.copy()
+        flattened_specs = []
+        for spec in result["model_specs"]:
+            if "model_src" in spec:
+                flattened_specs.extend(flatten_quantizations(spec))
+            else:
+                flattened_specs.append(spec)
+        result["model_specs"] = flattened_specs
+
+        return result
+
     loaded_count = load_complete_builtin_models(
         model_type="embedding",
-        builtin_registry=BUILTIN_EMBEDDING_MODELS,
-        convert_format_func=convert_embedding_model_format,
+        builtin_registry={},  # Temporarily use empty dict, we handle it manually
+        convert_format_func=convert_embedding_with_quantizations,
         model_class=EmbeddingModelFamilyV2,
     )
 
+    # Manually handle embedding's special registration logic
+    if loaded_count > 0:
+        from ...constants import XINFERENCE_MODEL_DIR
+        from ..custom import RegistryManager
+
+        registry = RegistryManager.get_registry("embedding")
+        existing_model_names = {
+            spec.model_name for spec in registry.get_custom_models()
+        }
+
+        builtin_embedding_dir = os.path.join(
+            XINFERENCE_MODEL_DIR, "v2", "builtin", "embedding"
+        )
+        complete_json_path = os.path.join(
+            builtin_embedding_dir, "embedding_models.json"
+        )
+
+        if os.path.exists(complete_json_path):
+            with codecs.open(complete_json_path, encoding="utf-8") as fd:
+                model_data = json.load(fd)
+
+            models_to_register = []
+            if isinstance(model_data, list):
+                models_to_register = model_data
+            elif isinstance(model_data, dict):
+                if "model_name" in model_data:
+                    models_to_register = [model_data]
+                else:
+                    for key, value in model_data.items():
+                        if isinstance(value, dict) and "model_name" in value:
+                            models_to_register.append(value)
+
+            for model_data in models_to_register:
+                try:
+                    from ..utils import flatten_quantizations
+
+                    converted_data = model_data.copy()
+                    if "model_specs" in converted_data:
+                        flattened_specs = []
+                        for spec in converted_data["model_specs"]:
+                            if "model_src" in spec:
+                                flattened_specs.extend(flatten_quantizations(spec))
+                            else:
+                                flattened_specs.append(spec)
+                        converted_data["model_specs"] = flattened_specs
+                    builtin_embedding_family = EmbeddingModelFamilyV2.parse_obj(
+                        converted_data
+                    )
+
+                    if builtin_embedding_family.model_name not in existing_model_names:
+                        register_embedding(builtin_embedding_family, persist=False)
+                        existing_model_names.add(builtin_embedding_family.model_name)
+                except Exception as e:
+                    warnings.warn(
+                        f"Error parsing model {model_data.get('model_name', 'Unknown')}: {e}"
+                    )
+
     logger.info(
         f"Successfully loaded {loaded_count} embedding models from complete JSON"
     )
 
@@ -24,67 +24,6 @@
 logger = logging.getLogger(__name__)
 
 
-def convert_image_model_format(model_json: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Convert image model hub JSON format to Xinference expected format.
-    """
-    logger.debug(
-        f"convert_image_model_format called for: {model_json.get('model_name', 'Unknown')}"
-    )
-
-    # Ensure required fields for image models
-    converted = model_json.copy()
-
-    # Add missing required fields
-    if "version" not in converted:
-        converted["version"] = 2
-    if "model_lang" not in converted:
-        converted["model_lang"] = ["en"]
-
-    # Handle missing model_id and model_revision
-    if converted.get("model_id") is None and "model_src" in converted:
-        model_src = converted["model_src"]
-        # Extract model_id from available sources
-        if "huggingface" in model_src and "model_id" in model_src["huggingface"]:
-            converted["model_id"] = model_src["huggingface"]["model_id"]
-        elif "modelscope" in model_src and "model_id" in model_src["modelscope"]:
-            converted["model_id"] = model_src["modelscope"]["model_id"]
-
-    if converted.get("model_revision") is None and "model_src" in converted:
-        model_src = converted["model_src"]
-        # Extract model_revision if available
-        if "huggingface" in model_src and "model_revision" in model_src["huggingface"]:
-            converted["model_revision"] = model_src["huggingface"]["model_revision"]
-        elif "modelscope" in model_src and "model_revision" in model_src["modelscope"]:
-            converted["model_revision"] = model_src["modelscope"]["model_revision"]
-
-    # Set defaults if still missing
-    if converted.get("model_id") is None:
-        converted["model_id"] = converted.get("model_name", "unknown")
-    if converted.get("model_revision") is None:
-        converted["model_revision"] = "main"
-
-    # Handle model_specs
-    if "model_specs" not in converted or not converted["model_specs"]:
-        converted["model_specs"] = [
-            {
-                "model_format": "pytorch",
-                "model_size_in_billions": None,
-                "quantization": "none",
-                "model_hub": "huggingface",
-            }
-        ]
-    else:
-        # Ensure each spec has required fields
-        for spec in converted["model_specs"]:
-            if "quantization" not in spec:
-                spec["quantization"] = "none"
-            if "model_hub" not in spec:
-                spec["model_hub"] = "huggingface"
-
-    return converted
-
-
 from .core import (
     BUILTIN_IMAGE_MODELS,
     IMAGE_MODEL_DESCRIPTIONS,
@@ -158,8 +97,13 @@ def register_builtin_model():
                 # Register all models from the complete JSON
                 for model_data in models_to_register:
                     try:
-                        # Convert format if needed
-                        converted_data = convert_image_model_format(model_data)
+                        # Convert format using flatten_model_src
+                        from ..utils import flatten_model_src
+
+                        flattened_list = flatten_model_src(model_data)
+                        converted_data = (
+                            flattened_list[0] if flattened_list else model_data
+                        )
                         builtin_image_family = ImageModelFamilyV2.parse_obj(
                             converted_data
                         )
 
@@ -23,8 +23,6 @@
 logger = logging.getLogger(__name__)
 
 
-
-
 from .core import (
     LLM,
     LLM_VERSION_INFOS,
@@ -137,10 +135,9 @@ def register_custom_model():
 
 
 def register_builtin_model():
-    from ..utils import load_complete_builtin_models
-
     # Use unified loading function with flatten_quantizations for LLM
-    from ..utils import flatten_quantizations
+    from ..utils import flatten_quantizations, load_complete_builtin_models
+
     def convert_llm_with_quantizations(model_json):
         if "model_specs" not in model_json:
             return model_json
@@ -194,9 +191,17 @@ def convert_llm_with_quantizations(model_json):
 
             for model_data in models_to_register:
                 try:
-                    from ..utils import flatten_model_src
-                    flattened_list = flatten_model_src(model_data)
-                    converted_data = flattened_list[0] if flattened_list else model_data
+                    from ..utils import flatten_quantizations
+
+                    converted_data = model_data.copy()
+                    if "model_specs" in converted_data:
+                        flattened_specs = []
+                        for spec in converted_data["model_specs"]:
+                            if "model_src" in spec:
+                                flattened_specs.extend(flatten_quantizations(spec))
+                            else:
+                                flattened_specs.append(spec)
+                        converted_data["model_specs"] = flattened_specs
                     builtin_llm_family = LLMFamilyV2.parse_obj(converted_data)
 
                     if builtin_llm_family.model_name not in existing_model_names: