FEAT: Support internlm3 (#2789)

xorbitsai · Jan 29, 2025 · 0ee8d14 · 0ee8d14
1 parent efff7f8
commit 0ee8d14
Show file tree

Hide file tree

Showing 3 changed files with 142 additions and 1 deletion.
diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -9355,5 +9355,72 @@
       "<|user|>",
       "<|observation|>"
     ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "internlm3-instruct",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "tools"
+    ],
+    "model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct-gptq-int4"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct-awq"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
+        ],
+        "model_id": "internlm/internlm3-8b-instruct-gguf",
+        "model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf"
+      }
+    ],
+    "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      2,
+      128131
+    ],
+    "stop": [
+      "</s>",
+      "<|im_end|>"
+    ]
   }
 ]
diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
@@ -6963,7 +6963,7 @@
       "<|endoftext|>"
     ]
   },
-   {
+  {
     "version": 1,
     "context_length": 32768,
     "model_name": "marco-o1",
@@ -7061,5 +7061,76 @@
       "<|user|>",
       "<|observation|>"
     ]
+  },
+  {
+    "version": 1,
+    "context_length": 32768,
+    "model_name": "internlm3-instruct",
+    "model_lang": [
+      "en",
+      "zh"
+    ],
+    "model_ability": [
+      "chat",
+      "tools"
+    ],
+    "model_description": "InternLM3 has open-sourced an 8-billion parameter instruction model, InternLM3-8B-Instruct, designed for general-purpose usage and advanced reasoning.",
+    "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct-gptq-int4",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct-awq",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "ggufv2",
+        "model_size_in_billions": 8,
+        "quantizations": [
+          "q2_k",
+          "q3_k_m",
+          "q4_0",
+          "q4_k_m",
+          "q5_0",
+          "q5_k_m",
+          "q6_k",
+          "q8_0"
+        ],
+        "model_id": "Shanghai_AI_Laboratory/internlm3-8b-instruct-gguf",
+        "model_file_name_template": "internlm3-8b-instruct-{quantization}.gguf",
+        "model_hub": "modelscope"
+      }
+    ],
+    "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+    "stop_token_ids": [
+      2,
+      128131
+    ],
+    "stop": [
+      "</s>",
+      "<|im_end|>"
+    ]
   }
 ]
diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py
@@ -198,6 +198,9 @@ class VLLMGenerateConfig(TypedDict, total=False):
     VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2-vl-instruct")
     VLLM_SUPPORTED_VISION_MODEL_LIST.append("QvQ-72B-Preview")
 
+if VLLM_INSTALLED and vllm.__version__ >= "0.7.0":
+    VLLM_SUPPORTED_CHAT_MODELS.append("internlm3-instruct")
+
 
 class VLLMModel(LLM):
     def __init__(