Skip to content

Commit bbc4369

Browse files
authored
Support Qwen3-VL (#4093)
* support qwen3vl dense * cleanups * cleanups * reuse input processor * support qwen3vl moe, add docs * format * Revert "format" This reverts commit c979730. * fix docs * fix * improve config check conditions * fix config * some optimizations * reuse qwen3, qwen3-moe * fix mrope acc bug * fix moe, optimize deepstack process
1 parent 136bb1b commit bbc4369

File tree

15 files changed

+1189
-7
lines changed

15 files changed

+1189
-7
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by
163163
<li>Qwen-VL (7B)</li>
164164
<li>Qwen2-VL (2B, 7B, 72B)</li>
165165
<li>Qwen2.5-VL (3B, 7B, 72B)</li>
166+
<li>Qwen3-VL (2B - 235B)</li>
166167
<li>DeepSeek-VL (7B)</li>
167168
<li>DeepSeek-VL2 (3B, 16B, 27B)</li>
168169
<li>InternVL-Chat (v1.1-v1.5)</li>

README_ja.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ LMDeploy TurboMindエンジンは卓越した推論能力を持ち、さまざ
149149
<li>Qwen-VL (7B)</li>
150150
<li>Qwen2-VL (2B, 7B, 72B)</li>
151151
<li>Qwen2.5-VL (3B, 7B, 72B)</li>
152+
<li>Qwen3-VL (2B - 235B)</li>
152153
<li>DeepSeek-VL (7B)</li>
153154
<li>DeepSeek-VL2 (3B, 16B, 27B)</li>
154155
<li>InternVL-Chat (v1.1-v1.5)</li>

README_zh-CN.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力,在各种规模的模型
164164
<li>Qwen-VL (7B)</li>
165165
<li>Qwen2-VL (2B, 7B, 72B)</li>
166166
<li>Qwen2.5-VL (3B, 7B, 72B)</li>
167+
<li>Qwen3-VL (2B - 235B)</li>
167168
<li>DeepSeek-VL (7B)</li>
168169
<li>DeepSeek-VL2 (3B, 16B, 27B)</li>
169170
<li>InternVL-Chat (v1.1-v1.5)</li>

docs/en/supported_models/supported_models.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine
8888
| QWen3-Next | 80B | LLM | Yes | No | No | No | No |
8989
| QWen2-VL | 2B, 7B | MLLM | Yes | Yes | No | No | Yes |
9090
| QWen2.5-VL | 3B - 72B | MLLM | Yes | No | No | No | No |
91+
| QWen3-VL | 2B - 235B | MLLM | Yes | No | No | No | No |
9192
| DeepSeek-MoE | 16B | LLM | Yes | No | No | No | No |
9293
| DeepSeek-V2 | 16B, 236B | LLM | Yes | No | No | No | No |
9394
| DeepSeek-V2.5 | 236B | LLM | Yes | No | No | No | No |

docs/zh_cn/supported_models/supported_models.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
| QWen3-Next | 80B | LLM | Yes | No | No | No | No |
8989
| QWen2-VL | 2B, 7B | MLLM | Yes | Yes | No | No | Yes |
9090
| QWen2.5-VL | 3B - 72B | MLLM | Yes | No | No | No | No |
91+
| QWen3-VL | 2B - 235B | MLLM | Yes | No | No | No | No |
9192
| DeepSeek-MoE | 16B | LLM | Yes | No | No | No | No |
9293
| DeepSeek-V2 | 16B, 236B | LLM | Yes | No | No | No | No |
9394
| DeepSeek-V2.5 | 236B | LLM | Yes | No | No | No | No |

lmdeploy/archs.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,9 @@ def check_vl_llm(config: dict) -> bool:
109109
'LlavaLlamaForCausalLM', 'LlavaMistralForCausalLM', 'CogVLMForCausalLM', 'InternLMXComposer2ForCausalLM',
110110
'InternVLChatModel', 'MiniCPMV', 'LlavaForConditionalGeneration', 'LlavaNextForConditionalGeneration',
111111
'Phi3VForCausalLM', 'Qwen2VLForConditionalGeneration', 'Qwen2_5_VLForConditionalGeneration',
112-
'MllamaForConditionalGeneration', 'MolmoForCausalLM', 'Gemma3ForConditionalGeneration',
113-
'Llama4ForConditionalGeneration', 'InternVLForConditionalGeneration', 'InternS1ForConditionalGeneration',
114-
'Glm4vForConditionalGeneration'
112+
'Qwen3VLForConditionalGeneration', 'Qwen3VLMoeForConditionalGeneration', 'MllamaForConditionalGeneration',
113+
'MolmoForCausalLM', 'Gemma3ForConditionalGeneration', 'Llama4ForConditionalGeneration',
114+
'InternVLForConditionalGeneration', 'InternS1ForConditionalGeneration', 'Glm4vForConditionalGeneration'
115115
])
116116
if arch == 'QWenLMHeadModel' and 'visual' in config:
117117
return True

lmdeploy/pytorch/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ def _update_torch_dtype(config: 'ModelConfig', dtype: str):
2828
config.dtype = torch.float16
2929
return config
3030

31-
torch_dtype = getattr(config.hf_config, 'dtype', None)
31+
torch_dtype = getattr(config.llm_config, 'dtype', None)
3232
if torch_dtype is None:
33-
torch_dtype = getattr(config.hf_config, 'torch_dtype', None)
33+
torch_dtype = getattr(config.llm_config, 'torch_dtype', None)
3434

3535
# deal with case when torch_dtype is not string but torch.dtype
3636
if isinstance(torch_dtype, torch.dtype):

lmdeploy/pytorch/configurations/default.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,16 @@ def condition(cls, hf_config):
1414
@classmethod
1515
def build(cls, hf_config, model_path: str = None, **kwargs):
1616
"""build."""
17+
18+
# for multi-modal models, get the language model config to build model config
19+
if hasattr(hf_config, 'text_config'):
20+
hf_config = hf_config.text_config
21+
elif hasattr(hf_config, 'llm_config'):
22+
hf_config = hf_config.llm_config
23+
1724
head_dim = getattr(hf_config, 'head_dim', None)
1825
head_dim = head_dim or hf_config.hidden_size // hf_config.num_attention_heads
26+
1927
# head_dim should not be None
2028
hf_config.head_dim = head_dim
2129
num_attention_heads = hf_config.num_attention_heads

lmdeploy/pytorch/models/module_map.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,18 @@
147147
f'{LMDEPLOY_PYTORCH_MODEL_PATH}.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration',
148148
})
149149

150+
# qwen3_vl
151+
MODULE_MAP.update({
152+
'Qwen3VLForConditionalGeneration':
153+
f'{LMDEPLOY_PYTORCH_MODEL_PATH}.qwen3_vl.Qwen3VLForConditionalGeneration',
154+
})
155+
156+
# qwen3_vl_moe
157+
MODULE_MAP.update({
158+
'Qwen3VLMoeForConditionalGeneration':
159+
f'{LMDEPLOY_PYTORCH_MODEL_PATH}.qwen3_vl_moe.Qwen3VLMoeForConditionalGeneration',
160+
})
161+
150162
# starcoder2
151163
MODULE_MAP.update({
152164
'Starcoder2ForCausalLM': f'{LMDEPLOY_PYTORCH_MODEL_PATH}.starcoder2.Starcoder2ForCausalLM',

lmdeploy/pytorch/models/qwen3.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def __init__(self, config: PretrainedConfig, dtype: torch.dtype = None, device:
4747
head_dim,
4848
num_kv_heads=num_key_value_heads,
4949
v_head_size=head_dim,
50-
sliding_window=config.sliding_window,
50+
sliding_window=getattr(config, 'sliding_window', None),
5151
)
5252

5353
# o_proj

0 commit comments

Comments
 (0)