-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_minicpm_basic.py
More file actions
149 lines (115 loc) · 4.14 KB
/
test_minicpm_basic.py
File metadata and controls
149 lines (115 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python3
"""
MiniCPM模型基础测试
==================
测试MiniCPM模型的加载和基本推理功能
"""
from fastcache_paths import ensure_sys_paths, CKPT_DIR, DATASETS_DIR, RESULTS_DIR
ensure_sys_paths()
import torch
import sys
from transformers import AutoConfig, AutoTokenizer
import warnings
warnings.filterwarnings('ignore')
def test_minicpm_model_loading():
"""测试MiniCPM模型加载(不含推理)"""
print("=" * 60)
print(" MiniCPM Model Loading Test")
print("=" * 60)
model_path = '/data/huggingface/MiniCPM-V-2_6'
# 1. 加载配置
print("\n1. Loading config...")
hf_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
print(f" model_type: {hf_config.model_type}")
print(f" hidden_size: {hf_config.hidden_size}")
print(f" num_layers: {hf_config.num_hidden_layers}")
print(f" query_num: {getattr(hf_config, 'query_num', 'N/A')}")
# 2. 创建模型
print("\n2. Creating MiniCPM model...")
from nanovllm.models.minicpm import MiniCPMForConditionalGeneration
torch.set_default_dtype(hf_config.torch_dtype)
torch.set_default_device('cuda')
model = MiniCPMForConditionalGeneration(hf_config)
print(f" Model created: {type(model).__name__}")
print(f" image_token_len: {model.image_token_len}")
# 3. 加载LLM权重
print("\n3. Loading LLM weights...")
from nanovllm.utils.loader import load_model
load_model(model.llm, model_path, prefix='llm.')
# 验证权重
param = model.llm.model.layers[0].self_attn.qkv_proj.weight
print(f" qkv_proj shape: {param.shape}")
print(f" weight mean: {param.mean().item():.6f}")
# 4. 加载视觉模块
print("\n4. Loading vision modules...")
from transformers import AutoModel
hf_model = AutoModel.from_pretrained(
model_path,
torch_dtype=hf_config.torch_dtype,
trust_remote_code=True,
device_map='cuda'
)
model.load_vision_modules(hf_model)
print(f" vpm type: {type(model.vpm).__name__}")
print(f" resampler type: {type(model.resampler).__name__}")
# 清理HF模型
del hf_model.llm
import gc
gc.collect()
torch.cuda.empty_cache()
print("\n" + "=" * 60)
print(" Model loading test passed!")
print("=" * 60)
# 清理
del model
gc.collect()
torch.cuda.empty_cache()
def test_minicpm_engine():
"""测试MiniCPM通过LlavaLLM引擎"""
print("\n" + "=" * 60)
print(" MiniCPM Engine Test")
print("=" * 60)
from nanovllm.engine.llava_engine import LlavaLLM
from nanovllm.sampling_params import SamplingParams
from transformers import AutoTokenizer
model_path = '/data/huggingface/MiniCPM-V-2_6'
print("\n1. Initializing LlavaLLM with MiniCPM...")
llm = LlavaLLM(
model_path,
enable_compression=False,
enforce_eager=True,
max_model_len=4096,
)
print(f" is_multimodal: {llm.model_runner.is_multimodal}")
print(f" model_type: {llm.model_runner.model_type}")
print(f" image_token_len: {llm.model_runner.image_token_len}")
# 加载tokenizer用于解码
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
# 2. 测试文本生成
print("\n2. Testing text generation...")
prompt = "What is artificial intelligence?"
llm.add_request(prompt, SamplingParams(max_tokens=64))
outputs = []
total_tokens = 0
while not llm.is_finished():
step_outputs, num_tokens = llm.step(apply_compression=False)
total_tokens += abs(num_tokens)
outputs.extend(step_outputs)
print(f" Prompt: {prompt}")
print(f" Total tokens: {total_tokens}")
for seq_id, token_ids in outputs:
# 解码生成的token IDs
generated_text = tokenizer.decode(token_ids, skip_special_tokens=True)
print(f" Output [{seq_id}]: {generated_text}")
print("\n" + "=" * 60)
print(" Engine test passed!")
print("=" * 60)
# 清理
del llm
import gc
gc.collect()
torch.cuda.empty_cache()
if __name__ == '__main__':
test_minicpm_model_loading()
print("\n\n")
test_minicpm_engine()