|
13 | 13 | VideoChatGPT_ROOT = None
|
14 | 14 | PLLaVA_ROOT = None
|
15 | 15 | RBDash_ROOT = None
|
| 16 | +VITA_ROOT = '/fs-computility/mllm1/shared/dhd/VITA' |
16 | 17 | LLAVA_V1_7B_MODEL_PTH = 'Please set your local path to LLaVA-7B-v1.1 here, the model weight is obtained by merging LLaVA delta weight based on vicuna-7b-v1.1 in https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md with vicuna-7b-v1.1. '
|
17 | 18 |
|
18 | 19 | video_models = {
|
|
172 | 173 | 'varco-vision-hf':partial(LLaVA_OneVision_HF, model_path='NCSOFT/VARCO-VISION-14B-HF'),
|
173 | 174 | }
|
174 | 175 |
|
| 176 | +vita_series = { |
| 177 | + 'vita': partial(VITA, model_path='VITA-MLLM/VITA', root=VITA_ROOT), |
| 178 | + 'vita_qwen2': partial(VITAQwen2, model_path='VITA-MLLM/VITA-1.5', root=VITA_ROOT), |
| 179 | +} |
| 180 | + |
175 | 181 | internvl_series = {
|
176 | 182 | 'InternVL-Chat-V1-1': partial(InternVLChat, model_path='OpenGVLab/InternVL-Chat-V1-1', version='V1.1'),
|
177 | 183 | 'InternVL-Chat-V1-2': partial(InternVLChat, model_path='OpenGVLab/InternVL-Chat-V1-2', version='V1.2'),
|
|
326 | 332 | qwen2vl_series = {
|
327 | 333 | 'Qwen-VL-Max-0809': partial(Qwen2VLAPI, model='qwen-vl-max-0809', min_pixels=1280*28*28, max_pixels=16384*28*28),
|
328 | 334 | 'Qwen-VL-Plus-0809': partial(Qwen2VLAPI, model='qwen-vl-plus-0809', min_pixels=1280*28*28, max_pixels=16384*28*28),
|
| 335 | + 'QVQ-72B-Preview': partial(Qwen2VLChat, model_path='Qwen/QVQ-72B-Preview', min_pixels=1280*28*28, max_pixels=16384*28*28, system_prompt='You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.', max_new_tokens=8192, post_process=False), |
329 | 336 | 'Qwen2-VL-72B-Instruct': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-72B-Instruct', min_pixels=1280*28*28, max_pixels=16384*28*28),
|
330 | 337 | 'Qwen2-VL-7B-Instruct': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-7B-Instruct', min_pixels=1280*28*28, max_pixels=16384*28*28),
|
331 | 338 | 'Qwen2-VL-7B-Instruct-AWQ': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-7B-Instruct-AWQ', min_pixels=1280*28*28, max_pixels=16384*28*28),
|
|
416 | 423 | mantis_series, mmalaya_series, phi3_series, xgen_mm_series, qwen2vl_series,
|
417 | 424 | slime_series, eagle_series, moondream_series, llama_series, molmo_series,
|
418 | 425 | kosmos_series, points_series, nvlm_series, vintern_series, h2ovl_series, aria_series,
|
419 |
| - smolvlm_series, sail_series, valley_series |
| 426 | + smolvlm_series, sail_series, valley_series, vita_series |
420 | 427 | ]
|
421 | 428 |
|
422 | 429 | for grp in model_groups:
|
|
0 commit comments