diff --git a/vlmeval/config.py b/vlmeval/config.py index 80bf14dde..43c299c1d 100644 --- a/vlmeval/config.py +++ b/vlmeval/config.py @@ -333,6 +333,7 @@ 'Qwen-VL-Max-0809': partial(Qwen2VLAPI, model='qwen-vl-max-0809', min_pixels=1280*28*28, max_pixels=16384*28*28), 'Qwen-VL-Plus-0809': partial(Qwen2VLAPI, model='qwen-vl-plus-0809', min_pixels=1280*28*28, max_pixels=16384*28*28), 'Qwen2-VL-72B-Instruct': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-72B-Instruct', min_pixels=1280*28*28, max_pixels=16384*28*28), + 'QVQ-72B-Preview': partial(Qwen2VLChat, model_path='Qwen/QVQ-72B-Preview', min_pixels=1280*28*28, max_pixels=16384*28*28), 'Qwen2-VL-7B-Instruct': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-7B-Instruct', min_pixels=1280*28*28, max_pixels=16384*28*28), 'Qwen2-VL-7B-Instruct-AWQ': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-7B-Instruct-AWQ', min_pixels=1280*28*28, max_pixels=16384*28*28), 'Qwen2-VL-7B-Instruct-GPTQ-Int4': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-7B-Instruct-GPTQ-Int4', min_pixels=1280*28*28, max_pixels=16384*28*28), diff --git a/vlmeval/vlm/qwen2_vl/model.py b/vlmeval/vlm/qwen2_vl/model.py index 65629b0a9..4c931d7e8 100644 --- a/vlmeval/vlm/qwen2_vl/model.py +++ b/vlmeval/vlm/qwen2_vl/model.py @@ -88,7 +88,9 @@ def __init__( temperature=temperature, repetition_penalty=repetition_penalty, ) - self.system_prompt = system_prompt + if system_prompt is None and 'qvq' in model_path.lower(): + system_prompt = """You are a helpful and harmless assistant. + You are Qwen developed by Alibaba. You should think step-by-step.""" self.verbose = verbose self.fps = 2.0 self.nframe = 64