Skip to content

Commit

Permalink
[Model] Support QVQ
Browse files Browse the repository at this point in the history
  • Loading branch information
kennymckormick committed Dec 30, 2024
1 parent 9bc5008 commit cb80ee0
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 2 deletions.
1 change: 1 addition & 0 deletions vlmeval/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@
qwen2vl_series = {
'Qwen-VL-Max-0809': partial(Qwen2VLAPI, model='qwen-vl-max-0809', min_pixels=1280*28*28, max_pixels=16384*28*28),
'Qwen-VL-Plus-0809': partial(Qwen2VLAPI, model='qwen-vl-plus-0809', min_pixels=1280*28*28, max_pixels=16384*28*28),
'QVQ-72B-Preview': partial(Qwen2VLChat, model_path='Qwen/QVQ-72B-Preview', min_pixels=1280*28*28, max_pixels=16384*28*28, system_prompt='You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.', max_new_tokens=8192, post_process=False),
'Qwen2-VL-72B-Instruct': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-72B-Instruct', min_pixels=1280*28*28, max_pixels=16384*28*28),
'Qwen2-VL-7B-Instruct': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-7B-Instruct', min_pixels=1280*28*28, max_pixels=16384*28*28),
'Qwen2-VL-7B-Instruct-AWQ': partial(Qwen2VLChat, model_path='Qwen/Qwen2-VL-7B-Instruct-AWQ', min_pixels=1280*28*28, max_pixels=16384*28*28),
Expand Down
20 changes: 20 additions & 0 deletions vlmeval/vlm/qwen2_vl/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def __init__(
repetition_penalty=1.0,
use_custom_prompt: bool = True,
system_prompt: str | None = None,
post_process: bool = False, # if True, will try to only extract stuff in the last \boxed{}.
verbose: bool = False,
):
super().__init__(use_custom_prompt=use_custom_prompt)
Expand All @@ -90,6 +91,7 @@ def __init__(
)
self.system_prompt = system_prompt
self.verbose = verbose
self.post_process = post_process
self.fps = 2.0
self.nframe = 64
self.FRAME_FACTOR = 2
Expand Down Expand Up @@ -195,6 +197,24 @@ def generate_inner(self, message, dataset=None):
generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
response = out[0]
if self.post_process:
resp = response.split('\\boxed{')[-1]
lt = len(resp)
counter, end = 1, None
for i in range(lt):
if resp[i] == '{':
counter += 1
elif resp[i] == '}':
counter -= 1
if counter == 0:
end = i
break
elif i == lt - 1:
end = lt
break
if end is not None:
response = resp[:end]

if self.verbose:
print(f'\033[32m{response}\033[0m')
return response
4 changes: 2 additions & 2 deletions vlmeval/vlm/vita.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ def __init__(self, model_path='VITA/vita', root=None, **kwargs):

model_name = get_model_name_from_path(model_path)
tokenizer, model, image_processor, _ = load_pretrained_model(
model_path, None, model_name, model_type='qwen2p5_instruct', device_map='auto'
model_path, None, model_name, model_type='qwen2p5_instruct', device_map='cuda'
)
# model.cuda().eval()
model = model.eval()
# model.tie_weights()

audio_encoder = model.get_audio_encoder()
Expand Down

0 comments on commit cb80ee0

Please sign in to comment.