Skip to content

Commit

Permalink
modify valley import (#700)
Browse files Browse the repository at this point in the history
  • Loading branch information
PhoenixZ810 authored Dec 29, 2024
1 parent be12656 commit 4bea02b
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions vlmeval/vlm/valley/valley_eagle_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,6 @@
from transformers import AutoTokenizer, AutoProcessor
import re

from .valley_eagle.model.language_model.valley_qwen2 import ValleyQwen2ForCausalLM
from .valley_eagle.util.mm_utils import process_anyres_image
from .valley_eagle import conversation as conversation_lib
from .valley_eagle.util.data_util import dynamic_preprocess, preprocess

IGNORE_INDEX = -100
IMAGE_TOKEN_INDEX = -200
Expand Down Expand Up @@ -124,6 +120,11 @@ class ValleyEagleChat(BaseModel):
def __init__(self,
model_path='liuhaotian/llava_v1.5_7b',
**kwargs):
from .valley_eagle.model.language_model.valley_qwen2 import ValleyQwen2ForCausalLM
from .valley_eagle.util.mm_utils import process_anyres_image
from .valley_eagle import conversation as conversation_lib
from .valley_eagle.util.data_util import dynamic_preprocess, preprocess

torch_dtype = torch.float16
padding_side = 'left'
use_fast = True
Expand All @@ -144,6 +145,8 @@ def __init__(self,
self.model_path = model_path
self.model = ValleyQwen2ForCausalLM.from_pretrained(model_path, torch_dtype=torch_dtype)
self.model = self.model.to(self.device).half()
self.process_anyres_image = process_anyres_image
self.preprocess = preprocess

# should check this code
self.model.config.min_tile_num = 1
Expand Down Expand Up @@ -192,7 +195,7 @@ def preprocess_images(
video_pad = []
for img in images:
if self.model.config.anyres:
image = process_anyres_image(img, self.image_processor, self.model.config.grid_pinpoints)
image = self.process_anyres_image(img, self.image_processor, self.model.config.grid_pinpoints)
else:
image = self.image_processor(img, return_tensors="pt")["pixel_values"][0]

Expand Down Expand Up @@ -269,7 +272,7 @@ def generate_inner(self, message, dataset=None):
img_length = len(video_images_tensor)
source = preprocess_multimodal(messages, img_length, self.model.config)

data_dict = preprocess(
data_dict = self.preprocess(
source,
self.tokenizer,
has_image=True,
Expand Down

0 comments on commit 4bea02b

Please sign in to comment.