@@ -148,7 +148,11 @@ class Ovis2Processor(ProcessorMixin):
148
148
def __init__ (self , image_processor = None , tokenizer = None , chat_template = None , ** kwargs ):
149
149
self .image_token = "<|image_pad|>" if not hasattr (tokenizer , "image_token" ) else tokenizer .image_token
150
150
self .video_token = "<|video_pad|>" if not hasattr (tokenizer , "video_token" ) else tokenizer .video_token
151
- super ().__init__ (image_processor , tokenizer , chat_template = chat_template )
151
+ self .max_partition = kwargs .get ('max_partition' , 9 )
152
+ self .covering_threshold = kwargs .get ('covering_threshold' , 0.9 )
153
+ self .convert_to_rgb = kwargs .get ('convert_to_rgb' , True )
154
+ self .return_tensors = kwargs .get ('return_tensors' , 'pt' )
155
+ super ().__init__ (image_processor , tokenizer , chat_template = chat_template , ** kwargs )
152
156
153
157
def __call__ (
154
158
self ,
@@ -316,7 +320,12 @@ def construct_image_placeholders(self, grid):
316
320
image_placeholders .append (self .get_token_value ('image_end' ))
317
321
return image_placeholders
318
322
319
- def preprocess_image (self , image : PIL .Image .Image , max_partition , covering_threshold , convert_to_rgb , return_tensors ):
323
+ def preprocess_image (self , image : PIL .Image .Image , max_partition = None , covering_threshold = None ,
324
+ convert_to_rgb = None , return_tensors = None ):
325
+ max_partition = max_partition if max_partition is not None else self .max_partition
326
+ covering_threshold = covering_threshold if covering_threshold is not None else self .covering_threshold
327
+ convert_to_rgb = convert_to_rgb if convert_to_rgb is not None else self .convert_to_rgb
328
+ return_tensors = return_tensors if return_tensors is not None else self .return_tensors
320
329
def _preprocess (img : PIL .Image .Image , side ):
321
330
# first resize and preprocess
322
331
w , h = img .size
0 commit comments