11# Copyright (c) OpenMMLab. All rights reserved.
2- from typing import Dict , List , Optional
2+ from typing import Any , Dict , List , Optional
33
44import torch
5+ from transformers import AutoProcessor
56
67from lmdeploy .vl .model .base import VISION_MODELS , VisionModel
78
@@ -22,14 +23,17 @@ class Qwen3VLModel(VisionModel):
2223
2324 def build_preprocessor (self ):
2425 check_transformers ()
25- from transformers import AutoProcessor
2626 self .processor = AutoProcessor .from_pretrained (self .model_path )
2727 tokenizer = self .processor .tokenizer
2828 self .image_token = self .processor .image_token
2929 self .image_token_id = tokenizer .encode (self .image_token )[- 1 ]
30+ self .mm_processor_kwargs = None
3031
31- def preprocess (self , messages : List [Dict ]) -> List [Dict ]:
32+ def preprocess (self , messages : List [Dict ], mm_processor_kwargs : Optional [ Dict [ str , Any ]] = None ) -> List [Dict ]:
3233 """Refer to `super().preprocess()` for spec."""
34+ if mm_processor_kwargs is None :
35+ mm_processor_kwargs = {}
36+
3337 images = self .collect_images (messages )
3438 optional_keys = {'resized_height' , 'resized_width' , 'min_pixels' , 'max_pixels' }
3539 outputs = []
@@ -38,7 +42,10 @@ def preprocess(self, messages: List[Dict]) -> List[Dict]:
3842
3943 item = dict (type = 'image' , image = image )
4044 item .update ({key : params [key ] for key in params .keys () if key in optional_keys })
41- result = self .processor .image_processor (images = image , videos = None , return_tensors = 'pt' )
45+ result = self .processor .image_processor (images = image ,
46+ videos = None ,
47+ return_tensors = 'pt' ,
48+ ** mm_processor_kwargs )
4249 merge_length = self .processor .image_processor .merge_size ** 2
4350 image_tokens = result ['image_grid_thw' ].prod (dim = 1 ) // merge_length
4451 result .update (dict (image_size = image .size , image_tokens = image_tokens , image_token_id = self .image_token_id ))
0 commit comments