@@ -196,13 +196,15 @@ def __call__(self,
196196 gen_config : Optional [GenerationConfig ] = None ,
197197 stream_response : bool = True ,
198198 do_preprocess : bool = True ,
199- adapter_name : str = None ) -> Union [Response , Iterator [Response ]]:
199+ adapter_name : str = None ,
200+ ** kwargs ) -> Union [Response , Iterator [Response ]]:
200201 self ._engine .chat (prompt ,
201202 gen_config = gen_config or self ._gen_config ,
202203 stream_response = stream_response ,
203204 do_preprocess = do_preprocess ,
204205 session = self ,
205- adapter_name = adapter_name )
206+ adapter_name = adapter_name ,
207+ ** kwargs )
206208 if stream_response :
207209 return self .generator
208210 else :
@@ -691,7 +693,7 @@ async def _get_prompt_input(self,
691693 adapter_name : str ,
692694 tools : Optional [List [object ]] = None ,
693695 reasoning_effort : Optional [Literal ['low' , 'medium' , 'high' ]] = None ,
694- enable_thinking : Optional [bool ] = None ,
696+ chat_template_kwargs : Optional [Dict ] = None ,
695697 ** kwargs ):
696698 # Change multimodal data to openai text messages, i.e.,
697699 # [{'role': 'user', 'content': [{'type': 'text', 'text': 'hi'}]}] ->
@@ -706,12 +708,12 @@ async def _get_prompt_input(self,
706708 chat_template = MODELS .module_dict [adapter_name ]()
707709 else :
708710 chat_template = BaseChatTemplate ()
711+ chat_template_kwargs = chat_template_kwargs or {}
709712 prompt = chat_template .messages2prompt (prompt ,
710713 sequence_start ,
711714 tools = tools ,
712- enable_thinking = enable_thinking ,
713715 reasoning_effort = reasoning_effort ,
714- ** kwargs )
716+ ** chat_template_kwargs )
715717 if prompt is None :
716718 raise ValueError (
717719 f'You are using base template to handle chat task. Please specify a `--chat-template` name chosen from `lmdeploy list` if you want to use OpenAI messages input.' # noqa
@@ -768,7 +770,7 @@ async def generate(
768770 rewind_stop_tokens : bool = False ,
769771 input_ids : Optional [List ] = None ,
770772 enable_thinking : Optional [bool ] = None ,
771- add_vision_id : Optional [bool ] = False ,
773+ chat_template_kwargs : Optional [Dict ] = None ,
772774 mm_processor_kwargs : Optional [Dict [str , Any ]] = None ,
773775 ** kwargs ):
774776 """Generate responses.
@@ -811,6 +813,14 @@ async def generate(
811813 if gen_config .n > 1 :
812814 logger .warning (f'n({ gen_config .n } ) > 1 hasn\' t been supported yet. Fallback to 1' )
813815 gen_config .n = 1
816+ chat_template_kwargs = chat_template_kwargs or {}
817+ if enable_thinking is not None :
818+ logger .warning ('enable_thinking is deprecated, use chat_template_kwargs["enable_thinking"] instead' )
819+ if chat_template_kwargs .get ('enable_thinking' ) is None :
820+ chat_template_kwargs ['enable_thinking' ] = enable_thinking
821+ else :
822+ logger .warning ('chat_template_kwargs["enable_thinking"] is already set, '
823+ 'the value will not be overwritten by enable_thinking' )
814824 if messages :
815825 prompt = messages
816826 self .request_logger .log_prompt (session_id = session_id , prompt = prompt )
@@ -820,9 +830,8 @@ async def generate(
820830 adapter_name ,
821831 tools = tools ,
822832 reasoning_effort = reasoning_effort ,
823- enable_thinking = enable_thinking ,
824- add_vision_id = add_vision_id ,
825833 mm_processor_kwargs = mm_processor_kwargs ,
834+ chat_template_kwargs = chat_template_kwargs ,
826835 ** kwargs )
827836 prompt = prompt_input ['prompt' ]
828837 input_ids = prompt_input ['input_ids' ]
0 commit comments