@@ -38,16 +38,19 @@ def _create_proposer(
3838 num_speculative_tokens = num_speculative_tokens ,
3939 )
4040
41- vllm_config = VllmConfig (
42- model_config = model_config ,
43- cache_config = CacheConfig (block_size = 16 ),
44- speculative_config = speculative_config ,
45- device_config = DeviceConfig (device = "tpu" ),
46- parallel_config = ParallelConfig (pipeline_parallel_size = 1 ,
47- tensor_parallel_size = 1 ),
48- load_config = LoadConfig (),
49- scheduler_config = SchedulerConfig (max_num_batched_tokens = 8192 ,
50- max_num_seqs = 128 ))
41+ vllm_config = VllmConfig (model_config = model_config ,
42+ cache_config = CacheConfig (block_size = 16 ),
43+ speculative_config = speculative_config ,
44+ device_config = DeviceConfig (device = "tpu" ),
45+ parallel_config = ParallelConfig (
46+ pipeline_parallel_size = 1 ,
47+ tensor_parallel_size = 1 ),
48+ load_config = LoadConfig (),
49+ scheduler_config = SchedulerConfig (
50+ max_num_batched_tokens = 8192 ,
51+ max_num_seqs = 128 ,
52+ max_model_len = model_config .max_model_len ,
53+ is_encoder_decoder = False ))
5154
5255 # Mock the runner, as the proposer needs it for initialization
5356 mock_runner = mock .MagicMock ()
0 commit comments