Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions vllm/model_executor/models/aria.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,8 +499,6 @@ class AriaForConditionalGeneration(nn.Module, SupportsMultiModal):
model to perform tasks that involve both image and text inputs.
"""

merge_by_field_config = True

hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
# mapping for new names in checkpoint saved after transformers v4.52
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/aya_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,8 +318,6 @@ def _get_layer_index(feature_layer_index: int, num_hidden_layers: int) -> int:
dummy_inputs=AyaVisionDummyInputsBuilder,
)
class AyaVisionForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
# mapping for new names in checkpoint saved after transformers v4.52
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/blip2.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,8 +523,6 @@ def _get_prompt_updates(
class Blip2ForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsPP, SupportsQuant
):
merge_by_field_config = True

@classmethod
def get_placeholder_str(cls, modality: str, i: int) -> str | None:
if modality.startswith("image"):
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/chameleon.py
Original file line number Diff line number Diff line change
Expand Up @@ -918,8 +918,6 @@ def forward(
class ChameleonForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsPP, SupportsQuant
):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"],
Expand Down
1 change: 0 additions & 1 deletion vllm/model_executor/models/clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,6 @@ class CLIPEmbeddingModel(nn.Module, SupportsMultiModal, SupportsQuant):
is_pooling_model = True

packed_modules_mapping = {"qkv_proj": ["q_proj", "k_proj", "v_proj"]}
merge_by_field_config = True

@classmethod
def get_placeholder_str(cls, modality: str, i: int) -> str | None:
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/cohere2_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,8 +331,6 @@ def get_replacement(item_idx: int):
dummy_inputs=Cohere2VisionDummyInputsBuilder,
)
class Cohere2VisionForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
"model.vision_tower.": "vision_tower.",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/deepseek_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,6 @@ def get_replacement_deepseek_vl2(item_idx: int):
dummy_inputs=DeepseekOCRDummyInputsBuilder,
)
class DeepseekOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
# map prefix for language backbone
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/deepseek_vl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,6 @@ def _cached_apply_hf_processor(
dummy_inputs=DeepseekVL2DummyInputsBuilder,
)
class DeepseekVLV2ForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
"language.": "language_model.",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/dots_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,8 +690,6 @@ def forward(
dummy_inputs=DotsOCRDummyInputsBuilder,
)
class DotsOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA):
merge_by_field_config = True

hf_to_vllm_mapper = WeightsMapper(
orig_to_new_substr={
".attn.qkv_proj.": ".attn.qkv.",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/ernie45_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1254,8 +1254,6 @@ def get_dummy_mm_data(
class Ernie4_5_VLMoeForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsLoRA, SupportsPP, SupportsMRoPE
):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/fuyu.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,6 @@ def get_replacement_fuyu(item_idx: int):
dummy_inputs=FuyuDummyInputsBuilder,
)
class FuyuForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
"model.vision_embed_tokens.": "vision_embed_tokens.",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/gemma3_mm.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,6 @@ def forward(self, vision_outputs: torch.Tensor):
class Gemma3ForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA
):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
1 change: 0 additions & 1 deletion vllm/model_executor/models/gemma3n_mm.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,6 @@ def forward(
class Gemma3nForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsTranscription
):
merge_by_field_config = True
supported_languages = ISO639_1_SUPPORTED_LANGS

packed_modules_mapping = {
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/glm4_1v.py
Original file line number Diff line number Diff line change
Expand Up @@ -1424,8 +1424,6 @@ def get_video_replacement_glm4v(item_idx: int):
class Glm4vForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsLoRA, SupportsPP, SupportsMRoPE
):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/glm4v.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,8 +561,6 @@ def get_replacement(item_idx: int):
class GLM4VForCausalLM(
ChatGLMBaseModel, SupportsMultiModal, SupportsLoRA, SupportsPP, SupportsMRoPE
):
merge_by_field_config = True

packed_modules_mapping = {
"query_key_value": ["query_key_value"],
"dense_h_to_4h": ["dense_h_to_4h"],
Expand Down
1 change: 0 additions & 1 deletion vllm/model_executor/models/granite_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,6 @@ class GraniteSpeechForConditionalGeneration(
SupportsLoRA,
SupportsTranscription,
):
merge_by_field_config = True
supported_languages = ISO639_1_SUPPORTED_LANGS

packed_modules_mapping = {
Expand Down
1 change: 0 additions & 1 deletion vllm/model_executor/models/hunyuan_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,6 @@ class HunYuanVLForConditionalGeneration(
SupportsQuant,
SupportsXDRoPE,
):
merge_by_field_config = True
multimodal_cpu_fields = {"image_grid_thw"}

# To ensure correct weight loading and mapping.
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/hyperclovax_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,8 +592,6 @@ def build_mlp(
dummy_inputs=HCXVisionDummyInputsBuilder,
)
class HCXVisionForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"],
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,8 +576,6 @@ def forward(
dummy_inputs=Idefics3DummyInputsBuilder,
)
class Idefics3ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsLoRA):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
25 changes: 22 additions & 3 deletions vllm/model_executor/models/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ class SupportsMultiModal(Protocol):
`multimodal_config.mm_encoder_tp_mode="data"`.
"""

merge_by_field_config: ClassVar[bool] = True
merge_by_field_config: ClassVar[bool | None] = None
"""
A flag that indicates which implementation of
[DEPRECATED] A flag that indicates which implementation of
`vllm.multimodal.utils.group_mm_kwargs_by_modality` to use.
"""

Expand Down Expand Up @@ -260,7 +260,26 @@ def supports_multimodal(model: object) -> TypeIs[SupportsMultiModal]: ...
def supports_multimodal(
model: type[object] | object,
) -> TypeIs[type[SupportsMultiModal]] | TypeIs[SupportsMultiModal]:
return getattr(model, "supports_multimodal", False)
res = getattr(model, "supports_multimodal", False)

if res:
# We can remove this starting from v0.14
merge_by_field_config = getattr(model, "merge_by_field_config", None)
if merge_by_field_config is False:
raise ValueError(
"`merge_by_field_config=False` is no longer effective, "
"please update your model to consider the new batching logic "
"in `group_mm_kwargs_by_modality` (refer to "
"https://github.com/vllm-project/vllm/issues/26149), "
"and then remove the override from your model."
)
if merge_by_field_config is True:
logger.warning_once(
"`merge_by_field_config=True` is redundant, "
"please remove the override from your model."
)

return res


def supports_multimodal_raw_input_only(model: type[object] | object) -> bool:
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/interns1.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,8 +509,6 @@ def get_replacement_interns1_video(item_idx: int):
class InternS1ForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA
):
merge_by_field_config = True

# To ensure correct weight loading and mapping.
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/internvl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1074,8 +1074,6 @@ def get_video_replacement_internvl(item_idx: int):
dummy_inputs=InternVLDummyInputsBuilder,
)
class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA):
merge_by_field_config = True

supports_encoder_tp_data = True

@classmethod
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/keye.py
Original file line number Diff line number Diff line change
Expand Up @@ -1292,8 +1292,6 @@ def _get_mm_fields_config(


class BaseKeyeModule(nn.Module):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/kimi_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,6 @@ def get_replacement(item_idx: int):
dummy_inputs=KimiVLDummyInputsBuilder,
)
class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

supports_encoder_tp_data = True

@classmethod
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,8 +506,6 @@ def init_vision_tower_for_llava(
dummy_inputs=LlavaDummyInputsBuilder,
)
class LlavaForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"],
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,6 @@ def _get_mm_fields_config(
dummy_inputs=LlavaDummyInputsBuilder,
)
class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
# mapping for new names in checkpoint saved after transformers v4.52
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/llava_next_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,6 @@ def forward(self, image_features: torch.Tensor) -> torch.Tensor:
dummy_inputs=LlavaNextVideoDummyInputsBuilder,
)
class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
# mapping for new names in checkpoint saved after transformers v4.52
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/llava_onevision.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,8 +479,6 @@ def forward(self, image_features: torch.Tensor) -> torch.Tensor:
dummy_inputs=LlavaOnevisionDummyInputsBuilder,
)
class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
# mapping for new names in checkpoint saved after transformers v4.52
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/midashenglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,8 +683,6 @@ def get_replacement_midashenglm(item_idx: int):
dummy_inputs=MiDashengLMDummyInputsBuilder,
)
class MiDashengLMModel(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/minicpmv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1003,8 +1003,6 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
instantiated.
"""

merge_by_field_config = True

supports_encoder_tp_data = True

@classmethod
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/minimax_vl_01.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,6 @@ def _get_mm_fields_config(
dummy_inputs=MiniMaxVL01DummyInputsBuilder,
)
class MiniMaxVL01ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"],
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/mistral3.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,8 +423,6 @@ def init_vision_tower_for_llava(
class Mistral3ForConditionalGeneration(
nn.Module, SupportsLoRA, SupportsMultiModal, SupportsPP
):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"],
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/mllama4.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,8 +741,6 @@ class Llama4ForConditionalGeneration(
SupportsEagle3,
SupportsLoRA,
):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"],
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/molmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1354,8 +1354,6 @@ def get_insertion_molmo(item_idx: int):
class MolmoForCausalLM(
nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, SupportsQuant
):
merge_by_field_config = True

hf_to_vllm_mapper = WeightsMapper(
orig_to_new_substr={
# vision backbone mapping
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/nano_nemotron_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1116,8 +1116,6 @@ def get_dummy_mm_data(
class NemotronH_Nano_VL_V2(
nn.Module, HasInnerState, IsHybrid, SupportsMultiModal, SupportsMultiModalPruning
):
merge_by_field_config = True

@classmethod
def get_placeholder_str(cls, modality: str, i: int) -> str | None:
if modality.startswith("image"):
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/nemotron_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,6 @@ def get_image_processor(self, **kwargs: object):
dummy_inputs=BaseInternVLDummyInputsBuilder[NemotronVLProcessingInfo],
)
class LlamaNemotronVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA):
merge_by_field_config = True

@classmethod
def get_placeholder_str(cls, modality: str, i: int) -> str | None:
if modality.startswith("image"):
Expand Down
1 change: 0 additions & 1 deletion vllm/model_executor/models/opencua.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
dummy_inputs=OpenCUADummyInputsBuilder,
)
class OpenCUAForConditionalGeneration(Qwen2_5_VLForConditionalGeneration):
merge_by_field_config = True
multimodal_cpu_fields = {"image_grid_thw"}

packed_modules_mapping = {
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/ovis.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,8 +414,6 @@ def get_replacement_ovis(item_idx: int):
dummy_inputs=OvisDummyInputsBuilder,
)
class Ovis(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

@classmethod
def get_placeholder_str(cls, modality: str, i: int) -> str | None:
if modality.startswith("image"):
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/ovis2_5.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,8 +456,6 @@ def get_replacement_ovis(item_idx, modality: str):
dummy_inputs=Ovis2_5DummyInputsBuilder,
)
class Ovis2_5(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
config = vllm_config.model_config.hf_config
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/paddleocr_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -1103,8 +1103,6 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
dummy_inputs=PaddleOCRVLDummyInputsBuilder,
)
class PaddleOCRVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsMRoPE):
merge_by_field_config = True

hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
"model.": "language_model.model.",
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/paligemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,6 @@ def apply(
dummy_inputs=PaliGemmaDummyInputsBuilder,
)
class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True

packed_modules_mapping = {
"qkv_proj": [
"q_proj",
Expand Down
Loading