diff --git a/src/transformers/models/apertus/configuration_apertus.py b/src/transformers/models/apertus/configuration_apertus.py index a4d9ca83693e..edaa1bc7c46e 100644 --- a/src/transformers/models/apertus/configuration_apertus.py +++ b/src/transformers/models/apertus/configuration_apertus.py @@ -23,7 +23,7 @@ from ...utils import auto_docstring -@auto_docstring(checkpoint="swiss-ai/Apertus-8B") +@auto_docstring(checkpoint="swiss-ai/Apertus-8B-Instruct-2509") class ApertusConfig(PreTrainedConfig): r""" ```python diff --git a/src/transformers/models/apertus/modeling_apertus.py b/src/transformers/models/apertus/modeling_apertus.py index 213dcb97082d..a4f24b10190c 100644 --- a/src/transformers/models/apertus/modeling_apertus.py +++ b/src/transformers/models/apertus/modeling_apertus.py @@ -459,8 +459,8 @@ def forward( ```python >>> from transformers import AutoTokenizer, ApertusForCausalLM - >>> model = ApertusForCausalLM.from_pretrained("swiss-ai/Apertus-8B") - >>> tokenizer = AutoTokenizer.from_pretrained("swiss-ai/Apertus-8B") + >>> model = ApertusForCausalLM.from_pretrained("swiss-ai/Apertus-8B-Instruct-2509") + >>> tokenizer = AutoTokenizer.from_pretrained("swiss-ai/Apertus-8B-Instruct-2509") >>> prompt = "Hey, are you conscious? Can you talk to me?" >>> inputs = tokenizer(prompt, return_tensors="pt") diff --git a/src/transformers/models/apertus/modular_apertus.py b/src/transformers/models/apertus/modular_apertus.py index 2c8c9a6f0745..58874db8ef22 100644 --- a/src/transformers/models/apertus/modular_apertus.py +++ b/src/transformers/models/apertus/modular_apertus.py @@ -42,7 +42,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="swiss-ai/Apertus-8B") +@auto_docstring(checkpoint="swiss-ai/Apertus-8B-Instruct-2509") class ApertusConfig(PreTrainedConfig): r""" ```python @@ -260,8 +260,8 @@ def forward(self, **super_kwargs): ```python >>> from transformers import AutoTokenizer, ApertusForCausalLM - >>> model = ApertusForCausalLM.from_pretrained("swiss-ai/Apertus-8B") - >>> tokenizer = AutoTokenizer.from_pretrained("swiss-ai/Apertus-8B") + >>> model = ApertusForCausalLM.from_pretrained("swiss-ai/Apertus-8B-Instruct-2509") + >>> tokenizer = AutoTokenizer.from_pretrained("swiss-ai/Apertus-8B-Instruct-2509") >>> prompt = "Hey, are you conscious? Can you talk to me?" >>> inputs = tokenizer(prompt, return_tensors="pt") diff --git a/src/transformers/models/chameleon/configuration_chameleon.py b/src/transformers/models/chameleon/configuration_chameleon.py index 5e4e4dd6a2d5..05e27f73d8e5 100644 --- a/src/transformers/models/chameleon/configuration_chameleon.py +++ b/src/transformers/models/chameleon/configuration_chameleon.py @@ -21,7 +21,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="meta/chameleon-7B") +@auto_docstring(checkpoint="facebook/chameleon-7b") class ChameleonVQVAEConfig(PreTrainedConfig): r""" base_channels (`int`, *optional*, defaults to 128): @@ -76,7 +76,7 @@ def __init__( self.initializer_range = initializer_range -@auto_docstring(checkpoint="meta/chameleon-7B") +@auto_docstring(checkpoint="facebook/chameleon-7b") class ChameleonConfig(PreTrainedConfig): r""" model_parallel_size (`int`, *optional*, defaults to 1): diff --git a/src/transformers/models/dab_detr/configuration_dab_detr.py b/src/transformers/models/dab_detr/configuration_dab_detr.py index 44d4d7d93bb6..3ad7fee61efa 100644 --- a/src/transformers/models/dab_detr/configuration_dab_detr.py +++ b/src/transformers/models/dab_detr/configuration_dab_detr.py @@ -22,7 +22,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="IDEA-Research/dab_detr-base") +@auto_docstring(checkpoint="IDEA-Research/dab-detr-resnet-50") class DabDetrConfig(PreTrainedConfig): r""" num_queries (`int`, *optional*, defaults to 300): @@ -56,10 +56,10 @@ class DabDetrConfig(PreTrainedConfig): ```python >>> from transformers import DabDetrConfig, DabDetrModel - >>> # Initializing a DAB-DETR IDEA-Research/dab_detr-base style configuration + >>> # Initializing a DAB-DETR IDEA-Research/dab-detr-resnet-50 style configuration >>> configuration = DabDetrConfig() - >>> # Initializing a model (with random weights) from the IDEA-Research/dab_detr-base style configuration + >>> # Initializing a model (with random weights) from the IDEA-Research/dab-detr-resnet-50 style configuration >>> model = DabDetrModel(configuration) >>> # Accessing the model configuration diff --git a/src/transformers/models/dab_detr/modeling_dab_detr.py b/src/transformers/models/dab_detr/modeling_dab_detr.py index 421bdbce6b89..933b2175d2cf 100644 --- a/src/transformers/models/dab_detr/modeling_dab_detr.py +++ b/src/transformers/models/dab_detr/modeling_dab_detr.py @@ -1248,8 +1248,8 @@ def forward( >>> with httpx.stream("GET", url) as response: ... image = Image.open(BytesIO(response.read())) - >>> image_processor = AutoImageProcessor.from_pretrained("IDEA-Research/dab_detr-base") - >>> model = AutoModel.from_pretrained("IDEA-Research/dab_detr-base") + >>> image_processor = AutoImageProcessor.from_pretrained("IDEA-Research/dab-detr-resnet-50") + >>> model = AutoModel.from_pretrained("IDEA-Research/dab-detr-resnet-50") >>> # prepare image for the model >>> inputs = image_processor(images=image, return_tensors="pt") diff --git a/src/transformers/models/edgetam/configuration_edgetam.py b/src/transformers/models/edgetam/configuration_edgetam.py index b962acf33aed..84811636abbc 100644 --- a/src/transformers/models/edgetam/configuration_edgetam.py +++ b/src/transformers/models/edgetam/configuration_edgetam.py @@ -22,7 +22,7 @@ from ..auto import CONFIG_MAPPING, AutoConfig -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamVisionConfig(PreTrainedConfig): r""" backbone_channel_list (`List[int]`, *optional*, defaults to `[384, 192, 96, 48]`): @@ -98,7 +98,7 @@ def __init__( super().__init__(**kwargs) -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamPromptEncoderConfig(PreTrainedConfig): r""" mask_input_channels (`int`, *optional*, defaults to 16): @@ -134,7 +134,7 @@ def __init__( self.scale = scale -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamMaskDecoderConfig(PreTrainedConfig): r""" mlp_dim (`int`, *optional*, defaults to 2048): @@ -192,7 +192,7 @@ def __init__( self.attention_downsample_rate = attention_downsample_rate -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamConfig(PreTrainedConfig): r""" prompt_encoder_config (Union[`dict`, `EdgeTamPromptEncoderConfig`], *optional*): diff --git a/src/transformers/models/edgetam/modular_edgetam.py b/src/transformers/models/edgetam/modular_edgetam.py index 6c707fad9230..242d872809d0 100644 --- a/src/transformers/models/edgetam/modular_edgetam.py +++ b/src/transformers/models/edgetam/modular_edgetam.py @@ -36,7 +36,7 @@ ) -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamVisionConfig(PreTrainedConfig): r""" backbone_channel_list (`List[int]`, *optional*, defaults to `[384, 192, 96, 48]`): @@ -112,17 +112,17 @@ def __init__( super().__init__(**kwargs) -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamPromptEncoderConfig(Sam2PromptEncoderConfig): pass -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamMaskDecoderConfig(Sam2MaskDecoderConfig): pass -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamConfig(Sam2Config): r""" prompt_encoder_config (Union[`dict`, `EdgeTamPromptEncoderConfig`], *optional*): diff --git a/src/transformers/models/edgetam_video/configuration_edgetam_video.py b/src/transformers/models/edgetam_video/configuration_edgetam_video.py index fd10f5265290..5bb24a873fa4 100644 --- a/src/transformers/models/edgetam_video/configuration_edgetam_video.py +++ b/src/transformers/models/edgetam_video/configuration_edgetam_video.py @@ -23,7 +23,7 @@ from ..auto import CONFIG_MAPPING, AutoConfig -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamVideoPromptEncoderConfig(PreTrainedConfig): r""" mask_input_channels (`int`, *optional*, defaults to 16): @@ -59,7 +59,7 @@ def __init__( self.scale = scale -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamVideoMaskDecoderConfig(PreTrainedConfig): r""" mlp_dim (`int`, *optional*, defaults to 2048): @@ -117,7 +117,7 @@ def __init__( self.attention_downsample_rate = attention_downsample_rate -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamVideoConfig(PreTrainedConfig): r""" prompt_encoder_config (Union[`dict`, `EdgeTamVideoPromptEncoderConfig`], *optional*): diff --git a/src/transformers/models/edgetam_video/modular_edgetam_video.py b/src/transformers/models/edgetam_video/modular_edgetam_video.py index 92a6c5c800fb..2418783f4e29 100644 --- a/src/transformers/models/edgetam_video/modular_edgetam_video.py +++ b/src/transformers/models/edgetam_video/modular_edgetam_video.py @@ -57,17 +57,17 @@ ) -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamVideoPromptEncoderConfig(Sam2VideoPromptEncoderConfig): pass -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamVideoMaskDecoderConfig(Sam2VideoMaskDecoderConfig): pass -@auto_docstring(checkpoint="facebook/EdgeTAM") +@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf") class EdgeTamVideoConfig(Sam2VideoConfig): r""" prompt_encoder_config (Union[`dict`, `EdgeTamVideoPromptEncoderConfig`], *optional*): diff --git a/src/transformers/models/falcon_h1/configuration_falcon_h1.py b/src/transformers/models/falcon_h1/configuration_falcon_h1.py index e3fe837706d1..2dc8399d3a39 100644 --- a/src/transformers/models/falcon_h1/configuration_falcon_h1.py +++ b/src/transformers/models/falcon_h1/configuration_falcon_h1.py @@ -21,7 +21,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="ibm-fms/FalconH1-9.8b-2.2T-hf") +@auto_docstring(checkpoint="tiiuae/Falcon-H1-1.5B-Deep-Instruct") class FalconH1Config(PreTrainedConfig): r""" num_logits_to_keep (`int` or `None`, *optional*, defaults to 1): diff --git a/src/transformers/models/gemma3/configuration_gemma3.py b/src/transformers/models/gemma3/configuration_gemma3.py index e92040af9ee9..6fc37c42c156 100644 --- a/src/transformers/models/gemma3/configuration_gemma3.py +++ b/src/transformers/models/gemma3/configuration_gemma3.py @@ -29,7 +29,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="google/gemma3_text-7b") +@auto_docstring(checkpoint="google/gemma-3-4b-it") class Gemma3TextConfig(PreTrainedConfig): r""" final_logit_softcapping (`float`, *optional*): @@ -174,7 +174,7 @@ def convert_rope_params_to_dict(self, ignore_keys_at_rope_validation=None, **kwa return kwargs -@auto_docstring(checkpoint="google/gemma3_text-7b") +@auto_docstring(checkpoint="google/gemma-3-4b-it") class Gemma3Config(PreTrainedConfig): r""" mm_tokens_per_image (`int`, *optional*, defaults to 256): diff --git a/src/transformers/models/gemma3/modular_gemma3.py b/src/transformers/models/gemma3/modular_gemma3.py index a10144fe8101..ef2cc1a33fd8 100644 --- a/src/transformers/models/gemma3/modular_gemma3.py +++ b/src/transformers/models/gemma3/modular_gemma3.py @@ -59,7 +59,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="google/gemma3_text-7b") +@auto_docstring(checkpoint="google/gemma-3-4b-it") class Gemma3TextConfig(Gemma2Config, PreTrainedConfig): r""" final_logit_softcapping (`float`, *optional*): @@ -198,7 +198,7 @@ def convert_rope_params_to_dict(self, ignore_keys_at_rope_validation=None, **kwa return kwargs -@auto_docstring(checkpoint="google/gemma3_text-7b") +@auto_docstring(checkpoint="google/gemma-3-4b-it") class Gemma3Config(PreTrainedConfig): r""" mm_tokens_per_image (`int`, *optional*, defaults to 256): diff --git a/src/transformers/models/glm4_moe/configuration_glm4_moe.py b/src/transformers/models/glm4_moe/configuration_glm4_moe.py index 1bd12f3ece43..27d869713053 100644 --- a/src/transformers/models/glm4_moe/configuration_glm4_moe.py +++ b/src/transformers/models/glm4_moe/configuration_glm4_moe.py @@ -23,7 +23,7 @@ from ...utils import auto_docstring -@auto_docstring(checkpoint="THUDM/GLM-4-100B-A10B") +@auto_docstring(checkpoint="zai-org/GLM-4.5") class Glm4MoeConfig(PreTrainedConfig): r""" n_group (`int`, *optional*, defaults to 1): diff --git a/src/transformers/models/glm4_moe/modular_glm4_moe.py b/src/transformers/models/glm4_moe/modular_glm4_moe.py index ea0282c37e83..26776dd46b9a 100644 --- a/src/transformers/models/glm4_moe/modular_glm4_moe.py +++ b/src/transformers/models/glm4_moe/modular_glm4_moe.py @@ -36,7 +36,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="THUDM/GLM-4-100B-A10B") +@auto_docstring(checkpoint="zai-org/GLM-4.5") class Glm4MoeConfig(PreTrainedConfig): r""" n_group (`int`, *optional*, defaults to 1): diff --git a/src/transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py b/src/transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py index 74f654b918af..591fb808e8f9 100644 --- a/src/transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py +++ b/src/transformers/models/glm4_moe_lite/configuration_glm4_moe_lite.py @@ -24,7 +24,7 @@ from ...utils import auto_docstring -@auto_docstring(checkpoint="THUDM/GLM-4-100B-A10B") +@auto_docstring(checkpoint="zai-org/GLM-4.5") class Glm4MoeLiteConfig(PreTrainedConfig): r""" rope_interleave (`bool`, *optional*, defaults to `True`): diff --git a/src/transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py b/src/transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py index 362261e75e20..504d11fb6435 100644 --- a/src/transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py +++ b/src/transformers/models/glm4_moe_lite/modular_glm4_moe_lite.py @@ -33,7 +33,7 @@ ) -@auto_docstring(checkpoint="THUDM/GLM-4-100B-A10B") +@auto_docstring(checkpoint="zai-org/GLM-4.5") class Glm4MoeLiteConfig(PreTrainedConfig): r""" rope_interleave (`bool`, *optional*, defaults to `True`): diff --git a/src/transformers/models/helium/configuration_helium.py b/src/transformers/models/helium/configuration_helium.py index 4187cf6bc349..c544b11e8797 100644 --- a/src/transformers/models/helium/configuration_helium.py +++ b/src/transformers/models/helium/configuration_helium.py @@ -19,7 +19,7 @@ from ...utils import auto_docstring -@auto_docstring(checkpoint="kyutai/helium-2b") +@auto_docstring(checkpoint="kyutai/helium-1-preview") class HeliumConfig(PreTrainedConfig): r""" Example: diff --git a/src/transformers/models/instructblip/configuration_instructblip.py b/src/transformers/models/instructblip/configuration_instructblip.py index b1989c734a3e..e3e79b43df80 100644 --- a/src/transformers/models/instructblip/configuration_instructblip.py +++ b/src/transformers/models/instructblip/configuration_instructblip.py @@ -22,7 +22,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="Salesforce/instruct-blip-flan-t5") +@auto_docstring(checkpoint="Salesforce/instructblip-flan-t5-xl") class InstructBlipVisionConfig(PreTrainedConfig): r""" Example: @@ -30,10 +30,10 @@ class InstructBlipVisionConfig(PreTrainedConfig): ```python >>> from transformers import InstructBlipVisionConfig, InstructBlipVisionModel - >>> # Initializing a InstructBlipVisionConfig with Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a InstructBlipVisionConfig with Salesforce/instructblip-flan-t5-xl style configuration >>> configuration = InstructBlipVisionConfig() - >>> # Initializing a InstructBlipVisionModel (with random weights) from the Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a InstructBlipVisionModel (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration >>> model = InstructBlipVisionModel(configuration) >>> # Accessing the model configuration @@ -73,7 +73,7 @@ def __init__( self.qkv_bias = qkv_bias -@auto_docstring(checkpoint="Salesforce/instruct-blip-flan-t5") +@auto_docstring(checkpoint="Salesforce/instructblip-flan-t5-xl") class InstructBlipQFormerConfig(PreTrainedConfig): r""" cross_attention_frequency (`int`, *optional*, defaults to 2): @@ -86,10 +86,10 @@ class InstructBlipQFormerConfig(PreTrainedConfig): ```python >>> from transformers import InstructBlipQFormerConfig, InstructBlipQFormerModel - >>> # Initializing a InstructBLIP Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a InstructBLIP Salesforce/instructblip-flan-t5-xl style configuration >>> configuration = InstructBlipQFormerConfig() - >>> # Initializing a model (with random weights) from the Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a model (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration >>> model = InstructBlipQFormerModel(configuration) >>> # Accessing the model configuration >>> configuration = model.config @@ -134,7 +134,7 @@ def __init__( self.encoder_hidden_size = encoder_hidden_size -@auto_docstring(checkpoint="Salesforce/instruct-blip-flan-t5") +@auto_docstring(checkpoint="Salesforce/instructblip-flan-t5-xl") class InstructBlipConfig(PreTrainedConfig): r""" qformer_config (`dict`, *optional*): @@ -153,10 +153,10 @@ class InstructBlipConfig(PreTrainedConfig): ... InstructBlipForConditionalGeneration, ... ) - >>> # Initializing a InstructBlipConfig with Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a InstructBlipConfig with Salesforce/instructblip-flan-t5-xl style configuration >>> configuration = InstructBlipConfig() - >>> # Initializing a InstructBlipForConditionalGeneration (with random weights) from the Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a InstructBlipForConditionalGeneration (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration >>> model = InstructBlipForConditionalGeneration(configuration) >>> # Accessing the model configuration diff --git a/src/transformers/models/instructblipvideo/configuration_instructblipvideo.py b/src/transformers/models/instructblipvideo/configuration_instructblipvideo.py index 0bff0e2ba922..1e8ec6756900 100644 --- a/src/transformers/models/instructblipvideo/configuration_instructblipvideo.py +++ b/src/transformers/models/instructblipvideo/configuration_instructblipvideo.py @@ -28,7 +28,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="Salesforce/instruct-blip-flan-t5") +@auto_docstring(checkpoint="Salesforce/instructblip-flan-t5-xl") class InstructBlipVideoVisionConfig(PreTrainedConfig): r""" Example: @@ -36,10 +36,10 @@ class InstructBlipVideoVisionConfig(PreTrainedConfig): ```python >>> from transformers import InstructBlipVideoVisionConfig, InstructBlipVideoVisionModel - >>> # Initializing a InstructBlipVideoVisionConfig with Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a InstructBlipVideoVisionConfig with Salesforce/instructblip-flan-t5-xl style configuration >>> configuration = InstructBlipVideoVisionConfig() - >>> # Initializing a InstructBlipVideoVisionModel (with random weights) from the Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a InstructBlipVideoVisionModel (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration >>> model = InstructBlipVideoVisionModel(configuration) >>> # Accessing the model configuration @@ -79,7 +79,7 @@ def __init__( self.qkv_bias = qkv_bias -@auto_docstring(checkpoint="Salesforce/instruct-blip-flan-t5") +@auto_docstring(checkpoint="Salesforce/instructblip-flan-t5-xl") class InstructBlipVideoQFormerConfig(PreTrainedConfig): r""" cross_attention_frequency (`int`, *optional*, defaults to 2): @@ -92,10 +92,10 @@ class InstructBlipVideoQFormerConfig(PreTrainedConfig): ```python >>> from transformers import InstructBlipVideoQFormerConfig, InstructBlipVideoQFormerModel - >>> # Initializing a InstructBlipVideo Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a InstructBlipVideo Salesforce/instructblip-flan-t5-xl style configuration >>> configuration = InstructBlipVideoQFormerConfig() - >>> # Initializing a model (with random weights) from the Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a model (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration >>> model = InstructBlipVideoQFormerModel(configuration) >>> # Accessing the model configuration >>> configuration = model.config @@ -140,7 +140,7 @@ def __init__( self.encoder_hidden_size = encoder_hidden_size -@auto_docstring(checkpoint="Salesforce/instruct-blip-flan-t5") +@auto_docstring(checkpoint="Salesforce/instructblip-flan-t5-xl") class InstructBlipVideoConfig(PreTrainedConfig): r""" qformer_config (`dict`, *optional*): @@ -159,10 +159,10 @@ class InstructBlipVideoConfig(PreTrainedConfig): ... InstructBlipVideoForConditionalGeneration, ... ) - >>> # Initializing a InstructBlipVideoConfig with Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a InstructBlipVideoConfig with Salesforce/instructblip-flan-t5-xl style configuration >>> configuration = InstructBlipVideoConfig() - >>> # Initializing a InstructBlipVideoForConditionalGeneration (with random weights) from the Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a InstructBlipVideoForConditionalGeneration (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration >>> model = InstructBlipVideoForConditionalGeneration(configuration) >>> # Accessing the model configuration diff --git a/src/transformers/models/instructblipvideo/modular_instructblipvideo.py b/src/transformers/models/instructblipvideo/modular_instructblipvideo.py index fa75c1d6f2e5..5cbc7d2c70b8 100644 --- a/src/transformers/models/instructblipvideo/modular_instructblipvideo.py +++ b/src/transformers/models/instructblipvideo/modular_instructblipvideo.py @@ -42,17 +42,49 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="Salesforce/instruct-blip-flan-t5") +@auto_docstring(checkpoint="Salesforce/instructblip-flan-t5-xl") class InstructBlipVideoVisionConfig(InstructBlipVisionConfig): - pass + r""" + Example: + + ```python + >>> from transformers import InstructBlipVideoVisionConfig, InstructBlipVideoVisionModel + + >>> # Initializing a InstructBlipVideoVisionConfig with Salesforce/instructblip-flan-t5-xl style configuration + >>> configuration = InstructBlipVideoVisionConfig() + + >>> # Initializing a InstructBlipVideoVisionModel (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration + >>> model = InstructBlipVideoVisionModel(configuration) + + >>> # Accessing the model configuration + >>> configuration = model.config + ```""" -@auto_docstring(checkpoint="Salesforce/instruct-blip-flan-t5") +@auto_docstring(checkpoint="Salesforce/instructblip-flan-t5-xl") class InstructBlipVideoQFormerConfig(InstructBlipQFormerConfig): - pass + r""" + cross_attention_frequency (`int`, *optional*, defaults to 2): + The frequency of adding cross-attention to the Transformer layers. + encoder_hidden_size (`int`, *optional*, defaults to 1408): + The hidden size of the hidden states for cross-attention. + + Examples: + + ```python + >>> from transformers import InstructBlipVideoQFormerConfig, InstructBlipVideoQFormerModel + + >>> # Initializing a InstructBlipVideo Salesforce/instructblip-flan-t5-xl style configuration + >>> configuration = InstructBlipVideoQFormerConfig() + + >>> # Initializing a model (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration + >>> model = InstructBlipVideoQFormerModel(configuration) + >>> # Accessing the model configuration + >>> configuration = model.config + ```""" -@auto_docstring(checkpoint="Salesforce/instruct-blip-flan-t5") +@auto_docstring(checkpoint="Salesforce/instructblip-flan-t5-xl") class InstructBlipVideoConfig(PreTrainedConfig): r""" qformer_config (`dict`, *optional*): @@ -71,10 +103,10 @@ class InstructBlipVideoConfig(PreTrainedConfig): ... InstructBlipVideoForConditionalGeneration, ... ) - >>> # Initializing a InstructBlipVideoConfig with Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a InstructBlipVideoConfig with Salesforce/instructblip-flan-t5-xl style configuration >>> configuration = InstructBlipVideoConfig() - >>> # Initializing a InstructBlipVideoForConditionalGeneration (with random weights) from the Salesforce/instruct-blip-flan-t5 style configuration + >>> # Initializing a InstructBlipVideoForConditionalGeneration (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration >>> model = InstructBlipVideoForConditionalGeneration(configuration) >>> # Accessing the model configuration diff --git a/src/transformers/models/lighton_ocr/configuration_lighton_ocr.py b/src/transformers/models/lighton_ocr/configuration_lighton_ocr.py index ea1028d38881..ce7d633a6f8a 100644 --- a/src/transformers/models/lighton_ocr/configuration_lighton_ocr.py +++ b/src/transformers/models/lighton_ocr/configuration_lighton_ocr.py @@ -24,7 +24,7 @@ from ..auto import CONFIG_MAPPING, AutoConfig -@auto_docstring(checkpoint="lightonocr-hf/lightonocr-9b") +@auto_docstring(checkpoint="lightonai/LightOnOCR-1B-1025") class LightOnOcrConfig(PretrainedConfig): r""" Example: diff --git a/src/transformers/models/lighton_ocr/modular_lighton_ocr.py b/src/transformers/models/lighton_ocr/modular_lighton_ocr.py index d88e04afb374..c932f05c9728 100644 --- a/src/transformers/models/lighton_ocr/modular_lighton_ocr.py +++ b/src/transformers/models/lighton_ocr/modular_lighton_ocr.py @@ -41,7 +41,7 @@ from ..pixtral.image_processing_pixtral import get_resize_output_image_size -@auto_docstring(checkpoint="lightonocr-hf/lightonocr-9b") +@auto_docstring(checkpoint="lightonai/LightOnOCR-1B-1025") class LightOnOcrConfig(PretrainedConfig): r""" Example: diff --git a/src/transformers/models/llava/configuration_llava.py b/src/transformers/models/llava/configuration_llava.py index 03e3b0ea392a..e9b0466fef77 100644 --- a/src/transformers/models/llava/configuration_llava.py +++ b/src/transformers/models/llava/configuration_llava.py @@ -20,7 +20,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="llava-hf/llava-9b") +@auto_docstring(checkpoint="llava-hf/llava-1.5-7b-hf") class LlavaConfig(PreTrainedConfig): r""" Example: diff --git a/src/transformers/models/mixtral/configuration_mixtral.py b/src/transformers/models/mixtral/configuration_mixtral.py index 51b2e00c4632..fe59aa3b721c 100644 --- a/src/transformers/models/mixtral/configuration_mixtral.py +++ b/src/transformers/models/mixtral/configuration_mixtral.py @@ -21,7 +21,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="mixtralai/Mixtral-8x7B") +@auto_docstring(checkpoint="mistralai/Mixtral-8x7B-v0.1") class MixtralConfig(PreTrainedConfig): r""" Example: diff --git a/src/transformers/models/nemotron/configuration_nemotron.py b/src/transformers/models/nemotron/configuration_nemotron.py index c2c6df3fbb2d..bb0570446c87 100644 --- a/src/transformers/models/nemotron/configuration_nemotron.py +++ b/src/transformers/models/nemotron/configuration_nemotron.py @@ -22,7 +22,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="nvidia/nemotron-3-8b-base-4k-hf") +@auto_docstring(checkpoint="thhaus/nemotron3-8b") class NemotronConfig(PreTrainedConfig): r""" Example: diff --git a/src/transformers/models/nemotron/modeling_nemotron.py b/src/transformers/models/nemotron/modeling_nemotron.py index 90986619b141..c8bf74ebcb38 100644 --- a/src/transformers/models/nemotron/modeling_nemotron.py +++ b/src/transformers/models/nemotron/modeling_nemotron.py @@ -757,8 +757,8 @@ def forward( ```python >>> from transformers import AutoTokenizer, NemotronForCausalLM - >>> model = NemotronForCausalLM.from_pretrained("nvidia/nemotron-3-8b-base-4k-hf") - >>> tokenizer = AutoTokenizer.from_pretrained("nvidia/nemotron-3-8b-base-4k-hf") + >>> model = NemotronForCausalLM.from_pretrained("thhaus/nemotron3-8b") + >>> tokenizer = AutoTokenizer.from_pretrained("thhaus/nemotron3-8b") >>> prompt = "Hey, are you conscious? Can you talk to me?" >>> inputs = tokenizer(prompt, return_tensors="pt") diff --git a/src/transformers/models/paligemma/configuration_paligemma.py b/src/transformers/models/paligemma/configuration_paligemma.py index 7ac92ae35a72..e2035a2f08e4 100644 --- a/src/transformers/models/paligemma/configuration_paligemma.py +++ b/src/transformers/models/paligemma/configuration_paligemma.py @@ -20,7 +20,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="paligemma-hf/paligemma-2b") +@auto_docstring(checkpoint="google/paligemma-3b-pt-224") class PaliGemmaConfig(PreTrainedConfig): r""" Example: diff --git a/src/transformers/models/patchtst/configuration_patchtst.py b/src/transformers/models/patchtst/configuration_patchtst.py index 619ac7cad23b..a07da696e739 100644 --- a/src/transformers/models/patchtst/configuration_patchtst.py +++ b/src/transformers/models/patchtst/configuration_patchtst.py @@ -20,7 +20,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="ibm/patchtst") +@auto_docstring(checkpoint="ibm-granite/granite-timeseries-patchtst") class PatchTSTConfig(PreTrainedConfig): r""" context_length (`int`, *optional*, defaults to 32): diff --git a/src/transformers/models/pixtral/configuration_pixtral.py b/src/transformers/models/pixtral/configuration_pixtral.py index ad6eee14a5ce..3c1913a69ab9 100644 --- a/src/transformers/models/pixtral/configuration_pixtral.py +++ b/src/transformers/models/pixtral/configuration_pixtral.py @@ -20,7 +20,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="pixtral-hf/pixtral-9b") +@auto_docstring(checkpoint="mistral-labs/pixtral-12b") class PixtralVisionConfig(PreTrainedConfig): r""" Example: diff --git a/src/transformers/models/plbart/configuration_plbart.py b/src/transformers/models/plbart/configuration_plbart.py index ca2300d185cf..6b7680055263 100644 --- a/src/transformers/models/plbart/configuration_plbart.py +++ b/src/transformers/models/plbart/configuration_plbart.py @@ -20,7 +20,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="pixtral-hf/pixtral-9b") +@auto_docstring(checkpoint="uclanlp/plbart-base") class PLBartConfig(PreTrainedConfig): r""" Example: diff --git a/src/transformers/models/qwen2/configuration_qwen2.py b/src/transformers/models/qwen2/configuration_qwen2.py index 460b2de2edbc..f9726bf27a0c 100644 --- a/src/transformers/models/qwen2/configuration_qwen2.py +++ b/src/transformers/models/qwen2/configuration_qwen2.py @@ -21,7 +21,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="Qwen/Qwen2-7B-beta") +@auto_docstring(checkpoint="Qwen/Qwen2-7B") class Qwen2Config(PreTrainedConfig): r""" Example: diff --git a/src/transformers/models/qwen3_5/configuration_qwen3_5.py b/src/transformers/models/qwen3_5/configuration_qwen3_5.py index e13e5701e49d..a2b739c6629d 100644 --- a/src/transformers/models/qwen3_5/configuration_qwen3_5.py +++ b/src/transformers/models/qwen3_5/configuration_qwen3_5.py @@ -22,7 +22,7 @@ from ...utils import auto_docstring -@auto_docstring(checkpoint="Qwen/Qwen3.5-9B-Instruct") +@auto_docstring(checkpoint="Qwen/Qwen3.5-27B") class Qwen3_5TextConfig(PreTrainedConfig): r""" linear_conv_kernel_dim (`int`, *optional*, defaults to 4): @@ -140,7 +140,7 @@ def __init__( super().__init__(**kwargs) -@auto_docstring(checkpoint="Qwen/Qwen3.5-9B-Instruct") +@auto_docstring(checkpoint="Qwen/Qwen3.5-27B") class Qwen3_5VisionConfig(PreTrainedConfig): r""" num_position_embeddings (`int`, *optional*, defaults to 2304): @@ -186,7 +186,7 @@ def __init__( self.initializer_range = initializer_range -@auto_docstring(checkpoint="Qwen/Qwen3.5-9B-Instruct") +@auto_docstring(checkpoint="Qwen/Qwen3.5-27B") class Qwen3_5Config(PreTrainedConfig): r""" Example: diff --git a/src/transformers/models/qwen3_5/modular_qwen3_5.py b/src/transformers/models/qwen3_5/modular_qwen3_5.py index 7a09f0bd4a18..f679eafc322f 100644 --- a/src/transformers/models/qwen3_5/modular_qwen3_5.py +++ b/src/transformers/models/qwen3_5/modular_qwen3_5.py @@ -56,7 +56,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="Qwen/Qwen3.5-9B-Instruct") +@auto_docstring(checkpoint="Qwen/Qwen3.5-27B") class Qwen3_5TextConfig(Qwen3NextConfig): r""" linear_conv_kernel_dim (`int`, *optional*, defaults to 4): @@ -144,7 +144,7 @@ def __init__( del self.router_aux_loss_coef -@auto_docstring(checkpoint="Qwen/Qwen3.5-9B-Instruct") +@auto_docstring(checkpoint="Qwen/Qwen3.5-27B") class Qwen3_5VisionConfig(Qwen3VLVisionConfig): model_type = "qwen3_5" @@ -168,7 +168,7 @@ def __init__( del self.deepstack_visual_indexes -@auto_docstring(checkpoint="Qwen/Qwen3.5-9B-Instruct") +@auto_docstring(checkpoint="Qwen/Qwen3.5-27B") class Qwen3_5Config(Qwen3VLConfig): r""" Example: diff --git a/src/transformers/models/qwen3_5_moe/configuration_qwen3_5_moe.py b/src/transformers/models/qwen3_5_moe/configuration_qwen3_5_moe.py index 56650e86182e..049673cc1ca2 100644 --- a/src/transformers/models/qwen3_5_moe/configuration_qwen3_5_moe.py +++ b/src/transformers/models/qwen3_5_moe/configuration_qwen3_5_moe.py @@ -22,7 +22,7 @@ from ...utils import auto_docstring -@auto_docstring(checkpoint="Qwen/Qwen3.5-9B-Instruct") +@auto_docstring(checkpoint="Qwen/Qwen3.5-35B-A3B") class Qwen3_5MoeTextConfig(PreTrainedConfig): r""" linear_conv_kernel_dim (`int`, *optional*, defaults to 4): @@ -153,7 +153,7 @@ def __init__( super().__init__(**kwargs) -@auto_docstring(checkpoint="Qwen/Qwen3.5-9B-Instruct") +@auto_docstring(checkpoint="Qwen/Qwen3.5-35B-A3B") class Qwen3_5MoeVisionConfig(PreTrainedConfig): r""" num_position_embeddings (`int`, *optional*, defaults to 2304): @@ -199,7 +199,7 @@ def __init__( self.initializer_range = initializer_range -@auto_docstring(checkpoint="Qwen/Qwen3.5-9B-Instruct") +@auto_docstring(checkpoint="Qwen/Qwen3.5-35B-A3B") class Qwen3_5MoeConfig(PreTrainedConfig): r""" Example: diff --git a/src/transformers/models/qwen3_5_moe/modular_qwen3_5_moe.py b/src/transformers/models/qwen3_5_moe/modular_qwen3_5_moe.py index b8f40bb1b58e..cef308d80a07 100644 --- a/src/transformers/models/qwen3_5_moe/modular_qwen3_5_moe.py +++ b/src/transformers/models/qwen3_5_moe/modular_qwen3_5_moe.py @@ -54,7 +54,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="Qwen/Qwen3.5-9B-Instruct") +@auto_docstring(checkpoint="Qwen/Qwen3.5-35B-A3B") class Qwen3_5MoeTextConfig(Qwen3NextConfig): r""" linear_conv_kernel_dim (`int`, *optional*, defaults to 4): @@ -145,12 +145,12 @@ def __init__( del self.mlp_only_layers -@auto_docstring(checkpoint="Qwen/Qwen3.5-9B-Instruct") +@auto_docstring(checkpoint="Qwen/Qwen3.5-35B-A3B") class Qwen3_5MoeVisionConfig(Qwen3_5VisionConfig): pass -@auto_docstring(checkpoint="Qwen/Qwen3.5-9B-Instruct") +@auto_docstring(checkpoint="Qwen/Qwen3.5-35B-A3B") class Qwen3_5MoeConfig(Qwen3VLConfig): r""" Example: diff --git a/src/transformers/models/qwen3_moe/configuration_qwen3_moe.py b/src/transformers/models/qwen3_moe/configuration_qwen3_moe.py index fb5b0c4c3286..c422bfac9183 100644 --- a/src/transformers/models/qwen3_moe/configuration_qwen3_moe.py +++ b/src/transformers/models/qwen3_moe/configuration_qwen3_moe.py @@ -21,7 +21,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3MoeConfig(PreTrainedConfig): r""" decoder_sparse_step (`int`, *optional*, defaults to 1): diff --git a/src/transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py b/src/transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py index 4ff0f2137d44..3ef96837f664 100644 --- a/src/transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +++ b/src/transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py @@ -82,7 +82,7 @@ def __init__( self.downsample_hidden_size = downsample_hidden_size -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeVisionEncoderConfig(PreTrainedConfig): r""" num_position_embeddings (`int`, *optional*, defaults to 2304): @@ -130,7 +130,7 @@ def __init__( self.deepstack_visual_indexes = deepstack_visual_indexes -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeTextConfig(PreTrainedConfig): r""" decoder_sparse_step (`int`, *optional*, defaults to 1): @@ -241,7 +241,7 @@ def __init__( ) -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeThinkerConfig(PreTrainedConfig): r""" position_id_per_seconds (`int`, *optional*, defaults to 25): @@ -420,7 +420,7 @@ def __init__( super().__init__(**kwargs) -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeTalkerTextConfig(PreTrainedConfig): r""" decoder_sparse_step (`int`, *optional*, defaults to 1): @@ -536,7 +536,7 @@ def __init__( super().__init__(**kwargs) -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeTalkerConfig(PreTrainedConfig): r""" code_predictor_config (`dict`, *optional*): @@ -651,7 +651,7 @@ def __init__( super().__init__(**kwargs) -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeCode2WavConfig(PreTrainedConfig): r""" num_quantizers (`int`, *optional*, defaults to 16): @@ -731,7 +731,7 @@ def layer_types(self): return ["sliding_attention"] * self.num_hidden_layers -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeConfig(PreTrainedConfig): r""" thinker_config (`dict`, *optional*): Configuration of the underlying thinker sub-model. diff --git a/src/transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py b/src/transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py index a58cf07e3788..8e548f703845 100644 --- a/src/transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +++ b/src/transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py @@ -177,12 +177,12 @@ def __init__( self.downsample_hidden_size = downsample_hidden_size -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeVisionEncoderConfig(Qwen3VLMoeVisionConfig): pass -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeTextConfig(PreTrainedConfig): r""" decoder_sparse_step (`int`, *optional*, defaults to 1): @@ -293,7 +293,7 @@ def __init__( ) -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeThinkerConfig(Qwen2_5OmniThinkerConfig): r""" position_id_per_seconds (`int`, *optional*, defaults to 25): @@ -497,7 +497,7 @@ def __init__( self.sliding_window = sliding_window -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeTalkerConfig(PreTrainedConfig): r""" code_predictor_config (`dict`, *optional*): @@ -612,7 +612,7 @@ def __init__( super().__init__(**kwargs) -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeCode2WavConfig(PreTrainedConfig): r""" num_quantizers (`int`, *optional*, defaults to 16): @@ -692,7 +692,7 @@ def layer_types(self): return ["sliding_attention"] * self.num_hidden_layers -@auto_docstring(checkpoint="Qwen/Qwen3-15B-A2B") +@auto_docstring(checkpoint="Qwen/Qwen3-30B-A3B-Base") class Qwen3OmniMoeConfig(PreTrainedConfig): r""" thinker_config (`dict`, *optional*): Configuration of the underlying thinker sub-model. diff --git a/src/transformers/models/sam3_tracker/configuration_sam3_tracker.py b/src/transformers/models/sam3_tracker/configuration_sam3_tracker.py index cde4d48b6bde..553cae852c0f 100644 --- a/src/transformers/models/sam3_tracker/configuration_sam3_tracker.py +++ b/src/transformers/models/sam3_tracker/configuration_sam3_tracker.py @@ -23,7 +23,7 @@ from ..auto import CONFIG_MAPPING, AutoConfig -@auto_docstring(checkpoint="facebook/sam3_tracker.1-hiera-tiny") +@auto_docstring(checkpoint="facebook/sam3") class Sam3TrackerPromptEncoderConfig(PreTrainedConfig): r""" mask_input_channels (`int`, *optional*, defaults to 16): @@ -59,7 +59,7 @@ def __init__( self.scale = scale -@auto_docstring(checkpoint="facebook/sam3_tracker.1-hiera-tiny") +@auto_docstring(checkpoint="facebook/sam3") class Sam3TrackerMaskDecoderConfig(PreTrainedConfig): r""" mlp_dim (`int`, *optional*, defaults to 2048): @@ -117,7 +117,7 @@ def __init__( self.attention_downsample_rate = attention_downsample_rate -@auto_docstring(checkpoint="facebook/sam3_tracker.1-hiera-tiny") +@auto_docstring(checkpoint="facebook/sam3") class Sam3TrackerConfig(PreTrainedConfig): r""" prompt_encoder_config (Union[`dict`, `Sam3TrackerPromptEncoderConfig`], *optional*): diff --git a/src/transformers/models/sam3_tracker/modular_sam3_tracker.py b/src/transformers/models/sam3_tracker/modular_sam3_tracker.py index a34bc4372fe4..bab158d08a89 100644 --- a/src/transformers/models/sam3_tracker/modular_sam3_tracker.py +++ b/src/transformers/models/sam3_tracker/modular_sam3_tracker.py @@ -41,7 +41,7 @@ from ..sam2.processing_sam2 import Sam2Processor -@auto_docstring(checkpoint="facebook/sam3_tracker.1-hiera-tiny") +@auto_docstring(checkpoint="facebook/sam3") class Sam3TrackerPromptEncoderConfig(Sam2PromptEncoderConfig): r""" mask_input_channels (`int`, *optional*, defaults to 16): @@ -69,17 +69,17 @@ def __init__( super().__init__(**kwargs) -@auto_docstring(checkpoint="facebook/sam3_tracker.1-hiera-tiny") +@auto_docstring(checkpoint="facebook/sam3") class Sam3TrackerProcessor(Sam2Processor): pass -@auto_docstring(checkpoint="facebook/sam3_tracker.1-hiera-tiny") +@auto_docstring(checkpoint="facebook/sam3") class Sam3TrackerMaskDecoderConfig(Sam2MaskDecoderConfig): pass -@auto_docstring(checkpoint="facebook/sam3_tracker.1-hiera-tiny") +@auto_docstring(checkpoint="facebook/sam3") class Sam3TrackerConfig(Sam2Config): r""" prompt_encoder_config (Union[`dict`, `Sam3TrackerPromptEncoderConfig`], *optional*): diff --git a/src/transformers/models/sam3_tracker/processing_sam3_tracker.py b/src/transformers/models/sam3_tracker/processing_sam3_tracker.py index 2c4164233cc4..b48728a805d0 100644 --- a/src/transformers/models/sam3_tracker/processing_sam3_tracker.py +++ b/src/transformers/models/sam3_tracker/processing_sam3_tracker.py @@ -29,7 +29,7 @@ from ...utils import TensorType, auto_docstring -@auto_docstring(checkpoint="facebook/sam3_tracker.1-hiera-tiny") +@auto_docstring(checkpoint="facebook/sam3") class Sam3TrackerProcessor(ProcessorMixin): def __init__(self, image_processor, target_size: int | None = None, point_pad_value: int = -10, **kwargs): r""" diff --git a/src/transformers/models/seed_oss/configuration_seed_oss.py b/src/transformers/models/seed_oss/configuration_seed_oss.py index fc1745bb000e..f1d6771ac149 100644 --- a/src/transformers/models/seed_oss/configuration_seed_oss.py +++ b/src/transformers/models/seed_oss/configuration_seed_oss.py @@ -18,7 +18,7 @@ from ...utils import auto_docstring -@auto_docstring(checkpoint="ByteDance-Seed/SeedOss-36B") +@auto_docstring(checkpoint="ByteDance-Seed/Seed-OSS-36B-Instruct") class SeedOssConfig(PreTrainedConfig): r""" attention_out_bias (`bool`, *optional*, defaults to `False`): diff --git a/src/transformers/models/seed_oss/modeling_seed_oss.py b/src/transformers/models/seed_oss/modeling_seed_oss.py index da2498bff21c..1ebc8f10a272 100644 --- a/src/transformers/models/seed_oss/modeling_seed_oss.py +++ b/src/transformers/models/seed_oss/modeling_seed_oss.py @@ -466,8 +466,8 @@ def forward( ```python >>> from transformers import AutoTokenizer, SeedOssForCausalLM - >>> model = SeedOssForCausalLM.from_pretrained("ByteDance-Seed/SeedOss-36B") - >>> tokenizer = AutoTokenizer.from_pretrained("ByteDance-Seed/SeedOss-36B") + >>> model = SeedOssForCausalLM.from_pretrained("ByteDance-Seed/Seed-OSS-36B-Instruct") + >>> tokenizer = AutoTokenizer.from_pretrained("ByteDance-Seed/Seed-OSS-36B-Instruct") >>> prompt = "Hey, are you conscious? Can you talk to me?" >>> inputs = tokenizer(prompt, return_tensors="pt") diff --git a/src/transformers/models/seed_oss/modular_seed_oss.py b/src/transformers/models/seed_oss/modular_seed_oss.py index 5f122bbafc67..88e345fdcac1 100644 --- a/src/transformers/models/seed_oss/modular_seed_oss.py +++ b/src/transformers/models/seed_oss/modular_seed_oss.py @@ -41,7 +41,7 @@ logger = logging.get_logger(__name__) -_CHECKPOINT_FOR_DOC = "ByteDance-Seed/SeedOss-36B" +_CHECKPOINT_FOR_DOC = "ByteDance-Seed/Seed-OSS-36B-Instruct" class SeedOssRMSNorm(LlamaRMSNorm): @@ -165,8 +165,8 @@ def forward( ```python >>> from transformers import AutoTokenizer, SeedOssForCausalLM - >>> model = SeedOssForCausalLM.from_pretrained("ByteDance-Seed/SeedOss-36B") - >>> tokenizer = AutoTokenizer.from_pretrained("ByteDance-Seed/SeedOss-36B") + >>> model = SeedOssForCausalLM.from_pretrained("ByteDance-Seed/Seed-OSS-36B-Instruct") + >>> tokenizer = AutoTokenizer.from_pretrained("ByteDance-Seed/Seed-OSS-36B-Instruct") >>> prompt = "Hey, are you conscious? Can you talk to me?" >>> inputs = tokenizer(prompt, return_tensors="pt") diff --git a/src/transformers/models/segformer/configuration_segformer.py b/src/transformers/models/segformer/configuration_segformer.py index ee8af653c540..fb921f59d365 100644 --- a/src/transformers/models/segformer/configuration_segformer.py +++ b/src/transformers/models/segformer/configuration_segformer.py @@ -20,7 +20,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="ByteDance-Seed/SeedOss-36B") +@auto_docstring(checkpoint="ByteDance-Seed/Seed-OSS-36B-Instruct") class SegformerConfig(PreTrainedConfig): r""" num_encoder_blocks (`int`, *optional*, defaults to 4): diff --git a/src/transformers/models/shieldgemma2/configuration_shieldgemma2.py b/src/transformers/models/shieldgemma2/configuration_shieldgemma2.py index f95aa9209d95..8619f26aa144 100644 --- a/src/transformers/models/shieldgemma2/configuration_shieldgemma2.py +++ b/src/transformers/models/shieldgemma2/configuration_shieldgemma2.py @@ -21,7 +21,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="google/gemma-3-4b") +@auto_docstring(checkpoint="google/shieldgemma-2-4b-it") class ShieldGemma2Config(PreTrainedConfig): r""" mm_tokens_per_image (`int`, *optional*, defaults to 256): diff --git a/src/transformers/models/t5gemma2/configuration_t5gemma2.py b/src/transformers/models/t5gemma2/configuration_t5gemma2.py index 06fbda364e31..8b69de4f253a 100644 --- a/src/transformers/models/t5gemma2/configuration_t5gemma2.py +++ b/src/transformers/models/t5gemma2/configuration_t5gemma2.py @@ -29,7 +29,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="google/t5gemma2_text-7b") +@auto_docstring(checkpoint="google/t5gemma-2-270m-270m") class T5Gemma2TextConfig(PreTrainedConfig): r""" query_pre_attn_scalar (`float`, *optional*, defaults to 256): @@ -154,7 +154,7 @@ def convert_rope_params_to_dict(self, ignore_keys_at_rope_validation=None, **kwa return kwargs -@auto_docstring(checkpoint="google/t5gemma2_text-7b") +@auto_docstring(checkpoint="google/t5gemma-2-270m-270m") class T5Gemma2EncoderConfig(PreTrainedConfig): r""" mm_tokens_per_image (`int`, *optional*, defaults to 256): @@ -233,7 +233,7 @@ def __init__( super().__init__(**kwargs) -@auto_docstring(checkpoint="google/t5gemma2_text-7b") +@auto_docstring(checkpoint="google/t5gemma-2-270m-270m") class T5Gemma2DecoderConfig(PreTrainedConfig): r""" query_pre_attn_scalar (`float`, *optional*, defaults to 256): @@ -358,7 +358,7 @@ def convert_rope_params_to_dict(self, ignore_keys_at_rope_validation=None, **kwa return kwargs -@auto_docstring(checkpoint="google/t5gemma2_text-7b") +@auto_docstring(checkpoint="google/t5gemma-2-270m-270m") class T5Gemma2Config(PreTrainedConfig): r""" encoder (`Union[T5Gemma2EncoderConfig, dict]`, optional, *optional*): diff --git a/src/transformers/models/t5gemma2/modular_t5gemma2.py b/src/transformers/models/t5gemma2/modular_t5gemma2.py index 44785233097f..d0f0bf4e4ea6 100644 --- a/src/transformers/models/t5gemma2/modular_t5gemma2.py +++ b/src/transformers/models/t5gemma2/modular_t5gemma2.py @@ -72,7 +72,7 @@ logger = logging.get_logger(__name__) -@auto_docstring(checkpoint="google/t5gemma2_text-7b") +@auto_docstring(checkpoint="google/t5gemma-2-270m-270m") class T5Gemma2TextConfig(Gemma3TextConfig, PreTrainedConfig): r""" query_pre_attn_scalar (`float`, *optional*, defaults to 256): @@ -149,7 +149,7 @@ def __init__( PreTrainedConfig.__init__(**kwargs) -@auto_docstring(checkpoint="google/t5gemma2_text-7b") +@auto_docstring(checkpoint="google/t5gemma-2-270m-270m") class T5Gemma2EncoderConfig(Gemma3Config): model_type = "t5gemma2_encoder" @@ -159,7 +159,7 @@ class T5Gemma2EncoderConfig(Gemma3Config): } -@auto_docstring(checkpoint="google/t5gemma2_text-7b") +@auto_docstring(checkpoint="google/t5gemma-2-270m-270m") class T5Gemma2DecoderConfig(Gemma3TextConfig, PreTrainedConfig): r""" query_pre_attn_scalar (`float`, *optional*, defaults to 256): @@ -236,7 +236,7 @@ def __init__( PreTrainedConfig.__init__(**kwargs) -@auto_docstring(checkpoint="google/t5gemma2_text-7b") +@auto_docstring(checkpoint="google/t5gemma-2-270m-270m") class T5Gemma2Config(PreTrainedConfig): r""" encoder (`Union[T5Gemma2EncoderConfig, dict]`, optional, *optional*): diff --git a/src/transformers/models/vaultgemma/configuration_vaultgemma.py b/src/transformers/models/vaultgemma/configuration_vaultgemma.py index 9c0244bd1aa2..379c1edf4449 100644 --- a/src/transformers/models/vaultgemma/configuration_vaultgemma.py +++ b/src/transformers/models/vaultgemma/configuration_vaultgemma.py @@ -24,7 +24,7 @@ from ...utils import auto_docstring -@auto_docstring(checkpoint="google/vaultgemma-7b") +@auto_docstring(checkpoint="google/vaultgemma-1b") class VaultGemmaConfig(PreTrainedConfig): r""" query_pre_attn_scalar (`float`, *optional*, defaults to 256): diff --git a/src/transformers/models/vaultgemma/modular_vaultgemma.py b/src/transformers/models/vaultgemma/modular_vaultgemma.py index 46d0a62511a8..150f1b45ff31 100644 --- a/src/transformers/models/vaultgemma/modular_vaultgemma.py +++ b/src/transformers/models/vaultgemma/modular_vaultgemma.py @@ -22,7 +22,7 @@ from ..gemma2.modeling_gemma2 import Gemma2Attention, Gemma2DecoderLayer, Gemma2ForCausalLM, Gemma2MLP, Gemma2RMSNorm -@auto_docstring(checkpoint="google/vaultgemma-7b") +@auto_docstring(checkpoint="google/vaultgemma-1b") class VaultGemmaConfig(Gemma2Config): def __init__( self,