Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/transformers/models/apertus/configuration_apertus.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from ...utils import auto_docstring


@auto_docstring(checkpoint="swiss-ai/Apertus-8B")
@auto_docstring(checkpoint="swiss-ai/Apertus-8B-Instruct-2509")
class ApertusConfig(PreTrainedConfig):
r"""
```python
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/apertus/modeling_apertus.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,8 +459,8 @@ def forward(
```python
>>> from transformers import AutoTokenizer, ApertusForCausalLM

>>> model = ApertusForCausalLM.from_pretrained("swiss-ai/Apertus-8B")
>>> tokenizer = AutoTokenizer.from_pretrained("swiss-ai/Apertus-8B")
>>> model = ApertusForCausalLM.from_pretrained("swiss-ai/Apertus-8B-Instruct-2509")
>>> tokenizer = AutoTokenizer.from_pretrained("swiss-ai/Apertus-8B-Instruct-2509")

>>> prompt = "Hey, are you conscious? Can you talk to me?"
>>> inputs = tokenizer(prompt, return_tensors="pt")
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/apertus/modular_apertus.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
logger = logging.get_logger(__name__)


@auto_docstring(checkpoint="swiss-ai/Apertus-8B")
@auto_docstring(checkpoint="swiss-ai/Apertus-8B-Instruct-2509")
class ApertusConfig(PreTrainedConfig):
r"""
```python
Expand Down Expand Up @@ -260,8 +260,8 @@ def forward(self, **super_kwargs):
```python
>>> from transformers import AutoTokenizer, ApertusForCausalLM

>>> model = ApertusForCausalLM.from_pretrained("swiss-ai/Apertus-8B")
>>> tokenizer = AutoTokenizer.from_pretrained("swiss-ai/Apertus-8B")
>>> model = ApertusForCausalLM.from_pretrained("swiss-ai/Apertus-8B-Instruct-2509")
>>> tokenizer = AutoTokenizer.from_pretrained("swiss-ai/Apertus-8B-Instruct-2509")

>>> prompt = "Hey, are you conscious? Can you talk to me?"
>>> inputs = tokenizer(prompt, return_tensors="pt")
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/chameleon/configuration_chameleon.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
logger = logging.get_logger(__name__)


@auto_docstring(checkpoint="meta/chameleon-7B")
@auto_docstring(checkpoint="facebook/chameleon-7b")
class ChameleonVQVAEConfig(PreTrainedConfig):
r"""
base_channels (`int`, *optional*, defaults to 128):
Expand Down Expand Up @@ -76,7 +76,7 @@ def __init__(
self.initializer_range = initializer_range


@auto_docstring(checkpoint="meta/chameleon-7B")
@auto_docstring(checkpoint="facebook/chameleon-7b")
class ChameleonConfig(PreTrainedConfig):
r"""
model_parallel_size (`int`, *optional*, defaults to 1):
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/dab_detr/configuration_dab_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
logger = logging.get_logger(__name__)


@auto_docstring(checkpoint="IDEA-Research/dab_detr-base")
@auto_docstring(checkpoint="IDEA-Research/dab-detr-resnet-50")
class DabDetrConfig(PreTrainedConfig):
r"""
num_queries (`int`, *optional*, defaults to 300):
Expand Down Expand Up @@ -56,10 +56,10 @@ class DabDetrConfig(PreTrainedConfig):
```python
>>> from transformers import DabDetrConfig, DabDetrModel

>>> # Initializing a DAB-DETR IDEA-Research/dab_detr-base style configuration
>>> # Initializing a DAB-DETR IDEA-Research/dab-detr-resnet-50 style configuration
>>> configuration = DabDetrConfig()

>>> # Initializing a model (with random weights) from the IDEA-Research/dab_detr-base style configuration
>>> # Initializing a model (with random weights) from the IDEA-Research/dab-detr-resnet-50 style configuration
>>> model = DabDetrModel(configuration)

>>> # Accessing the model configuration
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/dab_detr/modeling_dab_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1248,8 +1248,8 @@ def forward(
>>> with httpx.stream("GET", url) as response:
... image = Image.open(BytesIO(response.read()))
>>> image_processor = AutoImageProcessor.from_pretrained("IDEA-Research/dab_detr-base")
>>> model = AutoModel.from_pretrained("IDEA-Research/dab_detr-base")
>>> image_processor = AutoImageProcessor.from_pretrained("IDEA-Research/dab-detr-resnet-50")
>>> model = AutoModel.from_pretrained("IDEA-Research/dab-detr-resnet-50")
>>> # prepare image for the model
>>> inputs = image_processor(images=image, return_tensors="pt")
Expand Down
8 changes: 4 additions & 4 deletions src/transformers/models/edgetam/configuration_edgetam.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from ..auto import CONFIG_MAPPING, AutoConfig


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamVisionConfig(PreTrainedConfig):
r"""
backbone_channel_list (`List[int]`, *optional*, defaults to `[384, 192, 96, 48]`):
Expand Down Expand Up @@ -98,7 +98,7 @@ def __init__(
super().__init__(**kwargs)


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamPromptEncoderConfig(PreTrainedConfig):
r"""
mask_input_channels (`int`, *optional*, defaults to 16):
Expand Down Expand Up @@ -134,7 +134,7 @@ def __init__(
self.scale = scale


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamMaskDecoderConfig(PreTrainedConfig):
r"""
mlp_dim (`int`, *optional*, defaults to 2048):
Expand Down Expand Up @@ -192,7 +192,7 @@ def __init__(
self.attention_downsample_rate = attention_downsample_rate


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamConfig(PreTrainedConfig):
r"""
prompt_encoder_config (Union[`dict`, `EdgeTamPromptEncoderConfig`], *optional*):
Expand Down
8 changes: 4 additions & 4 deletions src/transformers/models/edgetam/modular_edgetam.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
)


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamVisionConfig(PreTrainedConfig):
r"""
backbone_channel_list (`List[int]`, *optional*, defaults to `[384, 192, 96, 48]`):
Expand Down Expand Up @@ -112,17 +112,17 @@ def __init__(
super().__init__(**kwargs)


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamPromptEncoderConfig(Sam2PromptEncoderConfig):
pass


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamMaskDecoderConfig(Sam2MaskDecoderConfig):
pass


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamConfig(Sam2Config):
r"""
prompt_encoder_config (Union[`dict`, `EdgeTamPromptEncoderConfig`], *optional*):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from ..auto import CONFIG_MAPPING, AutoConfig


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamVideoPromptEncoderConfig(PreTrainedConfig):
r"""
mask_input_channels (`int`, *optional*, defaults to 16):
Expand Down Expand Up @@ -59,7 +59,7 @@ def __init__(
self.scale = scale


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamVideoMaskDecoderConfig(PreTrainedConfig):
r"""
mlp_dim (`int`, *optional*, defaults to 2048):
Expand Down Expand Up @@ -117,7 +117,7 @@ def __init__(
self.attention_downsample_rate = attention_downsample_rate


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamVideoConfig(PreTrainedConfig):
r"""
prompt_encoder_config (Union[`dict`, `EdgeTamVideoPromptEncoderConfig`], *optional*):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,17 @@
)


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamVideoPromptEncoderConfig(Sam2VideoPromptEncoderConfig):
pass


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamVideoMaskDecoderConfig(Sam2VideoMaskDecoderConfig):
pass


@auto_docstring(checkpoint="facebook/EdgeTAM")
@auto_docstring(checkpoint="yonigozlan/EdgeTAM-hf")
class EdgeTamVideoConfig(Sam2VideoConfig):
r"""
prompt_encoder_config (Union[`dict`, `EdgeTamVideoPromptEncoderConfig`], *optional*):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
logger = logging.get_logger(__name__)


@auto_docstring(checkpoint="ibm-fms/FalconH1-9.8b-2.2T-hf")
@auto_docstring(checkpoint="tiiuae/Falcon-H1-1.5B-Deep-Instruct")
class FalconH1Config(PreTrainedConfig):
r"""
num_logits_to_keep (`int` or `None`, *optional*, defaults to 1):
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/gemma3/configuration_gemma3.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
logger = logging.get_logger(__name__)


@auto_docstring(checkpoint="google/gemma3_text-7b")
@auto_docstring(checkpoint="google/gemma-3-4b-it")
class Gemma3TextConfig(PreTrainedConfig):
r"""
final_logit_softcapping (`float`, *optional*):
Expand Down Expand Up @@ -174,7 +174,7 @@ def convert_rope_params_to_dict(self, ignore_keys_at_rope_validation=None, **kwa
return kwargs


@auto_docstring(checkpoint="google/gemma3_text-7b")
@auto_docstring(checkpoint="google/gemma-3-4b-it")
class Gemma3Config(PreTrainedConfig):
r"""
mm_tokens_per_image (`int`, *optional*, defaults to 256):
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/gemma3/modular_gemma3.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
logger = logging.get_logger(__name__)


@auto_docstring(checkpoint="google/gemma3_text-7b")
@auto_docstring(checkpoint="google/gemma-3-4b-it")
class Gemma3TextConfig(Gemma2Config, PreTrainedConfig):
r"""
final_logit_softcapping (`float`, *optional*):
Expand Down Expand Up @@ -198,7 +198,7 @@ def convert_rope_params_to_dict(self, ignore_keys_at_rope_validation=None, **kwa
return kwargs


@auto_docstring(checkpoint="google/gemma3_text-7b")
@auto_docstring(checkpoint="google/gemma-3-4b-it")
class Gemma3Config(PreTrainedConfig):
r"""
mm_tokens_per_image (`int`, *optional*, defaults to 256):
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/glm4_moe/configuration_glm4_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from ...utils import auto_docstring


@auto_docstring(checkpoint="THUDM/GLM-4-100B-A10B")
@auto_docstring(checkpoint="zai-org/GLM-4.5")
class Glm4MoeConfig(PreTrainedConfig):
r"""
n_group (`int`, *optional*, defaults to 1):
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/glm4_moe/modular_glm4_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
logger = logging.get_logger(__name__)


@auto_docstring(checkpoint="THUDM/GLM-4-100B-A10B")
@auto_docstring(checkpoint="zai-org/GLM-4.5")
class Glm4MoeConfig(PreTrainedConfig):
r"""
n_group (`int`, *optional*, defaults to 1):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from ...utils import auto_docstring


@auto_docstring(checkpoint="THUDM/GLM-4-100B-A10B")
@auto_docstring(checkpoint="zai-org/GLM-4.5")
class Glm4MoeLiteConfig(PreTrainedConfig):
r"""
rope_interleave (`bool`, *optional*, defaults to `True`):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
)


@auto_docstring(checkpoint="THUDM/GLM-4-100B-A10B")
@auto_docstring(checkpoint="zai-org/GLM-4.5")
class Glm4MoeLiteConfig(PreTrainedConfig):
r"""
rope_interleave (`bool`, *optional*, defaults to `True`):
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/helium/configuration_helium.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from ...utils import auto_docstring


@auto_docstring(checkpoint="kyutai/helium-2b")
@auto_docstring(checkpoint="kyutai/helium-1-preview")
class HeliumConfig(PreTrainedConfig):
r"""
Example:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@
logger = logging.get_logger(__name__)


@auto_docstring(checkpoint="Salesforce/instruct-blip-flan-t5")
@auto_docstring(checkpoint="Salesforce/instructblip-flan-t5-xl")
class InstructBlipVisionConfig(PreTrainedConfig):
r"""
Example:

```python
>>> from transformers import InstructBlipVisionConfig, InstructBlipVisionModel

>>> # Initializing a InstructBlipVisionConfig with Salesforce/instruct-blip-flan-t5 style configuration
>>> # Initializing a InstructBlipVisionConfig with Salesforce/instructblip-flan-t5-xl style configuration
>>> configuration = InstructBlipVisionConfig()

>>> # Initializing a InstructBlipVisionModel (with random weights) from the Salesforce/instruct-blip-flan-t5 style configuration
>>> # Initializing a InstructBlipVisionModel (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration
>>> model = InstructBlipVisionModel(configuration)

>>> # Accessing the model configuration
Expand Down Expand Up @@ -73,7 +73,7 @@ def __init__(
self.qkv_bias = qkv_bias


@auto_docstring(checkpoint="Salesforce/instruct-blip-flan-t5")
@auto_docstring(checkpoint="Salesforce/instructblip-flan-t5-xl")
class InstructBlipQFormerConfig(PreTrainedConfig):
r"""
cross_attention_frequency (`int`, *optional*, defaults to 2):
Expand All @@ -86,10 +86,10 @@ class InstructBlipQFormerConfig(PreTrainedConfig):
```python
>>> from transformers import InstructBlipQFormerConfig, InstructBlipQFormerModel

>>> # Initializing a InstructBLIP Salesforce/instruct-blip-flan-t5 style configuration
>>> # Initializing a InstructBLIP Salesforce/instructblip-flan-t5-xl style configuration
>>> configuration = InstructBlipQFormerConfig()

>>> # Initializing a model (with random weights) from the Salesforce/instruct-blip-flan-t5 style configuration
>>> # Initializing a model (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration
>>> model = InstructBlipQFormerModel(configuration)
>>> # Accessing the model configuration
>>> configuration = model.config
Expand Down Expand Up @@ -134,7 +134,7 @@ def __init__(
self.encoder_hidden_size = encoder_hidden_size


@auto_docstring(checkpoint="Salesforce/instruct-blip-flan-t5")
@auto_docstring(checkpoint="Salesforce/instructblip-flan-t5-xl")
class InstructBlipConfig(PreTrainedConfig):
r"""
qformer_config (`dict`, *optional*):
Expand All @@ -153,10 +153,10 @@ class InstructBlipConfig(PreTrainedConfig):
... InstructBlipForConditionalGeneration,
... )

>>> # Initializing a InstructBlipConfig with Salesforce/instruct-blip-flan-t5 style configuration
>>> # Initializing a InstructBlipConfig with Salesforce/instructblip-flan-t5-xl style configuration
>>> configuration = InstructBlipConfig()

>>> # Initializing a InstructBlipForConditionalGeneration (with random weights) from the Salesforce/instruct-blip-flan-t5 style configuration
>>> # Initializing a InstructBlipForConditionalGeneration (with random weights) from the Salesforce/instructblip-flan-t5-xl style configuration
>>> model = InstructBlipForConditionalGeneration(configuration)

>>> # Accessing the model configuration
Expand Down
Loading
Loading