diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 08c760dcbf57..711c0adb3b73 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -2368,6 +2368,7 @@ def _initialize_weights(self, module, is_remote_code: bool = False): # otherwise if ( is_remote_code + and module is not self # some remote code models do special things in _init_weights for top-most module and all(getattr(param, "_is_hf_initialized", False) for param in module.parameters(recurse=False)) and all( getattr(buffer, "_is_hf_initialized", False)