Skip to content

Commit 20f0e55

Browse files
authored
Merge branch 'main' into issue-1927-modernize-transformers
2 parents 1a8222a + ce01712 commit 20f0e55

File tree

16 files changed

+252
-227
lines changed

16 files changed

+252
-227
lines changed

setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,7 @@ def localversion_func(version: ScmVersion) -> str:
144144
if BUILD_TYPE == "release"
145145
else "compressed-tensors>=0.12.3a2"
146146
),
147-
# TODO: replace it with the release version
148-
("auto_round @ git+https://github.com/intel/auto-round.git@llmc"),
147+
("auto-round==0.9.1"),
149148
],
150149
extras_require={
151150
"dev": [

src/llmcompressor/entrypoints/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@
2929
from llmcompressor.pytorch.model_load.helpers import parse_dtype
3030
from llmcompressor.transformers.compression.compressed_tensors_utils import (
3131
modify_save_pretrained,
32-
untie_word_embeddings,
3332
)
3433
from llmcompressor.transformers.utils.helpers import (
3534
is_model_ct_quantized_from_path,
3635
)
3736
from llmcompressor.typing import Processor
37+
from llmcompressor.utils import untie_word_embeddings
3838
from llmcompressor.utils.fsdp.helpers import is_fsdp_model
3939

4040

src/llmcompressor/modifiers/autoround/base.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,8 @@
2020
from llmcompressor.modifiers import Modifier
2121
from llmcompressor.modifiers.quantization.calibration import apply_calibration_status
2222
from llmcompressor.modifiers.quantization.quantization import QuantizationMixin
23-
from llmcompressor.transformers.compression.compressed_tensors_utils import (
24-
untie_if_target_shared_embedding,
25-
)
26-
from llmcompressor.utils.pytorch.module import get_no_split_params
23+
from llmcompressor.utils import targets_embeddings, untie_word_embeddings
24+
from llmcompressor.utils.pytorch import get_no_split_params
2725

2826
__all__ = ["AutoRoundModifier"]
2927

@@ -107,9 +105,9 @@ class AutoRoundModifier(Modifier, QuantizationMixin):
107105
# AutoRound modifier arguments
108106
iters: int = 200
109107
enable_torch_compile: bool = True
108+
batch_size: int = 8
110109

111110
# private variables
112-
_module_names: Dict[torch.nn.Module, str] = PrivateAttr(default_factory=dict)
113111
_all_module_input: Dict[str, List[Tuple]] = PrivateAttr(default_factory=dict)
114112
_q_input: Optional[torch.Tensor] = PrivateAttr(default=None)
115113

@@ -124,10 +122,6 @@ def on_initialize(self, state: State, **kwargs) -> bool:
124122
QuantizationMixin.initialize_quantization(self, state.model)
125123

126124
# prepare module names
127-
self._module_names = {
128-
m: name
129-
for name, m in match_named_modules(state.model, self.targets, self.ignore)
130-
}
131125
self._add_temporary_names(state.model)
132126
# freeze all model parameters
133127
for _, param in state.model.named_parameters():
@@ -142,7 +136,9 @@ def start_calibration(self, model: torch.nn.Module):
142136
143137
:param model: model to prepare for calibration
144138
"""
145-
untie_if_target_shared_embedding(model, self._module_names.values())
139+
targets = match_named_modules(model, self.targets, self.ignore)
140+
if targets_embeddings(model, targets):
141+
untie_word_embeddings(model)
146142

147143
for _, module in match_named_modules(model, self.targets, self.ignore):
148144
# Note: No need to register observers for auto-round
@@ -223,6 +219,7 @@ def apply_autoround(self, state, subgraph):
223219
scheme=ar_quant_scheme,
224220
iters=self.iters,
225221
enable_torch_compile=self.enable_torch_compile,
222+
batch_size=self.batch_size,
226223
)
227224
# TODO: configure layer-wise config based on self.resolved_config
228225
ar.configure_layer_config(enable_gguf_official_mixed=False)
@@ -236,7 +233,7 @@ def apply_autoround(self, state, subgraph):
236233
block=decoding_layer,
237234
inputs=cur_inputs,
238235
q_input=self._q_input,
239-
device=device,
236+
device=str(device),
240237
# Leave offload for LLMC
241238
auto_offload=False,
242239
)

src/llmcompressor/modifiers/awq/mappings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ class AWQMapping:
166166
"Llama4ForConditionalGeneration": _default_mappings,
167167
"Mistral3ForConditionalGeneration": _default_mappings,
168168
"MistralForCausalLM": _default_mappings,
169+
"Olmo3ForCausalLM": _exaone4_mappings,
169170
"Phi3ForCausalLM": _phi_mappings,
170171
"Phi3VForCausalLM": _phi_mappings,
171172
"Qwen2ForCausalLM": _default_mappings,

src/llmcompressor/modifiers/quantization/quantization/mixin.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,7 @@
3434
reset_quantization_status,
3535
)
3636
from llmcompressor.modifiers.utils.hooks import HooksMixin
37-
from llmcompressor.transformers.compression.compressed_tensors_utils import (
38-
untie_if_target_shared_embedding,
39-
)
37+
from llmcompressor.utils import targets_embeddings, untie_word_embeddings
4038

4139
__all__ = ["QuantizationMixin"]
4240

@@ -182,11 +180,9 @@ def start_calibration(self, model: torch.nn.Module):
182180
183181
:param model: model to prepare for calibration
184182
"""
185-
186-
matched_module_generator = (
187-
x[1] for x in match_named_modules(model, self.resolved_targets, self.ignore)
188-
)
189-
untie_if_target_shared_embedding(model, matched_module_generator)
183+
targets = match_named_modules(model, self.resolved_targets, self.ignore)
184+
if targets_embeddings(model, targets):
185+
untie_word_embeddings(model)
190186

191187
for _, module in match_named_modules(model, self.resolved_targets, self.ignore):
192188
self._initialize_observers(module)

src/llmcompressor/modifiers/transform/quip/base.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@
1212

1313
from llmcompressor.core import Event, EventType, State
1414
from llmcompressor.modifiers import Modifier
15-
from llmcompressor.transformers.compression.compressed_tensors_utils import (
16-
untie_if_target_shared_embedding,
17-
)
15+
from llmcompressor.typing import NamedModules
16+
from llmcompressor.utils import targets_embeddings, untie_word_embeddings
1817

1918
__all__ = ["QuIPModifier"]
2019

@@ -102,18 +101,13 @@ def on_initialize(self, state: State, **kwargs) -> bool:
102101

103102
def on_start(self, state: State, event: Event, **kwargs):
104103
self.started_ = True
105-
106-
def matched_module_generator():
107-
for scheme in self.transform_config.config_groups.values():
108-
for arg in scheme.apply:
109-
gen = match_named_modules(state.model, arg.targets, arg.ignore)
110-
for _, module in gen:
111-
yield module
104+
model = state.model
112105

113106
# Untie embeddings if they will be targeted by transforms
114-
untie_if_target_shared_embedding(state.model, matched_module_generator())
107+
if targets_embeddings(model, self._get_targets(model)):
108+
untie_word_embeddings(model)
115109

116-
apply_transform_config(state.model, self.transform_config)
110+
apply_transform_config(model, self.transform_config)
117111

118112
def on_event(self, state: State, event: Event, **kwargs):
119113
if event.type_ == EventType.CALIBRATION_EPOCH_START:
@@ -136,6 +130,17 @@ def on_finalize(self, state: State, **kwargs) -> bool:
136130

137131
return True
138132

133+
def _get_targets(self, model: torch.nn.Module) -> NamedModules:
134+
if not self.initialized_:
135+
raise ValueError("Cannot get targets before modifier has been initialized")
136+
137+
return [
138+
(name, module)
139+
for scheme in self.transform_config.config_groups.values()
140+
for arg in scheme.apply
141+
for name, module in match_named_modules(model, arg.targets, arg.ignore)
142+
]
143+
139144
def _create_config(self) -> TransformConfig:
140145
config_groups = dict()
141146
if "v" in self.rotations:

src/llmcompressor/modifiers/transform/spinquant/base.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,8 @@
1616
from llmcompressor.core import Event, EventType, State
1717
from llmcompressor.modeling import center_embeddings, fuse_norm_linears
1818
from llmcompressor.modifiers import Modifier
19-
from llmcompressor.transformers.compression.compressed_tensors_utils import (
20-
untie_word_embeddings,
21-
)
19+
from llmcompressor.typing import NamedModules
20+
from llmcompressor.utils import untie_word_embeddings
2221

2322
from .mappings import SpinQuantMapping, infer_mapping_from_model
2423
from .norm_mappings import NormMapping, infer_norm_mapping_from_model
@@ -151,14 +150,16 @@ def on_initialize(self, state: State, **kwargs) -> bool:
151150
@torch.no_grad()
152151
def on_start(self, state: State, event: Event, **kwargs):
153152
self.started_ = True
153+
model = state.model
154+
155+
# untie embeddings to avoid unintended effects of `_center_embeddings`
156+
untie_word_embeddings(model)
154157

155-
# needed any time embeddings/lm_head is modified
156-
untie_word_embeddings(state.model)
157158
# needs to happen after the model has been hooked to execute on the GPU
158159
# otherwise we're applying weight transforms on CPU
159-
self._center_embeddings(state.model)
160-
self._fuse_norms(state.model)
161-
apply_transform_config(state.model, self.transform_config)
160+
self._center_embeddings(model)
161+
self._fuse_norms(model)
162+
apply_transform_config(model, self.transform_config)
162163

163164
def on_event(self, state: State, event: Event, **kwargs):
164165
if event.type_ == EventType.CALIBRATION_EPOCH_START:
@@ -181,6 +182,17 @@ def on_finalize(self, state: State, **kwargs) -> bool:
181182

182183
return True
183184

185+
def _get_targets(self, model: torch.nn.Module) -> NamedModules:
186+
if not self.initialized_:
187+
raise ValueError("Cannot get targets before modifier has been initialized")
188+
189+
return [
190+
(name, module)
191+
for scheme in self.transform_config.config_groups.values()
192+
for arg in scheme.apply
193+
for name, module in match_named_modules(model, arg.targets, arg.ignore)
194+
]
195+
184196
def _center_embeddings(self, model: PreTrainedModel):
185197
for _, embedding in match_named_modules(
186198
model, [self.mappings.embedding], warn_on_fail=True

src/llmcompressor/pytorch/model_load/helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def load_safetensors_state_dict(file_path: str) -> Dict[str, torch.Tensor]:
144144
def copy_python_files_from_model_cache(model, save_path: str):
145145
config = model.config
146146
cache_path = None
147-
if hasattr(config, "_name_or_path"):
147+
if hasattr(config, "_name_or_path") and len(config._name_or_path.strip()) > 0:
148148
import os
149149
import shutil
150150

src/llmcompressor/transformers/compression/compressed_tensors_utils.py

Lines changed: 1 addition & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,12 @@
11
import os
22
import weakref
3-
from collections.abc import Generator
43
from functools import wraps
54

65
import torch
76
from accelerate.accelerator import get_state_dict_offloaded_model
87
from compressed_tensors import (
98
ModelCompressor,
109
SparsityCompressionConfig,
11-
delete_offload_parameter,
12-
has_offloaded_params,
13-
register_offload_parameter,
1410
)
1511
from compressed_tensors.config import CompressionFormat
1612
from loguru import logger
@@ -24,7 +20,7 @@
2420
from llmcompressor.transformers.utils import RECIPE_FILE_NAME
2521
from llmcompressor.transformers.utils.helpers import infer_recipe_from_model_path
2622

27-
__all__ = ["modify_save_pretrained", "untie_word_embeddings"]
23+
__all__ = ["modify_save_pretrained"]
2824

2925

3026
def modify_save_pretrained(model: PreTrainedModel):
@@ -117,119 +113,6 @@ def save_pretrained_wrapper(
117113
model.save_pretrained = save_pretrained_compressed(model.save_pretrained)
118114

119115

120-
def untie_word_embeddings(model: PreTrainedModel):
121-
"""
122-
Patches bug where HF transformers will fail to untie weights under specific
123-
circumstances (https://github.com/huggingface/transformers/issues/33689).
124-
125-
This function detects those cases and unties the tensors if applicable
126-
127-
:param model: model to fix
128-
"""
129-
try:
130-
input_embed = model.get_input_embeddings()
131-
output_embed = model.get_output_embeddings()
132-
except NotImplementedError as e:
133-
logger.warning(
134-
f"cannot untie model of type {model.__class__} which doesn't have "
135-
f"get_input_embeddings and get_output_embeddings implmented\n{e}"
136-
)
137-
return
138-
139-
for module in (input_embed, output_embed):
140-
if module is None or not hasattr(module, "weight"):
141-
logger.warning(f"Cannot untie {module} which does not have weight param")
142-
continue
143-
144-
# this could be replaced by a `get_offloaded_parameter` util
145-
if not has_offloaded_params(module):
146-
untied_data = module.weight.data.clone()
147-
else:
148-
untied_data = module._hf_hook.weights_map["weight"].clone()
149-
150-
requires_grad = module.weight.requires_grad
151-
new_parameter = torch.nn.Parameter(untied_data, requires_grad=requires_grad)
152-
delete_offload_parameter(module, "weight")
153-
register_offload_parameter(module, "weight", new_parameter)
154-
155-
if hasattr(model.config, "tie_word_embeddings"):
156-
model.config.tie_word_embeddings = False
157-
158-
159-
def _get_embeddings_or_warn(
160-
model: torch.nn.Module,
161-
) -> tuple[torch.nn.Module | None, torch.nn.Module | None]:
162-
if not (
163-
hasattr(model, "get_input_embeddings")
164-
and hasattr(model, "get_output_embeddings")
165-
):
166-
logger.warning(
167-
f"{model.__class__} doesn't have attribute get_input_embeddings and"
168-
" get_output_embeddings implemented."
169-
"\nThis can cause"
170-
" problems when quantizing layers with shared weights"
171-
)
172-
return None, None
173-
174-
try:
175-
input_embeddings, output_embeddings = (
176-
model.get_input_embeddings(),
177-
model.get_output_embeddings(),
178-
)
179-
except NotImplementedError as e:
180-
logger.warning(
181-
f"{model.__class__} doesn't have get_input_embeddings and "
182-
"get_output_embeddings implemented."
183-
"\nThis can cause"
184-
" problems when quantizing layers with shared weights"
185-
f"\n{e}"
186-
)
187-
return None, None
188-
189-
if not (
190-
isinstance(input_embeddings, torch.nn.Module)
191-
and isinstance(output_embeddings, torch.nn.Module)
192-
):
193-
logger.warning(
194-
f"expected modules from {model.__class__} get_input_embeddings and"
195-
f" get_output_embeddings but got {type(input_embeddings)}"
196-
f" and {type(output_embeddings)}."
197-
"\nThis can cause"
198-
" problems when quantizing layers with shared weights"
199-
)
200-
return None, None
201-
return input_embeddings, output_embeddings
202-
203-
204-
def untie_if_target_shared_embedding(
205-
model: torch.nn.Module, matched_module_generator: Generator[torch.nn.Module]
206-
):
207-
"""
208-
Helper method that checks for shared input/output embedding and unties them
209-
if either shows up in the matched_module_generator
210-
211-
:param model: model to untie if embeddings are shared and targeted by
212-
matched_module_generator
213-
:param matched_module_generator: Generator of all modules (not names) which
214-
will be modified by quantization or transformation
215-
"""
216-
input_embeddings, output_embeddings = _get_embeddings_or_warn(model)
217-
218-
if None in (input_embeddings, output_embeddings): # if couldn't find embeddings
219-
return
220-
221-
if (
222-
input_embeddings.weight is not output_embeddings.weight
223-
): # if not shared, can ignore
224-
return
225-
226-
# if shared, check if either is targeted
227-
for module in matched_module_generator:
228-
if module in (input_embeddings, output_embeddings):
229-
untie_word_embeddings(model)
230-
return
231-
232-
233116
def get_model_compressor(
234117
model: torch.nn.Module,
235118
sparsity_config: SparsityCompressionConfig | None = None,

src/llmcompressor/transformers/utils/helpers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,12 @@ def infer_recipe_from_model_path(model_path: str | Path) -> str | None:
5757
- Hugging face model ID
5858
:return: The path to the recipe file if found, None otherwise.
5959
"""
60-
model_path = model_path.as_posix() if isinstance(model_path, Path) else model_path
60+
model_path = (
61+
model_path.as_posix() if isinstance(model_path, Path) else model_path.strip()
62+
)
63+
if model_path == "":
64+
logger.debug("got path_or_name=<empty string>" "unable to find recipe")
65+
return None
6166

6267
if os.path.isdir(model_path) or os.path.isfile(model_path):
6368
# Model path is a local path to the model directory or file

0 commit comments

Comments
 (0)