Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions src/llmcompressor/args/dataset_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,10 +223,19 @@ class DatasetArguments(CustomDatasetArguments):
quantization_aware_calibration: bool = field(
default=True,
metadata={
"help": "Whether to enable quantization-aware calibration in the pipeline. "
"When True, quantization is applied during forward pass in calibration. "
"When False, quantization is disabled during forward pass in calibration. "
"Default is set to True."
"help": "Only relevant for the sequential pipeline. "
"If True, quantization is applied during forward pass in calibration. "
"If False, quantization is disabled during forward pass in calibration. "
"Default is True."
},
)
propagate_error: bool = field(
default=True,
metadata={
"help": "Only relevant for the sequential pipeline. If True, use quantized "
"layer outputs as the inputs to the next sequential layer. If False, use "
"unquantized layer outputs as the inputs to the next sequential layer. "
"Default is True"
},
)

Expand Down
12 changes: 8 additions & 4 deletions src/llmcompressor/pipelines/sequential/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,11 @@ def __call__(
# do a preliminary pass to trigger modifier hooks
for batch_idx in tqdm(range(len(dataloader)), desc=calib_desc):
inputs = activations.fetch(batch_idx, subgraph.input_names)
subgraph.forward(model, **inputs)
outputs = subgraph.forward(model, **inputs)

if not dataset_args.propagate_error:
activations.update(batch_idx, outputs)
activations.delete(batch_idx, subgraph.consumed_names)

LifecycleCallbacks.sequential_epoch_end(subgraph)

Expand All @@ -106,10 +110,10 @@ def __call__(
with HooksMixin.disable_hooks():
for batch_idx in tqdm(range(len(dataloader)), desc=prop_desc):
inputs = activations.fetch(batch_idx, subgraph.input_names)
output = subgraph.forward(model, **inputs)
outputs = subgraph.forward(model, **inputs)

if subgraph_index < num_subgraphs - 1:
activations.update(batch_idx, output)
if dataset_args.propagate_error:
activations.update(batch_idx, outputs)
activations.delete(batch_idx, subgraph.consumed_names)

# redundant, finish any remaining compression
Expand Down
Loading