We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent fafccf3 commit 2aead0eCopy full SHA for 2aead0e
src/compressed_tensors/quantization/lifecycle/apply.py
@@ -137,7 +137,7 @@ def apply_quantization_config(
137
# because attention quantization is a superset of kv cache quantization
138
if config.kv_cache_scheme is not None:
139
scheme = QuantizationScheme(
140
- targets=".*self_attn$", input_activations=config.kv_cache_scheme
+ targets=[".*self_attn$"], input_activations=config.kv_cache_scheme
141
)
142
for submodule in model.modules():
143
if is_attention_module(submodule):
0 commit comments