fixing logic and test update

HDCharles · HDCharles · commit 351568d913eb · 2025-11-26T18:26:53.000Z
Summary

Signed-off-by: HDCharles &lt;charlesdavidhernandez@gmail.com&gt;
diff --git a/src/llmcompressor/modifiers/awq/base.py b/src/llmcompressor/modifiers/awq/base.py
@@ -313,74 +313,43 @@ def _set_resolved_mappings(self, model: Module) -> None:
         into ResolvedMapping objects, resolving regular expressions.
         Result is stored in _resolved_mappings.
 
-        Uses match_modules_set to find coherent sets of (smooth_layer, *balance_layers)
-        that belong together in the model architecture.
+        For each activation in the mapping list, we find the corresponding weight to
+        balance by searching for the longest substring. For instance, if our balance
+        weight is ".*re:.*q_proj" and the activation is "re:.*self_attn_layer_norm" we
+        would match model.layer.0.p_proj to model.layer.0.self_attn_layer_norm and
+        repeat for model.layer.1 and so on
         """
-        # Build a module-to-name mapping for efficient lookups
-        module_to_name = {module: name for name, module in model.named_modules()}
-
         resolved_mappings: list[ResolvedMapping] = []
+        module_to_name = {module: name for name, module in model.named_modules()}
         for mapping_idx, mapping in enumerate(self.mappings):
             num_skipped_mappings = 0
 
             # Use match_modules_set to find coherent sets of modules
             target_patterns = (mapping.smooth_layer, *mapping.balance_layers)
 
-            for modules_set in (
+            for smooth_layer, *balance_layers in (
                 pbar := tqdm(match_modules_set(model, target_patterns, self.ignore))
             ):
                 pbar.set_description(
                     f"Resolving mapping {mapping_idx+1}/{len(self.mappings)}"
                     f" ({num_skipped_mappings} skipped)"
                 )
 
-                # Unpack the matched set: first is smooth_layer, rest are balance_layers
-                smooth_layer = modules_set[0]
-                all_balance_layers = list(modules_set[1:])
-
-                # Get names using the pre-built mapping
                 smooth_name = module_to_name.get(smooth_layer)
-                if smooth_name is None:
-                    continue
+                balance_names = [
+                    module_to_name.get(balance_layer)
+                    for balance_layer in balance_layers
+                ]
 
-                # Filter balance layers, skipping incompatible ones
-                balance_layers = []
-                balance_names = []
-
-                for balance_layer in all_balance_layers:
-                    balance_name = module_to_name.get(balance_layer)
-                    if balance_name is None:
-                        continue
-
-                    # exclude v_proj->o_proj mappings whose shapes are incompatible
-                    # https://github.com/mit-han-lab/llm-awq/pull/67#issuecomment-1681632777
-                    if (
-                        isinstance(smooth_layer, torch.nn.Linear)
-                        and isinstance(balance_layer, torch.nn.Linear)
-                        and balance_name.endswith(".o_proj")
-                        and (
-                            (
-                                smooth_name.endswith(".v_proj")
-                                and smooth_layer.out_features
-                                != balance_layer.in_features
-                            )
-                            or (
-                                smooth_name.endswith(".qkv_proj")
-                                and smooth_layer.out_features
-                                != 3 * balance_layer.in_features
-                            )
-                        )
-                    ):
-                        num_skipped_mappings += 1
-                        continue
-
-                    balance_layers.append(balance_layer)
-                    balance_names.append(balance_name)
+                all_compatible = _check_layers_are_compatible(
+                    smooth_layer, smooth_name, balance_layers, balance_names
+                )
 
-                if len(balance_layers) == 0:
+                # skip mapping if any of the balance layers are incompatible
+                if not all_compatible or len(balance_layers) == 0:
+                    num_skipped_mappings += 1
                     continue
-
-                if len(balance_layers) == 1:
+                elif len(balance_layers) == 1:
                     # for single balance layer, parent is the balance layer
                     parent_name, parent = balance_names[0], balance_layers[0]
                 else:
@@ -730,6 +699,35 @@ def _assert_all_activations_consumed(self):
             raise RuntimeError("Some cached activations were not used")
 
 
+def _check_layers_are_compatible(
+    smooth_layer, smooth_name, balance_layers, balance_names
+):
+    """
+    returns True if they are all compatible
+    returns False if any smooth & balance layers are incompatible
+    """
+    for balance_layer, balance_name in zip(balance_layers, balance_names):
+        # exclude v_proj->o_proj mappings whose shapes are incompatible
+        # https://github.com/mit-han-lab/llm-awq/pull/67#issuecomment-1681632777
+        if (
+            isinstance(smooth_layer, torch.nn.Linear)
+            and isinstance(balance_layer, torch.nn.Linear)
+            and balance_name.endswith(".o_proj")
+            and (
+                (
+                    smooth_name.endswith(".v_proj")
+                    and smooth_layer.out_features != balance_layer.in_features
+                )
+                or (
+                    smooth_name.endswith(".qkv_proj")
+                    and smooth_layer.out_features != 3 * balance_layer.in_features
+                )
+            )
+        ):
+            return False
+    return True
+
+
 def _pseudo_quantize_tensor(
     w: torch.Tensor, symmetric: bool = False, bit_width: int = 8, group_size: int = -1
 ):
diff --git a/tests/llmcompressor/modifiers/awq/test_base.py b/tests/llmcompressor/modifiers/awq/test_base.py
@@ -85,10 +85,12 @@ def test_set_resolved_mappings():
             assert set(mapping.balance_names) == {"decoder.mlp.down_proj"}
             assert mapping.parent_name == "decoder.mlp.down_proj"
 
-    # make sure we exclude case where o_proj/v_proj shapes are mismatched
     awq = AWQModifier(
         mappings=[
+            # make sure we exclude case where o_proj/v_proj shapes are mismatched
             AWQMapping("re:.*v_proj", ["re:.*o_proj"]),
+            # make sure we exclude mapping if any balance layers are skipped
+            AWQMapping("re:.*v_proj", ["re:.*z_proj", "re:.*o_proj"]),
         ],
         scheme="W4A16_ASYM",
     )
@@ -101,6 +103,7 @@ def test_set_resolved_mappings():
                             "q_proj": torch.nn.Linear(4, 2),
                             "k_proj": torch.nn.Linear(4, 2),
                             "v_proj": torch.nn.Linear(4, 2),
+                            "z_proj": torch.nn.Linear(2, 4),
                             "o_proj": torch.nn.Linear(4, 4),
                         }
                     )
@@ -109,6 +112,16 @@ def test_set_resolved_mappings():
         }
     )
     awq._set_resolved_mappings(model)
+    if len(awq._resolved_mappings) > 0:
+        assert all(
+            "o_proj" not in name for name in awq._resolved_mappings[0].balance_names
+        ), "should have skipped v->o mapping because o is incompatible"
+        assert all(
+            "z_proj" not in name for name in awq._resolved_mappings[0].balance_names
+        ), (
+            "should have skipped v->[z,o] mapping because o is incompatible even though"
+            "z is compatible"
+        )
     assert len(awq._resolved_mappings) == 0