fix mask from threshold logic to mask exactly target number (#678)

bfineran · web-flow · commit a612e7b510b1 · 2022-04-08T15:21:20.000-04:00
* fix mask from threshold logic to mask exactly target number

* off by 1 fix from review
diff --git a/src/sparseml/pytorch/sparsification/pruning/mask_creator.py b/src/sparseml/pytorch/sparsification/pruning/mask_creator.py
@@ -130,9 +130,24 @@ def create_sparsity_masks(
                 masks.append(tensor.new_ones(tensor.shape))
                 continue
 
+            num_elem = tensor.numel()
+            target_num_mask = round(num_elem * sparsity_target)
             min_val = tensor.min().item()
+
             if threshold.item() > min_val:
-                masks.append((tensor > threshold).type(tensor.type()))
+                threshold_mask = (tensor > threshold).type(tensor.type())
+
+                num_masked = num_elem - torch.sum(threshold_mask).item()
+                if num_masked != target_num_mask:
+                    # attempt to reconcile expected number of masked weights
+                    # may occur if multiple values have the threshold weight
+                    num_to_flip = abs(num_masked - target_num_mask)
+                    over_masked = num_masked > target_num_mask
+                    threshold_mask = self._flip_threshold_mask_vals(
+                        threshold_mask, tensor, threshold, num_to_flip, over_masked
+                    )
+
+                masks.append(threshold_mask)
                 continue
 
             # too many zeros so will go over the already given sparsity
@@ -141,9 +156,7 @@ def create_sparsity_masks(
             rand_indices = list(range(zero_indices.shape[0]))
             local_rng = random.Random(42)
             local_rng.shuffle(rand_indices)
-            num_elem = tensor.numel()
-            num_mask = round(num_elem * sparsity_target)
-            rand_indices = rand_indices[:num_mask]
+            rand_indices = rand_indices[:target_num_mask]
             rand_indices = tensor.new_tensor(rand_indices, dtype=torch.int64)
             zero_indices = zero_indices[rand_indices, :]
             mask = tensor.new_ones(tensor.shape).type(tensor.type())
@@ -173,7 +186,7 @@ def _threshold_from_sparsity(self, tensor: Tensor, sparsity: float) -> Tensor:
             return tensor.new_tensor([])
 
         sorted_vals, _ = torch.sort(tensor.view(-1))
-        lookup_index = round(sparsity * (tensor.numel() - 1))
+        lookup_index = round(sparsity * tensor.numel()) - 1
 
         if lookup_index < 0:
             lookup_index = 0
@@ -218,6 +231,35 @@ def _unstack_flattened_tensors(
 
         return unstacked_tensors
 
+    def _flip_threshold_mask_vals(
+        self,
+        mask: Tensor,
+        tensor: Tensor,
+        threshold: Tensor,
+        max_flip: int,
+        over_masked: bool,
+    ) -> Tensor:
+        # flip mask values where tensor == threshold until mask has desired
+        # number of 0s/1s
+        threshold_idxs = torch.nonzero(tensor == threshold, as_tuple=False)
+        num_flipped = 0
+        for threshold_elem_idx in threshold_idxs:
+            # make tensor returned by nonzero() indexable
+            threshold_elem_idx = threshold_elem_idx.split(1)
+            threshold_mask_elem = mask[threshold_elem_idx]
+
+            # flip mask val at threshold index if necessary
+            if over_masked and threshold_mask_elem == 0:
+                mask[threshold_elem_idx] = 1
+                num_flipped += 1
+            elif not over_masked and threshold_mask_elem == 1:
+                mask[threshold_elem_idx] = 0
+                num_flipped += 1
+
+            if num_flipped >= max_flip:
+                break
+        return mask
+
 
 class GroupedPruningMaskCreator(UnstructuredPruningMaskCreator):
     """
diff --git a/tests/sparseml/pytorch/sparsification/pruning/helpers.py b/tests/sparseml/pytorch/sparsification/pruning/helpers.py
@@ -126,18 +126,35 @@ def sparsity_mask_creator_test(tensor_shapes, mask_creator, sparsity_val, device
     for update_mask, target_sparsity in zip(update_masks, sparsity_val):
         assert abs(tensor_sparsity(update_mask) - target_sparsity) < 1e-2
 
+        if not isinstance(mask_creator, GroupedPruningMaskCreator):
+            _test_num_masked(update_mask, target_sparsity)
+
     if isinstance(mask_creator, GroupedPruningMaskCreator):
-        grouped_masks_test(update_masks, mask_creator)
+        grouped_masks_test(update_masks, mask_creator, sparsity_val)
 
     return update_masks
 
 
-def grouped_masks_test(masks, mask_creator):
+def grouped_masks_test(masks, mask_creator, sparsity_val=None):
     # Check that every value in the mask_creator grouping
     # is the same within the mask.  Assumes grouping applies
-    # an absolte mean to each grouping
-    for mask in masks:
+    # an absolute mean to each grouping
+    # also checks that the grouped mask matches the target sparsity exactly
+
+    if sparsity_val is None:
+        sparsity_val = [sparsity_val] * len(masks)
+
+    for mask, target_sparsity in zip(masks, sparsity_val):
         grouped_mask = mask_creator.group_tensor(mask)
         grouped_mask /= max(torch.max(grouped_mask).item(), 1.0)
         mask_vals_are_grouped = torch.all((grouped_mask == 0.0) | (grouped_mask == 1.0))
         assert mask_vals_are_grouped
+
+        if target_sparsity is not None:
+            _test_num_masked(grouped_mask, target_sparsity)
+
+
+def _test_num_masked(mask, target_sparsity):
+    # tests that the number of masked values is exactly the number expected
+    expected_num_masked = round(target_sparsity * mask.numel())
+    assert torch.sum(1 - mask).item() == expected_num_masked