From c4fec2720df3116f351161cfd9fa45782f2bd189 Mon Sep 17 00:00:00 2001
From: Max Ghenis <mghenis@gmail.com>
Date: Fri, 17 Apr 2026 08:33:49 -0400
Subject: [PATCH] Fix operator-precedence bug in sparse L0 tracking condition

`i % tracking_n / 2 == 0` parsed as `(i % tracking_n) / 2 == 0`,
which is equivalent to `i % tracking_n == 0` because any non-zero
remainder divided by 2 is non-zero. The intended 2x logging density
(the sparse loop runs 2x as many epochs as the dense loop) was
silently lost, halving the fidelity of the sparse performance
DataFrame that feeds the dashboard.

Compute the stride explicitly as `max(1, tracking_n // 2)` and use
`i % sparse_tracking_n == 0`, with a comment explaining the intent
and the original parse mistake so it does not regress.

Adds tests/test_sparse_tracking.py verifying the sparse DataFrame
now has ~4x the dense DataFrame's row count at 100 dense epochs
(2x density * 2x epochs), consistent with the design.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 changelog.d/fix-tracking-precedence.fixed.md |   1 +
 src/microcalibrate/reweight.py               |   9 +-
 tests/test_sparse_tracking.py                | 102 +++++++++++++++++++
 3 files changed, 111 insertions(+), 1 deletion(-)
 create mode 100644 changelog.d/fix-tracking-precedence.fixed.md
 create mode 100644 tests/test_sparse_tracking.py

diff --git a/changelog.d/fix-tracking-precedence.fixed.md b/changelog.d/fix-tracking-precedence.fixed.md
new file mode 100644
index 0000000..b5c7fac
--- /dev/null
+++ b/changelog.d/fix-tracking-precedence.fixed.md
@@ -0,0 +1 @@
+Fix operator-precedence bug in the sparse L0 tracking condition. `i % tracking_n / 2 == 0` parsed as `(i % tracking_n) / 2 == 0`, which is equivalent to `i % tracking_n == 0`, so the intended 2x logging density was silently lost. The sparse loop now logs at stride `max(1, tracking_n // 2)` so its tracked DataFrame has ~2x the row density of the dense loop, matching the 2x epoch count.
diff --git a/src/microcalibrate/reweight.py b/src/microcalibrate/reweight.py
index 018ccba..a2d4ed0 100644
--- a/src/microcalibrate/reweight.py
+++ b/src/microcalibrate/reweight.py
@@ -224,7 +224,14 @@ def dropout_weights(weights: torch.Tensor, p: float) -> torch.Tensor:
             l_main = loss(estimate, targets, normalization_factor)
             l = l_main + l0_lambda * gates.get_penalty()
             close = pct_close(estimate, targets)
-            if i % tracking_n / 2 == 0:
+            # The sparse loop runs 2x as many epochs as the dense loop,
+            # so log twice as often (half the dense tracking stride).
+            # Without explicit parentheses the original expression
+            # `i % tracking_n / 2 == 0` parses as
+            # `(i % tracking_n) / 2 == 0`, which is equivalent to
+            # `i % tracking_n == 0` and silently loses the x2 density.
+            sparse_tracking_n = max(1, tracking_n // 2)
+            if i % sparse_tracking_n == 0:
                 epochs_sparse.append(i)
                 loss_over_epochs_sparse.append(l.item())
                 pct_close_over_epochs_sparse.append(close)
diff --git a/tests/test_sparse_tracking.py b/tests/test_sparse_tracking.py
new file mode 100644
index 0000000..18e7847
--- /dev/null
+++ b/tests/test_sparse_tracking.py
@@ -0,0 +1,102 @@
+"""Regression test for the sparse tracking precedence bug (finding #4).
+
+The original expression ``i % tracking_n / 2 == 0`` parses as
+``(i % tracking_n) / 2 == 0``, which is equivalent to
+``i % tracking_n == 0`` -- the ``/ 2`` is a no-op. The intent was to
+log sparse-loop estimates twice as often as the dense loop (because
+the sparse loop runs 2x as many epochs). After the fix the sparse
+performance DataFrame has roughly 2x the row density of the dense one.
+"""
+
+import logging
+
+import numpy as np
+import pandas as pd
+import torch
+
+from microcalibrate.reweight import reweight
+
+
+def test_sparse_loop_logs_twice_as_often_as_dense() -> None:
+    """With the fix in place, the sparse performance DataFrame should
+    contain approximately twice as many tracked epoch rows per target
+    as the dense DataFrame for the same number of dense epochs.
+    """
+
+    def estimate_function(w: torch.Tensor) -> torch.Tensor:
+        return w.sum().unsqueeze(0)
+
+    rng = np.random.default_rng(0)
+    original_weights = rng.uniform(1.0, 2.0, size=50)
+    targets = np.array([original_weights.sum() * 1.02])
+    logger = logging.getLogger("test_sparse_tracking")
+
+    torch.manual_seed(0)
+    _w, _sparse, dense_df = reweight(
+        original_weights=original_weights,
+        estimate_function=estimate_function,
+        targets_array=targets,
+        target_names=np.array(["total"]),
+        l0_lambda=0.0,
+        init_mean=0.999,
+        temperature=0.5,
+        regularize_with_l0=False,
+        sparse_learning_rate=0.2,
+        dropout_rate=0.0,
+        epochs=100,
+        noise_level=0.0,
+        learning_rate=1e-3,
+        logger=logger,
+    )
+
+    # Rerun with L0 enabled to capture the sparse performance DF. The
+    # sparse performance DataFrame is written to the
+    # <csv>_sparse.csv path when csv_path is set; we don't need disk
+    # here -- instead we count rows on the dense performance_df and
+    # compare indirectly by tracking the epoch lists via csv output.
+
+    # To keep the assertion simple and robust to refactors, we verify
+    # the sparse_tracking_n itself via a minimal L0 run and inspection
+    # of the resulting CSVs. We exercise the L0 path by writing to a
+    # temporary CSV and reading the sparse file back.
+    import os
+    import tempfile
+
+    with tempfile.TemporaryDirectory() as tmp:
+        csv_path = os.path.join(tmp, "out.csv")
+        torch.manual_seed(0)
+        reweight(
+            original_weights=original_weights,
+            estimate_function=estimate_function,
+            targets_array=targets,
+            target_names=np.array(["total"]),
+            l0_lambda=1e-6,
+            init_mean=0.999,
+            temperature=0.5,
+            regularize_with_l0=True,
+            sparse_learning_rate=0.01,
+            dropout_rate=0.0,
+            epochs=100,
+            noise_level=0.0,
+            learning_rate=1e-3,
+            csv_path=csv_path,
+            logger=logger,
+        )
+        sparse_df = pd.read_csv(csv_path.replace(".csv", "_sparse.csv"))
+
+    # With epochs=100, tracking_n = max(1, 100 // 10) = 10. Dense loop
+    # logs at i in {0, 10, 20, ..., 90} => 10 tracked epochs.
+    # Sparse loop runs 2*100 = 200 epochs at stride max(1, 10 // 2) = 5
+    # => i in {0, 5, 10, ..., 195} => 40 tracked epochs. Per target the
+    # DataFrame therefore has ~4x the row count (2x density * 2x
+    # epochs). Under the previous bug the sparse stride was 10 giving
+    # ~20 tracked epochs (2x only).
+    n_targets = 1
+    dense_rows_per_target = len(dense_df) / n_targets
+    sparse_rows_per_target = len(sparse_df) / n_targets
+    ratio = sparse_rows_per_target / max(dense_rows_per_target, 1)
+    assert ratio >= 3.5, (
+        f"Sparse tracking density ratio is {ratio:.2f} (dense rows "
+        f"={dense_rows_per_target}, sparse rows={sparse_rows_per_target}); "
+        "expected ~4x after fixing the precedence bug."
+    )