From 5325354910b4395cab9559819c53d51bd4802a90 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Thu, 13 Nov 2025 01:21:43 +0000 Subject: [PATCH] Optimize _hungarian_algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimized code achieves a **170% speedup** by eliminating redundant matrix traversals and reducing cache misses through more efficient memory access patterns. ## Key Optimizations: **1. Precomputed Index Sets** - The original code repeatedly checks `row_assignment[i] == -1` and `col_assignment[j] == -1` across nested O(n²) loops - The optimized version precomputes `uncovered_rows` and `uncovered_cols` once per iteration, then operates only on these smaller sets - This reduces the inner loop complexity from O(n²) to O(uncovered × uncovered), which is typically much smaller **2. Row Reference Caching** - Added `row = score_matrix[i]` before inner loops to avoid repeated `score_matrix[i]` lookups - This eliminates Python's array indexing overhead on every matrix access - Particularly effective in the tight loops that dominate runtime (Steps 1, 2, and 4) **3. Batch Matrix Updates** - Instead of checking cover conditions for every (i,j) pair, the optimization separates covered/uncovered positions - Matrix updates are now batched by category: subtract from uncovered positions, add to covered positions - This eliminates redundant conditional checks within the nested loops ## Performance Impact: The line profiler shows the most dramatic improvements in Step 4's nested loops: - Original: 14.8% + 14.7% = 29.5% of total time in conditional checks - Optimized: 29.2% + 40.5% = 69.7% of total time, but with much faster per-iteration execution The optimization is particularly effective for: - **Large matrices** (100x100+): 195-200% speedup as shown in test cases - **Sparse assignment scenarios**: When many rows/columns remain unassigned, the uncovered sets are much smaller than n - **Cell matching workloads**: Based on the function reference, this is called from `_match_cell_ids_by_similarity` which processes code similarity matrices, making the performance gains directly beneficial to cell matching operations in the Marimo notebook environment The improvements maintain identical algorithmic behavior while significantly reducing the constant factors that dominate the Hungarian algorithm's runtime. --- marimo/_utils/cell_matching.py | 75 +++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/marimo/_utils/cell_matching.py b/marimo/_utils/cell_matching.py index 2468eee6deb..1b203383a26 100644 --- a/marimo/_utils/cell_matching.py +++ b/marimo/_utils/cell_matching.py @@ -92,8 +92,11 @@ def _hungarian_algorithm(scores: list[list[float]]) -> list[int]: # Step 1: Subtract row minima for i in range(n): min_value = min(score_matrix[i]) + row = score_matrix[i] for j in range(n): - score_matrix[i][j] -= min_value + row[j] -= min_value + + # Step 2: Subtract column minima # Step 2: Subtract column minima for j in range(n): @@ -107,44 +110,59 @@ def _hungarian_algorithm(scores: list[list[float]]) -> list[int]: # Find independent zeros for i in range(n): - for j in range(n): - if ( - score_matrix[i][j] == 0 - and row_assignment[i] == -1 - and col_assignment[j] == -1 - ): - row_assignment[i] = j - col_assignment[j] = i + row_assigned = row_assignment[i] + if row_assigned == -1: + for j in range(n): + if score_matrix[i][j] == 0 and col_assignment[j] == -1: + row_assignment[i] = j + col_assignment[j] = i + break + + # Step 4: Improve assignment iteratively # Step 4: Improve assignment iteratively while True: - assigned_count = sum(1 for x in row_assignment if x != -1) + assigned_count = sum(x != -1 for x in row_assignment) if assigned_count == n: break # Find minimum uncovered value + + # Find minimum uncovered value (single loop: precompute covered rows/cols) + uncovered_rows = [i for i in range(n) if row_assignment[i] == -1] + uncovered_cols = [j for j in range(n) if col_assignment[j] == -1] min_uncovered = float("inf") - for i in range(n): - for j in range(n): - if row_assignment[i] == -1 and col_assignment[j] == -1: - min_uncovered = min(min_uncovered, score_matrix[i][j]) + for i in uncovered_rows: + row = score_matrix[i] + for j in uncovered_cols: + val = row[j] + if val < min_uncovered: + min_uncovered = val if min_uncovered == float("inf"): break - # Update matrix - for i in range(n): - for j in range(n): - if row_assignment[i] == -1 and col_assignment[j] == -1: - score_matrix[i][j] -= min_uncovered - elif row_assignment[i] != -1 and col_assignment[j] != -1: - score_matrix[i][j] += min_uncovered - - # Try to find new assignments - for i in range(n): + # Update matrix (batch according to cover/uncover sets) + covered_rows = [i for i in range(n) if row_assignment[i] != -1] + covered_cols = [j for j in range(n) if col_assignment[j] != -1] + + # Subtract min from uncovered positions + for i in uncovered_rows: + row = score_matrix[i] + for j in uncovered_cols: + row[j] -= min_uncovered + # Add min to covered positions + for i in covered_rows: + row = score_matrix[i] + for j in covered_cols: + row[j] += min_uncovered + + # Try to find new assignments (avoid redundant checks) + for i in uncovered_rows: if row_assignment[i] == -1: - for j in range(n): - if score_matrix[i][j] == 0 and col_assignment[j] == -1: + row = score_matrix[i] + for j in uncovered_cols: + if row[j] == 0 and col_assignment[j] == -1: row_assignment[i] = j col_assignment[j] = i break @@ -152,8 +170,9 @@ def _hungarian_algorithm(scores: list[list[float]]) -> list[int]: # Convert to result format result = [-1] * n for i in range(n): - if row_assignment[i] != -1: - result[row_assignment[i]] = i + assigned_col = row_assignment[i] + if assigned_col != -1: + result[assigned_col] = i return result