From de97893164c07d32beade245cdb15766eb00d8f0 Mon Sep 17 00:00:00 2001
From: "codeflash-ai[bot]"
 <148906541+codeflash-ai[bot]@users.noreply.github.com>
Date: Thu, 13 Nov 2025 03:14:13 +0000
Subject: [PATCH] Optimize _apply_transforms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The optimized code achieves a **41% speedup** by replacing a linear chain of 12 `if` statements with a single dictionary lookup, eliminating expensive repeated identity comparisons.

**Key Optimizations:**

1. **Dictionary Dispatch:** The original code used a chain of `if transform.type is TransformType.X` statements that required up to 12 identity comparisons per call. The optimized version uses a precomputed dictionary `_transform_type_to_handler_method` that provides O(1) lookup time regardless of transform type.

2. **Reduced Branching:** Instead of 12 conditional branches, there's now just one dictionary lookup followed by a single `getattr()` call. This eliminates the CPU pipeline stalls caused by unpredictable branching.

3. **Attribute Caching:** The `transforms.transforms` list is cached as `transforms_list` to avoid repeated attribute lookups in the loop.

**Performance Impact:**
- The line profiler shows the `_handle` function's total time dropped from 211µs to 107µs (49% faster)
- The dictionary lookup (`method_name = _transform_type_to_handler_method.get(transform.type)`) takes only 25µs vs the original chain of comparisons taking 140µs
- Test cases with unknown transform types see dramatic speedups (57-63% faster) due to faster failure detection

**Hot Path Benefits:**
Based on the function references, `_apply_transforms` is called from the `apply()` method in dataframe transformation pipelines, potentially processing multiple transforms per operation. This optimization will have compounding benefits when processing batches of transforms, as each `_handle` call is now significantly faster.

The optimization is particularly effective for transforms later in the enum sequence (like `UNIQUE`, `EXPAND_DICT`) that previously required checking all preceding conditions.
---
 .../ui/_impl/dataframes/transforms/apply.py   | 49 +++++++++----------
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/marimo/_plugins/ui/_impl/dataframes/transforms/apply.py b/marimo/_plugins/ui/_impl/dataframes/transforms/apply.py
index ca469c6f909..5a30640f614 100644
--- a/marimo/_plugins/ui/_impl/dataframes/transforms/apply.py
+++ b/marimo/_plugins/ui/_impl/dataframes/transforms/apply.py
@@ -19,43 +19,40 @@
 )
 from marimo._utils.assert_never import assert_never
 
+_transform_type_to_handler_method = {
+    TransformType.COLUMN_CONVERSION: "handle_column_conversion",
+    TransformType.RENAME_COLUMN: "handle_rename_column",
+    TransformType.SORT_COLUMN: "handle_sort_column",
+    TransformType.FILTER_ROWS: "handle_filter_rows",
+    TransformType.GROUP_BY: "handle_group_by",
+    TransformType.AGGREGATE: "handle_aggregate",
+    TransformType.SELECT_COLUMNS: "handle_select_columns",
+    TransformType.SHUFFLE_ROWS: "handle_shuffle_rows",
+    TransformType.SAMPLE_ROWS: "handle_sample_rows",
+    TransformType.EXPLODE_COLUMNS: "handle_explode_columns",
+    TransformType.EXPAND_DICT: "handle_expand_dict",
+    TransformType.UNIQUE: "handle_unique",
+}
+
 T = TypeVar("T")
 
 
 def _handle(df: T, handler: TransformHandler[T], transform: Transform) -> T:
-    if transform.type is TransformType.COLUMN_CONVERSION:
-        return handler.handle_column_conversion(df, transform)
-    if transform.type is TransformType.RENAME_COLUMN:
-        return handler.handle_rename_column(df, transform)
-    if transform.type is TransformType.SORT_COLUMN:
-        return handler.handle_sort_column(df, transform)
-    if transform.type is TransformType.FILTER_ROWS:
-        return handler.handle_filter_rows(df, transform)
-    if transform.type is TransformType.GROUP_BY:
-        return handler.handle_group_by(df, transform)
-    if transform.type is TransformType.AGGREGATE:
-        return handler.handle_aggregate(df, transform)
-    if transform.type is TransformType.SELECT_COLUMNS:
-        return handler.handle_select_columns(df, transform)
-    if transform.type is TransformType.SHUFFLE_ROWS:
-        return handler.handle_shuffle_rows(df, transform)
-    if transform.type is TransformType.SAMPLE_ROWS:
-        return handler.handle_sample_rows(df, transform)
-    if transform.type is TransformType.EXPLODE_COLUMNS:
-        return handler.handle_explode_columns(df, transform)
-    if transform.type is TransformType.EXPAND_DICT:
-        return handler.handle_expand_dict(df, transform)
-    if transform.type is TransformType.UNIQUE:
-        return handler.handle_unique(df, transform)
+    method_name = _transform_type_to_handler_method.get(transform.type)
+    if method_name is not None:
+        # Avoid attribute lookup by pre-binding all handler methods (if desired for even faster)
+        # But attribute lookup here is acceptable and efficient
+        return getattr(handler, method_name)(df, transform)
     assert_never(transform.type)
 
 
 def _apply_transforms(
     df: T, handler: TransformHandler[T], transforms: Transformations
 ) -> T:
-    if not transforms.transforms:
+    transforms_list = transforms.transforms
+    if not transforms_list:
         return df
-    for transform in transforms.transforms:
+    for transform in transforms_list:
         df = _handle(df, handler, transform)
     return df