Merge branch 'refs/heads/main' into init_caching

alvin-r · alvin-r · commit 3d33f8b20c39 · 2025-02-03T16:23:43.000+08:00
diff --git a/codeflash/either.py b/codeflash/either.py
@@ -1,4 +1,4 @@
-﻿from __future__ import annotations
+from __future__ import annotations
 
 from typing import Generic, TypeVar
 
diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
@@ -572,10 +572,6 @@ def determine_best_candidate(
                     )
                     speedup_ratios[candidate.optimization_id] = perf_gain
 
-                    speedup_stats = compare_function_runtime_distributions(
-                        original_code_runtime_distribution, candidate_runtime_distribution
-                    )
-
                     tree = Tree(f"Candidate #{candidate_index} - Sum of Minimum Runtimes")
                     if speedup_critic(
                         candidate_result, original_code_baseline.runtime, best_runtime_until_now
@@ -609,28 +605,33 @@ def determine_best_candidate(
                     console.print(tree)
                     console.rule()
 
-                    tree = Tree(f"Candidate #{candidate_index} - Bayesian Bootstrapping Nonparametric Analysis")
-                    tree.add(
-                        f"Expected candidate runtime (95% Credible Interval) = ["
-                        f"{humanize_runtime(candidate_runtime_statistics['credible_interval_lower_bound'])}, "
-                        f"{humanize_runtime(candidate_runtime_statistics['credible_interval_upper_bound'])}], "
-                        f"\nmedian = {humanize_runtime(candidate_runtime_statistics['median'])}"
-                        f"\nSpeedup ratio of candidate vs original:"
-                        f"\n95% Credible Interval = [{speedup_stats['credible_interval_lower_bound']:.3f}X, "
-                        f"{speedup_stats['credible_interval_upper_bound']:.3f}X]"
-                        f"\nmedian = {speedup_stats['median']:.3f}X"
-                    )
-                    if speedup_stats["credible_interval_lower_bound"] > 1.0:
-                        tree.add("The candidate is faster than the original code with a 95% probability.")
-                        if speedup_stats["median"] > best_speedup_ratio_until_now:
-                            best_speedup_ratio_until_now = speedup_stats["median"]
-                            tree.add("This candidate is the best candidate so far.")
+                    if candidate_runtime_distribution.any() and candidate_runtime_statistics:
+                        speedup_stats = compare_function_runtime_distributions(
+                            original_code_runtime_distribution, candidate_runtime_distribution
+                        )
+                        tree = Tree(f"Candidate #{candidate_index} - Bayesian Bootstrapping Nonparametric Analysis")
+                        tree.add(
+                            f"Expected candidate summed runtime (95% Credible Interval) = ["
+                            f"{humanize_runtime(round(candidate_runtime_statistics['credible_interval_lower_bound']))}"
+                            f", "
+                            f"{humanize_runtime(round(candidate_runtime_statistics['credible_interval_upper_bound']))}]"
+                            f"\nMedian = {humanize_runtime(round(candidate_runtime_statistics['median']))}"
+                            f"\nSpeedup ratio of candidate vs original:"
+                            f"\n95% Credible Interval = [{speedup_stats['credible_interval_lower_bound']:.3f}X, "
+                            f"{speedup_stats['credible_interval_upper_bound']:.3f}X]"
+                            f"\nmedian = {speedup_stats['median']:.3f}X"
+                        )
+                        if speedup_stats["credible_interval_lower_bound"] > 1.0:
+                            tree.add("The candidate is faster than the original code with a 95% probability.")
+                            if speedup_stats["median"] > best_speedup_ratio_until_now:
+                                best_speedup_ratio_until_now = float(speedup_stats["median"])
+                                tree.add("This candidate is the best candidate so far.")
+                            else:
+                                tree.add("This candidate is not faster than the current fastest candidate.")
                         else:
-                            tree.add("This candidate is not faster than the current fastest candidate.")
-                    else:
-                        tree.add("It is inconclusive whether the candidate is faster than the original code.")
-                    console.print(tree)
-                    console.rule()
+                            tree.add("It is inconclusive whether the candidate is faster than the original code.")
+                        console.print(tree)
+                        console.rule()
 
                 self.write_code_and_helpers(original_code, original_helper_code, function_to_optimize.file_path)
         except KeyboardInterrupt as e:
@@ -1087,9 +1088,6 @@ def establish_original_code_baseline(
             console.rule()
 
             total_timing = benchmarking_results.total_passed_runtime()  # caution: doesn't handle the loop index
-            runtime_distribution, runtime_statistics = benchmarking_results.bayesian_nonparametric_bootstrap_analysis(
-                100_000
-            )
             functions_to_remove = [
                 result.id.test_function_name
                 for result in behavioral_results
@@ -1123,9 +1121,12 @@ def establish_original_code_baseline(
             console.rule()
             logger.debug(f"Total original code summed runtime (ns): {total_timing}")
             console.rule()
+            runtime_distribution, runtime_statistics = benchmarking_results.bayesian_nonparametric_bootstrap_analysis(
+                100_000
+            )
             logger.info(
                 f"Bayesian Bootstrapping Nonparametric Analysis"
-                f"\nExpected original code runtime (95% Credible Interval) = ["
+                f"\nExpected original code summed runtime (95% Credible Interval) = ["
                 f"{humanize_runtime(round(runtime_statistics['credible_interval_lower_bound']))}, "
                 f"{humanize_runtime(round(runtime_statistics['credible_interval_upper_bound']))}], "
                 f"\nmedian: {humanize_runtime(round(runtime_statistics['median']))}"
@@ -1245,18 +1246,23 @@ def run_optimized_candidate(
             if (total_candidate_timing := candidate_benchmarking_results.total_passed_runtime()) == 0:
                 logger.warning("The overall test runtime of the optimized function is 0, couldn't run tests.")
                 console.rule()
-            runtime_distribution, runtime_statistics = (
-                candidate_benchmarking_results.bayesian_nonparametric_bootstrap_analysis(100_000)
-            )
-
-            logger.debug(f"Total optimized code {optimization_candidate_index} runtime (ns): {total_candidate_timing}")
-            console.rule()
-            logger.debug(
-                f"Overall code runtime (95% Credible Interval) = ["
-                f"{humanize_runtime(round(runtime_statistics['credible_interval_lower_bound']))}, "
-                f"{humanize_runtime(round(runtime_statistics['credible_interval_upper_bound']))}], median: "
-                f"{humanize_runtime(round(runtime_statistics['median']))}"
-            )
+                runtime_distribution: npt.NDArray[np.float64] = np.array([])
+                runtime_statistics: dict[str, np.float64] = {}
+            else:
+                logger.debug(
+                    f"Total optimized code {optimization_candidate_index} runtime (ns): {total_candidate_timing}"
+                )
+                console.rule()
+                runtime_distribution, runtime_statistics = (
+                    candidate_benchmarking_results.bayesian_nonparametric_bootstrap_analysis(100_000)
+                )
+                logger.debug(
+                    f"Overall code summed runtime (95% Credible Interval) = ["
+                    f"{humanize_runtime(round(runtime_statistics['credible_interval_lower_bound']))}, "
+                    f"{humanize_runtime(round(runtime_statistics['credible_interval_upper_bound']))}], median: "
+                    f"{humanize_runtime(round(runtime_statistics['median']))}"
+                )
+                console.rule()
             return Success(
                 (
                     OptimizedCandidateResult(
diff --git a/codeflash/verification/bayesian_analysis.py b/codeflash/verification/bayesian_analysis.py
@@ -84,6 +84,36 @@ def bootstrap_combined_function_input_runtime_means(
     return draws
 
 
+@nb.njit(parallel=True, fastmath=True, cache=True)
+def bootstrap_combined_function_input_runtime_sums(
+    posterior_means: list[npt.NDArray[np.float64]], rngs: tuple[np.random.Generator, ...], bootstrap_size: int
+) -> npt.NDArray[np.float64]:
+    """Given a function, we have posterior draws for each input, and get an overall expected time across these inputs.
+
+    We make random draws from each input's distribution using the rngs random generators (one per computation thread),
+    and compute their arithmetic mean.
+    Returns an array of shape (bootstrap_size,).
+    """
+    num_inputs = len(posterior_means)
+    num_input_means = max([len(posterior_mean) for posterior_mean in posterior_means])
+    draws = np.empty(bootstrap_size, dtype=np.float64)
+
+    num_threads = len(rngs)
+    thread_remainder = bootstrap_size % num_threads
+    num_bootstraps_per_thread = np.array([bootstrap_size // num_threads] * num_threads) + np.array(
+        [1] * thread_remainder + [0] * (num_threads - thread_remainder)
+    )
+    thread_idx = [0, *list(np.cumsum(num_bootstraps_per_thread))]
+
+    for thread_id in nb.prange(num_threads):
+        thread_draws = draws[thread_idx[thread_id] : thread_idx[thread_id + 1]]
+        for bootstrap_id in range(num_bootstraps_per_thread[thread_id]):
+            thread_draws[bootstrap_id] = sum(
+                [input_means[rngs[thread_id].integers(0, num_input_means)] for input_means in posterior_means]
+            )
+    return draws
+
+
 def compute_statistics(distribution: npt.NDArray[np.float64], gamma: float = 0.95) -> dict[str, np.float64]:
     lower_p = (1.0 - gamma) / 2 * 100
     return {
@@ -105,6 +135,18 @@ def analyze_function_runtime_data(
     return function_runtime_distribution, compute_statistics(function_runtime_distribution)
 
 
+def analyze_function_runtime_sums_data(
+    function_runtime_data: list[list[int]], bootstrap_size: int
+) -> tuple[npt.NDArray[np.float64], dict[str, np.float64]]:
+    rng = np.random.default_rng()
+    function_runtime_distribution = bootstrap_combined_function_input_runtime_sums(
+        compute_function_runtime_posterior_means(function_runtime_data, bootstrap_size),
+        tuple(rng.spawn(nb.get_num_threads())),
+        bootstrap_size,
+    )
+    return function_runtime_distribution, compute_statistics(function_runtime_distribution)
+
+
 def compare_function_runtime_distributions(
     function1_runtime_distribution: npt.NDArray[np.float64], function2_runtime_distribution: npt.NDArray[np.float64]
 ) -> dict[str, np.float64]:
diff --git a/codeflash/verification/test_results.py b/codeflash/verification/test_results.py
@@ -18,7 +18,7 @@
 from rich.tree import Tree
 
 from codeflash.cli_cmds.console import DEBUG_MODE, logger
-from codeflash.verification.bayesian_analysis import analyze_function_runtime_data
+from codeflash.verification.bayesian_analysis import analyze_function_runtime_sums_data
 from codeflash.verification.comparator import comparator
 
 
@@ -207,7 +207,9 @@ def total_passed_runtime(self) -> int:
     def bayesian_nonparametric_bootstrap_analysis(
         self, bootstrap_size: int
     ) -> tuple[npt.NDArray[np.float64], dict[str, np.float64]]:
-        return analyze_function_runtime_data(list(self.usable_runtime_data_by_test_case().values()), bootstrap_size)
+        return analyze_function_runtime_sums_data(
+            list(self.usable_runtime_data_by_test_case().values()), bootstrap_size
+        )
 
     def __iter__(self) -> Iterator[FunctionTestInvocation]:
         return iter(self.test_results)

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from __future__ import annotations`
	`1`	`+from __future__ import annotations`
`2`	`2`
`3`	`3`	`from typing import Generic, TypeVar`
`4`	`4`