From 6a51bd6fb403a7acf02910de06bad12a2515ec5e Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Mon, 8 Dec 2025 04:41:48 -0600 Subject: [PATCH 1/6] first pass split --- codeflash/optimization/function_optimizer.py | 161 ++++++++++--------- 1 file changed, 86 insertions(+), 75 deletions(-) diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 64b4ae75b..d833113b6 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -238,9 +238,6 @@ def __init__( self.function_benchmark_timings = function_benchmark_timings if function_benchmark_timings else {} self.total_benchmark_timings = total_benchmark_timings if total_benchmark_timings else {} self.replay_tests_dir = replay_tests_dir if replay_tests_dir else None - self.generate_and_instrument_tests_results: ( - tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet] | None - ) = None n_tests = N_TESTS_TO_GENERATE_EFFECTIVE self.executor = concurrent.futures.ThreadPoolExecutor( max_workers=n_tests + 3 if self.experiment_id is None else n_tests + 4 @@ -309,29 +306,37 @@ def generate_and_instrument_tests( transient=True, revert_to_print=bool(get_pr_number()), ): - generated_results = self.generate_tests_and_optimizations( + console.rule() + # Submit both test generation and optimization generation in parallel + future_tests = self.executor.submit( + self.generate_tests, testgen_context=code_context.testgen_context, - read_writable_code=code_context.read_writable_code, - read_only_context_code=code_context.read_only_context_code, helper_functions=code_context.helper_functions, generated_test_paths=generated_test_paths, generated_perf_test_paths=generated_perf_test_paths, + ) + future_optimizations = self.executor.submit( + self.generate_optimizations, + read_writable_code=code_context.read_writable_code, + read_only_context_code=code_context.read_only_context_code, run_experiment=should_run_experiment, ) - if not is_successful(generated_results): - return Failure(generated_results.failure()) + # Wait for both to complete + concurrent.futures.wait([future_tests, future_optimizations]) - generated_tests: GeneratedTestsList - optimizations_set: OptimizationSet - ( - count_tests, - generated_tests, - function_to_concolic_tests, - concolic_test_str, - optimizations_set, - function_references, - ) = generated_results.unwrap() + test_results = future_tests.result() + optimization_results = future_optimizations.result() + console.rule() + + if not is_successful(test_results): + return Failure(test_results.failure()) + + if not is_successful(optimization_results): + return Failure(optimization_results.failure()) + + count_tests, generated_tests, function_to_concolic_tests, concolic_test_str = test_results.unwrap() + optimizations_set, function_references = optimization_results.unwrap() for i, generated_test in enumerate(generated_tests.generated_tests): with generated_test.behavior_file_path.open("w", encoding="utf8") as f: @@ -1109,20 +1114,18 @@ def instrument_existing_tests(self, function_to_all_tests: dict[str, set[Functio console.rule() return unique_instrumented_test_files - def generate_tests_and_optimizations( + def generate_tests( self, testgen_context: CodeStringsMarkdown, - read_writable_code: CodeStringsMarkdown, - read_only_context_code: str, helper_functions: list[FunctionSource], generated_test_paths: list[Path], generated_perf_test_paths: list[Path], - run_experiment: bool = False, # noqa: FBT001, FBT002 - ) -> Result[tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet], str, str]: + ) -> Result[tuple[int, GeneratedTestsList, dict[str, set[FunctionCalledInTest]], str], str]: + """Generate unit tests and concolic tests for the function.""" n_tests = N_TESTS_TO_GENERATE_EFFECTIVE assert len(generated_test_paths) == n_tests - console.rule() - # Submit the test generation task as future + + # Submit test generation tasks future_tests = self.submit_test_generation_tasks( self.executor, testgen_context.markdown, @@ -1130,7 +1133,59 @@ def generate_tests_and_optimizations( generated_test_paths, generated_perf_test_paths, ) + + future_concolic_tests = self.executor.submit( + generate_concolic_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast + ) + + # Wait for test futures to complete + concurrent.futures.wait([*future_tests, future_concolic_tests]) + + # Process test generation results + tests: list[GeneratedTests] = [] + for future in future_tests: + res = future.result() + if res: + ( + generated_test_source, + instrumented_behavior_test_source, + instrumented_perf_test_source, + test_behavior_path, + test_perf_path, + ) = res + tests.append( + GeneratedTests( + generated_original_test_source=generated_test_source, + instrumented_behavior_test_source=instrumented_behavior_test_source, + instrumented_perf_test_source=instrumented_perf_test_source, + behavior_file_path=test_behavior_path, + perf_file_path=test_perf_path, + ) + ) + + if not tests: + logger.warning(f"Failed to generate and instrument tests for {self.function_to_optimize.function_name}") + return Failure(f"/!\\ NO TESTS GENERATED for {self.function_to_optimize.function_name}") + + function_to_concolic_tests, concolic_test_str = future_concolic_tests.result() + count_tests = len(tests) + if concolic_test_str: + count_tests += 1 + + logger.info(f"Generated '{count_tests}' tests for '{self.function_to_optimize.function_name}'") + + generated_tests = GeneratedTestsList(generated_tests=tests) + return Success((count_tests, generated_tests, function_to_concolic_tests, concolic_test_str)) + + def generate_optimizations( + self, + read_writable_code: CodeStringsMarkdown, + read_only_context_code: str, + run_experiment: bool = False, # noqa: FBT001, FBT002 + ) -> Result[tuple[OptimizationSet, str], str]: + """Generate optimization candidates for the function.""" n_candidates = N_CANDIDATES_EFFECTIVE + future_optimization_candidates = self.executor.submit( self.aiservice_client.optimize_python_code, read_writable_code.markdown, @@ -1140,11 +1195,7 @@ def generate_tests_and_optimizations( ExperimentMetadata(id=self.experiment_id, group="control") if run_experiment else None, is_async=self.function_to_optimize.is_async, ) - future_candidates_exp = None - future_concolic_tests = self.executor.submit( - generate_concolic_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast - ) future_references = self.executor.submit( get_opt_review_metrics, self.function_to_optimize_source_code, @@ -1153,7 +1204,10 @@ def generate_tests_and_optimizations( self.project_root, self.test_cfg.tests_root, ) - futures = [*future_tests, future_optimization_candidates, future_concolic_tests, future_references] + + futures = [future_optimization_candidates, future_references] + future_candidates_exp = None + if run_experiment: future_candidates_exp = self.executor.submit( self.local_aiservice_client.optimize_python_code, @@ -1166,63 +1220,20 @@ def generate_tests_and_optimizations( ) futures.append(future_candidates_exp) - # Wait for all futures to complete + # Wait for optimization futures to complete concurrent.futures.wait(futures) # Retrieve results candidates: list[OptimizedCandidate] = future_optimization_candidates.result() logger.info(f"lsp|Generated '{len(candidates)}' candidate optimizations.") - console.rule() if not candidates: return Failure(f"/!\\ NO OPTIMIZATIONS GENERATED for {self.function_to_optimize.function_name}") candidates_experiment = future_candidates_exp.result() if future_candidates_exp else None - - # Process test generation results - - tests: list[GeneratedTests] = [] - for future in future_tests: - res = future.result() - if res: - ( - generated_test_source, - instrumented_behavior_test_source, - instrumented_perf_test_source, - test_behavior_path, - test_perf_path, - ) = res - tests.append( - GeneratedTests( - generated_original_test_source=generated_test_source, - instrumented_behavior_test_source=instrumented_behavior_test_source, - instrumented_perf_test_source=instrumented_perf_test_source, - behavior_file_path=test_behavior_path, - perf_file_path=test_perf_path, - ) - ) - if not tests: - logger.warning(f"Failed to generate and instrument tests for {self.function_to_optimize.function_name}") - return Failure(f"/!\\ NO TESTS GENERATED for {self.function_to_optimize.function_name}") - function_to_concolic_tests, concolic_test_str = future_concolic_tests.result() function_references = future_references.result() - count_tests = len(tests) - if concolic_test_str: - count_tests += 1 - logger.info(f"Generated '{count_tests}' tests for '{self.function_to_optimize.function_name}'") - console.rule() - generated_tests = GeneratedTestsList(generated_tests=tests) - result = ( - count_tests, - generated_tests, - function_to_concolic_tests, - concolic_test_str, - OptimizationSet(control=candidates, experiment=candidates_experiment), - function_references, - ) - self.generate_and_instrument_tests_results = result - return Success(result) + return Success((OptimizationSet(control=candidates, experiment=candidates_experiment), function_references)) def setup_and_establish_baseline( self, From 03dc330e88fbff96ecf1e8150c11e27ee169f0bd Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Mon, 8 Dec 2025 04:51:29 -0600 Subject: [PATCH 2/6] split unit test generation and optimizations --- .../lsp/features/perform_optimization.py | 26 +++++- codeflash/optimization/function_optimizer.py | 79 +++++++++---------- 2 files changed, 58 insertions(+), 47 deletions(-) diff --git a/codeflash/lsp/features/perform_optimization.py b/codeflash/lsp/features/perform_optimization.py index 5dfd0daef..75cb37c95 100644 --- a/codeflash/lsp/features/perform_optimization.py +++ b/codeflash/lsp/features/perform_optimization.py @@ -1,5 +1,6 @@ from __future__ import annotations +import concurrent.futures import contextlib import os from typing import TYPE_CHECKING @@ -44,24 +45,41 @@ def sync_perform_optimization(server: CodeflashLanguageServer, cancel_event: thr function_optimizer.function_to_tests = function_to_tests abort_if_cancelled(cancel_event) - test_setup_result = function_optimizer.generate_and_instrument_tests( - code_context, should_run_experiment=should_run_experiment + + # Generate tests and optimizations in parallel + future_tests = function_optimizer.executor.submit( + function_optimizer.generate_and_instrument_tests, code_context + ) + future_optimizations = function_optimizer.executor.submit( + function_optimizer.generate_optimizations, + read_writable_code=code_context.read_writable_code, + read_only_context_code=code_context.read_only_context_code, + run_experiment=should_run_experiment, ) + + concurrent.futures.wait([future_tests, future_optimizations]) + + test_setup_result = future_tests.result() + optimization_result = future_optimizations.result() + abort_if_cancelled(cancel_event) if not is_successful(test_setup_result): return {"functionName": params.functionName, "status": "error", "message": test_setup_result.failure()} + if not is_successful(optimization_result): + return {"functionName": params.functionName, "status": "error", "message": optimization_result.failure()} + ( generated_tests, function_to_concolic_tests, concolic_test_str, - optimizations_set, generated_test_paths, generated_perf_test_paths, instrumented_unittests_created_for_function, original_conftest_content, - function_references, ) = test_setup_result.unwrap() + optimizations_set, function_references = optimization_result.unwrap() + baseline_setup_result = function_optimizer.setup_and_establish_baseline( code_context=code_context, original_helper_code=original_helper_code, diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index d833113b6..1e9abfc58 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -272,21 +272,20 @@ def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[P return Success((should_run_experiment, code_context, original_helper_code)) def generate_and_instrument_tests( - self, code_context: CodeOptimizationContext, *, should_run_experiment: bool + self, code_context: CodeOptimizationContext ) -> Result[ tuple[ GeneratedTestsList, dict[str, set[FunctionCalledInTest]], str, - OptimizationSet, list[Path], list[Path], set[Path], dict | None, - str, - ] + ], + str, ]: - """Generate and instrument tests, returning all necessary data for optimization.""" + """Generate and instrument tests for the function.""" n_tests = N_TESTS_TO_GENERATE_EFFECTIVE generated_test_paths = [ get_test_file_path( @@ -301,42 +300,17 @@ def generate_and_instrument_tests( for test_index in range(n_tests) ] - with progress_bar( - f"Generating new tests and optimizations for function '{self.function_to_optimize.function_name}'", - transient=True, - revert_to_print=bool(get_pr_number()), - ): - console.rule() - # Submit both test generation and optimization generation in parallel - future_tests = self.executor.submit( - self.generate_tests, - testgen_context=code_context.testgen_context, - helper_functions=code_context.helper_functions, - generated_test_paths=generated_test_paths, - generated_perf_test_paths=generated_perf_test_paths, - ) - future_optimizations = self.executor.submit( - self.generate_optimizations, - read_writable_code=code_context.read_writable_code, - read_only_context_code=code_context.read_only_context_code, - run_experiment=should_run_experiment, - ) - - # Wait for both to complete - concurrent.futures.wait([future_tests, future_optimizations]) - - test_results = future_tests.result() - optimization_results = future_optimizations.result() - console.rule() + test_results = self.generate_tests( + testgen_context=code_context.testgen_context, + helper_functions=code_context.helper_functions, + generated_test_paths=generated_test_paths, + generated_perf_test_paths=generated_perf_test_paths, + ) if not is_successful(test_results): return Failure(test_results.failure()) - if not is_successful(optimization_results): - return Failure(optimization_results.failure()) - count_tests, generated_tests, function_to_concolic_tests, concolic_test_str = test_results.unwrap() - optimizations_set, function_references = optimization_results.unwrap() for i, generated_test in enumerate(generated_tests.generated_tests): with generated_test.behavior_file_path.open("w", encoding="utf8") as f: @@ -377,12 +351,10 @@ def generate_and_instrument_tests( generated_tests, function_to_concolic_tests, concolic_test_str, - optimizations_set, generated_test_paths, generated_perf_test_paths, instrumented_unittests_created_for_function, original_conftest_content, - function_references, ) ) @@ -400,24 +372,45 @@ def optimize_function(self) -> Result[BestOptimization, str]: function_name=self.function_to_optimize.function_name, ) - test_setup_result = self.generate_and_instrument_tests( # also generates optimizations - code_context, should_run_experiment=should_run_experiment - ) + with progress_bar( + f"Generating new tests and optimizations for function '{self.function_to_optimize.function_name}'", + transient=True, + revert_to_print=bool(get_pr_number()), + ): + console.rule() + # Generate tests and optimizations in parallel + future_tests = self.executor.submit(self.generate_and_instrument_tests, code_context) + future_optimizations = self.executor.submit( + self.generate_optimizations, + read_writable_code=code_context.read_writable_code, + read_only_context_code=code_context.read_only_context_code, + run_experiment=should_run_experiment, + ) + + concurrent.futures.wait([future_tests, future_optimizations]) + + test_setup_result = future_tests.result() + optimization_result = future_optimizations.result() + console.rule() + if not is_successful(test_setup_result): return Failure(test_setup_result.failure()) + if not is_successful(optimization_result): + return Failure(optimization_result.failure()) + ( generated_tests, function_to_concolic_tests, concolic_test_str, - optimizations_set, generated_test_paths, generated_perf_test_paths, instrumented_unittests_created_for_function, original_conftest_content, - function_references, ) = test_setup_result.unwrap() + optimizations_set, function_references = optimization_result.unwrap() + baseline_setup_result = self.setup_and_establish_baseline( code_context=code_context, original_helper_code=original_helper_code, From c752fb8b80d337858a7fbc3bb483b9b1f927f81a Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Mon, 8 Dec 2025 05:14:52 -0600 Subject: [PATCH 3/6] formatting --- codeflash/lsp/features/perform_optimization.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/codeflash/lsp/features/perform_optimization.py b/codeflash/lsp/features/perform_optimization.py index 75cb37c95..0b1d80ebc 100644 --- a/codeflash/lsp/features/perform_optimization.py +++ b/codeflash/lsp/features/perform_optimization.py @@ -47,9 +47,7 @@ def sync_perform_optimization(server: CodeflashLanguageServer, cancel_event: thr abort_if_cancelled(cancel_event) # Generate tests and optimizations in parallel - future_tests = function_optimizer.executor.submit( - function_optimizer.generate_and_instrument_tests, code_context - ) + future_tests = function_optimizer.executor.submit(function_optimizer.generate_and_instrument_tests, code_context) future_optimizations = function_optimizer.executor.submit( function_optimizer.generate_optimizations, read_writable_code=code_context.read_writable_code, From 44b0ea1133d239617ed3162d07ca12bbad22f5a6 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Mon, 8 Dec 2025 16:16:56 -0600 Subject: [PATCH 4/6] copy contextvars --- codeflash/lsp/features/perform_optimization.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/codeflash/lsp/features/perform_optimization.py b/codeflash/lsp/features/perform_optimization.py index 0b1d80ebc..272f1e8a3 100644 --- a/codeflash/lsp/features/perform_optimization.py +++ b/codeflash/lsp/features/perform_optimization.py @@ -2,6 +2,7 @@ import concurrent.futures import contextlib +import contextvars import os from typing import TYPE_CHECKING @@ -46,9 +47,14 @@ def sync_perform_optimization(server: CodeflashLanguageServer, cancel_event: thr abort_if_cancelled(cancel_event) + ctx = contextvars.copy_context() + # Generate tests and optimizations in parallel - future_tests = function_optimizer.executor.submit(function_optimizer.generate_and_instrument_tests, code_context) + future_tests = function_optimizer.executor.submit( + ctx.run, function_optimizer.generate_and_instrument_tests, code_context + ) future_optimizations = function_optimizer.executor.submit( + ctx.run, function_optimizer.generate_optimizations, read_writable_code=code_context.read_writable_code, read_only_context_code=code_context.read_only_context_code, From eab8cdc050f4ebf5fe31be369c47333aa3e152b1 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Mon, 8 Dec 2025 16:56:35 -0600 Subject: [PATCH 5/6] bugfix for VSC --- .../lsp/features/perform_optimization.py | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/codeflash/lsp/features/perform_optimization.py b/codeflash/lsp/features/perform_optimization.py index 272f1e8a3..bdaed640f 100644 --- a/codeflash/lsp/features/perform_optimization.py +++ b/codeflash/lsp/features/perform_optimization.py @@ -6,7 +6,7 @@ import os from typing import TYPE_CHECKING -from codeflash.cli_cmds.console import code_print +from codeflash.cli_cmds.console import code_print, logger from codeflash.code_utils.git_worktree_utils import create_diff_patch_from_worktree from codeflash.either import is_successful @@ -47,20 +47,23 @@ def sync_perform_optimization(server: CodeflashLanguageServer, cancel_event: thr abort_if_cancelled(cancel_event) - ctx = contextvars.copy_context() + ctx_tests = contextvars.copy_context() + ctx_opts = contextvars.copy_context() - # Generate tests and optimizations in parallel - future_tests = function_optimizer.executor.submit( - ctx.run, function_optimizer.generate_and_instrument_tests, code_context - ) - future_optimizations = function_optimizer.executor.submit( - ctx.run, - function_optimizer.generate_optimizations, - read_writable_code=code_context.read_writable_code, - read_only_context_code=code_context.read_only_context_code, - run_experiment=should_run_experiment, - ) + def run_generate_tests(): # noqa: ANN202 + return function_optimizer.generate_and_instrument_tests(code_context) + + def run_generate_optimizations(): # noqa: ANN202 + return function_optimizer.generate_optimizations( + read_writable_code=code_context.read_writable_code, + read_only_context_code=code_context.read_only_context_code, + run_experiment=should_run_experiment, + ) + + future_tests = function_optimizer.executor.submit(ctx_tests.run, run_generate_tests) + future_optimizations = function_optimizer.executor.submit(ctx_opts.run, run_generate_optimizations) + logger.info("loading|Generating optimizations and tests...") concurrent.futures.wait([future_tests, future_optimizations]) test_setup_result = future_tests.result() From 632b8266270a6461967eef93d1bf8386c69b8188 Mon Sep 17 00:00:00 2001 From: ali Date: Tue, 9 Dec 2025 01:36:30 +0200 Subject: [PATCH 6/6] extension logging --- codeflash/api/aiservice.py | 1 - codeflash/lsp/features/perform_optimization.py | 3 ++- codeflash/optimization/function_optimizer.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 20c478eb4..8743ab2ac 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -153,7 +153,6 @@ def optimize_python_code( # noqa: D417 if response.status_code == 200: optimizations_json = response.json()["optimizations"] - logger.info(f"!lsp|Generated {len(optimizations_json)} candidate optimizations.") console.rule() end_time = time.perf_counter() logger.debug(f"!lsp|Generating possible optimizations took {end_time - start_time:.2f} seconds.") diff --git a/codeflash/lsp/features/perform_optimization.py b/codeflash/lsp/features/perform_optimization.py index bdaed640f..96552f1a8 100644 --- a/codeflash/lsp/features/perform_optimization.py +++ b/codeflash/lsp/features/perform_optimization.py @@ -63,7 +63,7 @@ def run_generate_optimizations(): # noqa: ANN202 future_tests = function_optimizer.executor.submit(ctx_tests.run, run_generate_tests) future_optimizations = function_optimizer.executor.submit(ctx_opts.run, run_generate_optimizations) - logger.info("loading|Generating optimizations and tests...") + logger.info(f"loading|Generating new tests and optimizations for function '{params.functionName}'...") concurrent.futures.wait([future_tests, future_optimizations]) test_setup_result = future_tests.result() @@ -87,6 +87,7 @@ def run_generate_optimizations(): # noqa: ANN202 optimizations_set, function_references = optimization_result.unwrap() + logger.info(f"Generated '{len(optimizations_set.control)}' candidate optimizations.") baseline_setup_result = function_optimizer.setup_and_establish_baseline( code_context=code_context, original_helper_code=original_helper_code, diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 1e9abfc58..058c84dfc 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -1165,7 +1165,7 @@ def generate_tests( if concolic_test_str: count_tests += 1 - logger.info(f"Generated '{count_tests}' tests for '{self.function_to_optimize.function_name}'") + logger.info(f"!lsp|Generated '{count_tests}' tests for '{self.function_to_optimize.function_name}'") generated_tests = GeneratedTestsList(generated_tests=tests) return Success((count_tests, generated_tests, function_to_concolic_tests, concolic_test_str)) @@ -1218,7 +1218,7 @@ def generate_optimizations( # Retrieve results candidates: list[OptimizedCandidate] = future_optimization_candidates.result() - logger.info(f"lsp|Generated '{len(candidates)}' candidate optimizations.") + logger.info(f"!lsp|Generated '{len(candidates)}' candidate optimizations.") if not candidates: return Failure(f"/!\\ NO OPTIMIZATIONS GENERATED for {self.function_to_optimize.function_name}")