From 9cc7e2eef091238c858d2f0ff8f7cde5db8e968b Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 12 Dec 2025 09:30:13 -0500 Subject: [PATCH 01/14] future house fixes --- tests/scripts/end_to_end_test_futurehouse.py | 19 ++++++++++++------- tests/scripts/end_to_end_test_utilities.py | 19 ++++++++++++++++--- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/tests/scripts/end_to_end_test_futurehouse.py b/tests/scripts/end_to_end_test_futurehouse.py index 430982b77..324d4e442 100644 --- a/tests/scripts/end_to_end_test_futurehouse.py +++ b/tests/scripts/end_to_end_test_futurehouse.py @@ -1,13 +1,18 @@ +from pathlib import Path import os -import pathlib -from end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries +from tests.scripts.end_to_end_test_utilities import ( + CoverageExpectation, + TestConfig, + run_codeflash_command, + run_with_retries +) def run_test(expected_improvement_pct: int) -> bool: config = TestConfig( - file_path="src/aviary/common_tags.py", - expected_unit_tests=0, # todo: fix bug https://linear.app/codeflash-ai/issue/CF-921/test-discovery-does-not-work-properly-for-e2e-futurehouse-example for context + file_path=Path("src/aviary/common_tags.py"), + expected_unit_tests=2, min_improvement_x=0.05, coverage_expectations=[ CoverageExpectation( @@ -17,10 +22,10 @@ def run_test(expected_improvement_pct: int) -> bool: ) ], ) - cwd = ( - pathlib.Path(__file__).parent.parent.parent / "code_to_optimize" / "code_directories" / "futurehouse_structure" + future_house_working_dir = ( + Path(__file__).parent.parent.parent / "code_to_optimize" / "code_directories" / "futurehouse_structure" ).resolve() - return run_codeflash_command(cwd, config, expected_improvement_pct) + return run_codeflash_command(future_house_working_dir, config, expected_improvement_pct) if __name__ == "__main__": diff --git a/tests/scripts/end_to_end_test_utilities.py b/tests/scripts/end_to_end_test_utilities.py index 8649e1abb..92af60080 100644 --- a/tests/scripts/end_to_end_test_utilities.py +++ b/tests/scripts/end_to_end_test_utilities.py @@ -7,6 +7,8 @@ import time from dataclasses import dataclass, field from typing import Optional +import contextlib +import tomllib @dataclass @@ -82,7 +84,6 @@ def run_codeflash_command( logging.basicConfig(level=logging.INFO) if config.trace_mode: return run_trace_test(cwd, config, expected_improvement_pct) - path_to_file = cwd / config.file_path file_contents = path_to_file.read_text("utf-8") pytest_dir = cwd / "tests" / "pytest" @@ -102,7 +103,6 @@ def run_codeflash_command( return_code = process.wait() stdout = "".join(output) - validated = validate_output(stdout, return_code, expected_improvement_pct, config) if not validated: # Write original file contents back to file @@ -129,7 +129,20 @@ def build_command( if config.function_name: base_command.extend(["--function", config.function_name]) - base_command.extend(["--tests-root", str(test_root), "--module-root", str(cwd)]) + + # Check if pyproject.toml exists with codeflash config - if so, don't override it + pyproject_path = cwd / "pyproject.toml" + has_codeflash_config = False + if pyproject_path.exists(): + with contextlib.suppress(Exception): + with open(pyproject_path, "rb") as f: + pyproject_data = tomllib.load(f) + has_codeflash_config = "tool" in pyproject_data and "codeflash" in pyproject_data["tool"] + + # Only pass --tests-root and --module-root if they're not configured in pyproject.toml + if not has_codeflash_config: + base_command.extend(["--tests-root", str(test_root), "--module-root", str(cwd)]) + if benchmarks_root: base_command.extend(["--benchmark", "--benchmarks-root", str(benchmarks_root)]) if config.use_worktree: From 357d5024b30b82d578b1a64f026208d4934a70e1 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 12 Dec 2025 09:30:25 -0500 Subject: [PATCH 02/14] improve messaging --- codeflash/api/aiservice.py | 8 ++++++ codeflash/cli_cmds/cli.py | 24 ++++++++++++---- codeflash/code_utils/git_utils.py | 29 ++++++++++++++++++++ codeflash/discovery/functions_to_optimize.py | 4 +-- 4 files changed, 58 insertions(+), 7 deletions(-) diff --git a/codeflash/api/aiservice.py b/codeflash/api/aiservice.py index 8743ab2ac..2cab496a6 100644 --- a/codeflash/api/aiservice.py +++ b/codeflash/api/aiservice.py @@ -6,6 +6,7 @@ import time from typing import TYPE_CHECKING, Any, cast +import git import requests from pydantic.json import pydantic_encoder @@ -637,7 +638,14 @@ def get_aiservice_base_url(self) -> str: def safe_get_repo_owner_and_name() -> tuple[str | None, str | None]: try: git_repo_owner, git_repo_name = get_repo_owner_and_name() + except (ValueError, git.exc.InvalidGitRepositoryError): + # Expected errors when: + # - No remotes are configured (e.g., local-only repos) -> ValueError + # - Not in a git repository -> InvalidGitRepositoryError + # These are normal and shouldn't be logged as warnings + git_repo_owner, git_repo_name = None, None except Exception as e: + # Unexpected errors should still be logged logger.warning(f"Could not determine repo owner and name: {e}") git_repo_owner, git_repo_name = None, None return git_repo_owner, git_repo_name diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py index a6e28aaaa..82664f8d8 100644 --- a/codeflash/cli_cmds/cli.py +++ b/codeflash/cli_cmds/cli.py @@ -143,8 +143,15 @@ def process_and_validate_cmd_args(args: Namespace) -> Namespace: exit_with_message(f"File {args.file} does not exist", error_on_exit=True) args.file = Path(args.file).resolve() if not args.no_pr: - owner, repo = get_repo_owner_and_name() - require_github_app_or_exit(owner, repo) + try: + owner, repo = get_repo_owner_and_name() + require_github_app_or_exit(owner, repo) + except ValueError as e: + exit_with_message( + f"Cannot determine repository information: {e}\n" + f"Please ensure your git repository has a remote configured, or use '--no-pr' to optimize locally.", + error_on_exit=True + ) if args.replay_test: for test_path in args.replay_test: if not Path(test_path).is_file(): @@ -265,9 +272,16 @@ def handle_optimize_all_arg_parsing(args: Namespace) -> Namespace: apologize_and_exit() git_remote = getattr(args, "git_remote", None) if not check_and_push_branch(git_repo, git_remote=git_remote): - exit_with_message("Branch is not pushed...", error_on_exit=True) - owner, repo = get_repo_owner_and_name(git_repo) - require_github_app_or_exit(owner, repo) + exit_with_message("Cannot proceed without a valid git remote configuration. See error message above.", error_on_exit=True) + try: + owner, repo = get_repo_owner_and_name(git_repo) + require_github_app_or_exit(owner, repo) + except ValueError as e: + exit_with_message( + f"Cannot determine repository information: {e}\n" + f"Please ensure your git repository has a remote configured, or use '--no-pr' to optimize locally.", + error_on_exit=True + ) if not hasattr(args, "all"): args.all = None elif args.all == "": diff --git a/codeflash/code_utils/git_utils.py b/codeflash/code_utils/git_utils.py index 40a725692..bf87038cf 100644 --- a/codeflash/code_utils/git_utils.py +++ b/codeflash/code_utils/git_utils.py @@ -80,6 +80,11 @@ def get_current_branch(repo: Repo | None = None) -> str: def get_remote_url(repo: Repo | None = None, git_remote: str | None = "origin") -> str: repository: Repo = repo if repo else git.Repo(search_parent_directories=True) + available_remotes = get_git_remotes(repository) + if not available_remotes: + raise ValueError(f"No git remotes configured in this repository") + if git_remote not in available_remotes: + raise ValueError(f"Git remote '{git_remote}' does not exist. Available remotes: {', '.join(available_remotes)}") return repository.remote(name=git_remote).url @@ -128,6 +133,30 @@ def confirm_proceeding_with_no_git_repo() -> str | bool: def check_and_push_branch(repo: git.Repo, git_remote: str | None = "origin", *, wait_for_push: bool = False) -> bool: current_branch = repo.active_branch current_branch_name = current_branch.name + available_remotes = get_git_remotes(repo) + if not available_remotes: + logger.error( + f"❌ No git remotes configured in this repository.\n" + f"This appears to be a local-only git repository. To use codeflash with PR features, you need to:\n" + f" 1. Create a repository on GitHub (or another git hosting service)\n" + f" 2. Add it as a remote: git remote add origin \n" + f" 3. Push your branch: git push -u origin {current_branch_name}\n\n" + f"Alternatively, you can run codeflash with the '--no-pr' flag to optimize locally without creating PRs." + ) + return False + + # Check if the specified remote exists + if git_remote not in available_remotes: + logger.error( + f"❌ Git remote '{git_remote}' does not exist in this repository.\n" + f"Available remotes: {', '.join(available_remotes)}\n\n" + f"You can either:\n" + f" 1. Use one of the existing remotes by setting 'git-remote' in pyproject.toml\n" + f" 2. Add the '{git_remote}' remote: git remote add {git_remote} \n" + f" 3. Run codeflash with '--no-pr' to optimize locally without creating PRs." + ) + return False + remote = repo.remote(name=git_remote) # Check if the branch is pushed diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py index 5821d23b1..01b35c0e9 100644 --- a/codeflash/discovery/functions_to_optimize.py +++ b/codeflash/discovery/functions_to_optimize.py @@ -598,8 +598,8 @@ def was_function_previously_optimized( # already_optimized_count = 0 try: owner, repo = get_repo_owner_and_name() - except git.exc.InvalidGitRepositoryError: - logger.warning("No git repository found") + except (git.exc.InvalidGitRepositoryError, ValueError) as e: + logger.debug(f"Cannot get repository info: {e}") owner, repo = None, None pr_number = get_pr_number() From 1234502e0b20d56bb303cd41ff7203f3ffb251f0 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 12 Dec 2025 09:32:11 -0500 Subject: [PATCH 03/14] Update end_to_end_test_utilities.py --- tests/scripts/end_to_end_test_utilities.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/scripts/end_to_end_test_utilities.py b/tests/scripts/end_to_end_test_utilities.py index 92af60080..634dd9a17 100644 --- a/tests/scripts/end_to_end_test_utilities.py +++ b/tests/scripts/end_to_end_test_utilities.py @@ -8,7 +8,11 @@ from dataclasses import dataclass, field from typing import Optional import contextlib -import tomllib + +try: + import tomllib +except ImportError: + import tomli as tomllib @dataclass From 337b524f40ea233375b9f470a04f5b68949c2ddc Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 12 Dec 2025 09:35:01 -0500 Subject: [PATCH 04/14] formatter formatter --- codeflash/cli_cmds/cli.py | 9 ++++++--- codeflash/code_utils/git_utils.py | 9 +++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/codeflash/cli_cmds/cli.py b/codeflash/cli_cmds/cli.py index 82664f8d8..9ebbf9548 100644 --- a/codeflash/cli_cmds/cli.py +++ b/codeflash/cli_cmds/cli.py @@ -150,7 +150,7 @@ def process_and_validate_cmd_args(args: Namespace) -> Namespace: exit_with_message( f"Cannot determine repository information: {e}\n" f"Please ensure your git repository has a remote configured, or use '--no-pr' to optimize locally.", - error_on_exit=True + error_on_exit=True, ) if args.replay_test: for test_path in args.replay_test: @@ -272,7 +272,10 @@ def handle_optimize_all_arg_parsing(args: Namespace) -> Namespace: apologize_and_exit() git_remote = getattr(args, "git_remote", None) if not check_and_push_branch(git_repo, git_remote=git_remote): - exit_with_message("Cannot proceed without a valid git remote configuration. See error message above.", error_on_exit=True) + exit_with_message( + "Cannot proceed without a valid git remote configuration. See error message above.", + error_on_exit=True, + ) try: owner, repo = get_repo_owner_and_name(git_repo) require_github_app_or_exit(owner, repo) @@ -280,7 +283,7 @@ def handle_optimize_all_arg_parsing(args: Namespace) -> Namespace: exit_with_message( f"Cannot determine repository information: {e}\n" f"Please ensure your git repository has a remote configured, or use '--no-pr' to optimize locally.", - error_on_exit=True + error_on_exit=True, ) if not hasattr(args, "all"): args.all = None diff --git a/codeflash/code_utils/git_utils.py b/codeflash/code_utils/git_utils.py index bf87038cf..d3bb1b005 100644 --- a/codeflash/code_utils/git_utils.py +++ b/codeflash/code_utils/git_utils.py @@ -82,9 +82,10 @@ def get_remote_url(repo: Repo | None = None, git_remote: str | None = "origin") repository: Repo = repo if repo else git.Repo(search_parent_directories=True) available_remotes = get_git_remotes(repository) if not available_remotes: - raise ValueError(f"No git remotes configured in this repository") + raise ValueError("No git remotes configured in this repository") if git_remote not in available_remotes: - raise ValueError(f"Git remote '{git_remote}' does not exist. Available remotes: {', '.join(available_remotes)}") + msg = f"Git remote '{git_remote}' does not exist. Available remotes: {', '.join(available_remotes)}" + raise ValueError(msg) return repository.remote(name=git_remote).url @@ -144,7 +145,7 @@ def check_and_push_branch(repo: git.Repo, git_remote: str | None = "origin", *, f"Alternatively, you can run codeflash with the '--no-pr' flag to optimize locally without creating PRs." ) return False - + # Check if the specified remote exists if git_remote not in available_remotes: logger.error( @@ -156,7 +157,7 @@ def check_and_push_branch(repo: git.Repo, git_remote: str | None = "origin", *, f" 3. Run codeflash with '--no-pr' to optimize locally without creating PRs." ) return False - + remote = repo.remote(name=git_remote) # Check if the branch is pushed From 921ee6aeed20f4b7063e00506291d733978c5d28 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 12 Dec 2025 09:59:34 -0500 Subject: [PATCH 05/14] update the tests --- tests/test_git_utils.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tests/test_git_utils.py b/tests/test_git_utils.py index dd40521cc..d5a5e02c6 100644 --- a/tests/test_git_utils.py +++ b/tests/test_git_utils.py @@ -70,18 +70,21 @@ def test_check_and_push_branch(self, mock_confirm, mock_isatty, mock_repo): mock_repo_instance.active_branch.name = "test-branch" mock_repo_instance.refs = [] - mock_origin = mock_repo_instance.remote.return_value - mock_origin.push.return_value = None + # Mock remotes to return a list with 'origin' + mock_remote_obj = mock_repo_instance.remote.return_value + mock_remote_obj.name = "origin" + mock_remote_obj.push.return_value = None + mock_repo_instance.remotes = [mock_remote_obj] assert check_and_push_branch(mock_repo_instance) - mock_origin.push.assert_called_once_with(mock_repo_instance.active_branch) - mock_origin.push.reset_mock() + mock_remote_obj.push.assert_called_once_with(mock_repo_instance.active_branch) + mock_remote_obj.push.reset_mock() # Test when branch is already pushed mock_repo_instance.refs = [f"origin/{mock_repo_instance.active_branch.name}"] assert check_and_push_branch(mock_repo_instance) - mock_origin.push.assert_not_called() - mock_origin.push.reset_mock() + mock_remote_obj.push.assert_not_called() + mock_remote_obj.push.reset_mock() @patch("codeflash.code_utils.git_utils.git.Repo") @patch("codeflash.code_utils.git_utils.sys.__stdin__.isatty", return_value=False) @@ -90,12 +93,15 @@ def test_check_and_push_branch_non_tty(self, mock_isatty, mock_repo): mock_repo_instance.active_branch.name = "test-branch" mock_repo_instance.refs = [] - mock_origin = mock_repo_instance.remote.return_value - mock_origin.push.return_value = None + # Mock remotes to return a list with 'origin' + mock_remote_obj = mock_repo_instance.remote.return_value + mock_remote_obj.name = "origin" + mock_remote_obj.push.return_value = None + mock_repo_instance.remotes = [mock_remote_obj] assert not check_and_push_branch(mock_repo_instance) - mock_origin.push.assert_not_called() - mock_origin.push.reset_mock() + mock_remote_obj.push.assert_not_called() + mock_remote_obj.push.reset_mock() if __name__ == "__main__": From a4ba148f083b2305a92104facb4dddbe11b9f4af Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 12 Dec 2025 10:22:46 -0500 Subject: [PATCH 06/14] update the regex --- tests/scripts/end_to_end_test_utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/end_to_end_test_utilities.py b/tests/scripts/end_to_end_test_utilities.py index 634dd9a17..59b56bb39 100644 --- a/tests/scripts/end_to_end_test_utilities.py +++ b/tests/scripts/end_to_end_test_utilities.py @@ -181,7 +181,7 @@ def validate_output(stdout: str, return_code: int, expected_improvement_pct: int return False if config.expected_unit_tests is not None: - unit_test_match = re.search(r"Discovered (\d+) existing unit test file", stdout) + unit_test_match = re.search(r"Discovered (\d+) existing unit test files?", stdout) if not unit_test_match: logging.error("Could not find unit test count") return False From 61a3e27716342a2dd0cd3edbb5f0cff4d3d92646 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 12 Dec 2025 10:30:00 -0500 Subject: [PATCH 07/14] regex fix --- tests/scripts/end_to_end_test_utilities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/end_to_end_test_utilities.py b/tests/scripts/end_to_end_test_utilities.py index 59b56bb39..5b784d9d4 100644 --- a/tests/scripts/end_to_end_test_utilities.py +++ b/tests/scripts/end_to_end_test_utilities.py @@ -181,7 +181,7 @@ def validate_output(stdout: str, return_code: int, expected_improvement_pct: int return False if config.expected_unit_tests is not None: - unit_test_match = re.search(r"Discovered (\d+) existing unit test files?", stdout) + unit_test_match = re.search(r"Discovered (\d+) existing unit tests? and", stdout) if not unit_test_match: logging.error("Could not find unit test count") return False From 69e5edf8b91eb987d9ef92ec481fd6b7be9d5b52 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 12 Dec 2025 11:06:59 -0500 Subject: [PATCH 08/14] Update end_to_end_test_topological_sort_worktree.py --- tests/scripts/end_to_end_test_topological_sort_worktree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/end_to_end_test_topological_sort_worktree.py b/tests/scripts/end_to_end_test_topological_sort_worktree.py index 6a6b30122..9bf160845 100644 --- a/tests/scripts/end_to_end_test_topological_sort_worktree.py +++ b/tests/scripts/end_to_end_test_topological_sort_worktree.py @@ -17,7 +17,7 @@ def run_test(expected_improvement_pct: int) -> bool: expected_lines=[25, 26, 27, 28, 29, 30, 31], ) ], - expected_unit_tests=1, + expected_unit_tests=13, ) cwd = (pathlib.Path(__file__).parent.parent.parent / "code_to_optimize").resolve() return_var = run_codeflash_command(cwd, config, expected_improvement_pct) From 5d542d27c335e525f22597eeb81634217ca72ecd Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 12 Dec 2025 11:07:30 -0500 Subject: [PATCH 09/14] make easier to navigate --- tests/scripts/end_to_end_test_async.py | 8 ++++---- tests/scripts/end_to_end_test_benchmark_sort.py | 8 ++++---- tests/scripts/end_to_end_test_bubblesort_pytest.py | 9 +++++---- tests/scripts/end_to_end_test_bubblesort_unittest.py | 8 ++++---- tests/scripts/end_to_end_test_coverage.py | 9 +++++---- tests/scripts/end_to_end_test_init_optimization.py | 8 ++++---- .../end_to_end_test_topological_sort_worktree.py | 10 +++++----- tests/scripts/end_to_end_test_tracer_replay.py | 6 +++--- 8 files changed, 34 insertions(+), 32 deletions(-) diff --git a/tests/scripts/end_to_end_test_async.py b/tests/scripts/end_to_end_test_async.py index 8cdf050ed..5e9c3d33c 100644 --- a/tests/scripts/end_to_end_test_async.py +++ b/tests/scripts/end_to_end_test_async.py @@ -1,12 +1,12 @@ import os -import pathlib +from pathlib import Path -from end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries +from tests.scripts.end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries def run_test(expected_improvement_pct: int) -> bool: config = TestConfig( - file_path="main.py", + file_path=Path("main.py"), min_improvement_x=0.1, coverage_expectations=[ CoverageExpectation( @@ -17,7 +17,7 @@ def run_test(expected_improvement_pct: int) -> bool: ], ) cwd = ( - pathlib.Path(__file__).parent.parent.parent / "code_to_optimize" / "code_directories" / "async_e2e" + Path(__file__).parent.parent.parent / "code_to_optimize" / "code_directories" / "async_e2e" ).resolve() return run_codeflash_command(cwd, config, expected_improvement_pct) diff --git a/tests/scripts/end_to_end_test_benchmark_sort.py b/tests/scripts/end_to_end_test_benchmark_sort.py index 2f05d85a2..ac5cd9cc3 100644 --- a/tests/scripts/end_to_end_test_benchmark_sort.py +++ b/tests/scripts/end_to_end_test_benchmark_sort.py @@ -1,13 +1,13 @@ import os -import pathlib +from pathlib import Path -from end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries +from tests.scripts.end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries def run_test(expected_improvement_pct: int) -> bool: - cwd = (pathlib.Path(__file__).parent.parent.parent / "code_to_optimize").resolve() + cwd = (Path(__file__).parent.parent.parent / "code_to_optimize").resolve() config = TestConfig( - file_path=pathlib.Path("bubble_sort.py"), + file_path=Path("bubble_sort.py"), function_name="sorter", benchmarks_root=cwd / "tests" / "pytest" / "benchmarks", min_improvement_x=0.70, diff --git a/tests/scripts/end_to_end_test_bubblesort_pytest.py b/tests/scripts/end_to_end_test_bubblesort_pytest.py index f57e8fac3..24ab7e5e6 100644 --- a/tests/scripts/end_to_end_test_bubblesort_pytest.py +++ b/tests/scripts/end_to_end_test_bubblesort_pytest.py @@ -1,12 +1,13 @@ import os -import pathlib +from pathlib import Path -from end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries + +from tests.scripts.end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries def run_test(expected_improvement_pct: int) -> bool: config = TestConfig( - file_path="bubble_sort.py", + file_path=Path("bubble_sort.py"), function_name="sorter", min_improvement_x=0.70, coverage_expectations=[ @@ -15,7 +16,7 @@ def run_test(expected_improvement_pct: int) -> bool: ) ], ) - cwd = (pathlib.Path(__file__).parent.parent.parent / "code_to_optimize").resolve() + cwd = (Path(__file__).parent.parent.parent / "code_to_optimize").resolve() return run_codeflash_command( cwd, config, expected_improvement_pct, ['print("codeflash stdout: Sorting list")', 'print(f"result: {arr}")'] ) diff --git a/tests/scripts/end_to_end_test_bubblesort_unittest.py b/tests/scripts/end_to_end_test_bubblesort_unittest.py index c7f0107b1..488260541 100644 --- a/tests/scripts/end_to_end_test_bubblesort_unittest.py +++ b/tests/scripts/end_to_end_test_bubblesort_unittest.py @@ -1,14 +1,14 @@ import os -import pathlib +from pathlib import Path -from end_to_end_test_utilities import TestConfig, run_codeflash_command, run_with_retries +from tests.scripts.end_to_end_test_utilities import TestConfig, run_codeflash_command, run_with_retries def run_test(expected_improvement_pct: int) -> bool: config = TestConfig( - file_path="bubble_sort.py", function_name="sorter", min_improvement_x=0.30 + file_path=Path("bubble_sort.py"), function_name="sorter", min_improvement_x=0.30 ) - cwd = (pathlib.Path(__file__).parent.parent.parent / "code_to_optimize").resolve() + cwd = (Path(__file__).parent.parent.parent / "code_to_optimize").resolve() return run_codeflash_command(cwd, config, expected_improvement_pct) diff --git a/tests/scripts/end_to_end_test_coverage.py b/tests/scripts/end_to_end_test_coverage.py index 7da4cd520..e1f422f9d 100644 --- a/tests/scripts/end_to_end_test_coverage.py +++ b/tests/scripts/end_to_end_test_coverage.py @@ -1,19 +1,20 @@ import os -import pathlib +from pathlib import Path -from end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries + +from tests.scripts.end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries def run_test(expected_improvement_pct: int) -> bool: config = TestConfig( - file_path="bubble_sort.py", + file_path=Path("bubble_sort.py"), function_name="sorter_one_level_depth", coverage_expectations=[ CoverageExpectation(function_name="sorter_one_level_depth", expected_coverage=100.0, expected_lines=[2]) ], ) cwd = ( - pathlib.Path(__file__).parent.parent.parent / "code_to_optimize" / "code_directories" / "my-best-repo" + Path(__file__).parent.parent.parent / "code_to_optimize" / "code_directories" / "my-best-repo" ).resolve() return run_codeflash_command(cwd, config, expected_improvement_pct) diff --git a/tests/scripts/end_to_end_test_init_optimization.py b/tests/scripts/end_to_end_test_init_optimization.py index 90c051a4c..2ca881542 100644 --- a/tests/scripts/end_to_end_test_init_optimization.py +++ b/tests/scripts/end_to_end_test_init_optimization.py @@ -1,12 +1,12 @@ import os -import pathlib +from pathlib import Path -from end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries +from tests.scripts.end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries def run_test(expected_improvement_pct: int) -> bool: config = TestConfig( - file_path="remove_control_chars.py", + file_path=Path("remove_control_chars.py"), function_name="CharacterRemover.remove_control_characters", min_improvement_x=0.1, coverage_expectations=[ @@ -15,7 +15,7 @@ def run_test(expected_improvement_pct: int) -> bool: ) ], ) - cwd = (pathlib.Path(__file__).parent.parent.parent / "code_to_optimize").resolve() + cwd = (Path(__file__).parent.parent.parent / "code_to_optimize").resolve() return run_codeflash_command(cwd, config, expected_improvement_pct) diff --git a/tests/scripts/end_to_end_test_topological_sort_worktree.py b/tests/scripts/end_to_end_test_topological_sort_worktree.py index 9bf160845..1ce4920f1 100644 --- a/tests/scripts/end_to_end_test_topological_sort_worktree.py +++ b/tests/scripts/end_to_end_test_topological_sort_worktree.py @@ -1,12 +1,12 @@ import os -import pathlib +from pathlib import Path -from end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries +from tests.scripts.end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries def run_test(expected_improvement_pct: int) -> bool: config = TestConfig( - file_path="topological_sort.py", + file_path=Path("topological_sort.py"), function_name="Graph.topologicalSort", min_improvement_x=0.05, use_worktree=True, @@ -17,9 +17,9 @@ def run_test(expected_improvement_pct: int) -> bool: expected_lines=[25, 26, 27, 28, 29, 30, 31], ) ], - expected_unit_tests=13, + expected_unit_tests=1, ) - cwd = (pathlib.Path(__file__).parent.parent.parent / "code_to_optimize").resolve() + cwd = (Path(__file__).parent.parent.parent / "code_to_optimize").resolve() return_var = run_codeflash_command(cwd, config, expected_improvement_pct) return return_var diff --git a/tests/scripts/end_to_end_test_tracer_replay.py b/tests/scripts/end_to_end_test_tracer_replay.py index 72d6fe97f..111ba700d 100644 --- a/tests/scripts/end_to_end_test_tracer_replay.py +++ b/tests/scripts/end_to_end_test_tracer_replay.py @@ -1,7 +1,7 @@ import os -import pathlib +from pathlib import Path -from end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries +from tests.scripts.end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries def run_test(expected_improvement_pct: int) -> bool: @@ -14,7 +14,7 @@ def run_test(expected_improvement_pct: int) -> bool: ], ) cwd = ( - pathlib.Path(__file__).parent.parent.parent / "code_to_optimize" / "code_directories" / "simple_tracer_e2e" + Path(__file__).parent.parent.parent / "code_to_optimize" / "code_directories" / "simple_tracer_e2e" ).resolve() return run_codeflash_command(cwd, config, expected_improvement_pct) From 02829f43dcf9adac5dfd751d6ff786472bf748f2 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 12 Dec 2025 12:09:03 -0500 Subject: [PATCH 10/14] more regex? --- tests/scripts/end_to_end_test_utilities.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/scripts/end_to_end_test_utilities.py b/tests/scripts/end_to_end_test_utilities.py index 5b784d9d4..125fa9524 100644 --- a/tests/scripts/end_to_end_test_utilities.py +++ b/tests/scripts/end_to_end_test_utilities.py @@ -181,7 +181,9 @@ def validate_output(stdout: str, return_code: int, expected_improvement_pct: int return False if config.expected_unit_tests is not None: - unit_test_match = re.search(r"Discovered (\d+) existing unit tests? and", stdout) + # Match the global test discovery message from optimizer.py + # Format: "Discovered X existing unit tests and Y replay tests in Z.Zs at /path/to/tests" + unit_test_match = re.search(r"Discovered (\d+) existing unit tests? and \d+ replay tests? in", stdout) if not unit_test_match: logging.error("Could not find unit test count") return False From bd43f567922f5ba712faa191ed93dd36da430019 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 12 Dec 2025 12:47:59 -0500 Subject: [PATCH 11/14] fixup --- tests/scripts/end_to_end_test_utilities.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/scripts/end_to_end_test_utilities.py b/tests/scripts/end_to_end_test_utilities.py index 125fa9524..d29bf92c9 100644 --- a/tests/scripts/end_to_end_test_utilities.py +++ b/tests/scripts/end_to_end_test_utilities.py @@ -181,9 +181,9 @@ def validate_output(stdout: str, return_code: int, expected_improvement_pct: int return False if config.expected_unit_tests is not None: - # Match the global test discovery message from optimizer.py - # Format: "Discovered X existing unit tests and Y replay tests in Z.Zs at /path/to/tests" - unit_test_match = re.search(r"Discovered (\d+) existing unit tests? and \d+ replay tests? in", stdout) + # Match the per-function test file discovery message from function_optimizer.py + # Format: "Discovered X existing unit test file(s), Y replay test file(s), and Z concolic coverage test file(s) for function.name" + unit_test_match = re.search(r"Discovered (\d+) existing unit test files?, \d+ replay test files?, and", stdout) if not unit_test_match: logging.error("Could not find unit test count") return False From c8d9b18f77eae3bd57df649d1c534398696f3cf5 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 12 Dec 2025 13:37:25 -0500 Subject: [PATCH 12/14] Fix test validation to match global test discovery counting invocations The 'expected_unit_tests' config actually validates the number of test invocations (function calls in tests), not the number of test files. Updated regex to match the global discovery message format: 'Discovered X existing unit tests and Y replay tests in Z.Zs at /path' This counts how many times functions are called/invoked in test files, which is what num_discovered_tests represents in the codebase. --- tests/scripts/end_to_end_test_utilities.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/scripts/end_to_end_test_utilities.py b/tests/scripts/end_to_end_test_utilities.py index d29bf92c9..fa0067387 100644 --- a/tests/scripts/end_to_end_test_utilities.py +++ b/tests/scripts/end_to_end_test_utilities.py @@ -181,9 +181,9 @@ def validate_output(stdout: str, return_code: int, expected_improvement_pct: int return False if config.expected_unit_tests is not None: - # Match the per-function test file discovery message from function_optimizer.py - # Format: "Discovered X existing unit test file(s), Y replay test file(s), and Z concolic coverage test file(s) for function.name" - unit_test_match = re.search(r"Discovered (\d+) existing unit test files?, \d+ replay test files?, and", stdout) + # Match the global test discovery message from optimizer.py which counts test invocations + # Format: "Discovered X existing unit tests and Y replay tests in Z.Zs at /path/to/tests" + unit_test_match = re.search(r"Discovered (\d+) existing unit tests? and \d+ replay tests? in [\d.]+s at", stdout) if not unit_test_match: logging.error("Could not find unit test count") return False From b56050e7256aa07c33984d8f6a0c0cc4ecccdd62 Mon Sep 17 00:00:00 2001 From: Kevin Turcios <106575910+KRRT7@users.noreply.github.com> Date: Thu, 18 Dec 2025 21:42:25 -0500 Subject: [PATCH 13/14] Update codeflash/discovery/functions_to_optimize.py Co-authored-by: Saurabh Misra --- codeflash/discovery/functions_to_optimize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py index 01b35c0e9..2acd209ec 100644 --- a/codeflash/discovery/functions_to_optimize.py +++ b/codeflash/discovery/functions_to_optimize.py @@ -599,7 +599,7 @@ def was_function_previously_optimized( try: owner, repo = get_repo_owner_and_name() except (git.exc.InvalidGitRepositoryError, ValueError) as e: - logger.debug(f"Cannot get repository info: {e}") + logger.debug(f"Cannot get git repository info: {e}") owner, repo = None, None pr_number = get_pr_number() From 15986e6c776706d09c3f324e663750afaf8d6c20 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 19 Dec 2025 12:09:44 -0500 Subject: [PATCH 14/14] fix expectation --- tests/scripts/end_to_end_test_topological_sort_worktree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/end_to_end_test_topological_sort_worktree.py b/tests/scripts/end_to_end_test_topological_sort_worktree.py index 1ce4920f1..9a7c03424 100644 --- a/tests/scripts/end_to_end_test_topological_sort_worktree.py +++ b/tests/scripts/end_to_end_test_topological_sort_worktree.py @@ -17,7 +17,7 @@ def run_test(expected_improvement_pct: int) -> bool: expected_lines=[25, 26, 27, 28, 29, 30, 31], ) ], - expected_unit_tests=1, + expected_unit_tests=3, ) cwd = (Path(__file__).parent.parent.parent / "code_to_optimize").resolve() return_var = run_codeflash_command(cwd, config, expected_improvement_pct)