diff --git a/codeflash/benchmarking/trace_benchmarks.py b/codeflash/benchmarking/trace_benchmarks.py index e59b06656..276a61948 100644 --- a/codeflash/benchmarking/trace_benchmarks.py +++ b/codeflash/benchmarking/trace_benchmarks.py @@ -1,8 +1,10 @@ from __future__ import annotations +import contextlib import os import re import subprocess +import sys from pathlib import Path from codeflash.cli_cmds.console import logger @@ -17,21 +19,42 @@ def trace_benchmarks_pytest( benchmark_env["PYTHONPATH"] = str(project_root) else: benchmark_env["PYTHONPATH"] += os.pathsep + str(project_root) - result = subprocess.run( - [ - SAFE_SYS_EXECUTABLE, - Path(__file__).parent / "pytest_new_process_trace_benchmarks.py", - benchmarks_root, - tests_root, - trace_file, - ], - cwd=project_root, - check=False, - capture_output=True, - text=True, - env=benchmark_env, - timeout=timeout, - ) + + is_windows = sys.platform == "win32" + cmd_list = [ + SAFE_SYS_EXECUTABLE, + Path(__file__).parent / "pytest_new_process_trace_benchmarks.py", + benchmarks_root, + tests_root, + trace_file, + ] + + if is_windows: + # Use Windows-safe subprocess handling to avoid file locking issues + creationflags = subprocess.CREATE_NEW_PROCESS_GROUP + process = subprocess.Popen( + cmd_list, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.DEVNULL, + cwd=project_root, + env=benchmark_env, + text=True, + creationflags=creationflags, + ) + try: + stdout_content, stderr_content = process.communicate(timeout=timeout) + returncode = process.returncode + except subprocess.TimeoutExpired: + with contextlib.suppress(OSError): + process.kill() + stdout_content, stderr_content = process.communicate(timeout=5) + raise subprocess.TimeoutExpired(cmd_list, timeout, output=stdout_content, stderr=stderr_content) from None + result = subprocess.CompletedProcess(cmd_list, returncode, stdout_content, stderr_content) + else: + result = subprocess.run( + cmd_list, cwd=project_root, check=False, capture_output=True, text=True, env=benchmark_env, timeout=timeout + ) if result.returncode != 0: if "ERROR collecting" in result.stdout: # Pattern matches "===== ERRORS =====" (any number of =) and captures everything after diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py index ecf7d3e6d..9c7faeb1f 100644 --- a/codeflash/cli_cmds/console.py +++ b/codeflash/cli_cmds/console.py @@ -141,6 +141,15 @@ def __init__(self) -> None: @contextmanager def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Progress, TaskID], None, None]: """Progress bar for test files.""" + if is_LSP_enabled(): + + class DummyProgress: + def advance(self, *args: object, **kwargs: object) -> None: + pass + + yield DummyProgress(), 0 # type: ignore[return-value] + return + with Progress( SpinnerColumn(next(spinners)), TextColumn("[progress.description]{task.description}"), diff --git a/codeflash/code_utils/coverage_utils.py b/codeflash/code_utils/coverage_utils.py index ed3d277a4..a4cd1cd3a 100644 --- a/codeflash/code_utils/coverage_utils.py +++ b/codeflash/code_utils/coverage_utils.py @@ -68,7 +68,10 @@ def generate_candidates(source_code_path: Path) -> set[str]: def prepare_coverage_files() -> tuple[Path, Path]: - """Prepare coverage configuration and output files.""" + """Prepare coverage configuration and output files. + + Returns tuple of (coverage_database_file, coverage_config_file). + """ coverage_database_file = get_run_tmp_file(Path(".coverage")) coveragercfile = get_run_tmp_file(Path(".coveragerc")) coveragerc_content = f"[run]\n branch = True\ndata_file={coverage_database_file}\n" diff --git a/codeflash/code_utils/git_worktree_utils.py b/codeflash/code_utils/git_worktree_utils.py index 9a071b741..613379693 100644 --- a/codeflash/code_utils/git_worktree_utils.py +++ b/codeflash/code_utils/git_worktree_utils.py @@ -1,15 +1,20 @@ from __future__ import annotations import configparser +import contextlib +import shutil import subprocess +import sys import tempfile import time from pathlib import Path -from typing import Optional +from typing import TYPE_CHECKING, Any, Optional + +if TYPE_CHECKING: + from collections.abc import Callable import git -from codeflash.cli_cmds.console import logger from codeflash.code_utils.compat import codeflash_cache_dir from codeflash.code_utils.git_utils import check_running_in_git_repo, git_root_dir @@ -53,7 +58,6 @@ def create_worktree_snapshot_commit(worktree_dir: Path, commit_message: str) -> def create_detached_worktree(module_root: Path) -> Optional[Path]: if not check_running_in_git_repo(module_root): - logger.warning("Module is not in a git repository. Skipping worktree creation.") return None git_root = git_root_dir() current_time_str = time.strftime("%Y%m%d-%H%M%S") @@ -71,7 +75,6 @@ def create_detached_worktree(module_root: Path) -> Optional[Path]: ) if not uni_diff_text.strip(): - logger.info("!lsp|No uncommitted changes to copy to worktree.") return worktree_dir # Write the diff to a temporary file @@ -89,18 +92,190 @@ def create_detached_worktree(module_root: Path) -> Optional[Path]: check=True, ) create_worktree_snapshot_commit(worktree_dir, "Initial Snapshot") - except subprocess.CalledProcessError as e: - logger.error(f"Failed to apply patch to worktree: {e}") + except subprocess.CalledProcessError: + pass return worktree_dir def remove_worktree(worktree_dir: Path) -> None: + """Remove a git worktree with robust error handling for Windows file locking issues. + + This function handles Windows-specific issues where files may be locked by processes, + causing 'Permission denied' errors. It implements retry logic with exponential backoff + and falls back to manual directory removal if git worktree remove fails. + + Args: + worktree_dir: Path to the worktree directory to remove + + """ + if not worktree_dir or not worktree_dir.exists(): + return + + is_windows = sys.platform == "win32" + max_retries = 3 if is_windows else 1 + retry_delay = 0.5 # Start with 500ms delay + + # Try to get the repository and git root for worktree removal try: repository = git.Repo(worktree_dir, search_parent_directories=True) - repository.git.worktree("remove", "--force", worktree_dir) except Exception: - logger.exception(f"Failed to remove worktree: {worktree_dir}") + # If we can't access the repository, try manual cleanup + _manual_cleanup_worktree_directory(worktree_dir) + return + + # Attempt to remove worktree using git command with retries + for attempt in range(max_retries): + try: + repository.git.worktree("remove", "--force", str(worktree_dir)) + return # noqa: TRY300 + except git.exc.GitCommandError as e: + error_msg = str(e).lower() + is_permission_error = "permission denied" in error_msg or "access is denied" in error_msg + + if is_permission_error and attempt < max_retries - 1: + # On Windows, file locks may be temporary - retry with exponential backoff + time.sleep(retry_delay) + retry_delay *= 2 # Exponential backoff + else: + # Last attempt failed or non-permission error + break + except Exception: + break + + # Fallback: Try to remove worktree entry from git, then manually delete directory + with contextlib.suppress(Exception): + # Try to prune the worktree entry from git (this doesn't delete the directory) + # Use git worktree prune to remove stale entries + repository.git.worktree("prune") + + # Manually remove the directory (always attempt, even if prune failed) + with contextlib.suppress(Exception): + _manual_cleanup_worktree_directory(worktree_dir) + + +def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None: + """Manually remove a worktree directory, handling Windows file locking issues. + + This is a fallback method when git worktree remove fails. It uses shutil.rmtree + with custom error handling for Windows-specific issues. + + SAFETY: This function includes multiple safeguards to prevent accidental deletion: + - Only deletes directories under the worktree_dirs cache location + - Verifies the path is a worktree directory (not the original repo) + - Uses resolve() to normalize paths and prevent path traversal attacks + + Args: + worktree_dir: Path to the worktree directory to remove + + """ + if not worktree_dir or not worktree_dir.exists(): + return + + # Validate paths for safety + if not _validate_worktree_path_safety(worktree_dir): + return + + # Attempt removal with retries on Windows + is_windows = sys.platform == "win32" + max_retries = 3 if is_windows else 1 + retry_delay = 0.5 + + for attempt in range(max_retries): + attempt_num = attempt + 1 + + if attempt_num > 1: + time.sleep(retry_delay) + retry_delay *= 2 # Exponential backoff + + # On Windows, use custom error handler to remove read-only attributes on the fly + # This is more efficient than pre-scanning the entire directory tree + error_handler = _create_windows_rmtree_error_handler() if is_windows else None + + try: + if is_windows and error_handler: + shutil.rmtree(worktree_dir, onerror=error_handler) + else: + shutil.rmtree(worktree_dir, ignore_errors=True) + + # Brief wait on Windows to allow file handles to be released + if is_windows: + wait_time = 0.3 if attempt_num < max_retries else 0.1 + time.sleep(wait_time) + + # Check if removal was successful + if not worktree_dir.exists(): + return + + except Exception: # noqa: S110 + pass + + +def _validate_worktree_path_safety(worktree_dir: Path) -> bool: + """Validate that a path is safe to delete (must be under worktree_dirs). + + Args: + worktree_dir: Path to validate + + Returns: + True if the path is safe to delete, False otherwise + + """ + # SAFETY CHECK 1: Resolve paths to absolute, normalized paths + try: + worktree_dir_resolved = worktree_dir.resolve() + worktree_dirs_resolved = worktree_dirs.resolve() + except (OSError, ValueError): + return False + + # SAFETY CHECK 2: Ensure worktree_dir is a subdirectory of worktree_dirs + try: + # Use relative_to to check if path is under worktree_dirs + worktree_dir_resolved.relative_to(worktree_dirs_resolved) + except ValueError: + return False + + # SAFETY CHECK 3: Ensure it's not the worktree_dirs root itself + return worktree_dir_resolved != worktree_dirs_resolved + + +def _create_windows_rmtree_error_handler() -> Callable[ + [Callable[[str], None], str, tuple[type[BaseException], BaseException, Any]], None +]: + """Create an error handler for shutil.rmtree that handles Windows-specific issues. + + This handler attempts to remove read-only attributes when encountering permission errors. + + Returns: + A callable error handler for shutil.rmtree's onerror parameter + + """ + + def handle_remove_error( + func: Callable[[str], None], path: str, exc_info: tuple[type[BaseException], BaseException, Any] + ) -> None: + """Error handler for shutil.rmtree on Windows. + + Attempts to remove read-only attributes and retry the operation. + """ + # Get the exception type + _exc_type, exc_value, _exc_traceback = exc_info + + # Only handle permission errors + if not isinstance(exc_value, (PermissionError, OSError)): + return + + try: + # Try to change file permissions to make it writable + # Using permissive mask (0o777) is intentional for Windows file cleanup + Path(path).chmod(0o777) + # Retry the failed operation + func(path) + except Exception: # noqa: S110 + # If it still fails, silently ignore (file is truly locked) + pass + + return handle_remove_error def create_diff_patch_from_worktree( @@ -110,7 +285,6 @@ def create_diff_patch_from_worktree( uni_diff_text = repository.git.diff(None, "HEAD", *files, ignore_blank_lines=True, ignore_space_at_eol=True) if not uni_diff_text: - logger.warning("No changes found in worktree.") return None if not uni_diff_text.endswith("\n"): diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py index bc0e2fd67..e6aedcf06 100644 --- a/codeflash/discovery/discover_unit_tests.py +++ b/codeflash/discovery/discover_unit_tests.py @@ -331,7 +331,7 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None: # Be conservative except when an alias is used (which requires exact method matching) for target_func in fnames: if "." in target_func: - class_name, method_name = target_func.split(".", 1) + class_name, _method_name = target_func.split(".", 1) if aname == class_name and not alias.asname: self.found_any_target_function = True self.found_qualified_name = target_func @@ -584,26 +584,49 @@ def discover_tests_pytest( project_root = cfg.project_root_path tmp_pickle_path = get_run_tmp_file("collected_tests.pkl") + discovery_script = Path(__file__).parent / "pytest_new_process_discovery.py" + + run_kwargs = { + "cwd": project_root, + "capture_output": True, + "text": True, + "stdin": subprocess.DEVNULL, + "timeout": 600, + } + if os.name == "nt": + # Prevent console window spawning on Windows which can cause hangs in LSP + run_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW + with custom_addopts(): - result = subprocess.run( - [ - SAFE_SYS_EXECUTABLE, - Path(__file__).parent / "pytest_new_process_discovery.py", - str(project_root), - str(tests_root), - str(tmp_pickle_path), - ], - cwd=project_root, - check=False, - capture_output=True, - text=True, - ) + try: + result = subprocess.run( + [SAFE_SYS_EXECUTABLE, discovery_script, str(project_root), str(tests_root), str(tmp_pickle_path)], + check=False, + **run_kwargs, + ) + except subprocess.TimeoutExpired: + result = subprocess.CompletedProcess(args=[], returncode=-1, stdout="", stderr="Timeout") + except Exception as e: + result = subprocess.CompletedProcess(args=[], returncode=-1, stdout="", stderr=str(e)) + try: - with tmp_pickle_path.open(mode="rb") as f: - exitcode, tests, pytest_rootdir = pickle.load(f) + # Check if pickle file exists before trying to read it + if not tmp_pickle_path.exists(): + tests, pytest_rootdir = [], None + logger.warning( + f"Test discovery pickle file not found. " + f"Subprocess return code: {result.returncode}, stdout: {result.stdout}, stderr: {result.stderr}" + ) + exitcode = result.returncode if result.returncode != 0 else -1 + else: + with tmp_pickle_path.open(mode="rb") as f: + exitcode, tests, pytest_rootdir = pickle.load(f) except Exception as e: tests, pytest_rootdir = [], None - logger.exception(f"Failed to discover tests: {e}") + logger.exception( + f"Failed to discover tests: {e}. " + f"Subprocess return code: {result.returncode}, stdout: {result.stdout}, stderr: {result.stderr}" + ) exitcode = -1 finally: tmp_pickle_path.unlink(missing_ok=True) @@ -750,7 +773,6 @@ def process_test_files( jedi_project = jedi.Project(path=project_root_path, sys_path=jedi_sys_path) tests_cache = TestsCache(project_root_path) - logger.info("!lsp|Discovering tests and processing unit tests") with test_files_progress_bar(total=len(file_to_test_map), description="Processing test files") as ( progress, task_id, diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py index 3958f40cf..c0a4d6687 100644 --- a/codeflash/discovery/functions_to_optimize.py +++ b/codeflash/discovery/functions_to_optimize.py @@ -1,7 +1,6 @@ from __future__ import annotations import ast -import os import random import warnings from _ast import AsyncFunctionDef, ClassDef, FunctionDef @@ -665,34 +664,93 @@ def filter_functions( submodule_ignored_paths_count: int = 0 blocklist_funcs_removed_count: int = 0 previous_checkpoint_functions_removed_count: int = 0 - tests_root_str = str(tests_root) - module_root_str = str(module_root) + + def _resolve_path(path: Path | str) -> Path: + # Use strict=False so we don't fail on paths that don't exist yet (e.g. worktree paths) + return Path(path).resolve(strict=False) + + def _resolve_path_consistent(path: Path | str) -> Path: + """Resolve path consistently: use strict resolution if path exists, otherwise non-strict.""" + path_obj = Path(path) + if path_obj.exists(): + try: + return path_obj.resolve() + except (OSError, RuntimeError): + return _resolve_path(path) + return _resolve_path(path) + + # Resolve all root paths to absolute paths for consistent comparison + # Use consistent resolution: strict for existing paths, non-strict for non-existent + tests_root_resolved = _resolve_path_consistent(tests_root) + module_root_resolved = _resolve_path_consistent(module_root) + + # Resolve ignore paths and submodule paths + ignore_paths_resolved = [_resolve_path_consistent(p) for p in ignore_paths] + submodule_paths_resolved = [_resolve_path_consistent(p) for p in submodule_paths] # We desperately need Python 3.10+ only support to make this code readable with structural pattern matching for file_path_path, functions in modified_functions.items(): _functions = functions - file_path = str(file_path_path) - if file_path.startswith(tests_root_str + os.sep): + # Resolve file path to absolute path + # Convert to Path if it's a string (e.g., from get_functions_within_git_diff) + file_path_obj = Path(file_path_path) + # Use consistent resolution: try strict first (for existing files), fall back to non-strict + if file_path_obj.exists(): + try: + file_path_resolved = file_path_obj.resolve() + except (OSError, RuntimeError): + file_path_resolved = _resolve_path(file_path_path) + else: + file_path_resolved = _resolve_path(file_path_path) + + file_path = str(file_path_obj) + + # Check if file is in tests root using resolved paths + try: + file_path_resolved.relative_to(tests_root_resolved) test_functions_removed_count += len(_functions) continue - if file_path in ignore_paths or any( - file_path.startswith(str(ignore_path) + os.sep) for ignore_path in ignore_paths - ): + except ValueError: + pass # File is not in tests root, continue checking + + # Check if file is in ignore paths using resolved paths + is_ignored = False + for ignore_path_resolved in ignore_paths_resolved: + try: + file_path_resolved.relative_to(ignore_path_resolved) + is_ignored = True + break + except ValueError: + pass + if is_ignored: ignore_paths_removed_count += 1 continue - if file_path in submodule_paths or any( - file_path.startswith(str(submodule_path) + os.sep) for submodule_path in submodule_paths - ): + + # Check if file is in submodule paths using resolved paths + is_in_submodule = False + for submodule_path_resolved in submodule_paths_resolved: + try: + file_path_resolved.relative_to(submodule_path_resolved) + is_in_submodule = True + break + except ValueError: + pass + if is_in_submodule: submodule_ignored_paths_count += 1 continue - if path_belongs_to_site_packages(Path(file_path)): + + if path_belongs_to_site_packages(file_path_resolved): site_packages_removed_count += len(_functions) continue - if not file_path.startswith(module_root_str + os.sep): + + # Check if file is in module root using resolved paths + try: + file_path_resolved.relative_to(module_root_resolved) + except ValueError: non_modules_removed_count += len(_functions) continue try: - ast.parse(f"import {module_name_from_file_path(Path(file_path), project_root)}") + ast.parse(f"import {module_name_from_file_path(file_path_resolved, project_root)}") except SyntaxError: malformed_paths_count += 1 continue diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py index 8be2c3b03..bd1eb2be1 100644 --- a/codeflash/lsp/beta.py +++ b/codeflash/lsp/beta.py @@ -514,10 +514,13 @@ def initialize_function_optimization(params: FunctionOptimizationInitParams) -> server.current_optimization_init_result = initialization_result.unwrap() server.show_message_log(f"Successfully initialized optimization for {params.functionName}", "Info") - files = [document.path] + # Use the worktree file path (which is normalized) instead of document.path + # document.path might be malformed on Windows (missing drive letter) + worktree_file_path = str(server.optimizer.args.file.resolve()) + files = [worktree_file_path] _, _, original_helpers = server.current_optimization_init_result - files.extend([str(helper_path) for helper_path in original_helpers]) + files.extend([str(helper_path.resolve()) for helper_path in original_helpers]) return {"functionName": params.functionName, "status": "success", "files_inside_context": files} diff --git a/codeflash/lsp/lsp_message.py b/codeflash/lsp/lsp_message.py index 36b2f3dcc..9e6c487fb 100644 --- a/codeflash/lsp/lsp_message.py +++ b/codeflash/lsp/lsp_message.py @@ -11,8 +11,8 @@ json_primitive_types = (str, float, int, bool) max_code_lines_before_collapse = 45 -# \u241F is the message delimiter becuase it can be more than one message sent over the same message, so we need something to separate each message -message_delimiter = "\u241f" +# \\u241F is the message delimiter becuase it can be more than one message sent over the same message, so we need something to separate each message +message_delimiter = "\\u241F" # allow the client to know which message it is receiving diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index 416bdc8df..1cb2881b8 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -1791,6 +1791,7 @@ def establish_original_code_baseline( instrument_codeflash_capture( self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root ) + total_looping_time = TOTAL_LOOPING_TIME_EFFECTIVE behavioral_results, coverage_results = self.run_and_parse_tests( testing_type=TestingMode.BEHAVIOR, diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py index ac757e6a9..1b08f27a1 100644 --- a/codeflash/optimization/optimizer.py +++ b/codeflash/optimization/optimizer.py @@ -649,7 +649,49 @@ def mirror_paths_for_worktree_mode(self, worktree_dir: Path) -> None: def mirror_path(path: Path, src_root: Path, dest_root: Path) -> Path: - relative_path = path.relative_to(src_root) + # Resolve both paths to absolute paths to handle Windows path normalization issues + # This ensures paths with/without drive letters are handled correctly + try: + path_resolved = path.resolve() + except (OSError, RuntimeError): + # If resolve fails (e.g., path doesn't exist or is malformed), try absolute() + path_resolved = path.absolute() if not path.is_absolute() else path + + try: + src_root_resolved = src_root.resolve() + except (OSError, RuntimeError): + src_root_resolved = src_root.absolute() if not src_root.is_absolute() else src_root + + # Normalize paths using os.path.normpath and normcase for cross-platform compatibility + path_str = os.path.normpath(str(path_resolved)) + src_root_str = os.path.normpath(str(src_root_resolved)) + + # Convert to lowercase for case-insensitive comparison on Windows + path_normalized = os.path.normcase(path_str) + src_root_normalized = os.path.normcase(src_root_str) + + # Try using Path.relative_to with resolved paths first + try: + relative_path = path_resolved.relative_to(src_root_resolved) + except ValueError as err: + # If relative_to fails, manually extract the relative path using normalized strings + if path_normalized.startswith(src_root_normalized): + # Extract relative path manually + # Use the original path_str to preserve proper path format + if path_str.startswith(src_root_str): + relative_str = path_str[len(src_root_str) :].lstrip(os.sep) + relative_path = Path(relative_str) + else: + # Fallback: use normalized paths + relative_str = path_normalized[len(src_root_normalized) :].lstrip(os.sep) + relative_path = Path(relative_str) + else: + error_msg = ( + f"Path {path_resolved} (normalized: {path_normalized}) is not relative to " + f"{src_root_resolved} (normalized: {src_root_normalized})" + ) + raise ValueError(error_msg) from err + return dest_root / relative_path diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py index 1860e0321..9efb0cb93 100644 --- a/codeflash/verification/test_runner.py +++ b/codeflash/verification/test_runner.py @@ -1,11 +1,12 @@ from __future__ import annotations +import contextlib import shlex import subprocess +import sys from pathlib import Path from typing import TYPE_CHECKING -from codeflash.cli_cmds.console import logger from codeflash.code_utils.code_utils import custom_addopts, get_run_tmp_file from codeflash.code_utils.compat import IS_POSIX, SAFE_SYS_EXECUTABLE from codeflash.code_utils.config_consts import TOTAL_LOOPING_TIME_EFFECTIVE @@ -22,9 +23,65 @@ def execute_test_subprocess( cmd_list: list[str], cwd: Path, env: dict[str, str] | None, timeout: int = 600 ) -> subprocess.CompletedProcess: - """Execute a subprocess with the given command list, working directory, environment variables, and timeout.""" + """Execute a subprocess with the given command list, working directory, environment variables, and timeout. + + On Windows, uses Popen with communicate() and process groups for proper cleanup. + On other platforms, uses subprocess.run with capture_output. + """ + is_windows = sys.platform == "win32" + with custom_addopts(): - logger.debug(f"executing test run with command: {' '.join(cmd_list)}") + if is_windows: + # WINDOWS SUBPROCESS FIX: + # On Windows, running pytest with coverage can hang indefinitely due to multiple issues: + # + # Problem 1: Pipe buffer deadlocks + # - subprocess.run() with file handles can deadlock when the child process + # produces output faster than the parent can read it + # - Solution: Use Popen.communicate() which properly drains both stdout/stderr + # concurrently using threads internally + # + # Problem 2: Child process waiting for stdin + # - Some Windows processes (especially pytest) may wait for console input + # - Solution: Use stdin=subprocess.DEVNULL to explicitly close stdin + # + # Problem 3: Orphaned child processes after timeout + # - When killing a process on Windows, child processes may not be terminated + # - Solution: Use CREATE_NEW_PROCESS_GROUP to allow proper process tree termination + # + # See: https://docs.python.org/3/library/subprocess.html#subprocess.Popen.communicate + + # CREATE_NEW_PROCESS_GROUP: Creates process in new group for proper termination + creationflags = subprocess.CREATE_NEW_PROCESS_GROUP + + process = subprocess.Popen( + cmd_list, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + stdin=subprocess.DEVNULL, + cwd=cwd, + env=env, + text=True, + creationflags=creationflags, + ) + + try: + stdout_content, stderr_content = process.communicate(timeout=timeout) + returncode = process.returncode + except subprocess.TimeoutExpired: + # On Windows, terminate the entire process tree + with contextlib.suppress(OSError): + process.kill() + + # Drain remaining output after killing + stdout_content, stderr_content = process.communicate(timeout=5) + raise subprocess.TimeoutExpired( + cmd_list, timeout, output=stdout_content, stderr=stderr_content + ) from None + + return subprocess.CompletedProcess(cmd_list, returncode, stdout_content, stderr_content) + + # On Linux/Mac, use subprocess.run (works fine there) return subprocess.run(cmd_list, capture_output=True, cwd=cwd, env=env, text=True, timeout=timeout, check=False) @@ -39,6 +96,12 @@ def run_behavioral_tests( pytest_target_runtime_seconds: float = TOTAL_LOOPING_TIME_EFFECTIVE, enable_coverage: bool = False, ) -> tuple[Path, subprocess.CompletedProcess, Path | None, Path | None]: + """Run behavioral tests with optional coverage. + + On Windows, uses --capture=no to avoid subprocess output deadlocks. + """ + is_windows = sys.platform == "win32" + if test_framework in {"pytest", "unittest"}: test_files: list[str] = [] for file in test_paths.test_files: @@ -53,14 +116,20 @@ def run_behavioral_tests( ) else: test_files.append(str(file.instrumented_behavior_file_path)) + pytest_cmd_list = ( shlex.split(f"{SAFE_SYS_EXECUTABLE} -m pytest", posix=IS_POSIX) if pytest_cmd == "pytest" else [SAFE_SYS_EXECUTABLE, "-m", *shlex.split(pytest_cmd, posix=IS_POSIX)] ) test_files = list(set(test_files)) # remove multiple calls in the same test function + + # On Windows, previously used --capture=no to avoid deadlocks with subprocess.run + # Now using Popen.communicate() which handles buffering correctly, so we can use capture=tee-sys + capture_mode = "--capture=tee-sys" + common_pytest_args = [ - "--capture=tee-sys", + capture_mode, "-q", "--codeflash_loops_scope=session", "--codeflash_min_loops=1", @@ -79,11 +148,17 @@ def run_behavioral_tests( if enable_coverage: coverage_database_file, coverage_config_file = prepare_coverage_files() - cov_erase = execute_test_subprocess( - shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env - ) # this cleanup is necessary to avoid coverage data from previous runs, if there are any, - # then the current run will be appended to the previous data, which skews the results - logger.debug(cov_erase) + # On Windows, delete coverage database file directly instead of using 'coverage erase' + # to avoid file locking issues + if is_windows: + if coverage_database_file.exists(): + with contextlib.suppress(PermissionError, OSError): + coverage_database_file.unlink() + else: + execute_test_subprocess( + shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env, timeout=30 + ) + coverage_cmd = [ SAFE_SYS_EXECUTABLE, "-m", @@ -100,26 +175,17 @@ def run_behavioral_tests( blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS if plugin != "cov"] - results = execute_test_subprocess( - coverage_cmd + common_pytest_args + blocklist_args + result_args + test_files, - cwd=cwd, - env=pytest_test_env, - timeout=600, - ) - logger.debug( - f"Result return code: {results.returncode}, " - f"{'Result stderr:' + str(results.stderr) if results.stderr else ''}" - ) + final_cmd = coverage_cmd + common_pytest_args + blocklist_args + result_args + test_files + results = execute_test_subprocess(final_cmd, cwd=cwd, env=pytest_test_env, timeout=600) else: blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS] + + final_cmd = pytest_cmd_list + common_pytest_args + blocklist_args + result_args + test_files results = execute_test_subprocess( - pytest_cmd_list + common_pytest_args + blocklist_args + result_args + test_files, + final_cmd, cwd=cwd, env=pytest_test_env, - timeout=600, # TODO: Make this dynamic - ) - logger.debug( - f"""Result return code: {results.returncode}, {"Result stderr:" + str(results.stderr) if results.stderr else ""}""" + timeout=60, # TODO: Make this dynamic ) else: msg = f"Unsupported test framework: {test_framework}" @@ -175,7 +241,7 @@ def run_line_profile_tests( pytest_cmd_list + pytest_args + blocklist_args + result_args + test_files, cwd=cwd, env=pytest_test_env, - timeout=600, # TODO: Make this dynamic + timeout=60, # TODO: Make this dynamic ) else: msg = f"Unsupported test framework: {test_framework}" diff --git a/tests/test_function_discovery.py b/tests/test_function_discovery.py index 49a67ba9b..76b3445a1 100644 --- a/tests/test_function_discovery.py +++ b/tests/test_function_discovery.py @@ -411,13 +411,17 @@ def not_in_checkpoint_function(): discovered = find_all_functions_in_file(test_file_path) modified_functions = {test_file_path: discovered[test_file_path]} - filtered, count = filter_functions( - modified_functions, - tests_root=Path("tests"), - ignore_paths=[], - project_root=temp_dir, - module_root=temp_dir, - ) + # Use an absolute path for tests_root that won't match the temp directory + # This avoids path resolution issues in CI where the working directory might differ + tests_root_absolute = (temp_dir.parent / "nonexistent_tests_dir").resolve() + with unittest.mock.patch("codeflash.discovery.functions_to_optimize.get_blocklisted_functions", return_value={}): + filtered, count = filter_functions( + modified_functions, + tests_root=tests_root_absolute, + ignore_paths=[], + project_root=temp_dir, + module_root=temp_dir, + ) function_names = [fn.function_name for fn in filtered.get(test_file_path, [])] assert "propagate_attributes" in function_names assert count == 3