From c53f2f6c26fbd25b6aae3970b5c63933c7266746 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 28 Nov 2025 04:58:35 +0200
Subject: [PATCH 01/26] windows fix in vsc correct function choosing from the
 file

---
 codeflash/discovery/functions_to_optimize.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py
index b8cf895e1..5adb8fbda 100644
--- a/codeflash/discovery/functions_to_optimize.py
+++ b/codeflash/discovery/functions_to_optimize.py
@@ -671,28 +671,32 @@ def filter_functions(
     previous_checkpoint_functions_removed_count: int = 0
     tests_root_str = str(tests_root)
     module_root_str = str(module_root)
+    # Normalize paths for case-insensitive comparison on Windows
+    tests_root_normalized = os.path.normcase(tests_root_str)
+    module_root_normalized = os.path.normcase(module_root_str)
 
     # We desperately need Python 3.10+ only support to make this code readable with structural pattern matching
     for file_path_path, functions in modified_functions.items():
         _functions = functions
         file_path = str(file_path_path)
-        if file_path.startswith(tests_root_str + os.sep):
+        file_path_normalized = os.path.normcase(file_path)
+        if file_path_normalized.startswith(tests_root_normalized + os.sep):
             test_functions_removed_count += len(_functions)
             continue
         if file_path in ignore_paths or any(
-            file_path.startswith(str(ignore_path) + os.sep) for ignore_path in ignore_paths
+            file_path_normalized.startswith(os.path.normcase(str(ignore_path)) + os.sep) for ignore_path in ignore_paths
         ):
             ignore_paths_removed_count += 1
             continue
         if file_path in submodule_paths or any(
-            file_path.startswith(str(submodule_path) + os.sep) for submodule_path in submodule_paths
+            file_path_normalized.startswith(os.path.normcase(str(submodule_path)) + os.sep) for submodule_path in submodule_paths
         ):
             submodule_ignored_paths_count += 1
             continue
         if path_belongs_to_site_packages(Path(file_path)):
             site_packages_removed_count += len(_functions)
             continue
-        if not file_path.startswith(module_root_str + os.sep):
+        if not file_path_normalized.startswith(module_root_normalized + os.sep):
             non_modules_removed_count += len(_functions)
             continue
         try:

From f869b61b9a868dabaa704850e828f7f5ee9b0f70 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 28 Nov 2025 05:37:35 +0200
Subject: [PATCH 02/26] ensure the optimization starts for the function

---
 codeflash/lsp/beta.py               |  7 +++--
 codeflash/optimization/optimizer.py | 43 ++++++++++++++++++++++++++++-
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py
index 14d43a1a5..8135333e5 100644
--- a/codeflash/lsp/beta.py
+++ b/codeflash/lsp/beta.py
@@ -479,10 +479,13 @@ def initialize_function_optimization(params: FunctionOptimizationInitParams) ->
         server.current_optimization_init_result = initialization_result.unwrap()
         server.show_message_log(f"Successfully initialized optimization for {params.functionName}", "Info")
 
-        files = [document.path]
+        # Use the worktree file path (which is normalized) instead of document.path
+        # document.path might be malformed on Windows (missing drive letter)
+        worktree_file_path = str(server.optimizer.args.file.resolve())
+        files = [worktree_file_path]
 
         _, _, original_helpers = server.current_optimization_init_result
-        files.extend([str(helper_path) for helper_path in original_helpers])
+        files.extend([str(helper_path.resolve()) for helper_path in original_helpers])
 
         return {"functionName": params.functionName, "status": "success", "files_inside_context": files}
 
diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
index ddbb894d1..fd0b221ca 100644
--- a/codeflash/optimization/optimizer.py
+++ b/codeflash/optimization/optimizer.py
@@ -485,7 +485,48 @@ def mirror_paths_for_worktree_mode(self, worktree_dir: Path) -> None:
 
 
 def mirror_path(path: Path, src_root: Path, dest_root: Path) -> Path:
-    relative_path = path.relative_to(src_root)
+    # Resolve both paths to absolute paths to handle Windows path normalization issues
+    # This ensures paths with/without drive letters are handled correctly
+    try:
+        path_resolved = path.resolve()
+    except (OSError, RuntimeError):
+        # If resolve fails (e.g., path doesn't exist or is malformed), try absolute()
+        path_resolved = path.absolute() if not path.is_absolute() else path
+    
+    try:
+        src_root_resolved = src_root.resolve()
+    except (OSError, RuntimeError):
+        src_root_resolved = src_root.absolute() if not src_root.is_absolute() else src_root
+    
+    # Normalize paths using os.path.normpath and normcase for cross-platform compatibility
+    path_str = os.path.normpath(str(path_resolved))
+    src_root_str = os.path.normpath(str(src_root_resolved))
+    
+    # Convert to lowercase for case-insensitive comparison on Windows
+    path_normalized = os.path.normcase(path_str)
+    src_root_normalized = os.path.normcase(src_root_str)
+    
+    # Try using Path.relative_to with resolved paths first
+    try:
+        relative_path = path_resolved.relative_to(src_root_resolved)
+    except ValueError:
+        # If relative_to fails, manually extract the relative path using normalized strings
+        if path_normalized.startswith(src_root_normalized):
+            # Extract relative path manually
+            # Use the original path_str to preserve proper path format
+            if path_str.startswith(src_root_str):
+                relative_str = path_str[len(src_root_str) :].lstrip(os.sep)
+                relative_path = Path(relative_str)
+            else:
+                # Fallback: use normalized paths
+                relative_str = path_normalized[len(src_root_normalized) :].lstrip(os.sep)
+                relative_path = Path(relative_str)
+        else:
+            raise ValueError(
+                f"Path {path_resolved} (normalized: {path_normalized}) is not relative to "
+                f"{src_root_resolved} (normalized: {src_root_normalized})"
+            )
+    
     return dest_root / relative_path
 
 

From da1fca0998cfb596a68fe7c214f90c617bab39cf Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 28 Nov 2025 06:01:22 +0200
Subject: [PATCH 03/26] using pathlib

---
 codeflash/discovery/functions_to_optimize.py | 68 +++++++++++++++-----
 1 file changed, 53 insertions(+), 15 deletions(-)

diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py
index 5adb8fbda..5dbcb4678 100644
--- a/codeflash/discovery/functions_to_optimize.py
+++ b/codeflash/discovery/functions_to_optimize.py
@@ -669,34 +669,72 @@ def filter_functions(
     submodule_ignored_paths_count: int = 0
     blocklist_funcs_removed_count: int = 0
     previous_checkpoint_functions_removed_count: int = 0
-    tests_root_str = str(tests_root)
-    module_root_str = str(module_root)
-    # Normalize paths for case-insensitive comparison on Windows
-    tests_root_normalized = os.path.normcase(tests_root_str)
-    module_root_normalized = os.path.normcase(module_root_str)
+    
+    def _resolve_path(path: Path | str) -> Path:
+        # Use strict=False so we don't fail on paths that don't exist yet (e.g. worktree paths)
+        return Path(path).resolve(strict=False)
+
+    # Resolve all root paths to absolute paths for consistent comparison
+    tests_root_resolved = _resolve_path(tests_root)
+    module_root_resolved = _resolve_path(module_root)
+
+    # Resolve ignore paths and submodule paths
+    ignore_paths_resolved = [_resolve_path(p) for p in ignore_paths]
+    submodule_paths_resolved = [_resolve_path(p) for p in submodule_paths]
 
     # We desperately need Python 3.10+ only support to make this code readable with structural pattern matching
     for file_path_path, functions in modified_functions.items():
         _functions = functions
+        # Resolve file path to absolute path
+        try:
+            file_path_resolved = file_path_path.resolve()
+        except (OSError, RuntimeError):
+            file_path_resolved = file_path_path.absolute() if not file_path_path.is_absolute() else file_path_path
+        
         file_path = str(file_path_path)
-        file_path_normalized = os.path.normcase(file_path)
-        if file_path_normalized.startswith(tests_root_normalized + os.sep):
+        
+        # Check if file is in tests root using resolved paths
+        try:
+            file_path_resolved.relative_to(tests_root_resolved)
             test_functions_removed_count += len(_functions)
             continue
-        if file_path in ignore_paths or any(
-            file_path_normalized.startswith(os.path.normcase(str(ignore_path)) + os.sep) for ignore_path in ignore_paths
-        ):
+        except ValueError:
+            pass  # File is not in tests root, continue checking
+        
+        # Check if file is in ignore paths using resolved paths
+        is_ignored = False
+        for ignore_path_resolved in ignore_paths_resolved:
+            try:
+                file_path_resolved.relative_to(ignore_path_resolved)
+                is_ignored = True
+                break
+            except ValueError:
+                pass
+        if is_ignored:
             ignore_paths_removed_count += 1
             continue
-        if file_path in submodule_paths or any(
-            file_path_normalized.startswith(os.path.normcase(str(submodule_path)) + os.sep) for submodule_path in submodule_paths
-        ):
+        
+        # Check if file is in submodule paths using resolved paths
+        is_in_submodule = False
+        for submodule_path_resolved in submodule_paths_resolved:
+            try:
+                file_path_resolved.relative_to(submodule_path_resolved)
+                is_in_submodule = True
+                break
+            except ValueError:
+                pass
+        if is_in_submodule:
             submodule_ignored_paths_count += 1
             continue
-        if path_belongs_to_site_packages(Path(file_path)):
+        
+        if path_belongs_to_site_packages(file_path_resolved):
             site_packages_removed_count += len(_functions)
             continue
-        if not file_path_normalized.startswith(module_root_normalized + os.sep):
+        
+        # Check if file is in module root using resolved paths
+        try:
+            file_path_resolved.relative_to(module_root_resolved)
+        except ValueError:
             non_modules_removed_count += len(_functions)
             continue
         try:

From f0b465726f8af9484b2a5c36cb38554e8e10497c Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 28 Nov 2025 06:02:18 +0200
Subject: [PATCH 04/26] formatting and linting fixes

---
 codeflash/discovery/functions_to_optimize.py | 15 +++++++--------
 codeflash/optimization/optimizer.py          | 10 +++++-----
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py
index 5dbcb4678..f4f2d84b3 100644
--- a/codeflash/discovery/functions_to_optimize.py
+++ b/codeflash/discovery/functions_to_optimize.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import ast
-import os
 import random
 import warnings
 from _ast import AsyncFunctionDef, ClassDef, FunctionDef
@@ -669,7 +668,7 @@ def filter_functions(
     submodule_ignored_paths_count: int = 0
     blocklist_funcs_removed_count: int = 0
     previous_checkpoint_functions_removed_count: int = 0
-    
+
     def _resolve_path(path: Path | str) -> Path:
         # Use strict=False so we don't fail on paths that don't exist yet (e.g. worktree paths)
         return Path(path).resolve(strict=False)
@@ -690,9 +689,9 @@ def _resolve_path(path: Path | str) -> Path:
             file_path_resolved = file_path_path.resolve()
         except (OSError, RuntimeError):
             file_path_resolved = file_path_path.absolute() if not file_path_path.is_absolute() else file_path_path
-        
+
         file_path = str(file_path_path)
-        
+
         # Check if file is in tests root using resolved paths
         try:
             file_path_resolved.relative_to(tests_root_resolved)
@@ -700,7 +699,7 @@ def _resolve_path(path: Path | str) -> Path:
             continue
         except ValueError:
             pass  # File is not in tests root, continue checking
-        
+
         # Check if file is in ignore paths using resolved paths
         is_ignored = False
         for ignore_path_resolved in ignore_paths_resolved:
@@ -713,7 +712,7 @@ def _resolve_path(path: Path | str) -> Path:
         if is_ignored:
             ignore_paths_removed_count += 1
             continue
-        
+
         # Check if file is in submodule paths using resolved paths
         is_in_submodule = False
         for submodule_path_resolved in submodule_paths_resolved:
@@ -726,11 +725,11 @@ def _resolve_path(path: Path | str) -> Path:
         if is_in_submodule:
             submodule_ignored_paths_count += 1
             continue
-        
+
         if path_belongs_to_site_packages(file_path_resolved):
             site_packages_removed_count += len(_functions)
             continue
-        
+
         # Check if file is in module root using resolved paths
         try:
             file_path_resolved.relative_to(module_root_resolved)
diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
index fd0b221ca..9a20c471b 100644
--- a/codeflash/optimization/optimizer.py
+++ b/codeflash/optimization/optimizer.py
@@ -492,20 +492,20 @@ def mirror_path(path: Path, src_root: Path, dest_root: Path) -> Path:
     except (OSError, RuntimeError):
         # If resolve fails (e.g., path doesn't exist or is malformed), try absolute()
         path_resolved = path.absolute() if not path.is_absolute() else path
-    
+
     try:
         src_root_resolved = src_root.resolve()
     except (OSError, RuntimeError):
         src_root_resolved = src_root.absolute() if not src_root.is_absolute() else src_root
-    
+
     # Normalize paths using os.path.normpath and normcase for cross-platform compatibility
     path_str = os.path.normpath(str(path_resolved))
     src_root_str = os.path.normpath(str(src_root_resolved))
-    
+
     # Convert to lowercase for case-insensitive comparison on Windows
     path_normalized = os.path.normcase(path_str)
     src_root_normalized = os.path.normcase(src_root_str)
-    
+
     # Try using Path.relative_to with resolved paths first
     try:
         relative_path = path_resolved.relative_to(src_root_resolved)
@@ -526,7 +526,7 @@ def mirror_path(path: Path, src_root: Path, dest_root: Path) -> Path:
                 f"Path {path_resolved} (normalized: {path_normalized}) is not relative to "
                 f"{src_root_resolved} (normalized: {src_root_normalized})"
             )
-    
+
     return dest_root / relative_path
 
 

From d62462d3a8e494284f973a618cbb920ec5aff4a3 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Fri, 28 Nov 2025 06:10:23 +0200
Subject: [PATCH 05/26] fix linting errors

---
 codeflash/optimization/optimizer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
index 9a20c471b..0f88b66ec 100644
--- a/codeflash/optimization/optimizer.py
+++ b/codeflash/optimization/optimizer.py
@@ -509,7 +509,7 @@ def mirror_path(path: Path, src_root: Path, dest_root: Path) -> Path:
     # Try using Path.relative_to with resolved paths first
     try:
         relative_path = path_resolved.relative_to(src_root_resolved)
-    except ValueError:
+    except ValueError as err:
         # If relative_to fails, manually extract the relative path using normalized strings
         if path_normalized.startswith(src_root_normalized):
             # Extract relative path manually
@@ -522,10 +522,11 @@ def mirror_path(path: Path, src_root: Path, dest_root: Path) -> Path:
                 relative_str = path_normalized[len(src_root_normalized) :].lstrip(os.sep)
                 relative_path = Path(relative_str)
         else:
-            raise ValueError(
+            error_msg = (
                 f"Path {path_resolved} (normalized: {path_normalized}) is not relative to "
                 f"{src_root_resolved} (normalized: {src_root_normalized})"
             )
+            raise ValueError(error_msg) from err
 
     return dest_root / relative_path
 

From 8b4a5f57e6f86d7275896e292d3eec013adccaee Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Mon, 1 Dec 2025 18:58:13 +0200
Subject: [PATCH 06/26] FIX FAILING TEST

---
 codeflash/discovery/functions_to_optimize.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py
index f4f2d84b3..104b86beb 100644
--- a/codeflash/discovery/functions_to_optimize.py
+++ b/codeflash/discovery/functions_to_optimize.py
@@ -685,6 +685,8 @@ def _resolve_path(path: Path | str) -> Path:
     for file_path_path, functions in modified_functions.items():
         _functions = functions
         # Resolve file path to absolute path
+        # Convert to Path if it's a string (e.g., from get_functions_within_git_diff)
+        file_path_path = _resolve_path(file_path_path)
         try:
             file_path_resolved = file_path_path.resolve()
         except (OSError, RuntimeError):

From d2a2e96a12173939bd1364c3988ec8cc32b4f4e4 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Mon, 1 Dec 2025 19:00:08 +0200
Subject: [PATCH 07/26] fix linting error

---
 codeflash/discovery/functions_to_optimize.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py
index 104b86beb..f29cf1325 100644
--- a/codeflash/discovery/functions_to_optimize.py
+++ b/codeflash/discovery/functions_to_optimize.py
@@ -686,13 +686,13 @@ def _resolve_path(path: Path | str) -> Path:
         _functions = functions
         # Resolve file path to absolute path
         # Convert to Path if it's a string (e.g., from get_functions_within_git_diff)
-        file_path_path = _resolve_path(file_path_path)
+        file_path_obj = _resolve_path(file_path_path)
         try:
-            file_path_resolved = file_path_path.resolve()
+            file_path_resolved = file_path_obj.resolve()
         except (OSError, RuntimeError):
-            file_path_resolved = file_path_path.absolute() if not file_path_path.is_absolute() else file_path_path
+            file_path_resolved = file_path_obj.absolute() if not file_path_obj.is_absolute() else file_path_obj
 
-        file_path = str(file_path_path)
+        file_path = str(file_path_obj)
 
         # Check if file is in tests root using resolved paths
         try:

From 3694c805ef931e078bed3f27ea3606a4e1e97182 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Tue, 2 Dec 2025 08:23:40 +0200
Subject: [PATCH 08/26] fixing one test after windows changes

---
 tests/test_function_discovery.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tests/test_function_discovery.py b/tests/test_function_discovery.py
index 49a67ba9b..76b3445a1 100644
--- a/tests/test_function_discovery.py
+++ b/tests/test_function_discovery.py
@@ -411,13 +411,17 @@ def not_in_checkpoint_function():
 
         discovered = find_all_functions_in_file(test_file_path)
         modified_functions = {test_file_path: discovered[test_file_path]}
-        filtered, count = filter_functions(
-            modified_functions,
-            tests_root=Path("tests"),
-            ignore_paths=[],
-            project_root=temp_dir,
-            module_root=temp_dir,
-        )
+        # Use an absolute path for tests_root that won't match the temp directory
+        # This avoids path resolution issues in CI where the working directory might differ
+        tests_root_absolute = (temp_dir.parent / "nonexistent_tests_dir").resolve()
+        with unittest.mock.patch("codeflash.discovery.functions_to_optimize.get_blocklisted_functions", return_value={}):
+            filtered, count = filter_functions(
+                modified_functions,
+                tests_root=tests_root_absolute,
+                ignore_paths=[],
+                project_root=temp_dir,
+                module_root=temp_dir,
+            )
         function_names = [fn.function_name for fn in filtered.get(test_file_path, [])]
         assert "propagate_attributes" in function_names
         assert count == 3

From 5987a57f73cb906539a5644a704770f704072bad Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Thu, 4 Dec 2025 04:47:53 +0200
Subject: [PATCH 09/26] update the unicode delimiter to be windows compatible

---
 codeflash/lsp/lsp_message.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/codeflash/lsp/lsp_message.py b/codeflash/lsp/lsp_message.py
index 36b2f3dcc..9e6c487fb 100644
--- a/codeflash/lsp/lsp_message.py
+++ b/codeflash/lsp/lsp_message.py
@@ -11,8 +11,8 @@
 json_primitive_types = (str, float, int, bool)
 max_code_lines_before_collapse = 45
 
-# \u241F is the message delimiter becuase it can be more than one message sent over the same message, so we need something to separate each message
-message_delimiter = "\u241f"
+# \\u241F is the message delimiter becuase it can be more than one message sent over the same message, so we need something to separate each message
+message_delimiter = "\\u241F"
 
 
 # allow the client to know which message it is receiving

From 4344374ca8414ef9526065150f2c101e4a990b18 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Thu, 4 Dec 2025 06:42:51 +0200
Subject: [PATCH 10/26] fix tests discovery in windows for vsc

---
 codeflash/cli_cmds/console.py              | 10 +++++
 codeflash/discovery/discover_unit_tests.py | 48 ++++++++++++++++------
 2 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py
index ecf7d3e6d..34585985f 100644
--- a/codeflash/cli_cmds/console.py
+++ b/codeflash/cli_cmds/console.py
@@ -141,6 +141,16 @@ def __init__(self) -> None:
 @contextmanager
 def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Progress, TaskID], None, None]:
     """Progress bar for test files."""
+    if is_LSP_enabled():
+        lsp_log(LspTextMessage(text=description, takes_time=True))
+
+        class DummyProgress:
+            def advance(self, *args, **kwargs) -> None:
+                pass
+
+        yield DummyProgress(), 0  # type: ignore
+        return
+
     with Progress(
         SpinnerColumn(next(spinners)),
         TextColumn("[progress.description]{task.description}"),
diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py
index bc0e2fd67..a42866c3f 100644
--- a/codeflash/discovery/discover_unit_tests.py
+++ b/codeflash/discovery/discover_unit_tests.py
@@ -584,20 +584,42 @@ def discover_tests_pytest(
     project_root = cfg.project_root_path
 
     tmp_pickle_path = get_run_tmp_file("collected_tests.pkl")
+    
+    logger.info(f"!lsp|Starting pytest discovery. Cwd: {project_root}")
+    discovery_script = Path(__file__).parent / "pytest_new_process_discovery.py"
+
+    run_kwargs = {
+        "cwd": project_root,
+        "check": False,
+        "capture_output": True,
+        "text": True,
+        "stdin": subprocess.DEVNULL,
+    }
+    if os.name == "nt":
+        # Prevent console window spawning on Windows which can cause hangs in LSP
+        run_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
+
     with custom_addopts():
-        result = subprocess.run(
-            [
-                SAFE_SYS_EXECUTABLE,
-                Path(__file__).parent / "pytest_new_process_discovery.py",
-                str(project_root),
-                str(tests_root),
-                str(tmp_pickle_path),
-            ],
-            cwd=project_root,
-            check=False,
-            capture_output=True,
-            text=True,
-        )
+        try:
+            result = subprocess.run(
+                [
+                    SAFE_SYS_EXECUTABLE,
+                    discovery_script,
+                    str(project_root),
+                    str(tests_root),
+                    str(tmp_pickle_path),
+                ],
+                **run_kwargs,
+                timeout=60,
+            )
+            logger.info(f"!lsp|Pytest discovery finished with code {result.returncode}")
+        except subprocess.TimeoutExpired:
+            logger.error("!lsp|Pytest discovery timed out after 60s")
+            result = subprocess.CompletedProcess(args=[], returncode=-1, stdout="", stderr="Timeout")
+        except Exception as e:
+            logger.error(f"!lsp|Pytest discovery failed: {e}")
+            result = subprocess.CompletedProcess(args=[], returncode=-1, stdout="", stderr=str(e))
+
     try:
         with tmp_pickle_path.open(mode="rb") as f:
             exitcode, tests, pytest_rootdir = pickle.load(f)

From 9b78d88660fb43eff64622403a3627031a0f99ba Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Mon, 8 Dec 2025 14:21:53 +0200
Subject: [PATCH 11/26] fix Starting baseline establishment in windows

---
 codeflash/code_utils/coverage_utils.py       |  34 +++-
 codeflash/optimization/function_optimizer.py |  62 ++++++-
 codeflash/verification/test_runner.py        | 163 ++++++++++++++++---
 3 files changed, 232 insertions(+), 27 deletions(-)

diff --git a/codeflash/code_utils/coverage_utils.py b/codeflash/code_utils/coverage_utils.py
index ed3d277a4..371f82d4c 100644
--- a/codeflash/code_utils/coverage_utils.py
+++ b/codeflash/code_utils/coverage_utils.py
@@ -68,9 +68,39 @@ def generate_candidates(source_code_path: Path) -> set[str]:
 
 
 def prepare_coverage_files() -> tuple[Path, Path]:
-    """Prepare coverage configuration and output files."""
+    """
+    Prepare coverage configuration and output files.
+    
+    Returns tuple of (coverage_database_file, coverage_config_file).
+    """
+    from codeflash.cli_cmds.console import logger
+    
+    logger.info("!lsp|coverage_utils.prepare_coverage_files: Starting coverage file preparation")
+    
+    logger.info("!lsp|coverage_utils.prepare_coverage_files: Getting coverage database file path")
     coverage_database_file = get_run_tmp_file(Path(".coverage"))
+    logger.info(f"!lsp|coverage_utils.prepare_coverage_files: Coverage database file: {coverage_database_file}")
+    
+    logger.info("!lsp|coverage_utils.prepare_coverage_files: Getting coverage config file path")
     coveragercfile = get_run_tmp_file(Path(".coveragerc"))
+    logger.info(f"!lsp|coverage_utils.prepare_coverage_files: Coverage config file: {coveragercfile}")
+    
     coveragerc_content = f"[run]\n branch = True\ndata_file={coverage_database_file}\n"
-    coveragercfile.write_text(coveragerc_content)
+    logger.info("!lsp|coverage_utils.prepare_coverage_files: Writing coverage config content")
+    logger.debug(f"coverage_utils.prepare_coverage_files: Config content:\n{coveragerc_content}")
+    
+    try:
+        coveragercfile.write_text(coveragerc_content)
+        logger.info("!lsp|coverage_utils.prepare_coverage_files: Coverage config file written successfully")
+    except Exception as e:
+        logger.error(
+            f"!lsp|coverage_utils.prepare_coverage_files: Failed to write coverage config file: "
+            f"{type(e).__name__}: {e}"
+        )
+        raise
+    
+    logger.info(
+        f"!lsp|coverage_utils.prepare_coverage_files: Coverage files prepared successfully. "
+        f"database={coverage_database_file}, config={coveragercfile}"
+    )
     return coverage_database_file, coveragercfile
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 2eef51f0f..1ccf81f11 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1597,22 +1597,56 @@ def establish_original_code_baseline(
         assert (test_framework := self.args.test_framework) in {"pytest", "unittest"}  # noqa: RUF018
         success = True
 
+        logger.info(
+            f"!lsp|function_optimizer.establish_baseline: Starting baseline establishment. "
+            f"function={self.function_to_optimize.function_name}, "
+            f"test_files_count={len(self.test_files.test_files)}, "
+            f"test_framework={test_framework}"
+        )
+        
         test_env = self.get_test_env(codeflash_loop_index=0, codeflash_test_iteration=0, codeflash_tracer_disable=1)
+        logger.info(
+            f"!lsp|function_optimizer.establish_baseline: Test environment created. "
+            f"CODEFLASH_TEST_ITERATION={test_env.get('CODEFLASH_TEST_ITERATION')}, "
+            f"CODEFLASH_LOOP_INDEX={test_env.get('CODEFLASH_LOOP_INDEX')}, "
+            f"PYTHONPATH={test_env.get('PYTHONPATH', 'not set')[:100]}..."
+        )
 
         if self.function_to_optimize.is_async:
+            logger.info("!lsp|function_optimizer.establish_baseline: Function is async, adding async decorator")
             from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function
 
             success = add_async_decorator_to_function(
                 self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.BEHAVIOR
             )
+            logger.info(f"!lsp|function_optimizer.establish_baseline: Async decorator added. success={success}")
 
         # Instrument codeflash capture
+        logger.info(
+            f"!lsp|function_optimizer.establish_baseline: About to enter progress_bar context. "
+            f"test_files={len(self.test_files.test_files)}, test_framework={test_framework}, "
+            f"function_name={self.function_to_optimize.function_name}"
+        )
         with progress_bar("Running tests to establish original code behavior..."):
             try:
+                logger.info("!lsp|function_optimizer.establish_baseline: Entered progress_bar context")
+                logger.info(
+                    f"!lsp|function_optimizer.establish_baseline: Calling instrument_codeflash_capture. "
+                    f"function={self.function_to_optimize.function_name}, "
+                    f"helper_classes={len(file_path_to_helper_classes)}"
+                )
                 instrument_codeflash_capture(
                     self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root
                 )
+                logger.info("!lsp|function_optimizer.establish_baseline: instrument_codeflash_capture completed")
+                
                 total_looping_time = TOTAL_LOOPING_TIME_EFFECTIVE
+                logger.info(
+                    f"!lsp|function_optimizer.establish_baseline: About to call run_and_parse_tests. "
+                    f"testing_type=BEHAVIOR, enable_coverage={test_framework == 'pytest'}, "
+                    f"test_files_count={len(self.test_files.test_files)}, "
+                    f"testing_time={total_looping_time}"
+                )
                 behavioral_results, coverage_results = self.run_and_parse_tests(
                     testing_type=TestingMode.BEHAVIOR,
                     test_env=test_env,
@@ -1622,11 +1656,18 @@ def establish_original_code_baseline(
                     enable_coverage=test_framework == "pytest",
                     code_context=code_context,
                 )
+                logger.info(
+                    f"!lsp|function_optimizer.establish_baseline: run_and_parse_tests completed. "
+                    f"behavioral_results_count={len(behavioral_results) if behavioral_results else 0}, "
+                    f"coverage_results={'present' if coverage_results else 'None'}"
+                )
             finally:
+                logger.info("!lsp|function_optimizer.establish_baseline: Cleaning up - removing codeflash capture")
                 # Remove codeflash capture
                 self.write_code_and_helpers(
                     self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path
                 )
+                logger.info("!lsp|function_optimizer.establish_baseline: Cleanup completed")
         if not behavioral_results:
             logger.warning(
                 f"force_lsp|Couldn't run any tests for original function {self.function_to_optimize.function_name}. Skipping optimization."
@@ -1923,10 +1964,21 @@ def run_and_parse_tests(
         unittest_loop_index: int | None = None,
         line_profiler_output_file: Path | None = None,
     ) -> tuple[TestResults | dict, CoverageData | None]:
+        logger.info(
+            f"!lsp|function_optimizer.run_and_parse_tests: Starting test execution. "
+            f"testing_type={testing_type}, optimization_iteration={optimization_iteration}, "
+            f"enable_coverage={enable_coverage}, test_files_count={len(test_files.test_files)}, "
+            f"testing_time={testing_time}"
+        )
         coverage_database_file = None
         coverage_config_file = None
         try:
             if testing_type == TestingMode.BEHAVIOR:
+                logger.info(
+                    f"!lsp|function_optimizer.run_and_parse_tests: Calling run_behavioral_tests. "
+                    f"test_framework={self.test_cfg.test_framework}, cwd={self.project_root}, "
+                    f"pytest_timeout={INDIVIDUAL_TESTCASE_TIMEOUT}"
+                )
                 result_file_path, run_result, coverage_database_file, coverage_config_file = run_behavioral_tests(
                     test_files,
                     test_framework=self.test_cfg.test_framework,
@@ -1964,9 +2016,15 @@ def run_and_parse_tests(
             else:
                 msg = f"Unexpected testing type: {testing_type}"
                 raise ValueError(msg)
-        except subprocess.TimeoutExpired:
+        except subprocess.TimeoutExpired as e:
+            test_files_str = ', '.join(str(f) for f in test_files.test_files)
+            logger.error(
+                f"!lsp|function_optimizer.run_and_parse_tests: Test execution timed out. "
+                f"testing_type={testing_type}, test_files=[{test_files_str}], "
+                f"timeout={e.timeout if hasattr(e, 'timeout') else 'unknown'}"
+            )
             logger.exception(
-                f"Error running tests in {', '.join(str(f) for f in test_files.test_files)}.\nTimeout Error"
+                f"Error running tests in {test_files_str}.\nTimeout Error"
             )
             return TestResults(), None
         if run_result.returncode != 0 and testing_type == TestingMode.BEHAVIOR:
diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py
index 5c839b8fb..1b96e26b8 100644
--- a/codeflash/verification/test_runner.py
+++ b/codeflash/verification/test_runner.py
@@ -2,6 +2,7 @@
 
 import shlex
 import subprocess
+import sys
 from pathlib import Path
 from typing import TYPE_CHECKING
 
@@ -22,10 +23,105 @@
 def execute_test_subprocess(
     cmd_list: list[str], cwd: Path, env: dict[str, str] | None, timeout: int = 600
 ) -> subprocess.CompletedProcess:
-    """Execute a subprocess with the given command list, working directory, environment variables, and timeout."""
+    """
+    Execute a subprocess with the given command list, working directory, environment variables, and timeout.
+
+    On Windows, uses Popen with communicate() and process groups for proper cleanup.
+    On other platforms, uses subprocess.run with capture_output.
+    """
+    is_windows = sys.platform == "win32"
+
+    logger.debug(f"execute_test_subprocess: Starting. platform={sys.platform}, timeout={timeout}s, cwd={cwd}")
+
     with custom_addopts():
-        logger.debug(f"executing test run with command: {' '.join(cmd_list)}")
-        return subprocess.run(cmd_list, capture_output=True, cwd=cwd, env=env, text=True, timeout=timeout, check=False)
+        try:
+            if is_windows:
+                # WINDOWS SUBPROCESS FIX:
+                # On Windows, running pytest with coverage can hang indefinitely due to multiple issues:
+                #
+                # Problem 1: Pipe buffer deadlocks
+                #   - subprocess.run() with file handles can deadlock when the child process
+                #     produces output faster than the parent can read it
+                #   - Solution: Use Popen.communicate() which properly drains both stdout/stderr
+                #     concurrently using threads internally
+                #
+                # Problem 2: Child process waiting for stdin
+                #   - Some Windows processes (especially pytest) may wait for console input
+                #   - Solution: Use stdin=subprocess.DEVNULL to explicitly close stdin
+                #
+                # Problem 3: Orphaned child processes after timeout
+                #   - When killing a process on Windows, child processes may not be terminated
+                #   - Solution: Use CREATE_NEW_PROCESS_GROUP to allow proper process tree termination
+                #
+                # See: https://docs.python.org/3/library/subprocess.html#subprocess.Popen.communicate
+
+                # CREATE_NEW_PROCESS_GROUP: Creates process in new group for proper termination
+                creationflags = subprocess.CREATE_NEW_PROCESS_GROUP
+
+                process = subprocess.Popen(
+                    cmd_list,
+                    stdout=subprocess.PIPE,  # Capture stdout
+                    stderr=subprocess.PIPE,  # Capture stderr
+                    stdin=subprocess.DEVNULL,  # CRITICAL: Prevents child from waiting for input
+                    cwd=cwd,
+                    env=env,
+                    text=True,  # Return strings instead of bytes
+                    creationflags=creationflags,  # Windows-specific process group handling
+                )
+
+                try:
+                    # communicate() properly drains stdout/stderr avoiding deadlocks
+                    stdout_content, stderr_content = process.communicate(timeout=timeout)
+                    returncode = process.returncode
+                except subprocess.TimeoutExpired:
+                    logger.warning(f"execute_test_subprocess: Process timed out after {timeout}s, terminating...")
+                    # On Windows, terminate the entire process tree
+                    try:
+                        process.kill()
+                    except OSError:
+                        pass
+                    # Drain remaining output after killing
+                    stdout_content, stderr_content = process.communicate(timeout=5)
+                    raise subprocess.TimeoutExpired(
+                        cmd_list, timeout, output=stdout_content, stderr=stderr_content
+                    )
+
+                logger.debug(
+                    f"execute_test_subprocess: Completed. returncode={returncode}, "
+                    f"stdout_len={len(stdout_content) if stdout_content else 0}, "
+                    f"stderr_len={len(stderr_content) if stderr_content else 0}"
+                )
+
+                # Log output for debugging
+                if returncode != 0:
+                    logger.warning(f"execute_test_subprocess: Non-zero return code: {returncode}")
+                    if stderr_content:
+                        logger.warning(f"execute_test_subprocess: stderr: {stderr_content[:2000]}")
+                    if stdout_content:
+                        logger.info(f"execute_test_subprocess: stdout: {stdout_content[:2000]}")
+                elif stdout_content or stderr_content:
+                    # Log a brief summary even on success for debugging
+                    if stderr_content:
+                        logger.debug(f"execute_test_subprocess: stderr preview: {stderr_content[:500]}")
+
+                return subprocess.CompletedProcess(cmd_list, returncode, stdout_content, stderr_content)
+            else:
+                # On Linux/Mac, use subprocess.run (works fine there)
+                result = subprocess.run(
+                    cmd_list, capture_output=True, cwd=cwd, env=env, text=True, timeout=timeout, check=False
+                )
+                logger.debug(
+                    f"execute_test_subprocess: Completed. returncode={result.returncode}, "
+                    f"stdout_len={len(result.stdout) if result.stdout else 0}, "
+                    f"stderr_len={len(result.stderr) if result.stderr else 0}"
+                )
+                return result
+        except subprocess.TimeoutExpired:
+            logger.warning(f"execute_test_subprocess: TimeoutExpired after {timeout}s")
+            raise
+        except Exception as e:
+            logger.error(f"execute_test_subprocess: Unexpected exception: {type(e).__name__}: {e}")
+            raise
 
 
 def run_behavioral_tests(
@@ -40,6 +136,15 @@ def run_behavioral_tests(
     pytest_target_runtime_seconds: int = TOTAL_LOOPING_TIME_EFFECTIVE,
     enable_coverage: bool = False,
 ) -> tuple[Path, subprocess.CompletedProcess, Path | None, Path | None]:
+    """
+    Run behavioral tests with optional coverage.
+
+    On Windows, uses --capture=no to avoid subprocess output deadlocks.
+    """
+    is_windows = sys.platform == "win32"
+
+    logger.debug(f"run_behavioral_tests: framework={test_framework}, enable_coverage={enable_coverage}")
+
     if test_framework == "pytest":
         test_files: list[str] = []
         for file in test_paths.test_files:
@@ -53,14 +158,20 @@ def run_behavioral_tests(
                 )
             else:
                 test_files.append(str(file.instrumented_behavior_file_path))
+
         pytest_cmd_list = (
             shlex.split(f"{SAFE_SYS_EXECUTABLE} -m pytest", posix=IS_POSIX)
             if pytest_cmd == "pytest"
             else [SAFE_SYS_EXECUTABLE, "-m", *shlex.split(pytest_cmd, posix=IS_POSIX)]
         )
         test_files = list(set(test_files))  # remove multiple calls in the same test function
+
+        # On Windows, use --capture=no to avoid subprocess output deadlocks
+        # On other platforms, use --capture=tee-sys to both capture and display output
+        capture_mode = "--capture=no" if is_windows else "--capture=tee-sys"
+
         common_pytest_args = [
-            "--capture=tee-sys",
+            capture_mode,
             f"--timeout={pytest_timeout}",
             "-q",
             "--codeflash_loops_scope=session",
@@ -78,11 +189,21 @@ def run_behavioral_tests(
         if enable_coverage:
             coverage_database_file, coverage_config_file = prepare_coverage_files()
 
-            cov_erase = execute_test_subprocess(
-                shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env
-            )  # this cleanup is necessary to avoid coverage data from previous runs, if there are any,
-            # then the current run will be appended to the previous data, which skews the results
-            logger.debug(cov_erase)
+            # On Windows, delete coverage database file directly instead of using 'coverage erase'
+            # to avoid file locking issues
+            if is_windows:
+                try:
+                    if coverage_database_file.exists():
+                        coverage_database_file.unlink()
+                except PermissionError as e:
+                    logger.warning(f"run_behavioral_tests: PermissionError deleting coverage database: {e}")
+                except Exception as e:
+                    logger.warning(f"run_behavioral_tests: Exception deleting coverage database: {e}")
+            else:
+                cov_erase = execute_test_subprocess(
+                    shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env, timeout=30
+                )
+
             coverage_cmd = [
                 SAFE_SYS_EXECUTABLE,
                 "-m",
@@ -99,26 +220,22 @@ def run_behavioral_tests(
 
             blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS if plugin != "cov"]
 
+            final_cmd = coverage_cmd + common_pytest_args + blocklist_args + result_args + test_files
             results = execute_test_subprocess(
-                coverage_cmd + common_pytest_args + blocklist_args + result_args + test_files,
+                final_cmd,
                 cwd=cwd,
                 env=pytest_test_env,
-                timeout=600,
-            )
-            logger.debug(
-                f"Result return code: {results.returncode}, "
-                f"{'Result stderr:' + str(results.stderr) if results.stderr else ''}"
+                timeout=60,
             )
         else:
             blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS]
+
+            final_cmd = pytest_cmd_list + common_pytest_args + blocklist_args + result_args + test_files
             results = execute_test_subprocess(
-                pytest_cmd_list + common_pytest_args + blocklist_args + result_args + test_files,
+                final_cmd,
                 cwd=cwd,
                 env=pytest_test_env,
-                timeout=600,  # TODO: Make this dynamic
-            )
-            logger.debug(
-                f"""Result return code: {results.returncode}, {"Result stderr:" + str(results.stderr) if results.stderr else ""}"""
+                timeout=60,  # TODO: Make this dynamic
             )
     elif test_framework == "unittest":
         if enable_coverage:
@@ -198,7 +315,7 @@ def run_line_profile_tests(
             pytest_cmd_list + pytest_args + blocklist_args + result_args + test_files,
             cwd=cwd,
             env=pytest_test_env,
-            timeout=600,  # TODO: Make this dynamic
+            timeout=60,  # TODO: Make this dynamic
         )
     elif test_framework == "unittest":
         test_env["CODEFLASH_LOOP_INDEX"] = "1"
@@ -282,7 +399,7 @@ def run_benchmarking_tests(
             pytest_cmd_list + pytest_args + blocklist_args + result_args + test_files,
             cwd=cwd,
             env=pytest_test_env,
-            timeout=600,  # TODO: Make this dynamic
+            timeout=60,  # TODO: Make this dynamic
         )
     elif test_framework == "unittest":
         test_files = [file.benchmarking_file_path for file in test_paths.test_files]
@@ -304,6 +421,6 @@ def run_unittest_tests(
     files = [str(file) for file in test_file_paths]
     output_file = ["--output-file", str(result_file_path)]
     results = execute_test_subprocess(
-        unittest_cmd_list + log_level + files + output_file, cwd=cwd, env=test_env, timeout=600
+        unittest_cmd_list + log_level + files + output_file, cwd=cwd, env=test_env, timeout=60
     )
     return result_file_path, results

From 784378c395fd3ecb47aa79cc51121c16e2ec9790 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Mon, 8 Dec 2025 19:16:17 +0200
Subject: [PATCH 12/26] adding _manual_cleanup_worktree_directory

---
 codeflash/code_utils/git_worktree_utils.py | 124 ++++++++++++++++++++-
 1 file changed, 121 insertions(+), 3 deletions(-)

diff --git a/codeflash/code_utils/git_worktree_utils.py b/codeflash/code_utils/git_worktree_utils.py
index 9a071b741..060d768c8 100644
--- a/codeflash/code_utils/git_worktree_utils.py
+++ b/codeflash/code_utils/git_worktree_utils.py
@@ -1,7 +1,10 @@
 from __future__ import annotations
 
 import configparser
+import os
+import shutil
 import subprocess
+import sys
 import tempfile
 import time
 from pathlib import Path
@@ -96,11 +99,126 @@ def create_detached_worktree(module_root: Path) -> Optional[Path]:
 
 
 def remove_worktree(worktree_dir: Path) -> None:
+    """
+    Remove a git worktree with robust error handling for Windows file locking issues.
+    
+    This function handles Windows-specific issues where files may be locked by processes,
+    causing 'Permission denied' errors. It implements retry logic with exponential backoff
+    and falls back to manual directory removal if git worktree remove fails.
+    
+    Args:
+        worktree_dir: Path to the worktree directory to remove
+    """
+    if not worktree_dir or not worktree_dir.exists():
+        logger.info(f"remove_worktree: Worktree does not exist, skipping removal. worktree_dir={worktree_dir}")
+        return
+
+    is_windows = sys.platform == "win32"
+    max_retries = 3 if is_windows else 1
+    retry_delay = 0.5  # Start with 500ms delay
+    
+    logger.info(f"remove_worktree: Starting worktree removal. worktree_dir={worktree_dir}, platform={sys.platform}, max_retries={max_retries}")
+    
+    # Try to get the repository and git root for worktree removal
     try:
         repository = git.Repo(worktree_dir, search_parent_directories=True)
-        repository.git.worktree("remove", "--force", worktree_dir)
-    except Exception:
-        logger.exception(f"Failed to remove worktree: {worktree_dir}")
+        git_root = repository.working_dir or repository.git_dir
+        logger.info(f"remove_worktree: Found repository. git_root={git_root}")
+    except Exception as e:
+        logger.warning(f"remove_worktree: Could not access repository, attempting manual cleanup. worktree_dir={worktree_dir}, error={e}")
+        # If we can't access the repository, try manual cleanup
+        _manual_cleanup_worktree_directory(worktree_dir)
+        return
+
+    # Attempt to remove worktree using git command with retries
+    for attempt in range(max_retries):
+        try:
+            attempt_num = attempt + 1
+            logger.info(f"remove_worktree: Attempting git worktree remove. attempt={attempt_num}, max_retries={max_retries}, worktree_dir={worktree_dir}")
+            repository.git.worktree("remove", "--force", str(worktree_dir))
+            logger.info(f"remove_worktree: Successfully removed worktree via git command. worktree_dir={worktree_dir}")
+            return
+        except git.exc.GitCommandError as e:
+            error_msg = str(e).lower()
+            is_permission_error = "permission denied" in error_msg or "access is denied" in error_msg
+            
+            if is_permission_error and attempt < max_retries - 1:
+                # On Windows, file locks may be temporary - retry with exponential backoff
+                logger.info(
+                    f"remove_worktree: Permission denied, retrying. attempt={attempt_num}, "
+                    f"retry_delay={retry_delay}, worktree_dir={worktree_dir}, error={e}"
+                )
+                time.sleep(retry_delay)
+                retry_delay *= 2  # Exponential backoff
+            else:
+                # Last attempt failed or non-permission error
+                logger.warning(
+                    f"remove_worktree: Git worktree remove failed, attempting fallback cleanup. "
+                    f"attempts={attempt_num}, worktree_dir={worktree_dir}, error={e}"
+                )
+                break
+        except Exception as e:
+            logger.warning(
+                f"remove_worktree: Unexpected error during git worktree remove, attempting fallback cleanup. "
+                f"worktree_dir={worktree_dir}, error={e}"
+            )
+            break
+
+    # Fallback: Try to remove worktree entry from git, then manually delete directory
+    try:
+        logger.info(f"remove_worktree: Attempting fallback cleanup. worktree_dir={worktree_dir}")
+        
+        # Try to prune the worktree entry from git (this doesn't delete the directory)
+        try:
+            # Use git worktree prune to remove stale entries
+            repository.git.worktree("prune")
+            logger.info(f"remove_worktree: Successfully pruned worktree entries")
+        except Exception as prune_error:
+            logger.info(f"remove_worktree: Could not prune worktree entries. error={prune_error}")
+        
+        # Manually remove the directory
+        _manual_cleanup_worktree_directory(worktree_dir)
+        
+    except Exception as e:
+        logger.error(
+            f"remove_worktree: Failed to cleanup worktree directory after all attempts. "
+            f"worktree_dir={worktree_dir}, error={e}"
+        )
+
+
+def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
+    """
+    Manually remove a worktree directory, handling Windows file locking issues.
+    
+    This is a fallback method when git worktree remove fails. It uses shutil.rmtree
+    with error handling similar to cleanup_paths.
+    
+    Args:
+        worktree_dir: Path to the worktree directory to remove
+    """
+    if not worktree_dir or not worktree_dir.exists():
+        logger.info(f"_manual_cleanup_worktree_directory: Directory does not exist, skipping. worktree_dir={worktree_dir}")
+        return
+
+    logger.info(f"_manual_cleanup_worktree_directory: Starting manual directory removal. worktree_dir={worktree_dir}")
+    
+    try:
+        # On Windows, files may be locked - use ignore_errors similar to cleanup_paths
+        shutil.rmtree(worktree_dir, ignore_errors=True)
+        
+        # Verify removal
+        if worktree_dir.exists():
+            logger.warning(
+                f"_manual_cleanup_worktree_directory: Directory still exists after removal attempt. "
+                f"worktree_dir={worktree_dir}"
+            )
+        else:
+            logger.info(f"_manual_cleanup_worktree_directory: Successfully removed directory. worktree_dir={worktree_dir}")
+    except Exception as e:
+        logger.error(
+            f"_manual_cleanup_worktree_directory: Failed to remove directory. "
+            f"worktree_dir={worktree_dir}, error={e}"
+        )
 
 
 def create_diff_patch_from_worktree(

From 194bad5814c11f52b6fd17808189e56e8f244864 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Mon, 8 Dec 2025 19:38:39 +0200
Subject: [PATCH 13/26] Improved the manual cleanup function to handle Windows
 file locking and added safty check

---
 codeflash/code_utils/git_worktree_utils.py | 158 +++++++++++++++++++--
 1 file changed, 146 insertions(+), 12 deletions(-)

diff --git a/codeflash/code_utils/git_worktree_utils.py b/codeflash/code_utils/git_worktree_utils.py
index 060d768c8..f7167ebc2 100644
--- a/codeflash/code_utils/git_worktree_utils.py
+++ b/codeflash/code_utils/git_worktree_utils.py
@@ -193,6 +193,11 @@ def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
     This is a fallback method when git worktree remove fails. It uses shutil.rmtree
     with error handling similar to cleanup_paths.
     
+    SAFETY: This function includes multiple safeguards to prevent accidental deletion:
+    - Only deletes directories under the worktree_dirs cache location
+    - Verifies the path is a worktree directory (not the original repo)
+    - Uses resolve() to normalize paths and prevent path traversal attacks
+    
     Args:
         worktree_dir: Path to the worktree directory to remove
     """
@@ -200,25 +205,154 @@ def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
         logger.info(f"_manual_cleanup_worktree_directory: Directory does not exist, skipping. worktree_dir={worktree_dir}")
         return
 
-    logger.info(f"_manual_cleanup_worktree_directory: Starting manual directory removal. worktree_dir={worktree_dir}")
-    
+    # SAFETY CHECK 1: Resolve paths to absolute, normalized paths
     try:
-        # On Windows, files may be locked - use ignore_errors similar to cleanup_paths
-        shutil.rmtree(worktree_dir, ignore_errors=True)
+        worktree_dir = worktree_dir.resolve()
+        worktree_dirs_resolved = worktree_dirs.resolve()
+    except (OSError, ValueError) as e:
+        logger.error(
+            f"_manual_cleanup_worktree_directory: Failed to resolve paths, aborting for safety. "
+            f"worktree_dir={worktree_dir}, error={e}"
+        )
+        return
+
+    # SAFETY CHECK 2: Ensure worktree_dir is actually under worktree_dirs
+    # This prevents deletion of the original repository or any other directory
+    try:
+        # Check if worktree_dir is a subdirectory of worktree_dirs
+        worktree_dir_parts = worktree_dir.parts
+        worktree_dirs_parts = worktree_dirs_resolved.parts
         
-        # Verify removal
-        if worktree_dir.exists():
-            logger.warning(
-                f"_manual_cleanup_worktree_directory: Directory still exists after removal attempt. "
-                f"worktree_dir={worktree_dir}"
+        # worktree_dir must have more parts than worktree_dirs and start with the same parts
+        if len(worktree_dir_parts) <= len(worktree_dirs_parts):
+            logger.error(
+                f"_manual_cleanup_worktree_directory: Path is not a subdirectory of worktree_dirs, aborting for safety. "
+                f"worktree_dir={worktree_dir}, worktree_dirs={worktree_dirs_resolved}"
             )
-        else:
-            logger.info(f"_manual_cleanup_worktree_directory: Successfully removed directory. worktree_dir={worktree_dir}")
+            return
+        
+        if worktree_dir_parts[:len(worktree_dirs_parts)] != worktree_dirs_parts:
+            logger.error(
+                f"_manual_cleanup_worktree_directory: Path is not under worktree_dirs, aborting for safety. "
+                f"worktree_dir={worktree_dir}, worktree_dirs={worktree_dirs_resolved}"
+            )
+            return
     except Exception as e:
         logger.error(
-            f"_manual_cleanup_worktree_directory: Failed to remove directory. "
+            f"_manual_cleanup_worktree_directory: Error during path validation, aborting for safety. "
             f"worktree_dir={worktree_dir}, error={e}"
         )
+        return
+
+    # SAFETY CHECK 3: Additional verification - ensure it's not the worktree_dirs root itself
+    if worktree_dir == worktree_dirs_resolved:
+        logger.error(
+            f"_manual_cleanup_worktree_directory: Attempted to delete worktree_dirs root, aborting for safety. "
+            f"worktree_dir={worktree_dir}"
+        )
+        return
+
+    logger.info(f"_manual_cleanup_worktree_directory: Starting manual directory removal. worktree_dir={worktree_dir}")
+    
+    # On Windows, files may be locked by processes - retry with delays
+    is_windows = sys.platform == "win32"
+    max_cleanup_retries = 3 if is_windows else 1
+    cleanup_retry_delay = 0.5
+    
+    # On Windows, try to remove read-only attributes before deletion
+    if is_windows:
+        try:
+            _remove_readonly_attributes_windows(worktree_dir)
+        except Exception as e:
+            logger.info(
+                f"_manual_cleanup_worktree_directory: Could not remove read-only attributes. "
+                f"worktree_dir={worktree_dir}, error={e}"
+            )
+    
+    for cleanup_attempt in range(max_cleanup_retries):
+        try:
+            cleanup_attempt_num = cleanup_attempt + 1
+            if cleanup_attempt_num > 1:
+                logger.info(
+                    f"_manual_cleanup_worktree_directory: Retrying directory removal. "
+                    f"attempt={cleanup_attempt_num}, retry_delay={cleanup_retry_delay}, worktree_dir={worktree_dir}"
+                )
+                time.sleep(cleanup_retry_delay)
+                cleanup_retry_delay *= 2  # Exponential backoff
+                # On retry, try removing read-only attributes again
+                if is_windows:
+                    try:
+                        _remove_readonly_attributes_windows(worktree_dir)
+                    except Exception:
+                        pass  # Ignore errors on retry
+            
+            # Try to remove the directory
+            # On Windows, use ignore_errors to handle locked files gracefully
+            shutil.rmtree(worktree_dir, ignore_errors=True)
+            
+            # Verify removal - wait a bit for Windows to release locks
+            if is_windows:
+                # Longer wait on Windows to allow file handles to be released
+                wait_time = 0.3 if cleanup_attempt_num < max_cleanup_retries else 0.1
+                time.sleep(wait_time)
+            
+            if not worktree_dir.exists():
+                logger.info(f"_manual_cleanup_worktree_directory: Successfully removed directory. worktree_dir={worktree_dir}")
+                return
+            elif cleanup_attempt_num < max_cleanup_retries:
+                # Directory still exists, will retry
+                logger.info(
+                    f"_manual_cleanup_worktree_directory: Directory still exists, will retry. "
+                    f"attempt={cleanup_attempt_num}, worktree_dir={worktree_dir}"
+                )
+            else:
+                # Last attempt failed
+                logger.warning(
+                    f"_manual_cleanup_worktree_directory: Directory still exists after all removal attempts. "
+                    f"attempts={cleanup_attempt_num}, worktree_dir={worktree_dir}. "
+                    f"This may indicate locked files that will be cleaned up later."
+                )
+                return
+                
+        except Exception as e:
+            if cleanup_attempt_num < max_cleanup_retries:
+                logger.info(
+                    f"_manual_cleanup_worktree_directory: Exception during removal, will retry. "
+                    f"attempt={cleanup_attempt_num}, worktree_dir={worktree_dir}, error={e}"
+                )
+            else:
+                logger.error(
+                    f"_manual_cleanup_worktree_directory: Failed to remove directory after all attempts. "
+                    f"attempts={cleanup_attempt_num}, worktree_dir={worktree_dir}, error={e}"
+                )
+                return
+
+
+def _remove_readonly_attributes_windows(path: Path) -> None:
+    """
+    Remove read-only attributes from files and directories on Windows.
+    
+    This helps with deletion of files that may have been marked read-only.
+    
+    Args:
+        path: Path to the file or directory to process
+    """
+    if not path.exists():
+        return
+    
+    try:
+        # Remove read-only attribute from the path itself
+        os.chmod(path, 0o777)
+    except (OSError, PermissionError):
+        pass  # Ignore errors - file may be locked
+    
+    # Recursively process directory contents
+    if path.is_dir():
+        try:
+            for item in path.iterdir():
+                _remove_readonly_attributes_windows(item)
+        except (OSError, PermissionError):
+            pass  # Ignore errors - directory may be locked or inaccessible
 
 
 def create_diff_patch_from_worktree(

From 5e1b108486faf44a7fa59c2d762acb86627ee5bb Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Mon, 8 Dec 2025 19:47:28 +0200
Subject: [PATCH 14/26] clean and optimize the git worktree deletion failure
 handling

---
 codeflash/code_utils/git_worktree_utils.py | 216 ++++++++++-----------
 1 file changed, 106 insertions(+), 110 deletions(-)

diff --git a/codeflash/code_utils/git_worktree_utils.py b/codeflash/code_utils/git_worktree_utils.py
index f7167ebc2..2dde68e02 100644
--- a/codeflash/code_utils/git_worktree_utils.py
+++ b/codeflash/code_utils/git_worktree_utils.py
@@ -191,7 +191,7 @@ def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
     Manually remove a worktree directory, handling Windows file locking issues.
     
     This is a fallback method when git worktree remove fails. It uses shutil.rmtree
-    with error handling similar to cleanup_paths.
+    with custom error handling for Windows-specific issues.
     
     SAFETY: This function includes multiple safeguards to prevent accidental deletion:
     - Only deletes directories under the worktree_dirs cache location
@@ -205,154 +205,150 @@ def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
         logger.info(f"_manual_cleanup_worktree_directory: Directory does not exist, skipping. worktree_dir={worktree_dir}")
         return
 
-    # SAFETY CHECK 1: Resolve paths to absolute, normalized paths
-    try:
-        worktree_dir = worktree_dir.resolve()
-        worktree_dirs_resolved = worktree_dirs.resolve()
-    except (OSError, ValueError) as e:
-        logger.error(
-            f"_manual_cleanup_worktree_directory: Failed to resolve paths, aborting for safety. "
-            f"worktree_dir={worktree_dir}, error={e}"
-        )
-        return
-
-    # SAFETY CHECK 2: Ensure worktree_dir is actually under worktree_dirs
-    # This prevents deletion of the original repository or any other directory
-    try:
-        # Check if worktree_dir is a subdirectory of worktree_dirs
-        worktree_dir_parts = worktree_dir.parts
-        worktree_dirs_parts = worktree_dirs_resolved.parts
-        
-        # worktree_dir must have more parts than worktree_dirs and start with the same parts
-        if len(worktree_dir_parts) <= len(worktree_dirs_parts):
-            logger.error(
-                f"_manual_cleanup_worktree_directory: Path is not a subdirectory of worktree_dirs, aborting for safety. "
-                f"worktree_dir={worktree_dir}, worktree_dirs={worktree_dirs_resolved}"
-            )
-            return
-        
-        if worktree_dir_parts[:len(worktree_dirs_parts)] != worktree_dirs_parts:
-            logger.error(
-                f"_manual_cleanup_worktree_directory: Path is not under worktree_dirs, aborting for safety. "
-                f"worktree_dir={worktree_dir}, worktree_dirs={worktree_dirs_resolved}"
-            )
-            return
-    except Exception as e:
-        logger.error(
-            f"_manual_cleanup_worktree_directory: Error during path validation, aborting for safety. "
-            f"worktree_dir={worktree_dir}, error={e}"
-        )
-        return
-
-    # SAFETY CHECK 3: Additional verification - ensure it's not the worktree_dirs root itself
-    if worktree_dir == worktree_dirs_resolved:
-        logger.error(
-            f"_manual_cleanup_worktree_directory: Attempted to delete worktree_dirs root, aborting for safety. "
-            f"worktree_dir={worktree_dir}"
-        )
+    # Validate paths for safety
+    if not _validate_worktree_path_safety(worktree_dir):
         return
 
     logger.info(f"_manual_cleanup_worktree_directory: Starting manual directory removal. worktree_dir={worktree_dir}")
     
-    # On Windows, files may be locked by processes - retry with delays
+    # Attempt removal with retries on Windows
     is_windows = sys.platform == "win32"
-    max_cleanup_retries = 3 if is_windows else 1
-    cleanup_retry_delay = 0.5
+    max_retries = 3 if is_windows else 1
+    retry_delay = 0.5
     
-    # On Windows, try to remove read-only attributes before deletion
-    if is_windows:
-        try:
-            _remove_readonly_attributes_windows(worktree_dir)
-        except Exception as e:
+    for attempt in range(max_retries):
+        attempt_num = attempt + 1
+        
+        # Log retry attempts
+        if attempt_num > 1:
             logger.info(
-                f"_manual_cleanup_worktree_directory: Could not remove read-only attributes. "
-                f"worktree_dir={worktree_dir}, error={e}"
+                f"_manual_cleanup_worktree_directory: Retrying directory removal. "
+                f"attempt={attempt_num}, retry_delay={retry_delay}, worktree_dir={worktree_dir}"
             )
-    
-    for cleanup_attempt in range(max_cleanup_retries):
+            time.sleep(retry_delay)
+            retry_delay *= 2  # Exponential backoff
+        
+        # On Windows, use custom error handler to remove read-only attributes on the fly
+        # This is more efficient than pre-scanning the entire directory tree
+        error_handler = _create_windows_rmtree_error_handler() if is_windows else None
+        
         try:
-            cleanup_attempt_num = cleanup_attempt + 1
-            if cleanup_attempt_num > 1:
-                logger.info(
-                    f"_manual_cleanup_worktree_directory: Retrying directory removal. "
-                    f"attempt={cleanup_attempt_num}, retry_delay={cleanup_retry_delay}, worktree_dir={worktree_dir}"
-                )
-                time.sleep(cleanup_retry_delay)
-                cleanup_retry_delay *= 2  # Exponential backoff
-                # On retry, try removing read-only attributes again
-                if is_windows:
-                    try:
-                        _remove_readonly_attributes_windows(worktree_dir)
-                    except Exception:
-                        pass  # Ignore errors on retry
-            
-            # Try to remove the directory
-            # On Windows, use ignore_errors to handle locked files gracefully
-            shutil.rmtree(worktree_dir, ignore_errors=True)
+            if is_windows and error_handler:
+                shutil.rmtree(worktree_dir, onerror=error_handler)
+            else:
+                shutil.rmtree(worktree_dir, ignore_errors=True)
             
-            # Verify removal - wait a bit for Windows to release locks
+            # Brief wait on Windows to allow file handles to be released
             if is_windows:
-                # Longer wait on Windows to allow file handles to be released
-                wait_time = 0.3 if cleanup_attempt_num < max_cleanup_retries else 0.1
+                wait_time = 0.3 if attempt_num < max_retries else 0.1
                 time.sleep(wait_time)
             
+            # Check if removal was successful
             if not worktree_dir.exists():
                 logger.info(f"_manual_cleanup_worktree_directory: Successfully removed directory. worktree_dir={worktree_dir}")
                 return
-            elif cleanup_attempt_num < max_cleanup_retries:
-                # Directory still exists, will retry
+            
+            # Directory still exists
+            if attempt_num < max_retries:
                 logger.info(
                     f"_manual_cleanup_worktree_directory: Directory still exists, will retry. "
-                    f"attempt={cleanup_attempt_num}, worktree_dir={worktree_dir}"
+                    f"attempt={attempt_num}, worktree_dir={worktree_dir}"
                 )
             else:
-                # Last attempt failed
                 logger.warning(
-                    f"_manual_cleanup_worktree_directory: Directory still exists after all removal attempts. "
-                    f"attempts={cleanup_attempt_num}, worktree_dir={worktree_dir}. "
-                    f"This may indicate locked files that will be cleaned up later."
+                    f"_manual_cleanup_worktree_directory: Directory still exists after all attempts. "
+                    f"attempts={attempt_num}, worktree_dir={worktree_dir}. "
+                    f"Files may be locked and will be cleaned up later."
                 )
-                return
                 
         except Exception as e:
-            if cleanup_attempt_num < max_cleanup_retries:
+            if attempt_num < max_retries:
                 logger.info(
                     f"_manual_cleanup_worktree_directory: Exception during removal, will retry. "
-                    f"attempt={cleanup_attempt_num}, worktree_dir={worktree_dir}, error={e}"
+                    f"attempt={attempt_num}, error={e}"
                 )
             else:
                 logger.error(
-                    f"_manual_cleanup_worktree_directory: Failed to remove directory after all attempts. "
-                    f"attempts={cleanup_attempt_num}, worktree_dir={worktree_dir}, error={e}"
+                    f"_manual_cleanup_worktree_directory: Failed after all attempts. "
+                    f"attempts={attempt_num}, worktree_dir={worktree_dir}, error={e}"
                 )
-                return
 
 
-def _remove_readonly_attributes_windows(path: Path) -> None:
+def _validate_worktree_path_safety(worktree_dir: Path) -> bool:
     """
-    Remove read-only attributes from files and directories on Windows.
-    
-    This helps with deletion of files that may have been marked read-only.
+    Validate that a path is safe to delete (must be under worktree_dirs).
     
     Args:
-        path: Path to the file or directory to process
+        worktree_dir: Path to validate
+        
+    Returns:
+        True if the path is safe to delete, False otherwise
     """
-    if not path.exists():
-        return
-    
+    # SAFETY CHECK 1: Resolve paths to absolute, normalized paths
+    try:
+        worktree_dir_resolved = worktree_dir.resolve()
+        worktree_dirs_resolved = worktree_dirs.resolve()
+    except (OSError, ValueError) as e:
+        logger.error(
+            f"_validate_worktree_path_safety: Failed to resolve paths, aborting for safety. "
+            f"worktree_dir={worktree_dir}, error={e}"
+        )
+        return False
+
+    # SAFETY CHECK 2: Ensure worktree_dir is a subdirectory of worktree_dirs
     try:
-        # Remove read-only attribute from the path itself
-        os.chmod(path, 0o777)
-    except (OSError, PermissionError):
-        pass  # Ignore errors - file may be locked
+        # Use relative_to to check if path is under worktree_dirs
+        worktree_dir_resolved.relative_to(worktree_dirs_resolved)
+    except ValueError:
+        logger.error(
+            f"_validate_worktree_path_safety: Path is not under worktree_dirs, aborting for safety. "
+            f"worktree_dir={worktree_dir_resolved}, worktree_dirs={worktree_dirs_resolved}"
+        )
+        return False
+
+    # SAFETY CHECK 3: Ensure it's not the worktree_dirs root itself
+    if worktree_dir_resolved == worktree_dirs_resolved:
+        logger.error(
+            f"_validate_worktree_path_safety: Attempted to delete worktree_dirs root, aborting for safety. "
+            f"worktree_dir={worktree_dir_resolved}"
+        )
+        return False
+
+    return True
+
+
+def _create_windows_rmtree_error_handler():
+    """
+    Create an error handler for shutil.rmtree that handles Windows-specific issues.
+    
+    This handler attempts to remove read-only attributes when encountering permission errors.
     
-    # Recursively process directory contents
-    if path.is_dir():
+    Returns:
+        A callable error handler for shutil.rmtree's onerror parameter
+    """
+    def handle_remove_error(func, path, exc_info):
+        """
+        Error handler for shutil.rmtree on Windows.
+        
+        Attempts to remove read-only attributes and retry the operation.
+        """
+        # Get the exception type
+        exc_type, exc_value, exc_traceback = exc_info
+        
+        # Only handle permission errors
+        if not isinstance(exc_value, (PermissionError, OSError)):
+            return
+        
         try:
-            for item in path.iterdir():
-                _remove_readonly_attributes_windows(item)
-        except (OSError, PermissionError):
-            pass  # Ignore errors - directory may be locked or inaccessible
+            # Try to change file permissions to make it writable
+            os.chmod(path, 0o777)
+            # Retry the failed operation
+            func(path)
+        except Exception:
+            # If it still fails, silently ignore (file is truly locked)
+            pass
+    
+    return handle_remove_error
 
 
 def create_diff_patch_from_worktree(

From 7404d01d8e101917e528788618c934b6843325ab Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Tue, 9 Dec 2025 10:49:52 +0200
Subject: [PATCH 15/26] clean up added logs

---
 codeflash/cli_cmds/console.py                |  2 -
 codeflash/code_utils/coverage_utils.py       | 28 +--------
 codeflash/discovery/discover_unit_tests.py   |  6 --
 codeflash/optimization/function_optimizer.py | 61 +-------------------
 codeflash/verification/test_runner.py        | 38 +-----------
 5 files changed, 6 insertions(+), 129 deletions(-)

diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py
index 34585985f..c78b1b29a 100644
--- a/codeflash/cli_cmds/console.py
+++ b/codeflash/cli_cmds/console.py
@@ -142,8 +142,6 @@ def __init__(self) -> None:
 def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Progress, TaskID], None, None]:
     """Progress bar for test files."""
     if is_LSP_enabled():
-        lsp_log(LspTextMessage(text=description, takes_time=True))
-
         class DummyProgress:
             def advance(self, *args, **kwargs) -> None:
                 pass
diff --git a/codeflash/code_utils/coverage_utils.py b/codeflash/code_utils/coverage_utils.py
index 371f82d4c..dbbe56593 100644
--- a/codeflash/code_utils/coverage_utils.py
+++ b/codeflash/code_utils/coverage_utils.py
@@ -73,34 +73,8 @@ def prepare_coverage_files() -> tuple[Path, Path]:
     
     Returns tuple of (coverage_database_file, coverage_config_file).
     """
-    from codeflash.cli_cmds.console import logger
-    
-    logger.info("!lsp|coverage_utils.prepare_coverage_files: Starting coverage file preparation")
-    
-    logger.info("!lsp|coverage_utils.prepare_coverage_files: Getting coverage database file path")
     coverage_database_file = get_run_tmp_file(Path(".coverage"))
-    logger.info(f"!lsp|coverage_utils.prepare_coverage_files: Coverage database file: {coverage_database_file}")
-    
-    logger.info("!lsp|coverage_utils.prepare_coverage_files: Getting coverage config file path")
     coveragercfile = get_run_tmp_file(Path(".coveragerc"))
-    logger.info(f"!lsp|coverage_utils.prepare_coverage_files: Coverage config file: {coveragercfile}")
-    
     coveragerc_content = f"[run]\n branch = True\ndata_file={coverage_database_file}\n"
-    logger.info("!lsp|coverage_utils.prepare_coverage_files: Writing coverage config content")
-    logger.debug(f"coverage_utils.prepare_coverage_files: Config content:\n{coveragerc_content}")
-    
-    try:
-        coveragercfile.write_text(coveragerc_content)
-        logger.info("!lsp|coverage_utils.prepare_coverage_files: Coverage config file written successfully")
-    except Exception as e:
-        logger.error(
-            f"!lsp|coverage_utils.prepare_coverage_files: Failed to write coverage config file: "
-            f"{type(e).__name__}: {e}"
-        )
-        raise
-    
-    logger.info(
-        f"!lsp|coverage_utils.prepare_coverage_files: Coverage files prepared successfully. "
-        f"database={coverage_database_file}, config={coveragercfile}"
-    )
+    coveragercfile.write_text(coveragerc_content)
     return coverage_database_file, coveragercfile
diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py
index a42866c3f..a95c13f28 100644
--- a/codeflash/discovery/discover_unit_tests.py
+++ b/codeflash/discovery/discover_unit_tests.py
@@ -584,8 +584,6 @@ def discover_tests_pytest(
     project_root = cfg.project_root_path
 
     tmp_pickle_path = get_run_tmp_file("collected_tests.pkl")
-    
-    logger.info(f"!lsp|Starting pytest discovery. Cwd: {project_root}")
     discovery_script = Path(__file__).parent / "pytest_new_process_discovery.py"
 
     run_kwargs = {
@@ -612,12 +610,9 @@ def discover_tests_pytest(
                 **run_kwargs,
                 timeout=60,
             )
-            logger.info(f"!lsp|Pytest discovery finished with code {result.returncode}")
         except subprocess.TimeoutExpired:
-            logger.error("!lsp|Pytest discovery timed out after 60s")
             result = subprocess.CompletedProcess(args=[], returncode=-1, stdout="", stderr="Timeout")
         except Exception as e:
-            logger.error(f"!lsp|Pytest discovery failed: {e}")
             result = subprocess.CompletedProcess(args=[], returncode=-1, stdout="", stderr=str(e))
 
     try:
@@ -772,7 +767,6 @@ def process_test_files(
     jedi_project = jedi.Project(path=project_root_path, sys_path=jedi_sys_path)
 
     tests_cache = TestsCache(project_root_path)
-    logger.info("!lsp|Discovering tests and processing unit tests")
     with test_files_progress_bar(total=len(file_to_test_map), description="Processing test files") as (
         progress,
         task_id,
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 1ccf81f11..457c6258d 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1597,56 +1597,23 @@ def establish_original_code_baseline(
         assert (test_framework := self.args.test_framework) in {"pytest", "unittest"}  # noqa: RUF018
         success = True
 
-        logger.info(
-            f"!lsp|function_optimizer.establish_baseline: Starting baseline establishment. "
-            f"function={self.function_to_optimize.function_name}, "
-            f"test_files_count={len(self.test_files.test_files)}, "
-            f"test_framework={test_framework}"
-        )
-        
         test_env = self.get_test_env(codeflash_loop_index=0, codeflash_test_iteration=0, codeflash_tracer_disable=1)
-        logger.info(
-            f"!lsp|function_optimizer.establish_baseline: Test environment created. "
-            f"CODEFLASH_TEST_ITERATION={test_env.get('CODEFLASH_TEST_ITERATION')}, "
-            f"CODEFLASH_LOOP_INDEX={test_env.get('CODEFLASH_LOOP_INDEX')}, "
-            f"PYTHONPATH={test_env.get('PYTHONPATH', 'not set')[:100]}..."
-        )
 
         if self.function_to_optimize.is_async:
-            logger.info("!lsp|function_optimizer.establish_baseline: Function is async, adding async decorator")
             from codeflash.code_utils.instrument_existing_tests import add_async_decorator_to_function
 
             success = add_async_decorator_to_function(
                 self.function_to_optimize.file_path, self.function_to_optimize, TestingMode.BEHAVIOR
             )
-            logger.info(f"!lsp|function_optimizer.establish_baseline: Async decorator added. success={success}")
 
         # Instrument codeflash capture
-        logger.info(
-            f"!lsp|function_optimizer.establish_baseline: About to enter progress_bar context. "
-            f"test_files={len(self.test_files.test_files)}, test_framework={test_framework}, "
-            f"function_name={self.function_to_optimize.function_name}"
-        )
         with progress_bar("Running tests to establish original code behavior..."):
             try:
-                logger.info("!lsp|function_optimizer.establish_baseline: Entered progress_bar context")
-                logger.info(
-                    f"!lsp|function_optimizer.establish_baseline: Calling instrument_codeflash_capture. "
-                    f"function={self.function_to_optimize.function_name}, "
-                    f"helper_classes={len(file_path_to_helper_classes)}"
-                )
                 instrument_codeflash_capture(
                     self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root
                 )
-                logger.info("!lsp|function_optimizer.establish_baseline: instrument_codeflash_capture completed")
                 
                 total_looping_time = TOTAL_LOOPING_TIME_EFFECTIVE
-                logger.info(
-                    f"!lsp|function_optimizer.establish_baseline: About to call run_and_parse_tests. "
-                    f"testing_type=BEHAVIOR, enable_coverage={test_framework == 'pytest'}, "
-                    f"test_files_count={len(self.test_files.test_files)}, "
-                    f"testing_time={total_looping_time}"
-                )
                 behavioral_results, coverage_results = self.run_and_parse_tests(
                     testing_type=TestingMode.BEHAVIOR,
                     test_env=test_env,
@@ -1656,18 +1623,11 @@ def establish_original_code_baseline(
                     enable_coverage=test_framework == "pytest",
                     code_context=code_context,
                 )
-                logger.info(
-                    f"!lsp|function_optimizer.establish_baseline: run_and_parse_tests completed. "
-                    f"behavioral_results_count={len(behavioral_results) if behavioral_results else 0}, "
-                    f"coverage_results={'present' if coverage_results else 'None'}"
-                )
             finally:
-                logger.info("!lsp|function_optimizer.establish_baseline: Cleaning up - removing codeflash capture")
                 # Remove codeflash capture
                 self.write_code_and_helpers(
                     self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path
                 )
-                logger.info("!lsp|function_optimizer.establish_baseline: Cleanup completed")
         if not behavioral_results:
             logger.warning(
                 f"force_lsp|Couldn't run any tests for original function {self.function_to_optimize.function_name}. Skipping optimization."
@@ -1964,21 +1924,10 @@ def run_and_parse_tests(
         unittest_loop_index: int | None = None,
         line_profiler_output_file: Path | None = None,
     ) -> tuple[TestResults | dict, CoverageData | None]:
-        logger.info(
-            f"!lsp|function_optimizer.run_and_parse_tests: Starting test execution. "
-            f"testing_type={testing_type}, optimization_iteration={optimization_iteration}, "
-            f"enable_coverage={enable_coverage}, test_files_count={len(test_files.test_files)}, "
-            f"testing_time={testing_time}"
-        )
         coverage_database_file = None
         coverage_config_file = None
         try:
             if testing_type == TestingMode.BEHAVIOR:
-                logger.info(
-                    f"!lsp|function_optimizer.run_and_parse_tests: Calling run_behavioral_tests. "
-                    f"test_framework={self.test_cfg.test_framework}, cwd={self.project_root}, "
-                    f"pytest_timeout={INDIVIDUAL_TESTCASE_TIMEOUT}"
-                )
                 result_file_path, run_result, coverage_database_file, coverage_config_file = run_behavioral_tests(
                     test_files,
                     test_framework=self.test_cfg.test_framework,
@@ -2016,15 +1965,9 @@ def run_and_parse_tests(
             else:
                 msg = f"Unexpected testing type: {testing_type}"
                 raise ValueError(msg)
-        except subprocess.TimeoutExpired as e:
-            test_files_str = ', '.join(str(f) for f in test_files.test_files)
-            logger.error(
-                f"!lsp|function_optimizer.run_and_parse_tests: Test execution timed out. "
-                f"testing_type={testing_type}, test_files=[{test_files_str}], "
-                f"timeout={e.timeout if hasattr(e, 'timeout') else 'unknown'}"
-            )
+        except subprocess.TimeoutExpired:
             logger.exception(
-                f"Error running tests in {test_files_str}.\nTimeout Error"
+                f"Error running tests in {', '.join(str(f) for f in test_files.test_files)}.\nTimeout Error"
             )
             return TestResults(), None
         if run_result.returncode != 0 and testing_type == TestingMode.BEHAVIOR:
diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py
index 1b96e26b8..fbeffe37a 100644
--- a/codeflash/verification/test_runner.py
+++ b/codeflash/verification/test_runner.py
@@ -31,8 +31,6 @@ def execute_test_subprocess(
     """
     is_windows = sys.platform == "win32"
 
-    logger.debug(f"execute_test_subprocess: Starting. platform={sys.platform}, timeout={timeout}s, cwd={cwd}")
-
     with custom_addopts():
         try:
             if is_windows:
@@ -74,7 +72,6 @@ def execute_test_subprocess(
                     stdout_content, stderr_content = process.communicate(timeout=timeout)
                     returncode = process.returncode
                 except subprocess.TimeoutExpired:
-                    logger.warning(f"execute_test_subprocess: Process timed out after {timeout}s, terminating...")
                     # On Windows, terminate the entire process tree
                     try:
                         process.kill()
@@ -86,41 +83,16 @@ def execute_test_subprocess(
                         cmd_list, timeout, output=stdout_content, stderr=stderr_content
                     )
 
-                logger.debug(
-                    f"execute_test_subprocess: Completed. returncode={returncode}, "
-                    f"stdout_len={len(stdout_content) if stdout_content else 0}, "
-                    f"stderr_len={len(stderr_content) if stderr_content else 0}"
-                )
-
-                # Log output for debugging
-                if returncode != 0:
-                    logger.warning(f"execute_test_subprocess: Non-zero return code: {returncode}")
-                    if stderr_content:
-                        logger.warning(f"execute_test_subprocess: stderr: {stderr_content[:2000]}")
-                    if stdout_content:
-                        logger.info(f"execute_test_subprocess: stdout: {stdout_content[:2000]}")
-                elif stdout_content or stderr_content:
-                    # Log a brief summary even on success for debugging
-                    if stderr_content:
-                        logger.debug(f"execute_test_subprocess: stderr preview: {stderr_content[:500]}")
-
                 return subprocess.CompletedProcess(cmd_list, returncode, stdout_content, stderr_content)
             else:
                 # On Linux/Mac, use subprocess.run (works fine there)
                 result = subprocess.run(
                     cmd_list, capture_output=True, cwd=cwd, env=env, text=True, timeout=timeout, check=False
                 )
-                logger.debug(
-                    f"execute_test_subprocess: Completed. returncode={result.returncode}, "
-                    f"stdout_len={len(result.stdout) if result.stdout else 0}, "
-                    f"stderr_len={len(result.stderr) if result.stderr else 0}"
-                )
                 return result
         except subprocess.TimeoutExpired:
-            logger.warning(f"execute_test_subprocess: TimeoutExpired after {timeout}s")
             raise
-        except Exception as e:
-            logger.error(f"execute_test_subprocess: Unexpected exception: {type(e).__name__}: {e}")
+        except Exception:
             raise
 
 
@@ -143,8 +115,6 @@ def run_behavioral_tests(
     """
     is_windows = sys.platform == "win32"
 
-    logger.debug(f"run_behavioral_tests: framework={test_framework}, enable_coverage={enable_coverage}")
-
     if test_framework == "pytest":
         test_files: list[str] = []
         for file in test_paths.test_files:
@@ -195,10 +165,8 @@ def run_behavioral_tests(
                 try:
                     if coverage_database_file.exists():
                         coverage_database_file.unlink()
-                except PermissionError as e:
-                    logger.warning(f"run_behavioral_tests: PermissionError deleting coverage database: {e}")
-                except Exception as e:
-                    logger.warning(f"run_behavioral_tests: Exception deleting coverage database: {e}")
+                except (PermissionError, Exception):
+                    pass
             else:
                 cov_erase = execute_test_subprocess(
                     shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env, timeout=30

From dff190dbd88049410ab5fcc44c737b5e5c7b8ec5 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Tue, 9 Dec 2025 14:41:45 +0200
Subject: [PATCH 16/26] fix conflict

---
 codeflash/verification/test_runner.py | 116 +++++---------------------
 1 file changed, 19 insertions(+), 97 deletions(-)

diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py
index fbeffe37a..a4f735603 100644
--- a/codeflash/verification/test_runner.py
+++ b/codeflash/verification/test_runner.py
@@ -104,7 +104,6 @@ def run_behavioral_tests(
     *,
     pytest_timeout: int | None = None,
     pytest_cmd: str = "pytest",
-    verbose: bool = False,
     pytest_target_runtime_seconds: int = TOTAL_LOOPING_TIME_EFFECTIVE,
     enable_coverage: bool = False,
 ) -> tuple[Path, subprocess.CompletedProcess, Path | None, Path | None]:
@@ -115,11 +114,11 @@ def run_behavioral_tests(
     """
     is_windows = sys.platform == "win32"
 
-    if test_framework == "pytest":
+    if test_framework in {"pytest", "unittest"}:
         test_files: list[str] = []
         for file in test_paths.test_files:
             if file.test_type == TestType.REPLAY_TEST:
-                # TODO: Does this work for unittest framework?
+                # Replay tests need specific test targeting because one file contains tests for multiple functions
                 test_files.extend(
                     [
                         str(file.instrumented_behavior_file_path) + "::" + test.test_function
@@ -142,13 +141,14 @@ def run_behavioral_tests(
 
         common_pytest_args = [
             capture_mode,
-            f"--timeout={pytest_timeout}",
             "-q",
             "--codeflash_loops_scope=session",
             "--codeflash_min_loops=1",
             "--codeflash_max_loops=1",
             f"--codeflash_seconds={pytest_target_runtime_seconds}",
         ]
+        if pytest_timeout is not None:
+            common_pytest_args.insert(1, f"--timeout={pytest_timeout}")
 
         result_file_path = get_run_tmp_file(Path("pytest_results.xml"))
         result_args = [f"--junitxml={result_file_path.as_posix()}", "-o", "junit_logging=all"]
@@ -205,18 +205,6 @@ def run_behavioral_tests(
                 env=pytest_test_env,
                 timeout=60,  # TODO: Make this dynamic
             )
-    elif test_framework == "unittest":
-        if enable_coverage:
-            msg = "Coverage is not supported yet for unittest framework"
-            raise ValueError(msg)
-        test_env["CODEFLASH_LOOP_INDEX"] = "1"
-        test_files = [file.instrumented_behavior_file_path for file in test_paths.test_files]
-        result_file_path, results = run_unittest_tests(
-            verbose=verbose, test_file_paths=test_files, test_env=test_env, cwd=cwd
-        )
-        logger.debug(
-            f"""Result return code: {results.returncode}, {"Result stderr:" + str(results.stderr) if results.stderr else ""}"""
-        )
     else:
         msg = f"Unsupported test framework: {test_framework}"
         raise ValueError(msg)
@@ -237,42 +225,30 @@ def run_line_profile_tests(
     test_framework: str,
     *,
     pytest_target_runtime_seconds: float = TOTAL_LOOPING_TIME_EFFECTIVE,
-    verbose: bool = False,
     pytest_timeout: int | None = None,
     pytest_min_loops: int = 5,  # noqa: ARG001
     pytest_max_loops: int = 100_000,  # noqa: ARG001
-    line_profiler_output_file: Path | None = None,
 ) -> tuple[Path, subprocess.CompletedProcess]:
-    if test_framework == "pytest":
+    if test_framework in {"pytest", "unittest"}:  # pytest runs both pytest and unittest tests
         pytest_cmd_list = (
             shlex.split(f"{SAFE_SYS_EXECUTABLE} -m pytest", posix=IS_POSIX)
             if pytest_cmd == "pytest"
             else shlex.split(pytest_cmd)
         )
-        test_files: list[str] = []
-        for file in test_paths.test_files:
-            if file.test_type in {TestType.REPLAY_TEST, TestType.EXISTING_UNIT_TEST} and file.tests_in_file:
-                test_files.extend(
-                    [
-                        str(file.benchmarking_file_path)
-                        + "::"
-                        + (test.test_class + "::" if test.test_class else "")
-                        + (test.test_function.split("[", 1)[0] if "[" in test.test_function else test.test_function)
-                        for test in file.tests_in_file
-                    ]
-                )
-            else:
-                test_files.append(str(file.benchmarking_file_path))
-        test_files = list(set(test_files))  # remove multiple calls in the same test function
+        # Always use file path - pytest discovers all tests including parametrized ones
+        test_files: list[str] = list(
+            {str(file.benchmarking_file_path) for file in test_paths.test_files}
+        )  # remove multiple calls in the same test function
         pytest_args = [
             "--capture=tee-sys",
-            f"--timeout={pytest_timeout}",
             "-q",
             "--codeflash_loops_scope=session",
             "--codeflash_min_loops=1",
             "--codeflash_max_loops=1",
             f"--codeflash_seconds={pytest_target_runtime_seconds}",
         ]
+        if pytest_timeout is not None:
+            pytest_args.insert(1, f"--timeout={pytest_timeout}")
         result_file_path = get_run_tmp_file(Path("pytest_results.xml"))
         result_args = [f"--junitxml={result_file_path.as_posix()}", "-o", "junit_logging=all"]
         pytest_test_env = test_env.copy()
@@ -285,34 +261,10 @@ def run_line_profile_tests(
             env=pytest_test_env,
             timeout=60,  # TODO: Make this dynamic
         )
-    elif test_framework == "unittest":
-        test_env["CODEFLASH_LOOP_INDEX"] = "1"
-        test_env["LINE_PROFILE"] = "1"
-        test_files: list[str] = []
-        for file in test_paths.test_files:
-            if file.test_type in {TestType.REPLAY_TEST, TestType.EXISTING_UNIT_TEST} and file.tests_in_file:
-                test_files.extend(
-                    [
-                        str(file.benchmarking_file_path)
-                        + "::"
-                        + (test.test_class + "::" if test.test_class else "")
-                        + (test.test_function.split("[", 1)[0] if "[" in test.test_function else test.test_function)
-                        for test in file.tests_in_file
-                    ]
-                )
-            else:
-                test_files.append(str(file.benchmarking_file_path))
-        test_files = list(set(test_files))  # remove multiple calls in the same test function
-        line_profiler_output_file, results = run_unittest_tests(
-            verbose=verbose, test_file_paths=[Path(file) for file in test_files], test_env=test_env, cwd=cwd
-        )
-        logger.debug(
-            f"""Result return code: {results.returncode}, {"Result stderr:" + str(results.stderr) if results.stderr else ""}"""
-        )
     else:
         msg = f"Unsupported test framework: {test_framework}"
         raise ValueError(msg)
-    return line_profiler_output_file, results
+    return result_file_path, results
 
 
 def run_benchmarking_tests(
@@ -323,41 +275,30 @@ def run_benchmarking_tests(
     test_framework: str,
     *,
     pytest_target_runtime_seconds: float = TOTAL_LOOPING_TIME_EFFECTIVE,
-    verbose: bool = False,
     pytest_timeout: int | None = None,
     pytest_min_loops: int = 5,
     pytest_max_loops: int = 100_000,
 ) -> tuple[Path, subprocess.CompletedProcess]:
-    if test_framework == "pytest":
+    if test_framework in {"pytest", "unittest"}:  # pytest runs both pytest and unittest tests
         pytest_cmd_list = (
             shlex.split(f"{SAFE_SYS_EXECUTABLE} -m pytest", posix=IS_POSIX)
             if pytest_cmd == "pytest"
             else shlex.split(pytest_cmd)
         )
-        test_files: list[str] = []
-        for file in test_paths.test_files:
-            if file.test_type in {TestType.REPLAY_TEST, TestType.EXISTING_UNIT_TEST} and file.tests_in_file:
-                test_files.extend(
-                    [
-                        str(file.benchmarking_file_path)
-                        + "::"
-                        + (test.test_class + "::" if test.test_class else "")
-                        + (test.test_function.split("[", 1)[0] if "[" in test.test_function else test.test_function)
-                        for test in file.tests_in_file
-                    ]
-                )
-            else:
-                test_files.append(str(file.benchmarking_file_path))
-        test_files = list(set(test_files))  # remove multiple calls in the same test function
+        # Always use file path - pytest discovers all tests including parametrized ones
+        test_files: list[str] = list(
+            {str(file.benchmarking_file_path) for file in test_paths.test_files}
+        )  # remove multiple calls in the same test function
         pytest_args = [
             "--capture=tee-sys",
-            f"--timeout={pytest_timeout}",
             "-q",
             "--codeflash_loops_scope=session",
             f"--codeflash_min_loops={pytest_min_loops}",
             f"--codeflash_max_loops={pytest_max_loops}",
             f"--codeflash_seconds={pytest_target_runtime_seconds}",
         ]
+        if pytest_timeout is not None:
+            pytest_args.insert(1, f"--timeout={pytest_timeout}")
         result_file_path = get_run_tmp_file(Path("pytest_results.xml"))
         result_args = [f"--junitxml={result_file_path.as_posix()}", "-o", "junit_logging=all"]
         pytest_test_env = test_env.copy()
@@ -369,26 +310,7 @@ def run_benchmarking_tests(
             env=pytest_test_env,
             timeout=60,  # TODO: Make this dynamic
         )
-    elif test_framework == "unittest":
-        test_files = [file.benchmarking_file_path for file in test_paths.test_files]
-        result_file_path, results = run_unittest_tests(
-            verbose=verbose, test_file_paths=test_files, test_env=test_env, cwd=cwd
-        )
     else:
         msg = f"Unsupported test framework: {test_framework}"
         raise ValueError(msg)
     return result_file_path, results
-
-
-def run_unittest_tests(
-    *, verbose: bool, test_file_paths: list[Path], test_env: dict[str, str], cwd: Path
-) -> tuple[Path, subprocess.CompletedProcess]:
-    result_file_path = get_run_tmp_file(Path("unittest_results.xml"))
-    unittest_cmd_list = [SAFE_SYS_EXECUTABLE, "-m", "xmlrunner"]
-    log_level = ["-v"] if verbose else []
-    files = [str(file) for file in test_file_paths]
-    output_file = ["--output-file", str(result_file_path)]
-    results = execute_test_subprocess(
-        unittest_cmd_list + log_level + files + output_file, cwd=cwd, env=test_env, timeout=60
-    )
-    return result_file_path, results

From 04fbab9b80ac74b7dd51cd180084c2788734b3e2 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Tue, 9 Dec 2025 17:33:33 +0200
Subject: [PATCH 17/26] fix capture_mode for windows for failing tests

---
 codeflash/verification/test_runner.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py
index e63e78372..30de8667d 100644
--- a/codeflash/verification/test_runner.py
+++ b/codeflash/verification/test_runner.py
@@ -58,17 +58,16 @@ def execute_test_subprocess(
 
                 process = subprocess.Popen(
                     cmd_list,
-                    stdout=subprocess.PIPE,  # Capture stdout
-                    stderr=subprocess.PIPE,  # Capture stderr
-                    stdin=subprocess.DEVNULL,  # CRITICAL: Prevents child from waiting for input
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                    stdin=subprocess.DEVNULL,
                     cwd=cwd,
                     env=env,
-                    text=True,  # Return strings instead of bytes
-                    creationflags=creationflags,  # Windows-specific process group handling
+                    text=True,
+                    creationflags=creationflags,
                 )
 
                 try:
-                    # communicate() properly drains stdout/stderr avoiding deadlocks
                     stdout_content, stderr_content = process.communicate(timeout=timeout)
                     returncode = process.returncode
                 except subprocess.TimeoutExpired:
@@ -136,9 +135,9 @@ def run_behavioral_tests(
         )
         test_files = list(set(test_files))  # remove multiple calls in the same test function
 
-        # On Windows, use --capture=no to avoid subprocess output deadlocks
-        # On other platforms, use --capture=tee-sys to both capture and display output
-        capture_mode = "--capture=no" if is_windows else "--capture=tee-sys"
+        # On Windows, previously used --capture=no to avoid deadlocks with subprocess.run
+        # Now using Popen.communicate() which handles buffering correctly, so we can use capture=tee-sys
+        capture_mode = "--capture=tee-sys"
 
         common_pytest_args = [
             capture_mode,

From c44380166f3875dd25251e7b395ec91da6416485 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Tue, 9 Dec 2025 17:34:11 +0200
Subject: [PATCH 18/26] fix linting

---
 codeflash/verification/test_runner.py | 131 ++++++++++++--------------
 1 file changed, 60 insertions(+), 71 deletions(-)

diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py
index 30de8667d..1547d9bad 100644
--- a/codeflash/verification/test_runner.py
+++ b/codeflash/verification/test_runner.py
@@ -1,12 +1,11 @@
 from __future__ import annotations
 
+import contextlib
 import shlex
 import subprocess
 import sys
 from pathlib import Path
 from typing import TYPE_CHECKING
-
-from codeflash.cli_cmds.console import logger
 from codeflash.code_utils.code_utils import custom_addopts, get_run_tmp_file
 from codeflash.code_utils.compat import IS_POSIX, SAFE_SYS_EXECUTABLE
 from codeflash.code_utils.config_consts import TOTAL_LOOPING_TIME_EFFECTIVE
@@ -23,8 +22,8 @@
 def execute_test_subprocess(
     cmd_list: list[str], cwd: Path, env: dict[str, str] | None, timeout: int = 600
 ) -> subprocess.CompletedProcess:
-    """
-    Execute a subprocess with the given command list, working directory, environment variables, and timeout.
+    """Execute a subprocess with the given command list, working directory, environment variables, and timeout.
+
 
     On Windows, uses Popen with communicate() and process groups for proper cleanup.
     On other platforms, uses subprocess.run with capture_output.
@@ -32,67 +31,60 @@ def execute_test_subprocess(
     is_windows = sys.platform == "win32"
 
     with custom_addopts():
-        try:
-            if is_windows:
-                # WINDOWS SUBPROCESS FIX:
-                # On Windows, running pytest with coverage can hang indefinitely due to multiple issues:
-                #
-                # Problem 1: Pipe buffer deadlocks
-                #   - subprocess.run() with file handles can deadlock when the child process
-                #     produces output faster than the parent can read it
-                #   - Solution: Use Popen.communicate() which properly drains both stdout/stderr
-                #     concurrently using threads internally
-                #
-                # Problem 2: Child process waiting for stdin
-                #   - Some Windows processes (especially pytest) may wait for console input
-                #   - Solution: Use stdin=subprocess.DEVNULL to explicitly close stdin
-                #
-                # Problem 3: Orphaned child processes after timeout
-                #   - When killing a process on Windows, child processes may not be terminated
-                #   - Solution: Use CREATE_NEW_PROCESS_GROUP to allow proper process tree termination
-                #
-                # See: https://docs.python.org/3/library/subprocess.html#subprocess.Popen.communicate
-
-                # CREATE_NEW_PROCESS_GROUP: Creates process in new group for proper termination
-                creationflags = subprocess.CREATE_NEW_PROCESS_GROUP
-
-                process = subprocess.Popen(
-                    cmd_list,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    stdin=subprocess.DEVNULL,
-                    cwd=cwd,
-                    env=env,
-                    text=True,
-                    creationflags=creationflags,
-                )
-
-                try:
-                    stdout_content, stderr_content = process.communicate(timeout=timeout)
-                    returncode = process.returncode
-                except subprocess.TimeoutExpired:
-                    # On Windows, terminate the entire process tree
-                    try:
-                        process.kill()
-                    except OSError:
-                        pass
-                    # Drain remaining output after killing
-                    stdout_content, stderr_content = process.communicate(timeout=5)
-                    raise subprocess.TimeoutExpired(
-                        cmd_list, timeout, output=stdout_content, stderr=stderr_content
-                    )
+        if is_windows:
+            # WINDOWS SUBPROCESS FIX:
+            # On Windows, running pytest with coverage can hang indefinitely due to multiple issues:
+            #
+            # Problem 1: Pipe buffer deadlocks
+            #   - subprocess.run() with file handles can deadlock when the child process
+            #     produces output faster than the parent can read it
+            #   - Solution: Use Popen.communicate() which properly drains both stdout/stderr
+            #     concurrently using threads internally
+            #
+            # Problem 2: Child process waiting for stdin
+            #   - Some Windows processes (especially pytest) may wait for console input
+            #   - Solution: Use stdin=subprocess.DEVNULL to explicitly close stdin
+            #
+            # Problem 3: Orphaned child processes after timeout
+            #   - When killing a process on Windows, child processes may not be terminated
+            #   - Solution: Use CREATE_NEW_PROCESS_GROUP to allow proper process tree termination
+            #
+            # See: https://docs.python.org/3/library/subprocess.html#subprocess.Popen.communicate
+
+            # CREATE_NEW_PROCESS_GROUP: Creates process in new group for proper termination
+            creationflags = subprocess.CREATE_NEW_PROCESS_GROUP
+
+            process = subprocess.Popen(
+                cmd_list,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                stdin=subprocess.DEVNULL,
+                cwd=cwd,
+                env=env,
+                text=True,
+                creationflags=creationflags,
+            )
 
-                return subprocess.CompletedProcess(cmd_list, returncode, stdout_content, stderr_content)
-            else:
-                # On Linux/Mac, use subprocess.run (works fine there)
-                result = subprocess.run(
-                    cmd_list, capture_output=True, cwd=cwd, env=env, text=True, timeout=timeout, check=False
-                )
-                return result
-        except subprocess.TimeoutExpired:
-            raise
-        except Exception:
-            raise
+            try:
+                stdout_content, stderr_content = process.communicate(timeout=timeout)
+                returncode = process.returncode
+            except subprocess.TimeoutExpired:
+                # On Windows, terminate the entire process tree
+                with contextlib.suppress(OSError):
+                    process.kill()
+
+                # Drain remaining output after killing
+                stdout_content, stderr_content = process.communicate(timeout=5)
+                raise subprocess.TimeoutExpired(
+                    cmd_list, timeout, output=stdout_content, stderr=stderr_content
+                ) from None
+
+            return subprocess.CompletedProcess(cmd_list, returncode, stdout_content, stderr_content)
+
+        # On Linux/Mac, use subprocess.run (works fine there)
+        return subprocess.run(
+            cmd_list, capture_output=True, cwd=cwd, env=env, text=True, timeout=timeout, check=False
+        )
 
 
 def run_behavioral_tests(
@@ -106,8 +98,7 @@ def run_behavioral_tests(
     pytest_target_runtime_seconds: float = TOTAL_LOOPING_TIME_EFFECTIVE,
     enable_coverage: bool = False,
 ) -> tuple[Path, subprocess.CompletedProcess, Path | None, Path | None]:
-    """
-    Run behavioral tests with optional coverage.
+    """Run behavioral tests with optional coverage.
 
     On Windows, uses --capture=no to avoid subprocess output deadlocks.
     """
@@ -162,13 +153,11 @@ def run_behavioral_tests(
             # On Windows, delete coverage database file directly instead of using 'coverage erase'
             # to avoid file locking issues
             if is_windows:
-                try:
-                    if coverage_database_file.exists():
+                if coverage_database_file.exists():
+                    with contextlib.suppress(PermissionError, OSError):
                         coverage_database_file.unlink()
-                except (PermissionError, Exception):
-                    pass
             else:
-                cov_erase = execute_test_subprocess(
+                execute_test_subprocess(
                     shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env, timeout=30
                 )
 

From 9336f3b4fba3d688fdbc48bc4628d15c97bf3db9 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Tue, 9 Dec 2025 17:42:19 +0200
Subject: [PATCH 19/26] fix linting

---
 codeflash/cli_cmds/console.py                |  1 +
 codeflash/code_utils/coverage_utils.py       |  5 +-
 codeflash/code_utils/git_worktree_utils.py   | 98 +++++++++++---------
 codeflash/discovery/discover_unit_tests.py   |  8 +-
 codeflash/optimization/function_optimizer.py |  2 +-
 codeflash/verification/test_runner.py        | 15 +--
 6 files changed, 63 insertions(+), 66 deletions(-)

diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py
index c78b1b29a..4abe7522f 100644
--- a/codeflash/cli_cmds/console.py
+++ b/codeflash/cli_cmds/console.py
@@ -142,6 +142,7 @@ def __init__(self) -> None:
 def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Progress, TaskID], None, None]:
     """Progress bar for test files."""
     if is_LSP_enabled():
+
         class DummyProgress:
             def advance(self, *args, **kwargs) -> None:
                 pass
diff --git a/codeflash/code_utils/coverage_utils.py b/codeflash/code_utils/coverage_utils.py
index dbbe56593..a4cd1cd3a 100644
--- a/codeflash/code_utils/coverage_utils.py
+++ b/codeflash/code_utils/coverage_utils.py
@@ -68,9 +68,8 @@ def generate_candidates(source_code_path: Path) -> set[str]:
 
 
 def prepare_coverage_files() -> tuple[Path, Path]:
-    """
-    Prepare coverage configuration and output files.
-    
+    """Prepare coverage configuration and output files.
+
     Returns tuple of (coverage_database_file, coverage_config_file).
     """
     coverage_database_file = get_run_tmp_file(Path(".coverage"))
diff --git a/codeflash/code_utils/git_worktree_utils.py b/codeflash/code_utils/git_worktree_utils.py
index 2dde68e02..b02230093 100644
--- a/codeflash/code_utils/git_worktree_utils.py
+++ b/codeflash/code_utils/git_worktree_utils.py
@@ -99,15 +99,15 @@ def create_detached_worktree(module_root: Path) -> Optional[Path]:
 
 
 def remove_worktree(worktree_dir: Path) -> None:
-    """
-    Remove a git worktree with robust error handling for Windows file locking issues.
-    
+    """Remove a git worktree with robust error handling for Windows file locking issues.
+
     This function handles Windows-specific issues where files may be locked by processes,
     causing 'Permission denied' errors. It implements retry logic with exponential backoff
     and falls back to manual directory removal if git worktree remove fails.
-    
+
     Args:
         worktree_dir: Path to the worktree directory to remove
+
     """
     if not worktree_dir or not worktree_dir.exists():
         logger.info(f"remove_worktree: Worktree does not exist, skipping removal. worktree_dir={worktree_dir}")
@@ -116,16 +116,20 @@ def remove_worktree(worktree_dir: Path) -> None:
     is_windows = sys.platform == "win32"
     max_retries = 3 if is_windows else 1
     retry_delay = 0.5  # Start with 500ms delay
-    
-    logger.info(f"remove_worktree: Starting worktree removal. worktree_dir={worktree_dir}, platform={sys.platform}, max_retries={max_retries}")
-    
+
+    logger.info(
+        f"remove_worktree: Starting worktree removal. worktree_dir={worktree_dir}, platform={sys.platform}, max_retries={max_retries}"
+    )
+
     # Try to get the repository and git root for worktree removal
     try:
         repository = git.Repo(worktree_dir, search_parent_directories=True)
         git_root = repository.working_dir or repository.git_dir
         logger.info(f"remove_worktree: Found repository. git_root={git_root}")
     except Exception as e:
-        logger.warning(f"remove_worktree: Could not access repository, attempting manual cleanup. worktree_dir={worktree_dir}, error={e}")
+        logger.warning(
+            f"remove_worktree: Could not access repository, attempting manual cleanup. worktree_dir={worktree_dir}, error={e}"
+        )
         # If we can't access the repository, try manual cleanup
         _manual_cleanup_worktree_directory(worktree_dir)
         return
@@ -134,14 +138,16 @@ def remove_worktree(worktree_dir: Path) -> None:
     for attempt in range(max_retries):
         try:
             attempt_num = attempt + 1
-            logger.info(f"remove_worktree: Attempting git worktree remove. attempt={attempt_num}, max_retries={max_retries}, worktree_dir={worktree_dir}")
+            logger.info(
+                f"remove_worktree: Attempting git worktree remove. attempt={attempt_num}, max_retries={max_retries}, worktree_dir={worktree_dir}"
+            )
             repository.git.worktree("remove", "--force", str(worktree_dir))
             logger.info(f"remove_worktree: Successfully removed worktree via git command. worktree_dir={worktree_dir}")
             return
         except git.exc.GitCommandError as e:
             error_msg = str(e).lower()
             is_permission_error = "permission denied" in error_msg or "access is denied" in error_msg
-            
+
             if is_permission_error and attempt < max_retries - 1:
                 # On Windows, file locks may be temporary - retry with exponential backoff
                 logger.info(
@@ -167,18 +173,18 @@ def remove_worktree(worktree_dir: Path) -> None:
     # Fallback: Try to remove worktree entry from git, then manually delete directory
     try:
         logger.info(f"remove_worktree: Attempting fallback cleanup. worktree_dir={worktree_dir}")
-        
+
         # Try to prune the worktree entry from git (this doesn't delete the directory)
         try:
             # Use git worktree prune to remove stale entries
             repository.git.worktree("prune")
-            logger.info(f"remove_worktree: Successfully pruned worktree entries")
+            logger.info("remove_worktree: Successfully pruned worktree entries")
         except Exception as prune_error:
             logger.info(f"remove_worktree: Could not prune worktree entries. error={prune_error}")
-        
+
         # Manually remove the directory
         _manual_cleanup_worktree_directory(worktree_dir)
-        
+
     except Exception as e:
         logger.error(
             f"remove_worktree: Failed to cleanup worktree directory after all attempts. "
@@ -187,22 +193,24 @@ def remove_worktree(worktree_dir: Path) -> None:
 
 
 def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
-    """
-    Manually remove a worktree directory, handling Windows file locking issues.
-    
+    """Manually remove a worktree directory, handling Windows file locking issues.
+
     This is a fallback method when git worktree remove fails. It uses shutil.rmtree
     with custom error handling for Windows-specific issues.
-    
+
     SAFETY: This function includes multiple safeguards to prevent accidental deletion:
     - Only deletes directories under the worktree_dirs cache location
     - Verifies the path is a worktree directory (not the original repo)
     - Uses resolve() to normalize paths and prevent path traversal attacks
-    
+
     Args:
         worktree_dir: Path to the worktree directory to remove
+
     """
     if not worktree_dir or not worktree_dir.exists():
-        logger.info(f"_manual_cleanup_worktree_directory: Directory does not exist, skipping. worktree_dir={worktree_dir}")
+        logger.info(
+            f"_manual_cleanup_worktree_directory: Directory does not exist, skipping. worktree_dir={worktree_dir}"
+        )
         return
 
     # Validate paths for safety
@@ -210,15 +218,15 @@ def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
         return
 
     logger.info(f"_manual_cleanup_worktree_directory: Starting manual directory removal. worktree_dir={worktree_dir}")
-    
+
     # Attempt removal with retries on Windows
     is_windows = sys.platform == "win32"
     max_retries = 3 if is_windows else 1
     retry_delay = 0.5
-    
+
     for attempt in range(max_retries):
         attempt_num = attempt + 1
-        
+
         # Log retry attempts
         if attempt_num > 1:
             logger.info(
@@ -227,27 +235,29 @@ def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
             )
             time.sleep(retry_delay)
             retry_delay *= 2  # Exponential backoff
-        
+
         # On Windows, use custom error handler to remove read-only attributes on the fly
         # This is more efficient than pre-scanning the entire directory tree
         error_handler = _create_windows_rmtree_error_handler() if is_windows else None
-        
+
         try:
             if is_windows and error_handler:
                 shutil.rmtree(worktree_dir, onerror=error_handler)
             else:
                 shutil.rmtree(worktree_dir, ignore_errors=True)
-            
+
             # Brief wait on Windows to allow file handles to be released
             if is_windows:
                 wait_time = 0.3 if attempt_num < max_retries else 0.1
                 time.sleep(wait_time)
-            
+
             # Check if removal was successful
             if not worktree_dir.exists():
-                logger.info(f"_manual_cleanup_worktree_directory: Successfully removed directory. worktree_dir={worktree_dir}")
+                logger.info(
+                    f"_manual_cleanup_worktree_directory: Successfully removed directory. worktree_dir={worktree_dir}"
+                )
                 return
-            
+
             # Directory still exists
             if attempt_num < max_retries:
                 logger.info(
@@ -260,7 +270,7 @@ def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
                     f"attempts={attempt_num}, worktree_dir={worktree_dir}. "
                     f"Files may be locked and will be cleaned up later."
                 )
-                
+
         except Exception as e:
             if attempt_num < max_retries:
                 logger.info(
@@ -275,14 +285,14 @@ def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
 
 
 def _validate_worktree_path_safety(worktree_dir: Path) -> bool:
-    """
-    Validate that a path is safe to delete (must be under worktree_dirs).
-    
+    """Validate that a path is safe to delete (must be under worktree_dirs).
+
     Args:
         worktree_dir: Path to validate
-        
+
     Returns:
         True if the path is safe to delete, False otherwise
+
     """
     # SAFETY CHECK 1: Resolve paths to absolute, normalized paths
     try:
@@ -318,27 +328,27 @@ def _validate_worktree_path_safety(worktree_dir: Path) -> bool:
 
 
 def _create_windows_rmtree_error_handler():
-    """
-    Create an error handler for shutil.rmtree that handles Windows-specific issues.
-    
+    """Create an error handler for shutil.rmtree that handles Windows-specific issues.
+
     This handler attempts to remove read-only attributes when encountering permission errors.
-    
+
     Returns:
         A callable error handler for shutil.rmtree's onerror parameter
+
     """
+
     def handle_remove_error(func, path, exc_info):
-        """
-        Error handler for shutil.rmtree on Windows.
-        
+        """Error handler for shutil.rmtree on Windows.
+
         Attempts to remove read-only attributes and retry the operation.
         """
         # Get the exception type
         exc_type, exc_value, exc_traceback = exc_info
-        
+
         # Only handle permission errors
         if not isinstance(exc_value, (PermissionError, OSError)):
             return
-        
+
         try:
             # Try to change file permissions to make it writable
             os.chmod(path, 0o777)
@@ -347,7 +357,7 @@ def handle_remove_error(func, path, exc_info):
         except Exception:
             # If it still fails, silently ignore (file is truly locked)
             pass
-    
+
     return handle_remove_error
 
 
diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py
index a95c13f28..d0c33d76f 100644
--- a/codeflash/discovery/discover_unit_tests.py
+++ b/codeflash/discovery/discover_unit_tests.py
@@ -600,13 +600,7 @@ def discover_tests_pytest(
     with custom_addopts():
         try:
             result = subprocess.run(
-                [
-                    SAFE_SYS_EXECUTABLE,
-                    discovery_script,
-                    str(project_root),
-                    str(tests_root),
-                    str(tmp_pickle_path),
-                ],
+                [SAFE_SYS_EXECUTABLE, discovery_script, str(project_root), str(tests_root), str(tmp_pickle_path)],
                 **run_kwargs,
                 timeout=60,
             )
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index fcb267df3..6ff9fc53e 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -1708,7 +1708,7 @@ def establish_original_code_baseline(
                 instrument_codeflash_capture(
                     self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root
                 )
-                
+
                 total_looping_time = TOTAL_LOOPING_TIME_EFFECTIVE
                 behavioral_results, coverage_results = self.run_and_parse_tests(
                     testing_type=TestingMode.BEHAVIOR,
diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py
index 1547d9bad..9efb0cb93 100644
--- a/codeflash/verification/test_runner.py
+++ b/codeflash/verification/test_runner.py
@@ -6,6 +6,7 @@
 import sys
 from pathlib import Path
 from typing import TYPE_CHECKING
+
 from codeflash.code_utils.code_utils import custom_addopts, get_run_tmp_file
 from codeflash.code_utils.compat import IS_POSIX, SAFE_SYS_EXECUTABLE
 from codeflash.code_utils.config_consts import TOTAL_LOOPING_TIME_EFFECTIVE
@@ -24,7 +25,6 @@ def execute_test_subprocess(
 ) -> subprocess.CompletedProcess:
     """Execute a subprocess with the given command list, working directory, environment variables, and timeout.
 
-
     On Windows, uses Popen with communicate() and process groups for proper cleanup.
     On other platforms, uses subprocess.run with capture_output.
     """
@@ -82,9 +82,7 @@ def execute_test_subprocess(
             return subprocess.CompletedProcess(cmd_list, returncode, stdout_content, stderr_content)
 
         # On Linux/Mac, use subprocess.run (works fine there)
-        return subprocess.run(
-            cmd_list, capture_output=True, cwd=cwd, env=env, text=True, timeout=timeout, check=False
-        )
+        return subprocess.run(cmd_list, capture_output=True, cwd=cwd, env=env, text=True, timeout=timeout, check=False)
 
 
 def run_behavioral_tests(
@@ -178,12 +176,7 @@ def run_behavioral_tests(
             blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS if plugin != "cov"]
 
             final_cmd = coverage_cmd + common_pytest_args + blocklist_args + result_args + test_files
-            results = execute_test_subprocess(
-                final_cmd,
-                cwd=cwd,
-                env=pytest_test_env,
-                timeout=60,
-            )
+            results = execute_test_subprocess(final_cmd, cwd=cwd, env=pytest_test_env, timeout=600)
         else:
             blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS]
 
@@ -298,7 +291,7 @@ def run_benchmarking_tests(
             pytest_cmd_list + pytest_args + blocklist_args + result_args + test_files,
             cwd=cwd,
             env=pytest_test_env,
-            timeout=60,  # TODO: Make this dynamic
+            timeout=600,  # TODO: Make this dynamic
         )
     else:
         msg = f"Unsupported test framework: {test_framework}"

From cc742fc75041d777cb439f7518245a896b6dfb1b Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Tue, 9 Dec 2025 18:18:52 +0200
Subject: [PATCH 20/26] fix pre-commit errors

---
 codeflash/cli_cmds/console.py              |  4 +--
 codeflash/code_utils/git_worktree_utils.py | 29 ++++++++++++++--------
 codeflash/discovery/discover_unit_tests.py | 24 +++++++++++++-----
 3 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py
index 4abe7522f..9c7faeb1f 100644
--- a/codeflash/cli_cmds/console.py
+++ b/codeflash/cli_cmds/console.py
@@ -144,10 +144,10 @@ def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Pro
     if is_LSP_enabled():
 
         class DummyProgress:
-            def advance(self, *args, **kwargs) -> None:
+            def advance(self, *args: object, **kwargs: object) -> None:
                 pass
 
-        yield DummyProgress(), 0  # type: ignore
+        yield DummyProgress(), 0  # type: ignore[return-value]
         return
 
     with Progress(
diff --git a/codeflash/code_utils/git_worktree_utils.py b/codeflash/code_utils/git_worktree_utils.py
index b02230093..c16bf2234 100644
--- a/codeflash/code_utils/git_worktree_utils.py
+++ b/codeflash/code_utils/git_worktree_utils.py
@@ -1,14 +1,16 @@
 from __future__ import annotations
 
 import configparser
-import os
 import shutil
 import subprocess
 import sys
 import tempfile
 import time
 from pathlib import Path
-from typing import Optional
+from typing import TYPE_CHECKING, Any, Optional
+
+if TYPE_CHECKING:
+    from collections.abc import Callable
 
 import git
 
@@ -143,7 +145,7 @@ def remove_worktree(worktree_dir: Path) -> None:
             )
             repository.git.worktree("remove", "--force", str(worktree_dir))
             logger.info(f"remove_worktree: Successfully removed worktree via git command. worktree_dir={worktree_dir}")
-            return
+            return  # noqa: TRY300
         except git.exc.GitCommandError as e:
             error_msg = str(e).lower()
             is_permission_error = "permission denied" in error_msg or "access is denied" in error_msg
@@ -327,7 +329,9 @@ def _validate_worktree_path_safety(worktree_dir: Path) -> bool:
     return True
 
 
-def _create_windows_rmtree_error_handler():
+def _create_windows_rmtree_error_handler() -> Callable[
+    [Callable[[str], None], str, tuple[type[BaseException], BaseException, Any]], None
+]:
     """Create an error handler for shutil.rmtree that handles Windows-specific issues.
 
     This handler attempts to remove read-only attributes when encountering permission errors.
@@ -337,13 +341,15 @@ def _create_windows_rmtree_error_handler():
 
     """
 
-    def handle_remove_error(func, path, exc_info):
+    def handle_remove_error(
+        func: Callable[[str], None], path: str, exc_info: tuple[type[BaseException], BaseException, Any]
+    ) -> None:
         """Error handler for shutil.rmtree on Windows.
 
         Attempts to remove read-only attributes and retry the operation.
         """
         # Get the exception type
-        exc_type, exc_value, exc_traceback = exc_info
+        _exc_type, exc_value, _exc_traceback = exc_info
 
         # Only handle permission errors
         if not isinstance(exc_value, (PermissionError, OSError)):
@@ -351,12 +357,15 @@ def handle_remove_error(func, path, exc_info):
 
         try:
             # Try to change file permissions to make it writable
-            os.chmod(path, 0o777)
+            # Using permissive mask (0o777) is intentional for Windows file cleanup
+            Path(path).chmod(0o777)
             # Retry the failed operation
             func(path)
-        except Exception:
-            # If it still fails, silently ignore (file is truly locked)
-            pass
+        except Exception as e:
+            # If it still fails, log and ignore (file is truly locked)
+            logger.debug(
+                f"_create_windows_rmtree_error_handler: Failed to remove file after permission change. path={path}, error={e}"
+            )
 
     return handle_remove_error
 
diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py
index d0c33d76f..e6aedcf06 100644
--- a/codeflash/discovery/discover_unit_tests.py
+++ b/codeflash/discovery/discover_unit_tests.py
@@ -331,7 +331,7 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
             # Be conservative except when an alias is used (which requires exact method matching)
             for target_func in fnames:
                 if "." in target_func:
-                    class_name, method_name = target_func.split(".", 1)
+                    class_name, _method_name = target_func.split(".", 1)
                     if aname == class_name and not alias.asname:
                         self.found_any_target_function = True
                         self.found_qualified_name = target_func
@@ -588,10 +588,10 @@ def discover_tests_pytest(
 
     run_kwargs = {
         "cwd": project_root,
-        "check": False,
         "capture_output": True,
         "text": True,
         "stdin": subprocess.DEVNULL,
+        "timeout": 600,
     }
     if os.name == "nt":
         # Prevent console window spawning on Windows which can cause hangs in LSP
@@ -601,8 +601,8 @@ def discover_tests_pytest(
         try:
             result = subprocess.run(
                 [SAFE_SYS_EXECUTABLE, discovery_script, str(project_root), str(tests_root), str(tmp_pickle_path)],
+                check=False,
                 **run_kwargs,
-                timeout=60,
             )
         except subprocess.TimeoutExpired:
             result = subprocess.CompletedProcess(args=[], returncode=-1, stdout="", stderr="Timeout")
@@ -610,11 +610,23 @@ def discover_tests_pytest(
             result = subprocess.CompletedProcess(args=[], returncode=-1, stdout="", stderr=str(e))
 
     try:
-        with tmp_pickle_path.open(mode="rb") as f:
-            exitcode, tests, pytest_rootdir = pickle.load(f)
+        # Check if pickle file exists before trying to read it
+        if not tmp_pickle_path.exists():
+            tests, pytest_rootdir = [], None
+            logger.warning(
+                f"Test discovery pickle file not found. "
+                f"Subprocess return code: {result.returncode}, stdout: {result.stdout}, stderr: {result.stderr}"
+            )
+            exitcode = result.returncode if result.returncode != 0 else -1
+        else:
+            with tmp_pickle_path.open(mode="rb") as f:
+                exitcode, tests, pytest_rootdir = pickle.load(f)
     except Exception as e:
         tests, pytest_rootdir = [], None
-        logger.exception(f"Failed to discover tests: {e}")
+        logger.exception(
+            f"Failed to discover tests: {e}. "
+            f"Subprocess return code: {result.returncode}, stdout: {result.stdout}, stderr: {result.stderr}"
+        )
         exitcode = -1
     finally:
         tmp_pickle_path.unlink(missing_ok=True)

From f824ea5877373c871bef599287fe6574c9b7aa51 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Wed, 10 Dec 2025 07:56:26 +0200
Subject: [PATCH 21/26] FIX for failing test
 test_function_discovery.py::test_filter_functions

---
 codeflash/discovery/functions_to_optimize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py
index b65ccf671..b8a27d463 100644
--- a/codeflash/discovery/functions_to_optimize.py
+++ b/codeflash/discovery/functions_to_optimize.py
@@ -731,7 +731,7 @@ def _resolve_path(path: Path | str) -> Path:
             non_modules_removed_count += len(_functions)
             continue
         try:
-            ast.parse(f"import {module_name_from_file_path(Path(file_path), project_root)}")
+            ast.parse(f"import {module_name_from_file_path(file_path_resolved, project_root)}")
         except SyntaxError:
             malformed_paths_count += 1
             continue

From e05aab24e80467b8fae2742a3d083648778ecd9e Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Wed, 10 Dec 2025 08:17:00 +0200
Subject: [PATCH 22/26] removing git worktree logs

---
 codeflash/code_utils/git_worktree_utils.py | 112 +++------------------
 1 file changed, 14 insertions(+), 98 deletions(-)

diff --git a/codeflash/code_utils/git_worktree_utils.py b/codeflash/code_utils/git_worktree_utils.py
index c16bf2234..66030aa3b 100644
--- a/codeflash/code_utils/git_worktree_utils.py
+++ b/codeflash/code_utils/git_worktree_utils.py
@@ -14,7 +14,6 @@
 
 import git
 
-from codeflash.cli_cmds.console import logger
 from codeflash.code_utils.compat import codeflash_cache_dir
 from codeflash.code_utils.git_utils import check_running_in_git_repo, git_root_dir
 
@@ -58,7 +57,6 @@ def create_worktree_snapshot_commit(worktree_dir: Path, commit_message: str) ->
 
 def create_detached_worktree(module_root: Path) -> Optional[Path]:
     if not check_running_in_git_repo(module_root):
-        logger.warning("Module is not in a git repository. Skipping worktree creation.")
         return None
     git_root = git_root_dir()
     current_time_str = time.strftime("%Y%m%d-%H%M%S")
@@ -76,7 +74,6 @@ def create_detached_worktree(module_root: Path) -> Optional[Path]:
     )
 
     if not uni_diff_text.strip():
-        logger.info("!lsp|No uncommitted changes to copy to worktree.")
         return worktree_dir
 
     # Write the diff to a temporary file
@@ -94,8 +91,8 @@ def create_detached_worktree(module_root: Path) -> Optional[Path]:
                 check=True,
             )
             create_worktree_snapshot_commit(worktree_dir, "Initial Snapshot")
-        except subprocess.CalledProcessError as e:
-            logger.error(f"Failed to apply patch to worktree: {e}")
+        except subprocess.CalledProcessError:
+            pass
 
         return worktree_dir
 
@@ -112,26 +109,17 @@ def remove_worktree(worktree_dir: Path) -> None:
 
     """
     if not worktree_dir or not worktree_dir.exists():
-        logger.info(f"remove_worktree: Worktree does not exist, skipping removal. worktree_dir={worktree_dir}")
         return
 
     is_windows = sys.platform == "win32"
     max_retries = 3 if is_windows else 1
     retry_delay = 0.5  # Start with 500ms delay
 
-    logger.info(
-        f"remove_worktree: Starting worktree removal. worktree_dir={worktree_dir}, platform={sys.platform}, max_retries={max_retries}"
-    )
-
     # Try to get the repository and git root for worktree removal
     try:
         repository = git.Repo(worktree_dir, search_parent_directories=True)
         git_root = repository.working_dir or repository.git_dir
-        logger.info(f"remove_worktree: Found repository. git_root={git_root}")
-    except Exception as e:
-        logger.warning(
-            f"remove_worktree: Could not access repository, attempting manual cleanup. worktree_dir={worktree_dir}, error={e}"
-        )
+    except Exception:
         # If we can't access the repository, try manual cleanup
         _manual_cleanup_worktree_directory(worktree_dir)
         return
@@ -140,11 +128,7 @@ def remove_worktree(worktree_dir: Path) -> None:
     for attempt in range(max_retries):
         try:
             attempt_num = attempt + 1
-            logger.info(
-                f"remove_worktree: Attempting git worktree remove. attempt={attempt_num}, max_retries={max_retries}, worktree_dir={worktree_dir}"
-            )
             repository.git.worktree("remove", "--force", str(worktree_dir))
-            logger.info(f"remove_worktree: Successfully removed worktree via git command. worktree_dir={worktree_dir}")
             return  # noqa: TRY300
         except git.exc.GitCommandError as e:
             error_msg = str(e).lower()
@@ -152,46 +136,28 @@ def remove_worktree(worktree_dir: Path) -> None:
 
             if is_permission_error and attempt < max_retries - 1:
                 # On Windows, file locks may be temporary - retry with exponential backoff
-                logger.info(
-                    f"remove_worktree: Permission denied, retrying. attempt={attempt_num}, "
-                    f"retry_delay={retry_delay}, worktree_dir={worktree_dir}, error={e}"
-                )
                 time.sleep(retry_delay)
                 retry_delay *= 2  # Exponential backoff
             else:
                 # Last attempt failed or non-permission error
-                logger.warning(
-                    f"remove_worktree: Git worktree remove failed, attempting fallback cleanup. "
-                    f"attempts={attempt_num}, worktree_dir={worktree_dir}, error={e}"
-                )
                 break
-        except Exception as e:
-            logger.warning(
-                f"remove_worktree: Unexpected error during git worktree remove, attempting fallback cleanup. "
-                f"worktree_dir={worktree_dir}, error={e}"
-            )
+        except Exception:
             break
 
     # Fallback: Try to remove worktree entry from git, then manually delete directory
     try:
-        logger.info(f"remove_worktree: Attempting fallback cleanup. worktree_dir={worktree_dir}")
-
         # Try to prune the worktree entry from git (this doesn't delete the directory)
         try:
             # Use git worktree prune to remove stale entries
             repository.git.worktree("prune")
-            logger.info("remove_worktree: Successfully pruned worktree entries")
-        except Exception as prune_error:
-            logger.info(f"remove_worktree: Could not prune worktree entries. error={prune_error}")
+        except Exception:
+            pass
 
         # Manually remove the directory
         _manual_cleanup_worktree_directory(worktree_dir)
 
-    except Exception as e:
-        logger.error(
-            f"remove_worktree: Failed to cleanup worktree directory after all attempts. "
-            f"worktree_dir={worktree_dir}, error={e}"
-        )
+    except Exception:
+        pass
 
 
 def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
@@ -210,17 +176,12 @@ def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
 
     """
     if not worktree_dir or not worktree_dir.exists():
-        logger.info(
-            f"_manual_cleanup_worktree_directory: Directory does not exist, skipping. worktree_dir={worktree_dir}"
-        )
         return
 
     # Validate paths for safety
     if not _validate_worktree_path_safety(worktree_dir):
         return
 
-    logger.info(f"_manual_cleanup_worktree_directory: Starting manual directory removal. worktree_dir={worktree_dir}")
-
     # Attempt removal with retries on Windows
     is_windows = sys.platform == "win32"
     max_retries = 3 if is_windows else 1
@@ -229,12 +190,7 @@ def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
     for attempt in range(max_retries):
         attempt_num = attempt + 1
 
-        # Log retry attempts
         if attempt_num > 1:
-            logger.info(
-                f"_manual_cleanup_worktree_directory: Retrying directory removal. "
-                f"attempt={attempt_num}, retry_delay={retry_delay}, worktree_dir={worktree_dir}"
-            )
             time.sleep(retry_delay)
             retry_delay *= 2  # Exponential backoff
 
@@ -255,35 +211,10 @@ def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
 
             # Check if removal was successful
             if not worktree_dir.exists():
-                logger.info(
-                    f"_manual_cleanup_worktree_directory: Successfully removed directory. worktree_dir={worktree_dir}"
-                )
                 return
 
-            # Directory still exists
-            if attempt_num < max_retries:
-                logger.info(
-                    f"_manual_cleanup_worktree_directory: Directory still exists, will retry. "
-                    f"attempt={attempt_num}, worktree_dir={worktree_dir}"
-                )
-            else:
-                logger.warning(
-                    f"_manual_cleanup_worktree_directory: Directory still exists after all attempts. "
-                    f"attempts={attempt_num}, worktree_dir={worktree_dir}. "
-                    f"Files may be locked and will be cleaned up later."
-                )
-
-        except Exception as e:
-            if attempt_num < max_retries:
-                logger.info(
-                    f"_manual_cleanup_worktree_directory: Exception during removal, will retry. "
-                    f"attempt={attempt_num}, error={e}"
-                )
-            else:
-                logger.error(
-                    f"_manual_cleanup_worktree_directory: Failed after all attempts. "
-                    f"attempts={attempt_num}, worktree_dir={worktree_dir}, error={e}"
-                )
+        except Exception:
+            pass
 
 
 def _validate_worktree_path_safety(worktree_dir: Path) -> bool:
@@ -300,11 +231,7 @@ def _validate_worktree_path_safety(worktree_dir: Path) -> bool:
     try:
         worktree_dir_resolved = worktree_dir.resolve()
         worktree_dirs_resolved = worktree_dirs.resolve()
-    except (OSError, ValueError) as e:
-        logger.error(
-            f"_validate_worktree_path_safety: Failed to resolve paths, aborting for safety. "
-            f"worktree_dir={worktree_dir}, error={e}"
-        )
+    except (OSError, ValueError):
         return False
 
     # SAFETY CHECK 2: Ensure worktree_dir is a subdirectory of worktree_dirs
@@ -312,18 +239,10 @@ def _validate_worktree_path_safety(worktree_dir: Path) -> bool:
         # Use relative_to to check if path is under worktree_dirs
         worktree_dir_resolved.relative_to(worktree_dirs_resolved)
     except ValueError:
-        logger.error(
-            f"_validate_worktree_path_safety: Path is not under worktree_dirs, aborting for safety. "
-            f"worktree_dir={worktree_dir_resolved}, worktree_dirs={worktree_dirs_resolved}"
-        )
         return False
 
     # SAFETY CHECK 3: Ensure it's not the worktree_dirs root itself
     if worktree_dir_resolved == worktree_dirs_resolved:
-        logger.error(
-            f"_validate_worktree_path_safety: Attempted to delete worktree_dirs root, aborting for safety. "
-            f"worktree_dir={worktree_dir_resolved}"
-        )
         return False
 
     return True
@@ -361,11 +280,9 @@ def handle_remove_error(
             Path(path).chmod(0o777)
             # Retry the failed operation
             func(path)
-        except Exception as e:
-            # If it still fails, log and ignore (file is truly locked)
-            logger.debug(
-                f"_create_windows_rmtree_error_handler: Failed to remove file after permission change. path={path}, error={e}"
-            )
+        except Exception:
+            # If it still fails, silently ignore (file is truly locked)
+            pass
 
     return handle_remove_error
 
@@ -377,7 +294,6 @@ def create_diff_patch_from_worktree(
     uni_diff_text = repository.git.diff(None, "HEAD", *files, ignore_blank_lines=True, ignore_space_at_eol=True)
 
     if not uni_diff_text:
-        logger.warning("No changes found in worktree.")
         return None
 
     if not uni_diff_text.endswith("\n"):

From a12f0028b611da2db58b5c41dea1444e91574c9c Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Wed, 10 Dec 2025 08:27:45 +0200
Subject: [PATCH 23/26] fix linting after removing logs

---
 codeflash/code_utils/git_worktree_utils.py | 27 ++++++++--------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/codeflash/code_utils/git_worktree_utils.py b/codeflash/code_utils/git_worktree_utils.py
index 66030aa3b..613379693 100644
--- a/codeflash/code_utils/git_worktree_utils.py
+++ b/codeflash/code_utils/git_worktree_utils.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import configparser
+import contextlib
 import shutil
 import subprocess
 import sys
@@ -118,7 +119,6 @@ def remove_worktree(worktree_dir: Path) -> None:
     # Try to get the repository and git root for worktree removal
     try:
         repository = git.Repo(worktree_dir, search_parent_directories=True)
-        git_root = repository.working_dir or repository.git_dir
     except Exception:
         # If we can't access the repository, try manual cleanup
         _manual_cleanup_worktree_directory(worktree_dir)
@@ -127,7 +127,6 @@ def remove_worktree(worktree_dir: Path) -> None:
     # Attempt to remove worktree using git command with retries
     for attempt in range(max_retries):
         try:
-            attempt_num = attempt + 1
             repository.git.worktree("remove", "--force", str(worktree_dir))
             return  # noqa: TRY300
         except git.exc.GitCommandError as e:
@@ -145,20 +144,15 @@ def remove_worktree(worktree_dir: Path) -> None:
             break
 
     # Fallback: Try to remove worktree entry from git, then manually delete directory
-    try:
+    with contextlib.suppress(Exception):
         # Try to prune the worktree entry from git (this doesn't delete the directory)
-        try:
-            # Use git worktree prune to remove stale entries
-            repository.git.worktree("prune")
-        except Exception:
-            pass
+        # Use git worktree prune to remove stale entries
+        repository.git.worktree("prune")
 
-        # Manually remove the directory
+    # Manually remove the directory (always attempt, even if prune failed)
+    with contextlib.suppress(Exception):
         _manual_cleanup_worktree_directory(worktree_dir)
 
-    except Exception:
-        pass
-
 
 def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
     """Manually remove a worktree directory, handling Windows file locking issues.
@@ -213,7 +207,7 @@ def _manual_cleanup_worktree_directory(worktree_dir: Path) -> None:
             if not worktree_dir.exists():
                 return
 
-        except Exception:
+        except Exception:  # noqa: S110
             pass
 
 
@@ -242,10 +236,7 @@ def _validate_worktree_path_safety(worktree_dir: Path) -> bool:
         return False
 
     # SAFETY CHECK 3: Ensure it's not the worktree_dirs root itself
-    if worktree_dir_resolved == worktree_dirs_resolved:
-        return False
-
-    return True
+    return worktree_dir_resolved != worktree_dirs_resolved
 
 
 def _create_windows_rmtree_error_handler() -> Callable[
@@ -280,7 +271,7 @@ def handle_remove_error(
             Path(path).chmod(0o777)
             # Retry the failed operation
             func(path)
-        except Exception:
+        except Exception:  # noqa: S110
             # If it still fails, silently ignore (file is truly locked)
             pass
 

From 52f19e441d6cf7bcd5805217dd6890640a2c472a Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Wed, 10 Dec 2025 08:32:18 +0200
Subject: [PATCH 24/26] Fixed inconsistent path resolution which can cause
 relative_to() checks to fail in CI

---
 codeflash/discovery/functions_to_optimize.py | 33 ++++++++++++++------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py
index b8a27d463..b53342e5b 100644
--- a/codeflash/discovery/functions_to_optimize.py
+++ b/codeflash/discovery/functions_to_optimize.py
@@ -665,24 +665,39 @@ def _resolve_path(path: Path | str) -> Path:
         # Use strict=False so we don't fail on paths that don't exist yet (e.g. worktree paths)
         return Path(path).resolve(strict=False)
 
+    def _resolve_path_consistent(path: Path | str) -> Path:
+        """Resolve path consistently: use strict resolution if path exists, otherwise non-strict."""
+        path_obj = Path(path)
+        if path_obj.exists():
+            try:
+                return path_obj.resolve()
+            except (OSError, RuntimeError):
+                return _resolve_path(path)
+        return _resolve_path(path)
+
     # Resolve all root paths to absolute paths for consistent comparison
-    tests_root_resolved = _resolve_path(tests_root)
-    module_root_resolved = _resolve_path(module_root)
+    # Use consistent resolution: strict for existing paths, non-strict for non-existent
+    tests_root_resolved = _resolve_path_consistent(tests_root)
+    module_root_resolved = _resolve_path_consistent(module_root)
 
     # Resolve ignore paths and submodule paths
-    ignore_paths_resolved = [_resolve_path(p) for p in ignore_paths]
-    submodule_paths_resolved = [_resolve_path(p) for p in submodule_paths]
+    ignore_paths_resolved = [_resolve_path_consistent(p) for p in ignore_paths]
+    submodule_paths_resolved = [_resolve_path_consistent(p) for p in submodule_paths]
 
     # We desperately need Python 3.10+ only support to make this code readable with structural pattern matching
     for file_path_path, functions in modified_functions.items():
         _functions = functions
         # Resolve file path to absolute path
         # Convert to Path if it's a string (e.g., from get_functions_within_git_diff)
-        file_path_obj = _resolve_path(file_path_path)
-        try:
-            file_path_resolved = file_path_obj.resolve()
-        except (OSError, RuntimeError):
-            file_path_resolved = file_path_obj.absolute() if not file_path_obj.is_absolute() else file_path_obj
+        file_path_obj = Path(file_path_path)
+        # Use consistent resolution: try strict first (for existing files), fall back to non-strict
+        if file_path_obj.exists():
+            try:
+                file_path_resolved = file_path_obj.resolve()
+            except (OSError, RuntimeError):
+                file_path_resolved = _resolve_path(file_path_path)
+        else:
+            file_path_resolved = _resolve_path(file_path_path)
 
         file_path = str(file_path_obj)
 

From bc779ff2bf8ea23efc97a68642adc113ed5a444d Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Wed, 10 Dec 2025 08:54:07 +0200
Subject: [PATCH 25/26] Updated trace_benchmarks_pytest to use the same
 Windows-safe subprocess handling as execute_test_subprocess

---
 codeflash/benchmarking/trace_benchmarks.py | 61 ++++++++++++++++------
 1 file changed, 46 insertions(+), 15 deletions(-)

diff --git a/codeflash/benchmarking/trace_benchmarks.py b/codeflash/benchmarking/trace_benchmarks.py
index e59b06656..7b923239e 100644
--- a/codeflash/benchmarking/trace_benchmarks.py
+++ b/codeflash/benchmarking/trace_benchmarks.py
@@ -1,8 +1,10 @@
 from __future__ import annotations
 
+import contextlib
 import os
 import re
 import subprocess
+import sys
 from pathlib import Path
 
 from codeflash.cli_cmds.console import logger
@@ -17,21 +19,50 @@ def trace_benchmarks_pytest(
         benchmark_env["PYTHONPATH"] = str(project_root)
     else:
         benchmark_env["PYTHONPATH"] += os.pathsep + str(project_root)
-    result = subprocess.run(
-        [
-            SAFE_SYS_EXECUTABLE,
-            Path(__file__).parent / "pytest_new_process_trace_benchmarks.py",
-            benchmarks_root,
-            tests_root,
-            trace_file,
-        ],
-        cwd=project_root,
-        check=False,
-        capture_output=True,
-        text=True,
-        env=benchmark_env,
-        timeout=timeout,
-    )
+    
+    is_windows = sys.platform == "win32"
+    cmd_list = [
+        SAFE_SYS_EXECUTABLE,
+        Path(__file__).parent / "pytest_new_process_trace_benchmarks.py",
+        benchmarks_root,
+        tests_root,
+        trace_file,
+    ]
+    
+    if is_windows:
+        # Use Windows-safe subprocess handling to avoid file locking issues
+        creationflags = subprocess.CREATE_NEW_PROCESS_GROUP
+        process = subprocess.Popen(
+            cmd_list,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            stdin=subprocess.DEVNULL,
+            cwd=project_root,
+            env=benchmark_env,
+            text=True,
+            creationflags=creationflags,
+        )
+        try:
+            stdout_content, stderr_content = process.communicate(timeout=timeout)
+            returncode = process.returncode
+        except subprocess.TimeoutExpired:
+            with contextlib.suppress(OSError):
+                process.kill()
+            stdout_content, stderr_content = process.communicate(timeout=5)
+            raise subprocess.TimeoutExpired(
+                cmd_list, timeout, output=stdout_content, stderr=stderr_content
+            ) from None
+        result = subprocess.CompletedProcess(cmd_list, returncode, stdout_content, stderr_content)
+    else:
+        result = subprocess.run(
+            cmd_list,
+            cwd=project_root,
+            check=False,
+            capture_output=True,
+            text=True,
+            env=benchmark_env,
+            timeout=timeout,
+        )
     if result.returncode != 0:
         if "ERROR collecting" in result.stdout:
             # Pattern matches "===== ERRORS =====" (any number of =) and captures everything after

From ac35af3e87fe2772212d820374b9493a754e5e19 Mon Sep 17 00:00:00 2001
From: Mohamed Ashraf <ashraf@codeflash.ai>
Date: Wed, 10 Dec 2025 08:54:30 +0200
Subject: [PATCH 26/26] pre-commit fixes

---
 codeflash/benchmarking/trace_benchmarks.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/codeflash/benchmarking/trace_benchmarks.py b/codeflash/benchmarking/trace_benchmarks.py
index 7b923239e..276a61948 100644
--- a/codeflash/benchmarking/trace_benchmarks.py
+++ b/codeflash/benchmarking/trace_benchmarks.py
@@ -19,7 +19,7 @@ def trace_benchmarks_pytest(
         benchmark_env["PYTHONPATH"] = str(project_root)
     else:
         benchmark_env["PYTHONPATH"] += os.pathsep + str(project_root)
-    
+
     is_windows = sys.platform == "win32"
     cmd_list = [
         SAFE_SYS_EXECUTABLE,
@@ -28,7 +28,7 @@ def trace_benchmarks_pytest(
         tests_root,
         trace_file,
     ]
-    
+
     if is_windows:
         # Use Windows-safe subprocess handling to avoid file locking issues
         creationflags = subprocess.CREATE_NEW_PROCESS_GROUP
@@ -49,19 +49,11 @@ def trace_benchmarks_pytest(
             with contextlib.suppress(OSError):
                 process.kill()
             stdout_content, stderr_content = process.communicate(timeout=5)
-            raise subprocess.TimeoutExpired(
-                cmd_list, timeout, output=stdout_content, stderr=stderr_content
-            ) from None
+            raise subprocess.TimeoutExpired(cmd_list, timeout, output=stdout_content, stderr=stderr_content) from None
         result = subprocess.CompletedProcess(cmd_list, returncode, stdout_content, stderr_content)
     else:
         result = subprocess.run(
-            cmd_list,
-            cwd=project_root,
-            check=False,
-            capture_output=True,
-            text=True,
-            env=benchmark_env,
-            timeout=timeout,
+            cmd_list, cwd=project_root, check=False, capture_output=True, text=True, env=benchmark_env, timeout=timeout
         )
     if result.returncode != 0:
         if "ERROR collecting" in result.stdout: