enhancements

mohammedahmed18 · mohammedahmed18 · commit ae080d01d43b · 2025-12-11T16:28:02.000+02:00
diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
@@ -21,7 +21,7 @@
 N_CANDIDATES_LP_LSP = 3
 
 # Code repair
-REPAIR_UNMATCHED_PERCENTAGE_LIMIT = 0.35  # if the percentage of unmatched tests is greater than this, we won't fix it
+REPAIR_UNMATCHED_PERCENTAGE_LIMIT = 0.4  # if the percentage of unmatched tests is greater than this, we won't fix it (lowering this value makes the repair more stricted)
 MAX_REPAIRS_PER_TRACE = 3  # maximum number of repairs we will do for each function
 
 MAX_N_CANDIDATES = 5
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -1854,6 +1854,46 @@ def get_results_not_matched_error(self) -> Failure:
         console.rule()
         return Failure("Test results did not match the test results of the original code.")
 
+    def repair_if_possible(
+        self,
+        candidate: OptimizedCandidate,
+        diffs: list[TestDiff],
+        code_context: CodeOptimizationContext,
+        test_results_count: int,
+        exp_type: str,
+    ) -> None:
+        if self.repair_counter >= MAX_REPAIRS_PER_TRACE:
+            logger.debug(f"Repair counter reached {MAX_REPAIRS_PER_TRACE}, skipping repair")
+            return
+        if candidate.source == OptimizedCandidateSource.REPAIR:
+            logger.debug("Candidate is already a repair, skipping repair")
+            return
+        if not diffs:
+            logger.debug("No diffs found, skipping repair")
+            return
+        result_unmatched_perc = len(diffs) / test_results_count
+        if result_unmatched_perc > REPAIR_UNMATCHED_PERCENTAGE_LIMIT:
+            logger.debug(f"Result unmatched percentage is {result_unmatched_perc * 100}%, skipping repair")
+            return
+
+        logger.debug(
+            f"Adding a candidate for repair, with {len(diffs)} diffs, ({result_unmatched_perc * 100}% unmatched)"
+        )
+        # start repairing
+        ai_service_client = self.aiservice_client if exp_type == "EXP0" else self.local_aiservice_client
+        self.repair_counter += 1
+        self.future_all_code_repair.append(
+            self.repair_optimization(
+                original_source_code=code_context.read_writable_code.markdown,
+                modified_source_code=candidate.source_code.markdown,
+                test_diffs=diffs,
+                trace_id=self.function_trace_id[:-4] + exp_type if self.experiment_id else self.function_trace_id,
+                ai_service_client=ai_service_client,
+                optimization_id=candidate.optimization_id,
+                executor=self.executor,
+            )
+        )
+
     def run_optimized_candidate(
         self,
         *,
@@ -1919,36 +1959,7 @@ def run_optimized_candidate(
                 logger.info("h3|Test results matched ✅")
                 console.rule()
             else:
-
-                def repair_if_possible() -> None:
-                    if self.repair_counter >= MAX_REPAIRS_PER_TRACE:
-                        return
-
-                    result_unmatched_perc = len(diffs) / len(candidate_behavior_results)
-                    if (
-                        candidate.source == OptimizedCandidateSource.REPAIR
-                        or result_unmatched_perc > REPAIR_UNMATCHED_PERCENTAGE_LIMIT
-                    ):
-                        return
-
-                    ai_service_client = self.aiservice_client if exp_type == "EXP0" else self.local_aiservice_client
-                    logger.info("Adding this to the repair queue")
-                    self.repair_counter += 1
-                    self.future_all_code_repair.append(
-                        self.repair_optimization(
-                            original_source_code=code_context.read_writable_code.markdown,
-                            modified_source_code=candidate.source_code.markdown,
-                            test_diffs=diffs,
-                            trace_id=self.function_trace_id[:-4] + exp_type
-                            if self.experiment_id
-                            else self.function_trace_id,
-                            ai_service_client=ai_service_client,
-                            optimization_id=candidate.optimization_id,
-                            executor=self.executor,
-                        )
-                    )
-
-                repair_if_possible()
+                self.repair_if_possible(candidate, diffs, code_context, len(candidate_behavior_results), exp_type)
                 return self.get_results_not_matched_error()
 
             logger.info(f"loading|Running performance tests for candidate {optimization_candidate_index}...")
diff --git a/codeflash/verification/equivalence.py b/codeflash/verification/equivalence.py
@@ -63,20 +63,19 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
             else ""
         )
 
-        test_src_code = original_test_result.id.get_src_code(original_test_result.file_name)
-        test_diff = TestDiff(
-            scope=TestDiffScope.RETURN_VALUE,
-            original_value=repr(original_test_result.return_value),
-            candidate_value=repr(cdd_test_result.return_value),
-            test_src_code=test_src_code,
-            candidate_pytest_error=cdd_pytest_error,
-            original_pass=original_test_result.did_pass,
-            candidate_pass=cdd_test_result.did_pass,
-            original_pytest_error=original_pytest_error,
-        )
         if not comparator(original_test_result.return_value, cdd_test_result.return_value, superset_obj=superset_obj):
-            test_diff.scope = TestDiffScope.RETURN_VALUE
-            test_diffs.append(test_diff)
+            test_diffs.append(
+                TestDiff(
+                    scope=TestDiffScope.RETURN_VALUE,
+                    original_value=repr(original_test_result.return_value),
+                    candidate_value=repr(cdd_test_result.return_value),
+                    test_src_code=original_test_result.id.get_src_code(original_test_result.file_name),
+                    candidate_pytest_error=cdd_pytest_error,
+                    original_pass=original_test_result.did_pass,
+                    candidate_pass=cdd_test_result.did_pass,
+                    original_pytest_error=original_pytest_error,
+                )
+            )
 
             try:
                 logger.debug(
@@ -92,21 +91,37 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
         elif (original_test_result.stdout and cdd_test_result.stdout) and not comparator(
             original_test_result.stdout, cdd_test_result.stdout
         ):
-            test_diff.scope = TestDiffScope.STDOUT
-            test_diff.original_value = str(original_test_result.stdout)
-            test_diff.candidate_value = str(cdd_test_result.stdout)
-            test_diffs.append(test_diff)
+            test_diffs.append(
+                TestDiff(
+                    scope=TestDiffScope.STDOUT,
+                    original_value=str(original_test_result.stdout),
+                    candidate_value=str(cdd_test_result.stdout),
+                    test_src_code=original_test_result.id.get_src_code(original_test_result.file_name),
+                    candidate_pytest_error=cdd_pytest_error,
+                    original_pass=original_test_result.did_pass,
+                    candidate_pass=cdd_test_result.did_pass,
+                    original_pytest_error=original_pytest_error,
+                )
+            )
 
         elif original_test_result.test_type in {
             TestType.EXISTING_UNIT_TEST,
             TestType.CONCOLIC_COVERAGE_TEST,
             TestType.GENERATED_REGRESSION,
             TestType.REPLAY_TEST,
         } and (cdd_test_result.did_pass != original_test_result.did_pass):
-            test_diff.scope = TestDiffScope.DID_PASS
-            test_diff.original_value = str(original_test_result.did_pass)
-            test_diff.candidate_value = str(cdd_test_result.did_pass)
-            test_diffs.append(test_diff)
+            test_diffs.append(
+                TestDiff(
+                    scope=TestDiffScope.DID_PASS,
+                    original_value=str(original_test_result.did_pass),
+                    candidate_value=str(cdd_test_result.did_pass),
+                    test_src_code=original_test_result.id.get_src_code(original_test_result.file_name),
+                    candidate_pytest_error=cdd_pytest_error,
+                    original_pass=original_test_result.did_pass,
+                    candidate_pass=cdd_test_result.did_pass,
+                    original_pytest_error=original_pytest_error,
+                )
+            )
 
     sys.setrecursionlimit(original_recursion_limit)
     if did_all_timeout: