Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions codeflash/code_utils/config_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms
N_TESTS_TO_GENERATE = 2
TOTAL_LOOPING_TIME = 10.0 # 10 second candidate benchmarking budget
CONSISTENT_LOOP_COUNT = 3
COVERAGE_THRESHOLD = 60.0
MIN_TESTCASE_PASSED_THRESHOLD = 6
REPEAT_OPTIMIZATION_PROBABILITY = 0.1
Expand Down
1 change: 0 additions & 1 deletion codeflash/code_utils/env_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
def check_formatter_installed(formatter_cmds: list[str], exit_on_failure: bool = True) -> bool: # noqa
if not formatter_cmds or formatter_cmds[0] == "disabled":
return True

first_cmd = formatter_cmds[0]
cmd_tokens = shlex.split(first_cmd) if isinstance(first_cmd, str) else [first_cmd]

Expand Down
54 changes: 49 additions & 5 deletions codeflash/verification/pytest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@

import contextlib
import inspect

# System Imports
import logging
import os
import platform
import re
import statistics
import sys
import time as _time_module
import warnings
from collections import deque

# System Imports
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable
from unittest import TestCase
Expand All @@ -19,6 +21,8 @@
import pytest
from pluggy import HookspecMarker

from codeflash.code_utils.config_consts import CONSISTENT_LOOP_COUNT

if TYPE_CHECKING:
from _pytest.config import Config, Parser
from _pytest.main import Session
Expand Down Expand Up @@ -268,9 +272,30 @@ def __init__(self, config: Config) -> None:
level = logging.DEBUG if config.option.verbose > 1 else logging.INFO
logging.basicConfig(level=level)
self.logger = logging.getLogger(self.name)
self.current_loop_durations_in_seconds: list[float] = []

def dynamic_tolerance(self, avg: float) -> float:
if avg < 0.0001: # < 100 µs
return 0.7
if avg < 0.0005: # < 500 µs
return 0.5
if avg < 0.001: # < 1 ms
return 0.4
if avg < 0.01: # < 10 ms
return 0.2
if avg < 0.1: # < 100 ms
return 0.1
return 0.03 # > 0.1 s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mohammedahmed18 how did you set these numbers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aseembits93
there are just experiment numbers, the whole idea is very small runtimes should have high tolerance value because the noise is relatively large


@pytest.hookimpl
def pytest_runtest_logreport(self, report: pytest.TestReport) -> None:
if report.when == "call" and report.outcome == "passed":
self.current_loop_durations_in_seconds.append(report.duration)

@hookspec(firstresult=True)
def pytest_runtestloop(self, session: Session) -> bool:
durations = deque(maxlen=CONSISTENT_LOOP_COUNT)

"""Reimplement the test loop but loop for the user defined amount of time."""
if session.testsfailed and not session.config.option.continue_on_collection_errors:
msg = "{} error{} during collection".format(session.testsfailed, "s" if session.testsfailed != 1 else "")
Expand All @@ -284,9 +309,9 @@ def pytest_runtestloop(self, session: Session) -> bool:

count: int = 0

while total_time >= SHORTEST_AMOUNT_OF_TIME: # need to run at least one for normal tests
while total_time >= SHORTEST_AMOUNT_OF_TIME:
count += 1
total_time = self._get_total_time(session)
self.current_loop_durations_in_seconds.clear()

for index, item in enumerate(session.items):
item: pytest.Item = item # noqa: PLW0127, PLW2901
Expand All @@ -304,8 +329,27 @@ def pytest_runtestloop(self, session: Session) -> bool:
raise session.Failed(session.shouldfail)
if session.shouldstop:
raise session.Interrupted(session.shouldstop)

total_duration_in_seconds = sum(self.current_loop_durations_in_seconds)

if total_duration_in_seconds > 0:
durations.append(total_duration_in_seconds)
else:
durations.clear()

# Consistency check
if len(durations) == CONSISTENT_LOOP_COUNT:
avg = statistics.median(durations)
if avg == 0:
consistent = all(d == 0 for d in durations)
else:
consistent = all(abs(d - avg) / avg <= self.dynamic_tolerance(avg) for d in durations)
if consistent:
break

if self._timed_out(session, start_time, count):
break # exit loop
break

_ORIGINAL_TIME_SLEEP(self._get_delay_time(session))
return True

Expand Down
Loading