|
6 | 6 | from typing import List |
7 | 7 |
|
8 | 8 |
|
9 | | -# Regex patterns for test files (consolidated from CodeGraphRanker) |
| 9 | +# Anchored regex patterns for test files (boundary-aware to prevent false matches) |
| 10 | +# Uses (?:^|/) for start boundary and (?:/|$) for end boundary |
10 | 11 | TEST_PATTERNS = [ |
11 | | - r'test[s]?[/_]', # test/, tests/, test_ |
12 | | - r'[/_]test[s]?\.py$', # _test.py, _tests.py |
13 | | - r'\.test\.[jt]sx?$', # .test.js, .test.ts |
14 | | - r'\.spec\.[jt]sx?$', # .spec.js, .spec.ts |
15 | | - r'__tests__', # __tests__/ |
16 | | - r'conftest\.py$', # pytest config |
17 | | - r'fixtures?[/_]', # fixtures/ |
18 | | - r'mock[s]?[/_]', # mocks/ |
| 12 | + # test directories: /test/, /tests/, but NOT "contest", "latest" |
| 13 | + r'(?:^|/)tests?(?:/|$)', |
| 14 | + # test_ prefix in filename: test_foo.py, but NOT "contest_foo.py" |
| 15 | + r'(?:^|/)test_[^/]+$', |
| 16 | + # _test suffix: foo_test.py, foo_tests.py |
| 17 | + r'(?:^|/)[^/]+_tests?\.py$', |
| 18 | + # .test.js, .test.ts, .test.tsx, .test.jsx |
| 19 | + r'\.test\.[jt]sx?$', |
| 20 | + # .spec.js, .spec.ts, .spec.tsx, .spec.jsx |
| 21 | + r'\.spec\.[jt]sx?$', |
| 22 | + # __tests__ directory (Jest convention) |
| 23 | + r'(?:^|/)__tests__(?:/|$)', |
| 24 | + # conftest.py (pytest config) |
| 25 | + r'(?:^|/)conftest\.py$', |
| 26 | + # fixtures directory |
| 27 | + r'(?:^|/)fixtures?(?:/|$)', |
| 28 | + # mocks directory |
| 29 | + r'(?:^|/)mocks?(?:/|$)', |
19 | 30 | ] |
20 | 31 |
|
| 32 | +# Pre-compile patterns for performance |
| 33 | +_COMPILED_PATTERNS = [re.compile(p) for p in TEST_PATTERNS] |
| 34 | + |
21 | 35 |
|
22 | 36 | def is_test_file(file_path: str) -> bool: |
23 | 37 | """ |
24 | | - Check if file is a test file using regex patterns. |
| 38 | + Check if file is a test file using anchored regex patterns. |
25 | 39 | |
26 | 40 | Args: |
27 | | - file_path: Path to check (can be relative or absolute) |
| 41 | + file_path: Path to check (can be relative or absolute, Windows or Unix) |
28 | 42 | |
29 | 43 | Returns: |
30 | 44 | True if file matches any test pattern |
31 | 45 | """ |
32 | 46 | if not file_path: |
33 | 47 | return False |
34 | | - file_path_lower = file_path.lower() |
35 | | - for pattern in TEST_PATTERNS: |
36 | | - if re.search(pattern, file_path_lower): |
| 48 | + # normalize: lowercase + Windows separators to Unix |
| 49 | + normalized = file_path.lower().replace('\\', '/') |
| 50 | + for pattern in _COMPILED_PATTERNS: |
| 51 | + if pattern.search(normalized): |
37 | 52 | return True |
38 | 53 | return False |
39 | 54 |
|
|
0 commit comments