Skip to content

Commit 94eb7ee

Browse files
committed
feat(dna): comprehensive Python framework support
- Add TestPattern and ConfigPattern dataclasses - Add Django ORM, SQLAlchemy, Prisma, Tortoise ORM detection - Add Django + DRF framework detection - Add aiohttp, tornado framework detection - Add Django middleware patterns (MIDDLEWARE, MiddlewareMixin, hooks) - Add DRF permission_classes and authentication_classes detection - Add test framework detection (pytest, unittest, django.test) - Add mock library detection (unittest.mock, responses, pytest-mock) - Add config pattern detection (dotenv, environs, django-environ, pydantic) - Add secrets handling detection (AWS Secrets Manager, Vault, env vars) - Update to_markdown() with test and config sections
1 parent cb65705 commit 94eb7ee

1 file changed

Lines changed: 223 additions & 9 deletions

File tree

backend/services/dna_extractor.py

Lines changed: 223 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,27 @@ class NamingConventions:
7474
file_style: str = "unknown"
7575

7676

77+
@dataclass
78+
class TestPattern:
79+
"""Detected testing patterns"""
80+
framework: Optional[str] = None # pytest, unittest, nose
81+
fixture_style: Optional[str] = None # pytest fixtures, setUp/tearDown
82+
mock_library: Optional[str] = None # unittest.mock, pytest-mock, responses
83+
test_file_pattern: str = "test_*.py"
84+
has_conftest: bool = False
85+
has_factories: bool = False # factory_boy, faker
86+
coverage_config: bool = False
87+
88+
89+
@dataclass
90+
class ConfigPattern:
91+
"""Detected configuration patterns"""
92+
env_loading: Optional[str] = None # python-dotenv, environs, django-environ
93+
settings_pattern: Optional[str] = None # single file, split by env, pydantic
94+
secrets_handling: Optional[str] = None # env vars, vault, AWS secrets
95+
config_validation: bool = False # pydantic Settings, dynaconf
96+
97+
7798
@dataclass
7899
class CodebaseDNA:
79100
"""Complete DNA profile of a codebase"""
@@ -86,12 +107,13 @@ class CodebaseDNA:
86107
error_patterns: ErrorPattern = field(default_factory=ErrorPattern)
87108
logging_patterns: LoggingPattern = field(default_factory=LoggingPattern)
88109
naming_conventions: NamingConventions = field(default_factory=NamingConventions)
110+
test_patterns: TestPattern = field(default_factory=TestPattern)
111+
config_patterns: ConfigPattern = field(default_factory=ConfigPattern)
89112
middleware_patterns: List[str] = field(default_factory=list)
90113
common_imports: List[str] = field(default_factory=list)
91114
skip_directories: List[str] = field(default_factory=list)
92115
api_versioning: Optional[str] = None
93116
router_pattern: Optional[str] = None
94-
test_patterns: Dict[str, Any] = field(default_factory=dict)
95117

96118
def to_dict(self) -> Dict:
97119
return asdict(self)
@@ -192,6 +214,34 @@ def to_markdown(self) -> str:
192214
md += f"**Router:** `{self.router_pattern}`\n"
193215
md += "\n"
194216

217+
# Test patterns
218+
if self.test_patterns.framework:
219+
md += "## Testing Patterns\n"
220+
md += f"**Framework:** {self.test_patterns.framework}\n"
221+
if self.test_patterns.fixture_style:
222+
md += f"**Fixture Style:** {self.test_patterns.fixture_style}\n"
223+
if self.test_patterns.mock_library:
224+
md += f"**Mock Library:** {self.test_patterns.mock_library}\n"
225+
md += f"**Test File Pattern:** `{self.test_patterns.test_file_pattern}`\n"
226+
if self.test_patterns.has_conftest:
227+
md += "**Has conftest.py:** Yes\n"
228+
if self.test_patterns.has_factories:
229+
md += "**Uses Factories:** Yes\n"
230+
md += "\n"
231+
232+
# Config patterns
233+
if self.config_patterns.env_loading or self.config_patterns.settings_pattern:
234+
md += "## Configuration Patterns\n"
235+
if self.config_patterns.env_loading:
236+
md += f"**Env Loading:** {self.config_patterns.env_loading}\n"
237+
if self.config_patterns.settings_pattern:
238+
md += f"**Settings Pattern:** {self.config_patterns.settings_pattern}\n"
239+
if self.config_patterns.secrets_handling:
240+
md += f"**Secrets Handling:** {self.config_patterns.secrets_handling}\n"
241+
if self.config_patterns.config_validation:
242+
md += "**Config Validation:** Yes (Pydantic/dynaconf)\n"
243+
md += "\n"
244+
195245
# Skip directories
196246
if self.skip_directories:
197247
md += "## Skip Directories\n"
@@ -245,13 +295,16 @@ def _discover_files(self, repo_path: Path) -> List[Path]:
245295
def _detect_framework(self, files: List[Path]) -> Optional[str]:
246296
"""Detect the primary framework used in the codebase"""
247297
framework_indicators = {
248-
'fastapi': ['from fastapi', 'FastAPI()', 'APIRouter'],
298+
'fastapi': ['from fastapi', 'FastAPI()', 'APIRouter', 'fastapi.routing'],
299+
'django-rest-framework': ['from rest_framework', 'rest_framework.views', 'APIView', 'ViewSet', 'serializers.Serializer'],
300+
'django': ['from django', 'django.conf', 'INSTALLED_APPS', 'django.urls', 'django.views'],
249301
'starlette': ['from starlette', 'Starlette()', 'starlette.routing'],
250-
'flask': ['from flask', 'Flask(__name__)', '@app.route'],
251-
'django': ['from django', 'django.conf', 'INSTALLED_APPS'],
252-
'express': ['require("express")', 'express()', 'app.use('],
253-
'nextjs': ['from next', 'getServerSideProps', 'getStaticProps'],
254-
'nestjs': ['@Module(', '@Injectable(', '@Controller('],
302+
'flask': ['from flask', 'Flask(__name__)', '@app.route', 'flask.Blueprint'],
303+
'aiohttp': ['from aiohttp', 'aiohttp.web', 'web.Application'],
304+
'tornado': ['from tornado', 'tornado.web', 'RequestHandler'],
305+
'express': ['require("express")', 'express()', 'app.use(', 'express.Router'],
306+
'nextjs': ['from next', 'getServerSideProps', 'getStaticProps', 'next/router'],
307+
'nestjs': ['@Module(', '@Injectable(', '@Controller(', 'NestFactory'],
255308
}
256309

257310
scores = Counter()
@@ -266,7 +319,11 @@ def _detect_framework(self, files: List[Path]) -> Optional[str]:
266319
pass
267320

268321
if scores:
269-
return scores.most_common(1)[0][0]
322+
top_framework = scores.most_common(1)[0][0]
323+
# DRF is always used WITH Django, so note both
324+
if top_framework == 'django-rest-framework':
325+
return 'django + DRF'
326+
return top_framework
270327
return None
271328

272329
def _extract_middleware_patterns(self, files: List[Path], framework: Optional[str]) -> List[str]:
@@ -292,13 +349,31 @@ def _extract_middleware_patterns(self, files: List[Path], framework: Optional[st
292349
for dep in deps:
293350
patterns.append(f'Depends({dep})')
294351

352+
# Django middleware
353+
if 'MIDDLEWARE' in content and ('django' in content or '.middleware' in content):
354+
patterns.append('Django MIDDLEWARE setting')
355+
if 'MiddlewareMixin' in content:
356+
patterns.append('MiddlewareMixin')
357+
if 'process_request' in content or 'process_response' in content:
358+
patterns.append('Django middleware hooks')
359+
360+
# DRF middleware/permissions
361+
if 'permission_classes' in content:
362+
perms = re.findall(r'permission_classes\s*=\s*\[([^\]]+)\]', content)
363+
for perm in perms:
364+
patterns.append(f'DRF permission_classes: {perm.strip()}')
365+
if 'authentication_classes' in content:
366+
patterns.append('DRF authentication_classes')
367+
295368
# Express middleware
296369
if 'app.use(' in content:
297370
patterns.append('app.use(middleware)')
298371

299372
# Flask decorators
300373
if '@app.before_request' in content:
301374
patterns.append('@app.before_request')
375+
if '@app.after_request' in content:
376+
patterns.append('@app.after_request')
302377

303378
except:
304379
pass
@@ -415,8 +490,36 @@ def _extract_database_patterns(self, files: List[Path], repo_path: Path) -> Data
415490
content = file_path.read_text(encoding='utf-8', errors='ignore')
416491

417492
# Check for Supabase
418-
if 'supabase' in content.lower():
493+
if 'supabase' in content.lower() and not pattern.orm_used:
419494
pattern.orm_used = 'Supabase'
495+
496+
# Check for Django ORM
497+
if 'from django.db import models' in content or 'models.Model' in content:
498+
pattern.orm_used = 'Django ORM'
499+
if 'models.UUIDField' in content:
500+
pattern.id_type = 'UUID (Django UUIDField)'
501+
elif 'models.AutoField' in content or 'models.BigAutoField' in content:
502+
pattern.id_type = 'AutoField (Django)'
503+
if 'models.DateTimeField' in content:
504+
pattern.timestamp_type = 'DateTimeField (Django)'
505+
if 'on_delete=models.CASCADE' in content:
506+
pattern.cascade_deletes = True
507+
508+
# Check for SQLAlchemy
509+
if 'from sqlalchemy' in content or 'sqlalchemy' in content:
510+
pattern.orm_used = 'SQLAlchemy'
511+
if 'UUID' in content:
512+
pattern.id_type = 'UUID (SQLAlchemy)'
513+
if 'DateTime' in content:
514+
pattern.timestamp_type = 'DateTime (SQLAlchemy)'
515+
516+
# Check for Prisma (JS/TS)
517+
if 'prisma' in content.lower() or '@prisma/client' in content:
518+
pattern.orm_used = 'Prisma'
519+
520+
# Check for Tortoise ORM
521+
if 'from tortoise' in content or 'tortoise.models' in content:
522+
pattern.orm_used = 'Tortoise ORM'
420523

421524
# Check SQL files for patterns
422525
if file_path.suffix == '.sql':
@@ -442,6 +545,10 @@ def _extract_database_patterns(self, files: List[Path], repo_path: Path) -> Data
442545
pattern.connection_pattern = 'Singleton: get_supabase_service()'
443546
elif 'create_client(' in content:
444547
pattern.connection_pattern = 'Direct: create_client()'
548+
elif 'DATABASES' in content and 'django' in content.lower():
549+
pattern.connection_pattern = 'Django DATABASES setting'
550+
elif 'create_engine(' in content:
551+
pattern.connection_pattern = 'SQLAlchemy: create_engine()'
445552

446553
except Exception as e:
447554
logger.debug(f"Error reading {file_path}: {e}")
@@ -612,6 +719,109 @@ def _extract_api_patterns(self, files: List[Path], repo_path: Path) -> tuple:
612719

613720
return api_versioning, router_pattern
614721

722+
def _extract_test_patterns(self, files: List[Path], repo_path: Path) -> TestPattern:
723+
"""Extract testing patterns from codebase"""
724+
pattern = TestPattern()
725+
726+
# Check for conftest.py (pytest)
727+
conftest_files = list(repo_path.rglob('conftest.py'))
728+
pattern.has_conftest = len(conftest_files) > 0
729+
730+
# Check for test directory structure
731+
test_dirs = [d for d in ['tests', 'test'] if (repo_path / d).exists()]
732+
733+
for file_path in files:
734+
try:
735+
content = file_path.read_text(encoding='utf-8', errors='ignore')
736+
737+
# Detect test framework
738+
if 'import pytest' in content or '@pytest' in content:
739+
pattern.framework = 'pytest'
740+
if '@pytest.fixture' in content:
741+
pattern.fixture_style = 'pytest fixtures'
742+
elif 'from unittest' in content or 'import unittest' in content:
743+
if not pattern.framework:
744+
pattern.framework = 'unittest'
745+
if 'def setUp(' in content or 'def tearDown(' in content:
746+
pattern.fixture_style = 'setUp/tearDown'
747+
elif 'from django.test' in content:
748+
pattern.framework = 'django.test'
749+
pattern.fixture_style = 'Django TestCase'
750+
751+
# Detect mock library
752+
if 'from unittest.mock import' in content or 'from unittest import mock' in content:
753+
pattern.mock_library = 'unittest.mock'
754+
elif 'import responses' in content:
755+
pattern.mock_library = 'responses'
756+
elif 'pytest_mock' in content or 'mocker' in content:
757+
pattern.mock_library = 'pytest-mock'
758+
elif '@patch(' in content:
759+
pattern.mock_library = 'unittest.mock (decorator)'
760+
761+
# Detect factories
762+
if 'factory_boy' in content or 'factory.Factory' in content:
763+
pattern.has_factories = True
764+
if 'from faker import' in content:
765+
pattern.has_factories = True
766+
767+
except:
768+
pass
769+
770+
# Check for coverage config
771+
if (repo_path / '.coveragerc').exists() or (repo_path / 'pyproject.toml').exists():
772+
pattern.coverage_config = True
773+
774+
return pattern
775+
776+
def _extract_config_patterns(self, files: List[Path], repo_path: Path) -> ConfigPattern:
777+
"""Extract configuration patterns from codebase"""
778+
pattern = ConfigPattern()
779+
780+
for file_path in files:
781+
try:
782+
content = file_path.read_text(encoding='utf-8', errors='ignore')
783+
784+
# Detect env loading
785+
if 'from dotenv import' in content or 'load_dotenv' in content:
786+
pattern.env_loading = 'python-dotenv'
787+
elif 'from environs import' in content:
788+
pattern.env_loading = 'environs'
789+
elif 'import environ' in content or 'django-environ' in content:
790+
pattern.env_loading = 'django-environ'
791+
elif 'from decouple import' in content:
792+
pattern.env_loading = 'python-decouple'
793+
794+
# Detect settings pattern
795+
if 'pydantic' in content and ('BaseSettings' in content or 'BaseModel' in content):
796+
pattern.settings_pattern = 'Pydantic Settings'
797+
pattern.config_validation = True
798+
elif 'dynaconf' in content:
799+
pattern.settings_pattern = 'Dynaconf'
800+
pattern.config_validation = True
801+
elif 'DJANGO_SETTINGS_MODULE' in content:
802+
pattern.settings_pattern = 'Django settings'
803+
804+
# Detect secrets handling
805+
if 'boto3' in content and 'secretsmanager' in content:
806+
pattern.secrets_handling = 'AWS Secrets Manager'
807+
elif 'hvac' in content or 'vault' in content.lower():
808+
pattern.secrets_handling = 'HashiCorp Vault'
809+
elif 'os.getenv(' in content or 'os.environ' in content:
810+
pattern.secrets_handling = 'Environment variables'
811+
812+
except:
813+
pass
814+
815+
# Check for specific config files
816+
if (repo_path / 'settings.py').exists():
817+
pattern.settings_pattern = 'Single settings file'
818+
elif (repo_path / 'settings').is_dir():
819+
pattern.settings_pattern = 'Split settings (by environment)'
820+
elif (repo_path / 'config').is_dir():
821+
pattern.settings_pattern = 'Config directory'
822+
823+
return pattern
824+
615825
def extract_dna(self, repo_path: str, repo_id: str) -> CodebaseDNA:
616826
"""Extract complete DNA profile from a codebase"""
617827
repo_path = Path(repo_path)
@@ -641,6 +851,8 @@ def extract_dna(self, repo_path: str, repo_id: str) -> CodebaseDNA:
641851
error_patterns = self._extract_error_patterns(files)
642852
logging_patterns = self._extract_logging_patterns(files)
643853
naming_conventions = self._extract_naming_conventions(files)
854+
test_patterns = self._extract_test_patterns(files, repo_path)
855+
config_patterns = self._extract_config_patterns(files, repo_path)
644856
common_imports = self._extract_common_imports(files)
645857
api_versioning, router_pattern = self._extract_api_patterns(files, repo_path)
646858

@@ -654,6 +866,8 @@ def extract_dna(self, repo_path: str, repo_id: str) -> CodebaseDNA:
654866
error_patterns=error_patterns,
655867
logging_patterns=logging_patterns,
656868
naming_conventions=naming_conventions,
869+
test_patterns=test_patterns,
870+
config_patterns=config_patterns,
657871
middleware_patterns=middleware_patterns,
658872
common_imports=common_imports,
659873
skip_directories=list(self.SKIP_DIRS),

0 commit comments

Comments
 (0)