From ffad96222e7602aa8e3d697082c9f31bea6ed998 Mon Sep 17 00:00:00 2001 From: Devanshu Rajesh Chicholikar Date: Sun, 15 Feb 2026 12:49:59 -0500 Subject: [PATCH 1/2] feat(dna): auto-detect team rules from CLAUDE.md and similar files - Add RULES_FILES constant with priority order: CLAUDE.md, .cursorrules, .codeintel/rules.md, CONVENTIONS.md, .github/copilot-instructions.md, CODING_GUIDELINES.md - Add team_rules and team_rules_source fields to CodebaseDNA - Add Team Rules section to DNA markdown output - First found file wins (priority order) Now any AI using get_codebase_dna via MCP automatically gets the team's explicit coding rules without extra configuration. --- backend/services/dna_extractor.py | 40 +++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/backend/services/dna_extractor.py b/backend/services/dna_extractor.py index 5d3e830..110c6bc 100644 --- a/backend/services/dna_extractor.py +++ b/backend/services/dna_extractor.py @@ -114,6 +114,8 @@ class CodebaseDNA: skip_directories: List[str] = field(default_factory=list) api_versioning: Optional[str] = None router_pattern: Optional[str] = None + team_rules: Optional[str] = None + team_rules_source: Optional[str] = None def to_dict(self) -> Dict: return asdict(self) @@ -247,6 +249,14 @@ def to_markdown(self) -> str: md += "## Skip Directories\n" md += f"`{', '.join(self.skip_directories)}`\n\n" + # Team rules (explicit conventions from CLAUDE.md, .cursorrules, etc.) + if self.team_rules: + md += "## Team Rules\n" + if self.team_rules_source: + md += f"*Source: `{self.team_rules_source}`*\n\n" + md += self.team_rules + md += "\n" + return md @@ -257,6 +267,16 @@ class DNAExtractor: MAX_FILE_SIZE = 1024 * 1024 # 1MB MAX_FILES = 5000 + # Team rules files in priority order (first found wins) + RULES_FILES = [ + 'CLAUDE.md', + '.cursorrules', + '.codeintel/rules.md', + 'CONVENTIONS.md', + '.github/copilot-instructions.md', + 'CODING_GUIDELINES.md', + ] + def __init__(self): self.parsers = { 'python': Parser(Language(tspython.language())), @@ -317,6 +337,21 @@ def _safe_read_file(self, file_path: Path) -> Optional[str]: self._stats['read_errors'] += 1 return None + def _extract_team_rules(self, repo_path: Path) -> tuple[Optional[str], Optional[str]]: + """Extract team-defined rules from convention files. + + Returns: + Tuple of (rules_content, source_filename) or (None, None) if not found. + """ + for filename in self.RULES_FILES: + rules_path = repo_path / filename + if rules_path.exists() and rules_path.is_file(): + content = self._safe_read_file(rules_path) + if content: + logger.info(f"Found team rules in {filename}") + return content.strip(), filename + return None, None + def _detect_language(self, file_path: str) -> str: ext = Path(file_path).suffix.lower() return { @@ -930,6 +965,7 @@ def extract_dna(self, repo_path: str, repo_id: str) -> CodebaseDNA: config_patterns = self._extract_config_patterns(files, repo_path) common_imports = self._extract_common_imports(files) api_versioning, router_pattern = self._extract_api_patterns(files, repo_path) + team_rules, team_rules_source = self._extract_team_rules(repo_path) dna = CodebaseDNA( repo_id=repo_id, @@ -948,6 +984,8 @@ def extract_dna(self, repo_path: str, repo_id: str) -> CodebaseDNA: skip_directories=list(self.SKIP_DIRS), api_versioning=api_versioning, router_pattern=router_pattern, + team_rules=team_rules, + team_rules_source=team_rules_source, ) elapsed = time.time() - start_time @@ -1009,6 +1047,8 @@ def load_from_cache(self, repo_id: str) -> Optional[CodebaseDNA]: skip_directories=data.get('skip_directories', []), api_versioning=data.get('api_versioning'), router_pattern=data.get('router_pattern'), + team_rules=data.get('team_rules'), + team_rules_source=data.get('team_rules_source'), ) logger.debug("DNA loaded from cache", repo_id=repo_id) return dna From 0ce8c128120bfb3b268ccc04d746cb5ad158cc20 Mon Sep 17 00:00:00 2001 From: Devanshu Rajesh Chicholikar Date: Sun, 15 Feb 2026 13:17:08 -0500 Subject: [PATCH 2/2] fix(dna): restore missing fields in load_from_cache Add detected_framework, test_patterns, config_patterns, and middleware_patterns to cache reconstruction. --- backend/services/dna_extractor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/services/dna_extractor.py b/backend/services/dna_extractor.py index 110c6bc..e369353 100644 --- a/backend/services/dna_extractor.py +++ b/backend/services/dna_extractor.py @@ -1036,6 +1036,7 @@ def load_from_cache(self, repo_id: str) -> Optional[CodebaseDNA]: # Reconstruct CodebaseDNA from dict dna = CodebaseDNA( repo_id=data['repo_id'], + detected_framework=data.get('detected_framework'), language_distribution=data.get('language_distribution', {}), auth_patterns=AuthPattern(**data.get('auth_patterns', {})), service_patterns=ServicePattern(**data.get('service_patterns', {})), @@ -1043,6 +1044,9 @@ def load_from_cache(self, repo_id: str) -> Optional[CodebaseDNA]: error_patterns=ErrorPattern(**data.get('error_patterns', {})), logging_patterns=LoggingPattern(**data.get('logging_patterns', {})), naming_conventions=NamingConventions(**data.get('naming_conventions', {})), + test_patterns=TestPattern(**data.get('test_patterns', {})), + config_patterns=ConfigPattern(**data.get('config_patterns', {})), + middleware_patterns=data.get('middleware_patterns', []), common_imports=data.get('common_imports', []), skip_directories=data.get('skip_directories', []), api_versioning=data.get('api_versioning'),