Skip to content

Commit ffad962

Browse files
committed
feat(dna): auto-detect team rules from CLAUDE.md and similar files
- Add RULES_FILES constant with priority order: CLAUDE.md, .cursorrules, .codeintel/rules.md, CONVENTIONS.md, .github/copilot-instructions.md, CODING_GUIDELINES.md - Add team_rules and team_rules_source fields to CodebaseDNA - Add Team Rules section to DNA markdown output - First found file wins (priority order) Now any AI using get_codebase_dna via MCP automatically gets the team's explicit coding rules without extra configuration.
1 parent baa3703 commit ffad962

1 file changed

Lines changed: 40 additions & 0 deletions

File tree

backend/services/dna_extractor.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ class CodebaseDNA:
114114
skip_directories: List[str] = field(default_factory=list)
115115
api_versioning: Optional[str] = None
116116
router_pattern: Optional[str] = None
117+
team_rules: Optional[str] = None
118+
team_rules_source: Optional[str] = None
117119

118120
def to_dict(self) -> Dict:
119121
return asdict(self)
@@ -247,6 +249,14 @@ def to_markdown(self) -> str:
247249
md += "## Skip Directories\n"
248250
md += f"`{', '.join(self.skip_directories)}`\n\n"
249251

252+
# Team rules (explicit conventions from CLAUDE.md, .cursorrules, etc.)
253+
if self.team_rules:
254+
md += "## Team Rules\n"
255+
if self.team_rules_source:
256+
md += f"*Source: `{self.team_rules_source}`*\n\n"
257+
md += self.team_rules
258+
md += "\n"
259+
250260
return md
251261

252262

@@ -257,6 +267,16 @@ class DNAExtractor:
257267
MAX_FILE_SIZE = 1024 * 1024 # 1MB
258268
MAX_FILES = 5000
259269

270+
# Team rules files in priority order (first found wins)
271+
RULES_FILES = [
272+
'CLAUDE.md',
273+
'.cursorrules',
274+
'.codeintel/rules.md',
275+
'CONVENTIONS.md',
276+
'.github/copilot-instructions.md',
277+
'CODING_GUIDELINES.md',
278+
]
279+
260280
def __init__(self):
261281
self.parsers = {
262282
'python': Parser(Language(tspython.language())),
@@ -317,6 +337,21 @@ def _safe_read_file(self, file_path: Path) -> Optional[str]:
317337
self._stats['read_errors'] += 1
318338
return None
319339

340+
def _extract_team_rules(self, repo_path: Path) -> tuple[Optional[str], Optional[str]]:
341+
"""Extract team-defined rules from convention files.
342+
343+
Returns:
344+
Tuple of (rules_content, source_filename) or (None, None) if not found.
345+
"""
346+
for filename in self.RULES_FILES:
347+
rules_path = repo_path / filename
348+
if rules_path.exists() and rules_path.is_file():
349+
content = self._safe_read_file(rules_path)
350+
if content:
351+
logger.info(f"Found team rules in {filename}")
352+
return content.strip(), filename
353+
return None, None
354+
320355
def _detect_language(self, file_path: str) -> str:
321356
ext = Path(file_path).suffix.lower()
322357
return {
@@ -930,6 +965,7 @@ def extract_dna(self, repo_path: str, repo_id: str) -> CodebaseDNA:
930965
config_patterns = self._extract_config_patterns(files, repo_path)
931966
common_imports = self._extract_common_imports(files)
932967
api_versioning, router_pattern = self._extract_api_patterns(files, repo_path)
968+
team_rules, team_rules_source = self._extract_team_rules(repo_path)
933969

934970
dna = CodebaseDNA(
935971
repo_id=repo_id,
@@ -948,6 +984,8 @@ def extract_dna(self, repo_path: str, repo_id: str) -> CodebaseDNA:
948984
skip_directories=list(self.SKIP_DIRS),
949985
api_versioning=api_versioning,
950986
router_pattern=router_pattern,
987+
team_rules=team_rules,
988+
team_rules_source=team_rules_source,
951989
)
952990

953991
elapsed = time.time() - start_time
@@ -1009,6 +1047,8 @@ def load_from_cache(self, repo_id: str) -> Optional[CodebaseDNA]:
10091047
skip_directories=data.get('skip_directories', []),
10101048
api_versioning=data.get('api_versioning'),
10111049
router_pattern=data.get('router_pattern'),
1050+
team_rules=data.get('team_rules'),
1051+
team_rules_source=data.get('team_rules_source'),
10121052
)
10131053
logger.debug("DNA loaded from cache", repo_id=repo_id)
10141054
return dna

0 commit comments

Comments
 (0)