Skip to content

Commit 03243cf

Browse files
authored
Merge pull request #244 from DevanshuNEU/feature/team-rules-detection
feat(dna): auto-detect team rules from CLAUDE.md and similar files
2 parents baa3703 + 0ce8c12 commit 03243cf

1 file changed

Lines changed: 44 additions & 0 deletions

File tree

backend/services/dna_extractor.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ class CodebaseDNA:
114114
skip_directories: List[str] = field(default_factory=list)
115115
api_versioning: Optional[str] = None
116116
router_pattern: Optional[str] = None
117+
team_rules: Optional[str] = None
118+
team_rules_source: Optional[str] = None
117119

118120
def to_dict(self) -> Dict:
119121
return asdict(self)
@@ -247,6 +249,14 @@ def to_markdown(self) -> str:
247249
md += "## Skip Directories\n"
248250
md += f"`{', '.join(self.skip_directories)}`\n\n"
249251

252+
# Team rules (explicit conventions from CLAUDE.md, .cursorrules, etc.)
253+
if self.team_rules:
254+
md += "## Team Rules\n"
255+
if self.team_rules_source:
256+
md += f"*Source: `{self.team_rules_source}`*\n\n"
257+
md += self.team_rules
258+
md += "\n"
259+
250260
return md
251261

252262

@@ -257,6 +267,16 @@ class DNAExtractor:
257267
MAX_FILE_SIZE = 1024 * 1024 # 1MB
258268
MAX_FILES = 5000
259269

270+
# Team rules files in priority order (first found wins)
271+
RULES_FILES = [
272+
'CLAUDE.md',
273+
'.cursorrules',
274+
'.codeintel/rules.md',
275+
'CONVENTIONS.md',
276+
'.github/copilot-instructions.md',
277+
'CODING_GUIDELINES.md',
278+
]
279+
260280
def __init__(self):
261281
self.parsers = {
262282
'python': Parser(Language(tspython.language())),
@@ -317,6 +337,21 @@ def _safe_read_file(self, file_path: Path) -> Optional[str]:
317337
self._stats['read_errors'] += 1
318338
return None
319339

340+
def _extract_team_rules(self, repo_path: Path) -> tuple[Optional[str], Optional[str]]:
341+
"""Extract team-defined rules from convention files.
342+
343+
Returns:
344+
Tuple of (rules_content, source_filename) or (None, None) if not found.
345+
"""
346+
for filename in self.RULES_FILES:
347+
rules_path = repo_path / filename
348+
if rules_path.exists() and rules_path.is_file():
349+
content = self._safe_read_file(rules_path)
350+
if content:
351+
logger.info(f"Found team rules in {filename}")
352+
return content.strip(), filename
353+
return None, None
354+
320355
def _detect_language(self, file_path: str) -> str:
321356
ext = Path(file_path).suffix.lower()
322357
return {
@@ -930,6 +965,7 @@ def extract_dna(self, repo_path: str, repo_id: str) -> CodebaseDNA:
930965
config_patterns = self._extract_config_patterns(files, repo_path)
931966
common_imports = self._extract_common_imports(files)
932967
api_versioning, router_pattern = self._extract_api_patterns(files, repo_path)
968+
team_rules, team_rules_source = self._extract_team_rules(repo_path)
933969

934970
dna = CodebaseDNA(
935971
repo_id=repo_id,
@@ -948,6 +984,8 @@ def extract_dna(self, repo_path: str, repo_id: str) -> CodebaseDNA:
948984
skip_directories=list(self.SKIP_DIRS),
949985
api_versioning=api_versioning,
950986
router_pattern=router_pattern,
987+
team_rules=team_rules,
988+
team_rules_source=team_rules_source,
951989
)
952990

953991
elapsed = time.time() - start_time
@@ -998,17 +1036,23 @@ def load_from_cache(self, repo_id: str) -> Optional[CodebaseDNA]:
9981036
# Reconstruct CodebaseDNA from dict
9991037
dna = CodebaseDNA(
10001038
repo_id=data['repo_id'],
1039+
detected_framework=data.get('detected_framework'),
10011040
language_distribution=data.get('language_distribution', {}),
10021041
auth_patterns=AuthPattern(**data.get('auth_patterns', {})),
10031042
service_patterns=ServicePattern(**data.get('service_patterns', {})),
10041043
database_patterns=DatabasePattern(**data.get('database_patterns', {})),
10051044
error_patterns=ErrorPattern(**data.get('error_patterns', {})),
10061045
logging_patterns=LoggingPattern(**data.get('logging_patterns', {})),
10071046
naming_conventions=NamingConventions(**data.get('naming_conventions', {})),
1047+
test_patterns=TestPattern(**data.get('test_patterns', {})),
1048+
config_patterns=ConfigPattern(**data.get('config_patterns', {})),
1049+
middleware_patterns=data.get('middleware_patterns', []),
10081050
common_imports=data.get('common_imports', []),
10091051
skip_directories=data.get('skip_directories', []),
10101052
api_versioning=data.get('api_versioning'),
10111053
router_pattern=data.get('router_pattern'),
1054+
team_rules=data.get('team_rules'),
1055+
team_rules_source=data.get('team_rules_source'),
10121056
)
10131057
logger.debug("DNA loaded from cache", repo_id=repo_id)
10141058
return dna

0 commit comments

Comments
 (0)