Skip to content

Commit 2a67304

Browse files
committed
feat(hooks): add memory validation on SessionStart
Validates .serena/memories consistency with codebase on session start. Detects stale references to: - Non-existent files/directories - Deleted skills - Missing cross-memory references Ported from t3chn/skills with adaptations for skillbox structure.
1 parent 39bdbd7 commit 2a67304

4 files changed

Lines changed: 374 additions & 1 deletion

File tree

plugins/core/.claude-plugin/plugin.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "core",
33
"description": "Core workflow tools: beads task tracking, serena code navigation, conventional commits, discovery, context engineering",
4-
"version": "1.0.2",
4+
"version": "1.0.3",
55
"author": {
66
"name": "11me"
77
}

plugins/core/hooks/hooks.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
{
2121
"type": "command",
2222
"command": "python3 ${CLAUDE_PLUGIN_ROOT}/scripts/hooks/skill_suggester.py"
23+
},
24+
{
25+
"type": "command",
26+
"command": "bash ${CLAUDE_PLUGIN_ROOT}/scripts/hooks/memory_validator.sh"
2327
}
2428
]
2529
},
Lines changed: 347 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,347 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Memory Validator - Validates .serena/memories consistency with codebase.
4+
5+
Detects:
6+
- References to non-existent files/directories
7+
- References to deleted skills
8+
- Stale cross-references between memories
9+
10+
Usage:
11+
python3 memory_validator.py # Validate all memories
12+
python3 memory_validator.py --json # JSON output for hooks
13+
python3 memory_validator.py --quiet # Only output if issues found
14+
15+
Exit codes:
16+
0 - All memories valid (or no memories exist)
17+
1 - Stale references found
18+
2 - Error during validation
19+
"""
20+
21+
import json
22+
import re
23+
import sys
24+
from dataclasses import dataclass, field
25+
from pathlib import Path
26+
27+
28+
@dataclass
29+
class StaleReference:
30+
"""A stale reference found in a memory file."""
31+
32+
memory_file: str
33+
reference: str
34+
reference_type: str # 'path', 'skill', 'memory'
35+
line_number: int
36+
context: str = ""
37+
38+
39+
@dataclass
40+
class ValidationResult:
41+
"""Result of memory validation."""
42+
43+
total_memories: int = 0
44+
valid_memories: int = 0
45+
stale_references: list[StaleReference] = field(default_factory=list)
46+
47+
@property
48+
def is_valid(self) -> bool:
49+
return len(self.stale_references) == 0
50+
51+
52+
def get_project_root() -> Path | None:
53+
"""Find project root (directory containing .serena or .git)."""
54+
current = Path.cwd()
55+
56+
for parent in [current, *current.parents]:
57+
if (parent / ".serena").exists() or (parent / ".git").exists():
58+
return parent
59+
60+
return None
61+
62+
63+
def discover_skills(project_root: Path) -> set[str]:
64+
"""Discover skills from skillbox plugin structure."""
65+
skills = set()
66+
67+
# Pattern: plugins/*/skills/*/SKILL.md
68+
plugins_dir = project_root / "plugins"
69+
if not plugins_dir.exists():
70+
return skills
71+
72+
for skill_md in plugins_dir.glob("*/skills/*/SKILL.md"):
73+
# Extract skill name from path like plugins/core/skills/beads-workflow/SKILL.md
74+
skill_name = skill_md.parent.name
75+
skills.add(skill_name)
76+
77+
return skills
78+
79+
80+
def load_memories(project_root: Path) -> list[tuple[Path, str]]:
81+
"""Load all memory files."""
82+
memories_dir = project_root / ".serena" / "memories"
83+
if not memories_dir.exists():
84+
return []
85+
86+
memories = []
87+
for md_file in memories_dir.glob("*.md"):
88+
try:
89+
content = md_file.read_text(encoding="utf-8")
90+
memories.append((md_file, content))
91+
except Exception:
92+
continue
93+
94+
return memories
95+
96+
97+
def extract_references(content: str) -> list[tuple[str, str, int]]:
98+
"""
99+
Extract references from memory content.
100+
101+
Returns: List of (reference, type, line_number)
102+
"""
103+
refs = []
104+
lines = content.split("\n")
105+
106+
# Patterns to skip (not filesystem paths)
107+
skip_patterns = [
108+
r"^https?://", # URLs
109+
r"^\$", # Variables
110+
r"^/\w+$", # Slash commands like /task, /commit
111+
r"^\w+\.\w+\.\w+/", # Go modules: github.com/..., go.uber.org/...
112+
r"^[a-z]+-[a-z]+/[a-z]", # Go packages: go-resty/resty
113+
r"^[a-z]+/[a-z]+$", # Simple package paths: log/slog, net/http
114+
r"^google/", # Google packages
115+
r"^uber-go/", # Uber packages
116+
r"^errors\.", # errors.Is/As
117+
r"YYYY", # Date templates (checkpoint-YYYY-MM-DD.md)
118+
r"^\d+\.\d+", # Version numbers
119+
r"^~", # Home directory paths (external)
120+
r"^\.claude/", # User's .claude directory (not in project)
121+
r"^mcp__", # MCP tool names
122+
]
123+
124+
def should_skip(ref: str) -> bool:
125+
for pattern in skip_patterns:
126+
if pattern.startswith("^") or pattern.startswith(r"\d"):
127+
if re.match(pattern, ref, re.IGNORECASE):
128+
return True
129+
else:
130+
if re.search(pattern, ref, re.IGNORECASE):
131+
return True
132+
return False
133+
134+
in_code_block = False
135+
136+
for line_num, line in enumerate(lines, 1):
137+
# Track code blocks
138+
if line.strip().startswith("```"):
139+
in_code_block = not in_code_block
140+
continue
141+
142+
# Skip content inside code blocks
143+
if in_code_block:
144+
continue
145+
146+
# Path references: `path/to/file.md`, `./skill-name/`
147+
path_matches = re.findall(r"`(\.?\.?/?[a-zA-Z_][a-zA-Z0-9_-]*(?:/[a-zA-Z0-9_.-]+)+)`", line)
148+
for match in path_matches:
149+
if should_skip(match):
150+
continue
151+
# Only validate paths that look like project structure
152+
if any(
153+
marker in match
154+
for marker in [
155+
".md",
156+
".py",
157+
".json",
158+
".sh",
159+
".yaml",
160+
".yml",
161+
"SKILL",
162+
"AGENT",
163+
".claude",
164+
".serena",
165+
"plugins/",
166+
"scripts/",
167+
"hooks/",
168+
]
169+
):
170+
refs.append((match, "path", line_num))
171+
172+
# Skill references in markdown links: [skill-name](./skill-name)
173+
skill_link_matches = re.findall(r"\[([a-z][a-z0-9-]*)\]\(\./([a-z][a-z0-9-]*)\)", line)
174+
for name, path in skill_link_matches:
175+
refs.append((path, "skill", line_num))
176+
177+
# Direct skill mentions: `skill-name/SKILL.md`
178+
skill_matches = re.findall(r"`([a-z][a-z0-9-]+)/SKILL\.md`", line)
179+
for match in skill_matches:
180+
refs.append((match, "skill", line_num))
181+
182+
# Memory references: read_memory('name.md')
183+
memory_matches = re.findall(r"read_memory\(['\"]([^'\"]+)['\"]\)", line)
184+
for match in memory_matches:
185+
if not should_skip(match):
186+
refs.append((match, "memory", line_num))
187+
188+
return refs
189+
190+
191+
def validate_reference(
192+
ref: str, ref_type: str, project_root: Path, skills: set[str], memories_dir: Path
193+
) -> bool:
194+
"""Check if a reference is valid."""
195+
if ref_type == "skill":
196+
return ref in skills
197+
198+
if ref_type == "memory":
199+
return (memories_dir / ref).exists()
200+
201+
if ref_type == "path":
202+
# Clean up the path
203+
clean_ref = ref
204+
if clean_ref.startswith("./"):
205+
clean_ref = clean_ref[2:]
206+
207+
# Check if it's a skill directory reference
208+
if clean_ref.endswith("/") or "/" not in clean_ref:
209+
skill_name = clean_ref.rstrip("/")
210+
if skill_name in skills:
211+
return True
212+
213+
# Check filesystem
214+
full_path = project_root / clean_ref
215+
return full_path.exists()
216+
217+
return True
218+
219+
220+
def validate_memories(project_root: Path | None = None) -> ValidationResult:
221+
"""Validate all memories and return results."""
222+
if project_root is None:
223+
project_root = get_project_root()
224+
225+
if project_root is None:
226+
return ValidationResult()
227+
228+
result = ValidationResult()
229+
skills = discover_skills(project_root)
230+
memories = load_memories(project_root)
231+
memories_dir = project_root / ".serena" / "memories"
232+
233+
result.total_memories = len(memories)
234+
235+
for memory_path, content in memories:
236+
memory_name = memory_path.name
237+
has_stale = False
238+
239+
refs = extract_references(content)
240+
lines = content.split("\n")
241+
242+
for ref, ref_type, line_num in refs:
243+
if not validate_reference(ref, ref_type, project_root, skills, memories_dir):
244+
has_stale = True
245+
context = lines[line_num - 1] if line_num <= len(lines) else ""
246+
result.stale_references.append(
247+
StaleReference(
248+
memory_file=memory_name,
249+
reference=ref,
250+
reference_type=ref_type,
251+
line_number=line_num,
252+
context=context[:100],
253+
)
254+
)
255+
256+
if not has_stale:
257+
result.valid_memories += 1
258+
259+
return result
260+
261+
262+
def format_report(result: ValidationResult) -> str:
263+
"""Format validation result as human-readable report."""
264+
if result.is_valid:
265+
if result.total_memories == 0:
266+
return "" # No memories, no output
267+
return f"✓ All {result.total_memories} memories are valid"
268+
269+
lines = [
270+
f"## ⚠️ Memory Validation: {len(result.stale_references)} stale reference(s)",
271+
"",
272+
]
273+
274+
# Group by memory file
275+
by_file: dict[str, list[StaleReference]] = {}
276+
for ref in result.stale_references:
277+
by_file.setdefault(ref.memory_file, []).append(ref)
278+
279+
for memory_file, refs in by_file.items():
280+
lines.append(f"**{memory_file}:**")
281+
for ref in refs:
282+
lines.append(f" - Line {ref.line_number}: `{ref.reference}` ({ref.reference_type})")
283+
lines.append("")
284+
285+
lines.append("*Run `/checkpoint` after fixing to update memories.*")
286+
287+
return "\n".join(lines)
288+
289+
290+
def format_json(result: ValidationResult) -> str:
291+
"""Format validation result as JSON."""
292+
return json.dumps(
293+
{
294+
"valid": result.is_valid,
295+
"total_memories": result.total_memories,
296+
"valid_memories": result.valid_memories,
297+
"stale_count": len(result.stale_references),
298+
"stale_references": [
299+
{
300+
"file": r.memory_file,
301+
"reference": r.reference,
302+
"type": r.reference_type,
303+
"line": r.line_number,
304+
}
305+
for r in result.stale_references
306+
],
307+
},
308+
indent=2,
309+
)
310+
311+
312+
def main():
313+
"""Main entry point."""
314+
import argparse
315+
316+
parser = argparse.ArgumentParser(description="Validate memory consistency")
317+
parser.add_argument("--json", action="store_true", help="Output as JSON")
318+
parser.add_argument("--quiet", action="store_true", help="Only output if issues found")
319+
args = parser.parse_args()
320+
321+
try:
322+
project_root = get_project_root()
323+
result = validate_memories(project_root)
324+
325+
if args.quiet and result.is_valid:
326+
sys.exit(0)
327+
328+
if args.json:
329+
output = format_json(result)
330+
else:
331+
output = format_report(result)
332+
333+
if output:
334+
print(output)
335+
336+
sys.exit(0 if result.is_valid else 1)
337+
338+
except Exception as e:
339+
if args.json:
340+
print(json.dumps({"error": str(e)}))
341+
else:
342+
print(f"Error: {e}", file=sys.stderr)
343+
sys.exit(2)
344+
345+
346+
if __name__ == "__main__":
347+
main()
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/usr/bin/env bash
2+
# memory_validator.sh - SessionStart hook for memory consistency validation
3+
#
4+
# Validates that .serena/memories references match current codebase state.
5+
# Reports stale references to skills, files, or other memories.
6+
#
7+
# Exit 0 = always (non-blocking, informational only)
8+
9+
set -e
10+
11+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
12+
VALIDATOR="$SCRIPT_DIR/memory_validator.py"
13+
14+
# Check if validator exists
15+
if [[ ! -f "$VALIDATOR" ]]; then
16+
exit 0
17+
fi
18+
19+
# Run validation (quiet mode - only output if issues found)
20+
python3 "$VALIDATOR" --quiet 2>/dev/null || true
21+
22+
exit 0

0 commit comments

Comments
 (0)