From 8f6258e016ac772b8a6889022dc045cc79b83bf2 Mon Sep 17 00:00:00 2001 From: gdilla Date: Fri, 20 Mar 2026 13:22:07 -0700 Subject: [PATCH] Consolidate 4 skills into 1 with progressive disclosure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge codebase-memory-exploring, codebase-memory-tracing, codebase-memory-quality, and codebase-memory-reference into a single codebase-memory skill. Motivation: - 4 skills = 4 description entries at session startup when 1 suffices - Overlapping content (gotchas, tips) scattered with no single source - Per Anthropic's Agent Skills best practices, skills should use progressive disclosure with concise SKILL.md + reference files What changed: - cli.c: 4 embedded skill strings consolidated into 1 with decision matrix, all workflows, and gotchas - cli.h: CBM_SKILL_COUNT 4 → 1 - test_cli.c: updated to test single consolidated skill covering all capabilities from all 4 former skills Rebased onto current main — dropped cmd/assets/skills/ changes since that directory was removed upstream (skills are now embedded in cli.c only). --- src/cli/cli.c | 153 +++++++++++++++++++---------------------------- src/cli/cli.h | 4 +- tests/test_cli.c | 59 ++++++++---------- 3 files changed, 88 insertions(+), 128 deletions(-) diff --git a/src/cli/cli.c b/src/cli/cli.c index 05c9a560..483673d1 100644 --- a/src/cli/cli.c +++ b/src/cli/cli.c @@ -315,112 +315,82 @@ int cbm_replace_binary(const char *path, const unsigned char *data, int len, int } /* ── Skill file content (embedded) ────────────────────────────── */ +/* Consolidated from 4 separate skills into 1 with progressive disclosure. + * This embedded version is the single source of truth for the CLI installer. */ -static const char skill_exploring_content[] = +static const char skill_content[] = "---\n" - "name: codebase-memory-exploring\n" - "description: Codebase knowledge graph expert. ALWAYS invoke this skill when the user " - "explores code, searches for functions/classes/routes, asks about architecture, or needs " - "codebase orientation. Do not use Grep, Glob, or file search directly — use " - "codebase-memory-mcp search_graph and get_architecture first.\n" + "name: codebase-memory\n" + "description: Use the codebase knowledge graph for structural code queries. " + "Triggers on: explore the codebase, understand the architecture, what functions exist, " + "show me the structure, who calls this function, what does X call, trace the call chain, " + "find callers of, show dependencies, impact analysis, dead code, unused functions, " + "high fan-out, refactor candidates, code quality audit, graph query syntax, " + "Cypher query examples, edge types, how to use search_graph.\n" "---\n" "\n" - "# Codebase Exploration\n" + "# Codebase Memory — Knowledge Graph Tools\n" "\n" - "Use codebase-memory-mcp tools to explore the codebase:\n" + "Graph tools return precise structural results in ~500 tokens vs ~80K for grep.\n" "\n" - "## Workflow\n" - "1. `get_graph_schema` — understand what node/edge types exist\n" - "2. `search_graph` — find functions, classes, routes by pattern\n" - "3. `get_code_snippet` — read specific function implementations\n" - "4. `get_architecture` — get high-level project summary\n" + "## Quick Decision Matrix\n" "\n" - "## Tips\n" - "- Use `search_graph(name_pattern=\".*Pattern.*\")` for fuzzy matching\n" - "- Use `search_graph(label=\"Route\")` to find HTTP routes\n" - "- Use `search_graph(label=\"Function\", file_pattern=\"*.go\")` to scope by language\n"; - -static const char skill_tracing_content[] = - "---\n" - "name: codebase-memory-tracing\n" - "description: Call chain and dependency expert. ALWAYS invoke this skill when the user " - "asks who calls a function, what a function calls, needs impact analysis, or traces " - "dependencies. Do not grep for function names directly — use codebase-memory-mcp " - "trace_call_path first.\n" - "---\n" - "\n" - "# Call Tracing & Impact Analysis\n" - "\n" - "Use codebase-memory-mcp tools to trace call paths:\n" + "| Question | Tool call |\n" + "|----------|----------|\n" + "| Who calls X? | `trace_call_path(direction=\"inbound\")` |\n" + "| What does X call? | `trace_call_path(direction=\"outbound\")` |\n" + "| Full call context | `trace_call_path(direction=\"both\")` |\n" + "| Find by name pattern | `search_graph(name_pattern=\"...\")` |\n" + "| Dead code | `search_graph(max_degree=0, exclude_entry_points=true)` |\n" + "| Cross-service edges | `query_graph` with Cypher |\n" + "| Impact of local changes | `detect_changes()` |\n" + "| Risk-classified trace | `trace_call_path(risk_labels=true)` |\n" + "| Text search | `search_code` or Grep |\n" "\n" - "## Workflow\n" - "1. `search_graph(name_pattern=\".*FuncName.*\")` — find exact function name\n" - "2. `trace_call_path(function_name=\"FuncName\", direction=\"both\")` — trace callers + " - "callees\n" - "3. `detect_changes` — find what changed and assess risk_labels\n" - "\n" - "## Direction Options\n" - "- `inbound` — who calls this function?\n" - "- `outbound` — what does this function call?\n" - "- `both` — full context (callers + callees)\n"; - -static const char skill_quality_content[] = - "---\n" - "name: codebase-memory-quality\n" - "description: Code quality analysis expert. ALWAYS invoke this skill when the user asks " - "about dead code, unused functions, complexity, refactor candidates, or cleanup " - "opportunities. Do not search files manually — use codebase-memory-mcp search_graph " - "with degree filters first.\n" - "---\n" + "## Exploration Workflow\n" + "1. `list_projects` — check if project is indexed\n" + "2. `get_graph_schema` — understand node/edge types\n" + "3. `search_graph(label=\"Function\", name_pattern=\".*Pattern.*\")` — find code\n" + "4. `get_code_snippet(qualified_name=\"project.path.FuncName\")` — read source\n" "\n" - "# Code Quality Analysis\n" + "## Tracing Workflow\n" + "1. `search_graph(name_pattern=\".*FuncName.*\")` — discover exact name\n" + "2. `trace_call_path(function_name=\"FuncName\", direction=\"both\", depth=3)` — trace\n" + "3. `detect_changes()` — map git diff to affected symbols\n" "\n" - "Use codebase-memory-mcp tools for quality analysis:\n" + "## Quality Analysis\n" + "- Dead code: `search_graph(max_degree=0, exclude_entry_points=true)`\n" + "- High fan-out: `search_graph(min_degree=10, relationship=\"CALLS\", direction=\"outbound\")`\n" + "- High fan-in: `search_graph(min_degree=10, relationship=\"CALLS\", direction=\"inbound\")`\n" + "- Change coupling: `query_graph(query=\"MATCH (a)-[r:FILE_CHANGES_WITH]->(b) " + "WHERE r.coupling_score >= 0.5 RETURN a.name, b.name, r.coupling_score\")`\n" "\n" - "## Dead Code Detection\n" - "- `search_graph(max_degree=0, exclude_entry_points=true)` — find unreferenced functions\n" - "- `search_graph(max_degree=0, label=\"Function\")` — unreferenced functions only\n" - "\n" - "## Complexity Analysis\n" - "- `search_graph(min_degree=10)` — high fan-out functions\n" - "- `search_graph(label=\"Function\", sort_by=\"degree\")` — most-connected functions\n"; - -static const char skill_reference_content[] = - "---\n" - "name: codebase-memory-reference\n" - "description: Codebase-memory-mcp reference guide. ALWAYS invoke this skill when the user " - "asks about MCP tools, graph queries, Cypher syntax, edge types, or how to use the " - "knowledge graph. Do not guess tool parameters — load this reference first.\n" - "---\n" - "\n" - "# Codebase Memory MCP Reference\n" - "\n" - "## 14 total MCP Tools\n" - "- `index_repository` — index a project\n" - "- `index_status` — check indexing progress\n" - "- `detect_changes` — find what changed since last index\n" - "- `search_graph` — find nodes by pattern\n" - "- `search_code` — text search in source\n" - "- `query_graph` — Cypher query language\n" - "- `trace_call_path` — call chain traversal\n" - "- `get_code_snippet` — read function source\n" - "- `get_graph_schema` — node/edge type catalog\n" - "- `get_architecture` — high-level summary\n" - "- `list_projects` — indexed projects\n" - "- `delete_project` — remove a project\n" - "- `manage_adr` — architecture decision records\n" - "- `ingest_traces` — import runtime traces\n" + "## 14 MCP Tools\n" + "`index_repository`, `index_status`, `list_projects`, `delete_project`,\n" + "`search_graph`, `search_code`, `trace_call_path`, `detect_changes`,\n" + "`query_graph`, `get_graph_schema`, `get_code_snippet`, `read_file`,\n" + "`list_directory`, `ingest_traces`\n" "\n" "## Edge Types\n" "CALLS, HTTP_CALLS, ASYNC_CALLS, IMPORTS, DEFINES, DEFINES_METHOD,\n" - "HANDLES, IMPLEMENTS, CONTAINS_FILE, CONTAINS_FOLDER, CONTAINS_PACKAGE\n" + "HANDLES, IMPLEMENTS, OVERRIDE, USAGE, FILE_CHANGES_WITH,\n" + "CONTAINS_FILE, CONTAINS_FOLDER, CONTAINS_PACKAGE\n" "\n" - "## Cypher Examples\n" + "## Cypher Examples (for query_graph)\n" "```\n" + "MATCH (a)-[r:HTTP_CALLS]->(b) RETURN a.name, b.name, r.url_path, r.confidence LIMIT 20\n" "MATCH (f:Function) WHERE f.name =~ '.*Handler.*' RETURN f.name, f.file_path\n" - "MATCH (a)-[r:CALLS]->(b) WHERE a.name = 'main' RETURN b.name\n" - "MATCH (a)-[r:HTTP_CALLS]->(b) RETURN a.name, b.name, r.url_path\n" - "```\n"; + "MATCH (a)-[r:FILE_CHANGES_WITH]->(b) WHERE r.coupling_score >= 0.5 " + "RETURN a.name, b.name\n" + "```\n" + "\n" + "## Gotchas\n" + "1. `search_graph(relationship=\"HTTP_CALLS\")` filters nodes by degree — " + "use `query_graph` with Cypher to see actual edges.\n" + "2. `query_graph` has a 200-row cap — use `search_graph` with degree filters for counting.\n" + "3. `trace_call_path` needs exact names — use `search_graph(name_pattern=...)` first.\n" + "4. `direction=\"outbound\"` misses cross-service callers — use `direction=\"both\"`.\n" + "5. Results default to 10 per page — check `has_more` and use `offset`.\n"; static const char codex_instructions_content[] = "# Codebase Knowledge Graph\n" @@ -437,10 +407,7 @@ static const char codex_instructions_content[] = "Always prefer graph tools over grep for code discovery.\n"; static const cbm_skill_t skills[CBM_SKILL_COUNT] = { - {"codebase-memory-exploring", skill_exploring_content}, - {"codebase-memory-tracing", skill_tracing_content}, - {"codebase-memory-quality", skill_quality_content}, - {"codebase-memory-reference", skill_reference_content}, + {"codebase-memory", skill_content}, }; const cbm_skill_t *cbm_get_skills(void) { diff --git a/src/cli/cli.h b/src/cli/cli.h index efa022e7..7f3f18cd 100644 --- a/src/cli/cli.h +++ b/src/cli/cli.h @@ -53,11 +53,11 @@ int cbm_replace_binary(const char *path, const unsigned char *data, int len, int /* ── Skill file management ────────────────────────────────────── */ /* Number of skill files. */ -#define CBM_SKILL_COUNT 4 +#define CBM_SKILL_COUNT 1 /* Skill name/content pair. */ typedef struct { - const char *name; /* e.g. "codebase-memory-exploring" */ + const char *name; /* e.g. "codebase-memory" */ const char *content; /* full SKILL.md content */ } cbm_skill_t; diff --git a/tests/test_cli.c b/tests/test_cli.c index 0e2434cc..b44a4ff4 100644 --- a/tests/test_cli.c +++ b/tests/test_cli.c @@ -533,41 +533,34 @@ TEST(cli_remove_old_monolithic_skill) { } TEST(cli_skill_files_content) { - /* Port of TestSkillFilesContent */ + /* Consolidated skill: all 4 former skills merged into one */ const cbm_skill_t *sk = cbm_get_skills(); - ASSERT_EQ(CBM_SKILL_COUNT, 4); + ASSERT_EQ(CBM_SKILL_COUNT, 1); + + /* The single consolidated skill must cover all use cases */ + ASSERT(strcmp(sk[0].name, "codebase-memory") == 0); + + /* Exploring capabilities */ + ASSERT(strstr(sk[0].content, "search_graph") != NULL); + ASSERT(strstr(sk[0].content, "get_graph_schema") != NULL); + + /* Tracing capabilities */ + ASSERT(strstr(sk[0].content, "trace_call_path") != NULL); + ASSERT(strstr(sk[0].content, "direction") != NULL); + ASSERT(strstr(sk[0].content, "detect_changes") != NULL); + + /* Quality capabilities */ + ASSERT(strstr(sk[0].content, "max_degree=0") != NULL); + ASSERT(strstr(sk[0].content, "exclude_entry_points") != NULL); + + /* Reference capabilities */ + ASSERT(strstr(sk[0].content, "query_graph") != NULL); + ASSERT(strstr(sk[0].content, "Cypher") != NULL); + ASSERT(strstr(sk[0].content, "14 MCP Tools") != NULL); + + /* Gotchas section (new — highest-value content per Anthropic best practices) */ + ASSERT(strstr(sk[0].content, "Gotchas") != NULL); - /* Check exploring skill */ - bool found_exploring = false, found_tracing = false; - bool found_quality = false, found_reference = false; - for (int i = 0; i < CBM_SKILL_COUNT; i++) { - if (strcmp(sk[i].name, "codebase-memory-exploring") == 0) { - found_exploring = true; - ASSERT(strstr(sk[i].content, "search_graph") != NULL); - ASSERT(strstr(sk[i].content, "get_graph_schema") != NULL); - } - if (strcmp(sk[i].name, "codebase-memory-tracing") == 0) { - found_tracing = true; - ASSERT(strstr(sk[i].content, "trace_call_path") != NULL); - ASSERT(strstr(sk[i].content, "direction") != NULL); - ASSERT(strstr(sk[i].content, "detect_changes") != NULL); - } - if (strcmp(sk[i].name, "codebase-memory-quality") == 0) { - found_quality = true; - ASSERT(strstr(sk[i].content, "max_degree=0") != NULL); - ASSERT(strstr(sk[i].content, "exclude_entry_points") != NULL); - } - if (strcmp(sk[i].name, "codebase-memory-reference") == 0) { - found_reference = true; - ASSERT(strstr(sk[i].content, "query_graph") != NULL); - ASSERT(strstr(sk[i].content, "Cypher") != NULL); - ASSERT(strstr(sk[i].content, "14 total") != NULL); - } - } - ASSERT_TRUE(found_exploring); - ASSERT_TRUE(found_tracing); - ASSERT_TRUE(found_quality); - ASSERT_TRUE(found_reference); PASS(); }