From 8f6258e016ac772b8a6889022dc045cc79b83bf2 Mon Sep 17 00:00:00 2001
From: gdilla <djinla@gmail.com>
Date: Fri, 20 Mar 2026 13:22:07 -0700
Subject: [PATCH] Consolidate 4 skills into 1 with progressive disclosure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Merge codebase-memory-exploring, codebase-memory-tracing,
codebase-memory-quality, and codebase-memory-reference into a single
codebase-memory skill.

Motivation:
- 4 skills = 4 description entries at session startup when 1 suffices
- Overlapping content (gotchas, tips) scattered with no single source
- Per Anthropic's Agent Skills best practices, skills should use
  progressive disclosure with concise SKILL.md + reference files

What changed:
- cli.c: 4 embedded skill strings consolidated into 1 with decision
  matrix, all workflows, and gotchas
- cli.h: CBM_SKILL_COUNT 4 → 1
- test_cli.c: updated to test single consolidated skill covering all
  capabilities from all 4 former skills

Rebased onto current main — dropped cmd/assets/skills/ changes since
that directory was removed upstream (skills are now embedded in cli.c
only).
---
 src/cli/cli.c    | 153 +++++++++++++++++++----------------------------
 src/cli/cli.h    |   4 +-
 tests/test_cli.c |  59 ++++++++----------
 3 files changed, 88 insertions(+), 128 deletions(-)

diff --git a/src/cli/cli.c b/src/cli/cli.c
index 05c9a560..483673d1 100644
--- a/src/cli/cli.c
+++ b/src/cli/cli.c
@@ -315,112 +315,82 @@ int cbm_replace_binary(const char *path, const unsigned char *data, int len, int
 }
 
 /* ── Skill file content (embedded) ────────────────────────────── */
+/* Consolidated from 4 separate skills into 1 with progressive disclosure.
+ * This embedded version is the single source of truth for the CLI installer. */
 
-static const char skill_exploring_content[] =
+static const char skill_content[] =
     "---\n"
-    "name: codebase-memory-exploring\n"
-    "description: Codebase knowledge graph expert. ALWAYS invoke this skill when the user "
-    "explores code, searches for functions/classes/routes, asks about architecture, or needs "
-    "codebase orientation. Do not use Grep, Glob, or file search directly — use "
-    "codebase-memory-mcp search_graph and get_architecture first.\n"
+    "name: codebase-memory\n"
+    "description: Use the codebase knowledge graph for structural code queries. "
+    "Triggers on: explore the codebase, understand the architecture, what functions exist, "
+    "show me the structure, who calls this function, what does X call, trace the call chain, "
+    "find callers of, show dependencies, impact analysis, dead code, unused functions, "
+    "high fan-out, refactor candidates, code quality audit, graph query syntax, "
+    "Cypher query examples, edge types, how to use search_graph.\n"
     "---\n"
     "\n"
-    "# Codebase Exploration\n"
+    "# Codebase Memory — Knowledge Graph Tools\n"
     "\n"
-    "Use codebase-memory-mcp tools to explore the codebase:\n"
+    "Graph tools return precise structural results in ~500 tokens vs ~80K for grep.\n"
     "\n"
-    "## Workflow\n"
-    "1. `get_graph_schema` — understand what node/edge types exist\n"
-    "2. `search_graph` — find functions, classes, routes by pattern\n"
-    "3. `get_code_snippet` — read specific function implementations\n"
-    "4. `get_architecture` — get high-level project summary\n"
+    "## Quick Decision Matrix\n"
     "\n"
-    "## Tips\n"
-    "- Use `search_graph(name_pattern=\".*Pattern.*\")` for fuzzy matching\n"
-    "- Use `search_graph(label=\"Route\")` to find HTTP routes\n"
-    "- Use `search_graph(label=\"Function\", file_pattern=\"*.go\")` to scope by language\n";
-
-static const char skill_tracing_content[] =
-    "---\n"
-    "name: codebase-memory-tracing\n"
-    "description: Call chain and dependency expert. ALWAYS invoke this skill when the user "
-    "asks who calls a function, what a function calls, needs impact analysis, or traces "
-    "dependencies. Do not grep for function names directly — use codebase-memory-mcp "
-    "trace_call_path first.\n"
-    "---\n"
-    "\n"
-    "# Call Tracing & Impact Analysis\n"
-    "\n"
-    "Use codebase-memory-mcp tools to trace call paths:\n"
+    "| Question | Tool call |\n"
+    "|----------|----------|\n"
+    "| Who calls X? | `trace_call_path(direction=\"inbound\")` |\n"
+    "| What does X call? | `trace_call_path(direction=\"outbound\")` |\n"
+    "| Full call context | `trace_call_path(direction=\"both\")` |\n"
+    "| Find by name pattern | `search_graph(name_pattern=\"...\")` |\n"
+    "| Dead code | `search_graph(max_degree=0, exclude_entry_points=true)` |\n"
+    "| Cross-service edges | `query_graph` with Cypher |\n"
+    "| Impact of local changes | `detect_changes()` |\n"
+    "| Risk-classified trace | `trace_call_path(risk_labels=true)` |\n"
+    "| Text search | `search_code` or Grep |\n"
     "\n"
-    "## Workflow\n"
-    "1. `search_graph(name_pattern=\".*FuncName.*\")` — find exact function name\n"
-    "2. `trace_call_path(function_name=\"FuncName\", direction=\"both\")` — trace callers + "
-    "callees\n"
-    "3. `detect_changes` — find what changed and assess risk_labels\n"
-    "\n"
-    "## Direction Options\n"
-    "- `inbound` — who calls this function?\n"
-    "- `outbound` — what does this function call?\n"
-    "- `both` — full context (callers + callees)\n";
-
-static const char skill_quality_content[] =
-    "---\n"
-    "name: codebase-memory-quality\n"
-    "description: Code quality analysis expert. ALWAYS invoke this skill when the user asks "
-    "about dead code, unused functions, complexity, refactor candidates, or cleanup "
-    "opportunities. Do not search files manually — use codebase-memory-mcp search_graph "
-    "with degree filters first.\n"
-    "---\n"
+    "## Exploration Workflow\n"
+    "1. `list_projects` — check if project is indexed\n"
+    "2. `get_graph_schema` — understand node/edge types\n"
+    "3. `search_graph(label=\"Function\", name_pattern=\".*Pattern.*\")` — find code\n"
+    "4. `get_code_snippet(qualified_name=\"project.path.FuncName\")` — read source\n"
     "\n"
-    "# Code Quality Analysis\n"
+    "## Tracing Workflow\n"
+    "1. `search_graph(name_pattern=\".*FuncName.*\")` — discover exact name\n"
+    "2. `trace_call_path(function_name=\"FuncName\", direction=\"both\", depth=3)` — trace\n"
+    "3. `detect_changes()` — map git diff to affected symbols\n"
     "\n"
-    "Use codebase-memory-mcp tools for quality analysis:\n"
+    "## Quality Analysis\n"
+    "- Dead code: `search_graph(max_degree=0, exclude_entry_points=true)`\n"
+    "- High fan-out: `search_graph(min_degree=10, relationship=\"CALLS\", direction=\"outbound\")`\n"
+    "- High fan-in: `search_graph(min_degree=10, relationship=\"CALLS\", direction=\"inbound\")`\n"
+    "- Change coupling: `query_graph(query=\"MATCH (a)-[r:FILE_CHANGES_WITH]->(b) "
+    "WHERE r.coupling_score >= 0.5 RETURN a.name, b.name, r.coupling_score\")`\n"
     "\n"
-    "## Dead Code Detection\n"
-    "- `search_graph(max_degree=0, exclude_entry_points=true)` — find unreferenced functions\n"
-    "- `search_graph(max_degree=0, label=\"Function\")` — unreferenced functions only\n"
-    "\n"
-    "## Complexity Analysis\n"
-    "- `search_graph(min_degree=10)` — high fan-out functions\n"
-    "- `search_graph(label=\"Function\", sort_by=\"degree\")` — most-connected functions\n";
-
-static const char skill_reference_content[] =
-    "---\n"
-    "name: codebase-memory-reference\n"
-    "description: Codebase-memory-mcp reference guide. ALWAYS invoke this skill when the user "
-    "asks about MCP tools, graph queries, Cypher syntax, edge types, or how to use the "
-    "knowledge graph. Do not guess tool parameters — load this reference first.\n"
-    "---\n"
-    "\n"
-    "# Codebase Memory MCP Reference\n"
-    "\n"
-    "## 14 total MCP Tools\n"
-    "- `index_repository` — index a project\n"
-    "- `index_status` — check indexing progress\n"
-    "- `detect_changes` — find what changed since last index\n"
-    "- `search_graph` — find nodes by pattern\n"
-    "- `search_code` — text search in source\n"
-    "- `query_graph` — Cypher query language\n"
-    "- `trace_call_path` — call chain traversal\n"
-    "- `get_code_snippet` — read function source\n"
-    "- `get_graph_schema` — node/edge type catalog\n"
-    "- `get_architecture` — high-level summary\n"
-    "- `list_projects` — indexed projects\n"
-    "- `delete_project` — remove a project\n"
-    "- `manage_adr` — architecture decision records\n"
-    "- `ingest_traces` — import runtime traces\n"
+    "## 14 MCP Tools\n"
+    "`index_repository`, `index_status`, `list_projects`, `delete_project`,\n"
+    "`search_graph`, `search_code`, `trace_call_path`, `detect_changes`,\n"
+    "`query_graph`, `get_graph_schema`, `get_code_snippet`, `read_file`,\n"
+    "`list_directory`, `ingest_traces`\n"
     "\n"
     "## Edge Types\n"
     "CALLS, HTTP_CALLS, ASYNC_CALLS, IMPORTS, DEFINES, DEFINES_METHOD,\n"
-    "HANDLES, IMPLEMENTS, CONTAINS_FILE, CONTAINS_FOLDER, CONTAINS_PACKAGE\n"
+    "HANDLES, IMPLEMENTS, OVERRIDE, USAGE, FILE_CHANGES_WITH,\n"
+    "CONTAINS_FILE, CONTAINS_FOLDER, CONTAINS_PACKAGE\n"
     "\n"
-    "## Cypher Examples\n"
+    "## Cypher Examples (for query_graph)\n"
     "```\n"
+    "MATCH (a)-[r:HTTP_CALLS]->(b) RETURN a.name, b.name, r.url_path, r.confidence LIMIT 20\n"
     "MATCH (f:Function) WHERE f.name =~ '.*Handler.*' RETURN f.name, f.file_path\n"
-    "MATCH (a)-[r:CALLS]->(b) WHERE a.name = 'main' RETURN b.name\n"
-    "MATCH (a)-[r:HTTP_CALLS]->(b) RETURN a.name, b.name, r.url_path\n"
-    "```\n";
+    "MATCH (a)-[r:FILE_CHANGES_WITH]->(b) WHERE r.coupling_score >= 0.5 "
+    "RETURN a.name, b.name\n"
+    "```\n"
+    "\n"
+    "## Gotchas\n"
+    "1. `search_graph(relationship=\"HTTP_CALLS\")` filters nodes by degree — "
+    "use `query_graph` with Cypher to see actual edges.\n"
+    "2. `query_graph` has a 200-row cap — use `search_graph` with degree filters for counting.\n"
+    "3. `trace_call_path` needs exact names — use `search_graph(name_pattern=...)` first.\n"
+    "4. `direction=\"outbound\"` misses cross-service callers — use `direction=\"both\"`.\n"
+    "5. Results default to 10 per page — check `has_more` and use `offset`.\n";
 
 static const char codex_instructions_content[] =
     "# Codebase Knowledge Graph\n"
@@ -437,10 +407,7 @@ static const char codex_instructions_content[] =
     "Always prefer graph tools over grep for code discovery.\n";
 
 static const cbm_skill_t skills[CBM_SKILL_COUNT] = {
-    {"codebase-memory-exploring", skill_exploring_content},
-    {"codebase-memory-tracing", skill_tracing_content},
-    {"codebase-memory-quality", skill_quality_content},
-    {"codebase-memory-reference", skill_reference_content},
+    {"codebase-memory", skill_content},
 };
 
 const cbm_skill_t *cbm_get_skills(void) {
diff --git a/src/cli/cli.h b/src/cli/cli.h
index efa022e7..7f3f18cd 100644
--- a/src/cli/cli.h
+++ b/src/cli/cli.h
@@ -53,11 +53,11 @@ int cbm_replace_binary(const char *path, const unsigned char *data, int len, int
 /* ── Skill file management ────────────────────────────────────── */
 
 /* Number of skill files. */
-#define CBM_SKILL_COUNT 4
+#define CBM_SKILL_COUNT 1
 
 /* Skill name/content pair. */
 typedef struct {
-    const char *name;    /* e.g. "codebase-memory-exploring" */
+    const char *name;    /* e.g. "codebase-memory" */
     const char *content; /* full SKILL.md content */
 } cbm_skill_t;
 
diff --git a/tests/test_cli.c b/tests/test_cli.c
index 0e2434cc..b44a4ff4 100644
--- a/tests/test_cli.c
+++ b/tests/test_cli.c
@@ -533,41 +533,34 @@ TEST(cli_remove_old_monolithic_skill) {
 }
 
 TEST(cli_skill_files_content) {
-    /* Port of TestSkillFilesContent */
+    /* Consolidated skill: all 4 former skills merged into one */
     const cbm_skill_t *sk = cbm_get_skills();
-    ASSERT_EQ(CBM_SKILL_COUNT, 4);
+    ASSERT_EQ(CBM_SKILL_COUNT, 1);
+
+    /* The single consolidated skill must cover all use cases */
+    ASSERT(strcmp(sk[0].name, "codebase-memory") == 0);
+
+    /* Exploring capabilities */
+    ASSERT(strstr(sk[0].content, "search_graph") != NULL);
+    ASSERT(strstr(sk[0].content, "get_graph_schema") != NULL);
+
+    /* Tracing capabilities */
+    ASSERT(strstr(sk[0].content, "trace_call_path") != NULL);
+    ASSERT(strstr(sk[0].content, "direction") != NULL);
+    ASSERT(strstr(sk[0].content, "detect_changes") != NULL);
+
+    /* Quality capabilities */
+    ASSERT(strstr(sk[0].content, "max_degree=0") != NULL);
+    ASSERT(strstr(sk[0].content, "exclude_entry_points") != NULL);
+
+    /* Reference capabilities */
+    ASSERT(strstr(sk[0].content, "query_graph") != NULL);
+    ASSERT(strstr(sk[0].content, "Cypher") != NULL);
+    ASSERT(strstr(sk[0].content, "14 MCP Tools") != NULL);
+
+    /* Gotchas section (new — highest-value content per Anthropic best practices) */
+    ASSERT(strstr(sk[0].content, "Gotchas") != NULL);
 
-    /* Check exploring skill */
-    bool found_exploring = false, found_tracing = false;
-    bool found_quality = false, found_reference = false;
-    for (int i = 0; i < CBM_SKILL_COUNT; i++) {
-        if (strcmp(sk[i].name, "codebase-memory-exploring") == 0) {
-            found_exploring = true;
-            ASSERT(strstr(sk[i].content, "search_graph") != NULL);
-            ASSERT(strstr(sk[i].content, "get_graph_schema") != NULL);
-        }
-        if (strcmp(sk[i].name, "codebase-memory-tracing") == 0) {
-            found_tracing = true;
-            ASSERT(strstr(sk[i].content, "trace_call_path") != NULL);
-            ASSERT(strstr(sk[i].content, "direction") != NULL);
-            ASSERT(strstr(sk[i].content, "detect_changes") != NULL);
-        }
-        if (strcmp(sk[i].name, "codebase-memory-quality") == 0) {
-            found_quality = true;
-            ASSERT(strstr(sk[i].content, "max_degree=0") != NULL);
-            ASSERT(strstr(sk[i].content, "exclude_entry_points") != NULL);
-        }
-        if (strcmp(sk[i].name, "codebase-memory-reference") == 0) {
-            found_reference = true;
-            ASSERT(strstr(sk[i].content, "query_graph") != NULL);
-            ASSERT(strstr(sk[i].content, "Cypher") != NULL);
-            ASSERT(strstr(sk[i].content, "14 total") != NULL);
-        }
-    }
-    ASSERT_TRUE(found_exploring);
-    ASSERT_TRUE(found_tracing);
-    ASSERT_TRUE(found_quality);
-    ASSERT_TRUE(found_reference);
     PASS();
 }