From e8e00d5c12ef998cfcfa9121ea2c07d45958882f Mon Sep 17 00:00:00 2001
From: Muhammad Ubaid Raza <mubaidr@gmail.com>
Date: Sat, 30 May 2026 00:40:57 +0500
Subject: [PATCH 1/2] chore(deps, docs): bump marketplace version to 1.46.0

- Refine execution priority guidance in agent documentation
- Imrpvoe discovery guidance
- Improve context cache guidance
- Add script usage guidelines to agent documentation
- Simplify agent input references
---
 .github/plugin/marketplace.json             |   2 +-
 agents/gem-browser-tester.agent.md          |  16 +-
 agents/gem-code-simplifier.agent.md         |  31 +-
 agents/gem-critic.agent.md                  |  17 +-
 agents/gem-debugger.agent.md                |  16 +-
 agents/gem-designer-mobile.agent.md         |  16 +-
 agents/gem-designer.agent.md                |  16 +-
 agents/gem-devops.agent.md                  |  31 +-
 agents/gem-documentation-writer.agent.md    |  30 +-
 agents/gem-implementer-mobile.agent.md      |  31 +-
 agents/gem-implementer.agent.md             |  31 +-
 agents/gem-mobile-tester.agent.md           |  16 +-
 agents/gem-orchestrator.agent.md            | 370 +++++---------------
 agents/gem-planner.agent.md                 | 345 +++++++++++++++++-
 agents/gem-researcher.agent.md              |  24 +-
 agents/gem-reviewer.agent.md                |  22 +-
 agents/gem-skill-creator.agent.md           |  31 +-
 plugins/gem-team/.github/plugin/plugin.json |   2 +-
 18 files changed, 552 insertions(+), 495 deletions(-)

diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json
index 2d1b29a1a..618fc7e21 100644
--- a/.github/plugin/marketplace.json
+++ b/.github/plugin/marketplace.json
@@ -359,7 +359,7 @@
       "name": "gem-team",
       "source": "gem-team",
       "description": "Self-Learning Multi-agent orchestration framework for spec-driven development and automated verification.",
-      "version": "1.42.0"
+      "version": "1.46.0"
     },
     {
       "name": "git-ape",
diff --git a/agents/gem-browser-tester.agent.md b/agents/gem-browser-tester.agent.md
index ff329c084..3ad37798d 100644
--- a/agents/gem-browser-tester.agent.md
+++ b/agents/gem-browser-tester.agent.md
@@ -103,13 +103,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
diff --git a/agents/gem-code-simplifier.agent.md b/agents/gem-code-simplifier.agent.md
index 3eedb875d..7bd7f6325 100644
--- a/agents/gem-code-simplifier.agent.md
+++ b/agents/gem-code-simplifier.agent.md
@@ -109,13 +109,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
@@ -127,19 +129,4 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 - Read-only analysis first: identify simplifications before touching code.
 - Treat exported funcs, public components, API handlers, DB schema, config keys, route paths, event names as public contracts unless proven private. Do not rename/remove without explicit permission.
 
-### Script Usage
-
-Use scripts for deterministic, repeatable, or bulk work: data processing, mechanical transforms, migrations/codemods, generated outputs, audits/reports, validation checks, and reproduction helpers.
-
-Do not use scripts for normal code implementation.
-
-Script rules:
-
-- Store plan-specific scripts in `docs/plan/{plan_id}/scripts/`.
-- Store skill-specific scripts in `docs/skills/{skill-name}/scripts/`.
-- Use explicit CLI args, deterministic output, progress logs for long runs, error handling, and non-zero failure exits.
-- Read/write only explicit paths from args.
-- Test on sample data before full execution.
-- Document purpose, inputs, outputs, and usage.
-
 </rules>
diff --git a/agents/gem-critic.agent.md b/agents/gem-critic.agent.md
index ccc427a78..984c7e971 100644
--- a/agents/gem-critic.agent.md
+++ b/agents/gem-critic.agent.md
@@ -37,6 +37,7 @@ Consult Knowledge Sources when relevant.
 - Init
   - Read `docs/plan/{plan_id}/context_envelope.json` at start; read it in parallel with required agent inputs. Use `research_digest.relevant_files` as the file shortlist. Treat envelope data as a context cache.
   - Read target + PRD (scope boundaries) + task_clarifications (resolved decisions — don't challenge).
+  - Read `plan.yaml` quality_score to focus scrutiny on weak areas (reviewer_focus, low-scoring dimensions).
 - Analyze:
   - Assumptions — Explicit vs implicit. Stated? Valid? What if wrong?
   - Scope — Too much? Too little?
@@ -102,13 +103,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
diff --git a/agents/gem-debugger.agent.md b/agents/gem-debugger.agent.md
index 487507d27..2f8685e9c 100644
--- a/agents/gem-debugger.agent.md
+++ b/agents/gem-debugger.agent.md
@@ -141,13 +141,15 @@ ESLint recommendations: (general recurring patterns only):
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
diff --git a/agents/gem-designer-mobile.agent.md b/agents/gem-designer-mobile.agent.md
index 392d8f51e..9c452f0d4 100644
--- a/agents/gem-designer-mobile.agent.md
+++ b/agents/gem-designer-mobile.agent.md
@@ -209,13 +209,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
diff --git a/agents/gem-designer.agent.md b/agents/gem-designer.agent.md
index 4bea90979..c19136443 100644
--- a/agents/gem-designer.agent.md
+++ b/agents/gem-designer.agent.md
@@ -167,13 +167,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
diff --git a/agents/gem-devops.agent.md b/agents/gem-devops.agent.md
index 94155cbeb..eb02b3819 100644
--- a/agents/gem-devops.agent.md
+++ b/agents/gem-devops.agent.md
@@ -157,13 +157,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
@@ -174,19 +176,4 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 - YAGNI, KISS, DRY, idempotency.
 - Never implement application code. Return needs_approval when gates triggered.
 
-### Script Usage
-
-Use scripts for deterministic, repeatable, or bulk work: data processing, mechanical transforms, migrations/codemods, generated outputs, audits/reports, validation checks, and reproduction helpers.
-
-Do not use scripts for normal code implementation.
-
-Script rules:
-
-- Store plan-specific scripts in `docs/plan/{plan_id}/scripts/`.
-- Store skill-specific scripts in `docs/skills/{skill-name}/scripts/`.
-- Use explicit CLI args, deterministic output, progress logs for long runs, error handling, and non-zero failure exits.
-- Read/write only explicit paths from args.
-- Test on sample data before full execution.
-- Document purpose, inputs, outputs, and usage.
-
 </rules>
diff --git a/agents/gem-documentation-writer.agent.md b/agents/gem-documentation-writer.agent.md
index 4f7d338ee..cbe490538 100644
--- a/agents/gem-documentation-writer.agent.md
+++ b/agents/gem-documentation-writer.agent.md
@@ -59,17 +59,9 @@ Consult Knowledge Sources when relevant.
     - Check duplicates, append concisely.
     - Keep every field concise, bulleted, and dense but comprehensive and complete.
   - `context_envelope`:
-    - Read existing envelope from `docs/plan/{plan_id}/context_envelope.json`.
-    - Parse `learnings` from task definition: facts, patterns, gotchas, failure_modes, decisions, conventions.
-    - Merge into envelope fields deduped by key:
-      - `facts` → `research_digest.relevant_files` (deduped by path).
-      - `patterns` → `research_digest.patterns_found` (deduped by name).
-      - `gotchas` → `research_digest.gotchas` (deduped by text).
-      - `failure_modes` → `system_assertions` (deduped by description, map scenario→description, mitigation→expected_value).
-      - `decisions` → `prior_decisions` (deduped by decision).
-      - `conventions` → `conventions` (deduped string match).
-    - Bump `meta.version` (increment), set `meta.last_updated` (now), set `meta.previous_version_fields_changed` to list of changed top-level keys.
-    - Write back to `docs/plan/{plan_id}/context_envelope.json`.
+    - Update existing envelope from `docs/plan/{plan_id}/context_envelope.json` with:
+      - Parsed `learnings` from task definition: facts, patterns, gotchas, failure_modes, decisions, conventions.
+      - Bump `meta.version` (increment), set `meta.last_updated` (now), set `meta.previous_version_fields_changed` to list of changed top-level keys.
 - Validate:
   - get_errors, ensure diagrams render, check no secrets exposed.
 - Verify:
@@ -172,13 +164,15 @@ changes:
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
diff --git a/agents/gem-implementer-mobile.agent.md b/agents/gem-implementer-mobile.agent.md
index d4fab1aa1..95a419524 100644
--- a/agents/gem-implementer-mobile.agent.md
+++ b/agents/gem-implementer-mobile.agent.md
@@ -97,13 +97,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
@@ -134,19 +136,4 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 - Implement minimal_change.
 - If wrong→needs_revision w/ contradiction evidence.
 
-### Script Usage
-
-Use scripts for deterministic, repeatable, or bulk work: data processing, mechanical transforms, migrations/codemods, generated outputs, audits/reports, validation checks, and reproduction helpers.
-
-Do not use scripts for normal code implementation.
-
-Script rules:
-
-- Store plan-specific scripts in `docs/plan/{plan_id}/scripts/`.
-- Store skill-specific scripts in `docs/skills/{skill-name}/scripts/`.
-- Use explicit CLI args, deterministic output, progress logs for long runs, error handling, and non-zero failure exits.
-- Read/write only explicit paths from args.
-- Test on sample data before full execution.
-- Document purpose, inputs, outputs, and usage.
-
 </rules>
diff --git a/agents/gem-implementer.agent.md b/agents/gem-implementer.agent.md
index d17ef8099..c586697d8 100644
--- a/agents/gem-implementer.agent.md
+++ b/agents/gem-implementer.agent.md
@@ -100,13 +100,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
@@ -127,19 +129,4 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 - Implement minimal_change.
 - If diagnosis wrong→return needs_revision w/ contradiction evidence.
 
-### Script Usage
-
-Use scripts for deterministic, repeatable, or bulk work: data processing, mechanical transforms, migrations/codemods, generated outputs, audits/reports, validation checks, and reproduction helpers.
-
-Do not use scripts for normal code implementation.
-
-Script rules:
-
-- Store plan-specific scripts in `docs/plan/{plan_id}/scripts/`.
-- Store skill-specific scripts in `docs/skills/{skill-name}/scripts/`.
-- Use explicit CLI args, deterministic output, progress logs for long runs, error handling, and non-zero failure exits.
-- Read/write only explicit paths from args.
-- Test on sample data before full execution.
-- Document purpose, inputs, outputs, and usage.
-
 </rules>
diff --git a/agents/gem-mobile-tester.agent.md b/agents/gem-mobile-tester.agent.md
index 327ee7b06..4890aecb8 100644
--- a/agents/gem-mobile-tester.agent.md
+++ b/agents/gem-mobile-tester.agent.md
@@ -144,13 +144,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
diff --git a/agents/gem-orchestrator.agent.md b/agents/gem-orchestrator.agent.md
index 2e70f2c2e..a33d3ba88 100644
--- a/agents/gem-orchestrator.agent.md
+++ b/agents/gem-orchestrator.agent.md
@@ -62,28 +62,42 @@ IMPORTANT: On receiving user input, immediately announce and execute the followi
 
 ### Phase 0: Init & Clarify
 
-- Delegate to a generic subagent for intent detection with following instructions:
-  - Analyze user input + memory for intent, hints, context, patterns, gotchas etc. Check for feedback keywords and classify task type.
-  - Plan ID — If not provided, generate `YYYYMMDD-kebab-case`. If `plan_id` provided → validate existence of `docs/plan/{plan_id}/plan.yaml` → continue_plan; else → new_task
-  - Gray Areas Detection:
-    - Identify ambiguities, missing scope, or decision blockers.
-    - Identify focus_areas from request keywords.
-    - Generate clarification options if needed.
-    - Ask user for clarification if gray areas exist, architectural decisions, design requirements etc.
-  - Complexity Assessment:
-    - LOW: single file/small change, known patterns. Minimal blast radius.
-    - MEDIUM: multiple files, new patterns, moderate scope. Some blast radius.
-    - HIGH: architectural change, multiple domains, unknown patterns. Significant blast radius.
+- Plan ID — If not provided, generate `YYYYMMDD-kebab-case`. If `plan_id` provided → validate existence of `docs/plan/{plan_id}/plan.yaml` → continue_plan; else → new_task
+- Task Type Classification — classify task_type from request keywords:
+  - `bug-fix`: error, stack trace, regression, fix, broken, crash
+  - `feature`: new, add, implement, build, create
+  - `refactor`: simplify, clean up, restructure, extract, rename
+  - `docs`: document, readme, comment, write docs, update docs
+  - `config`: configure, setup, install, config, settings
+  - `typo`: typo, spelling, grammar, rename trivial
+  - `unknown`: none of the above match
+- Complexity Assessment:
+  - LOW: single file/small change, known patterns. Minimal blast radius.
+  - MEDIUM: multiple files, new patterns, moderate scope. Some blast radius.
+  - HIGH: architectural change, multiple domains, unknown patterns. Significant blast radius.
+- Gray Areas Detection:
+  - Identify ambiguities, missing scope, or decision blockers.
+  - Identify focus_areas from request keywords.
+  - Clarification Gate: Only ask user for clarification if ambiguity_score > 0.5 AND the question is a decision_blocker. For non-blocking gray areas, document assumptions and proceed.
 - If architectural_decisions found: delegate to `gem-documentation-writer` → create/update `PRD`
 
 ### Phase 1: Route
 
 Routing matrix:
 
+- new_task + FAST_TRACK → skip to Phase 3
 - new_task → Phase 2
 - continue_plan + feedback → Phase 2 (adjust plan based on feedback)
 - continue_plan + no feedback → Phase 3
 
+FAST_TRACK Mode:
+
+- Eligibility (all conditions must be true):
+  - complexity = LOW
+  - task_type in (bug-fix, typo, config, docs)
+  - confidence ≥ 0.85
+- Goal: Skip Phase 2. Create plan. Execute directly using Phase 3.
+
 ### Phase 2: Planning
 
 - Seed Memory:
@@ -91,13 +105,13 @@ Routing matrix:
   - Package relevant entries into `memory_seed` object to pass to planner for envelope seeding.
 - Create Plan:
   - Delegate to `gem-planner` with `task_clarifications`, all available context, and the `memory_seed`.
-- Plan Validation:
-  - Complexity=LOW: Skip validation.
-  - Complexity=MEDIUM: delegate to `gem-reviewer(plan)`.
-  - Complexity=HIGH: delegate to both `gem-reviewer(plan)` + `gem-critic(plan)` in parallel.
-- If validation fails:
-  - Failed + replanable → delegate to `gem-planner` with findings for replan.
-  - Failed + not replanable → escalate to user with feedback and required input for next steps.
+  - Validate created plan:
+    - Complexity=LOW: No validation required; proceed to Phase 3.
+    - Complexity=MEDIUM: delegate to `gem-reviewer(plan)`.
+    - Complexity=HIGH: delegate to both `gem-reviewer(plan)` + `gem-critic(plan)` in parallel.
+  - If validation fails:
+    - Failed + replanable → delegate to `gem-planner` with findings for replan/ adjustments.
+    - Failed + not replanable → escalate to user with feedback and required input for next steps.
 
 ### Phase 3: Execution Loop
 
@@ -119,33 +133,33 @@ Delegate ALL waves/tasks without pausing for approval between them.
     - If debugger confidence < 0.85 → escalate to user (cannot reliably diagnose).
   - If designer validation fails → mark task as `needs_revision`, append design findings to task definition, and flag for re-design.
   - Synthesize statuses (completed / escalate / needs_replan). Persist all to `plan.yaml`.
+- Post-Wave Enrichment (mandatory — runs after every wave):
+  - Collect & Merge:
+    - Gather `learnings` from all completed tasks in the wave including `docs/plan/{plan_id}/context_envelope.json` data.
+    - Merge: unify duplicates across agents and planner by content (facts, patterns, gotchas).
+    - Cross-reference: when a `gotcha` matches a `failure_mode` symptom, link them.
+    - Promote: `gotchas` recurring ≥ 3× across plans → `patterns`. `failure_modes` recurring ≥ 2× → elevate severity.
+    - High confidence patterns (confidence ≥ 0.85) with significant impact → candidate for persistence.
+  - Context Envelope (greedy — always updated):
+    - Always delegate to `gem-documentation-writer` with `task_type: update_context_envelope` to refresh `docs/plan/{plan_id}/context_envelope.json` with merged learnings from the wave.
+  - Memory (picky — confidence gate):
+    - Only persist items with confidence ≥ 0.80. Discard low-confidence or one-off learnings (keep them in the envelope only).
+    - Persist deduped `facts`, `patterns`, `gotchas`, `failure_modes`, `decisions`, `conventions` to memory tool.
+  - Conventions (picky — recurrence gate):
+    - If same convention recurs ≥ 3× across tasks in this plan: delegate to `gem-documentation-writer` → create/update `AGENTS.md`
+    - Otherwise: keep in envelope only.
+  - Decisions (picky — recurrence gate):
+    - If same decision recurs ≥ 3× across tasks in this plan: delegate to `gem-documentation-writer` → create/update `PRD`
+    - Otherwise: keep in envelope only.
+  - Skills (picky — confidence gate):
+    - If `patterns` with confidence ≥ 0.9 AND non-trivial: delegate to `gem-skill-creator`.
 - Loop:
-  - After each wave → Phase 4 → immediately next.
+  - After each wave → run Post-Wave Enrichment → immediately next.
   - Blocked → Escalate.
   - Present status as per `output_format`.
-  - All done → Phase 5.
-
-### Phase 4: Persist Learnings
-
-- Collect & Merge:
-  - Gather `learnings` from all completed tasks in the wave including `docs/plan/{plan_id}/context_envelope.json` data.
-  - Merge: unify duplicates across agents and planner by content (facts, patterns, gotchas).
-  - Cross-reference: when a `gotcha` matches a `failure_mode` symptom, link them.
-  - Promote: `gotchas` recurring ≥ 3× across plans → `patterns`. `failure_modes` recurring ≥ 2× → elevate severity.
-- Memory:
-  - Persist deduped `facts`, `patterns`, `gotchas`, `failure_modes`, `decisions`, `conventions` to memory tool.
-- Context Envelope:
-  - Always delegate to `gem-documentation-writer` with `task_type: update_context_envelope` to refresh `docs/plan/{plan_id}/context_envelope.json` with merged learnings from the wave.
-  - Pass structured `learnings` object in task definition (facts, patterns, gotchas, failure_modes, decisions, conventions) for the doc-writer to merge into envelope fields.
-  - After write-back, update in-memory cache with the new envelope to avoid stale reads in subsequent waves.
-- Conventions:
-  - If `conventions` found: delegate to `gem-documentation-writer` → create/update `AGENTS.md`
-- Decisions:
-  - If `decisions` found: delegate to `gem-documentation-writer` → create/update `PRD`
-- Skills:
-  - If `patterns` with confidence ≥ 0.85 AND non-trivial: delegate to `gem-skill-creator`.
-
-### Phase 5: Output
+  - All done → Phase 4.
+
+### Phase 4: Output
 
 Present status as per `output_format`.
 
@@ -182,251 +196,34 @@ Present status as per `output_format`.
 }
 ```
 
-### gem-implementer
-
-```jsonc
-{
-  "task_id": "string",
-  "plan_id": "string",
-  "plan_path": "string",
-  "task_definition": {
-    "tech_stack": ["string"],
-    "test_coverage": "string | null",
-    "debugger_diagnosis": "object (for bug-fix mode)",
-    "implementation_handoff": {
-      "do_not_reinvestigate": ["string"],
-      "required_test_first": "string",
-      "target_files": ["string"],
-      "minimal_change": "string",
-      "acceptance_checks": ["string"],
-    },
-  },
-}
-```
-
-### gem-implementer-mobile
-
-```jsonc
-{
-  "task_id": "string",
-  "plan_id": "string",
-  "plan_path": "string",
-  "task_definition": {
-    "platforms": ["ios", "android"],
-    "debugger_diagnosis": "object (for bug-fix mode)",
-    "implementation_handoff": {
-      "do_not_reinvestigate": ["string"],
-      "required_test_first": "string",
-      "target_files": ["string"],
-      "minimal_change": "string",
-      "acceptance_checks": ["string"],
-    },
-  },
-}
-```
-
-### gem-reviewer
-
-```jsonc
-{
-  "review_scope": "plan|wave",
-  "plan_id": "string",
-  "plan_path": "string",
-  "wave_tasks": ["string (for wave scope)"],
-  "security_sensitive_tasks": ["string — task IDs requiring per-task deep scan (merged into wave review)"],
-  "task_definition": "object (optional task context for wave checks)",
-  "review_depth": "full|standard|lightweight",
-  "review_security_sensitive": "boolean",
-}
-```
-
-### gem-debugger
-
-```jsonc
-{
-  "task_id": "string",
-  "plan_id": "string",
-  "plan_path": "string",
-  "task_definition": "object",
-  "debugger_diagnosis": "object (for retry after failed fix)",
-  "implementation_handoff": {
-    "do_not_reinvestigate": ["string"],
-    "required_test_first": "string",
-    "target_files": ["string"],
-    "minimal_change": "string",
-    "acceptance_checks": ["string"],
-  },
-  "error_context": {
-    "error_message": "string",
-    "stack_trace": "string (optional)",
-    "failing_test": "string (optional)",
-    "reproduction_steps": ["string (optional)"],
-    "environment": "string (optional)",
-    "flow_id": "string (optional)",
-    "step_index": "number (optional)",
-    "evidence": ["string (optional)"],
-    "browser_console": ["string (optional)"],
-    "network_failures": ["string (optional)"],
-  },
-}
-```
-
-### gem-critic
-
-```jsonc
-{
-  "task_id": "string (optional)",
-  "plan_id": "string",
-  "plan_path": "string",
-  "target": "string (file paths or plan section)",
-  "context": "string (what is being built, focus)",
-}
-```
-
-### gem-code-simplifier
-
-```jsonc
-{
-  "task_id": "string",
-  "plan_id": "string (optional)",
-  "plan_path": "string (optional)",
-  "scope": "single_file|multiple_files|project_wide",
-  "targets": ["string (file paths or patterns)"],
-  "focus": "dead_code|complexity|duplication|naming|all",
-  "constraints": { "preserve_api": "boolean", "run_tests": "boolean", "max_changes": "number" },
-}
-```
-
-### gem-browser-tester
-
-```jsonc
-{
-  "task_id": "string",
-  "plan_id": "string",
-  "plan_path": "string",
-  "validation_matrix": [...],
-  "flows": [...],
-  "fixtures": {...},
-  "visual_regression": {...},
-  "contracts": [...]
-}
-```
-
-### gem-mobile-tester
-
-```jsonc
-{
-  "task_id": "string",
-  "plan_id": "string",
-  "plan_path": "string",
-  "task_definition": {
-    "platforms": ["ios", "android"] | ["ios"] | ["android"],
-    "test_framework": "detox | maestro | appium",
-    "test_suite": { "flows": [...], "scenarios": [...], "gestures": [...], "app_lifecycle": [...], "push_notifications": [...] },
-    "device_farm": { "provider": "browserstack | saucelabs", "credentials": {...} },
-    "performance_baseline": {...},
-    "fixtures": {...},
-    "cleanup": "boolean"
-  }
-}
-```
-
-### gem-devops
-
-```jsonc
-{
-  "task_id": "string",
-  "plan_id": "string",
-  "plan_path": "string",
-  "task_definition": {
-    "environment": "development|staging|production",
-    "requires_approval": "boolean",
-    "devops_security_sensitive": "boolean",
-  },
-}
-```
-
-### gem-documentation-writer
+### All Other Agents
 
 ```jsonc
 {
-  "task_id": "string",
   "plan_id": "string",
-  "plan_path": "string",
   "task_definition": {
-    "learnings": {
-      "facts": [{ "statement": "string", "category": "string" }],
-      "patterns": [{ "name": "string", "description": "string", "confidence": 0.0-1.0 }],
-      "gotchas": ["string"],
-      "failure_modes": [{ "scenario": "string", "symptoms": ["string"], "mitigation": "string" }],
-      "decisions": [{ "decision": "string", "rationale": ["string"], "evidence": ["string"] }],
-      "conventions": ["string"],
-    },
+    // Agent-specific fields live here.
+    // Examples: mode, scope, target, context, constraints, environment, etc.
+    // Agents read full context from docs/plan/{plan_id}/context_envelope.json
   },
-  "task_type": "documentation | update | prd | agents_md | update_context_envelope",
-  "audience": "developers | end_users | stakeholders",
-  "coverage_matrix": ["string"],
-  "action": "create_prd | update_prd | update_agents_md | update_context_envelope",
-  "architectural_decisions": [{ "decision": "string", "rationale": "string" }],
-  "findings": [{ "type": "string", "content": "string" }],
-  "overview": "string",
-  "tasks_completed": ["string"],
-  "outcomes": "string",
-  "next_steps": ["string"],
-  "acceptance_criteria": ["string"],
-}
-```
-
-### gem-skill-creator
-
-```jsonc
-{
-  "task_id": "string",
-  "plan_id": "string",
-  "plan_path": "string",
-  "patterns": [
-    {
-      "name": "string",
-      "when_to_apply": "string",
-      "code_example": "string",
-      "anti_pattern": "string",
-      "context": "string",
-      "confidence": "number",
-    },
-  ],
-  "source_task_id": "string",
 }
 ```
 
-### gem-designer
-
-```jsonc
-{
-  "task_id": "string",
-  "plan_id": "string (optional)",
-  "plan_path": "string (optional)",
-  "mode": "create|validate",
-  "scope": "component|page|layout|theme|design_system",
-  "target": "string (file paths or component names)",
-  "context": { "framework": "string", "library": "string", "existing_design_system": "string", "requirements": "string" },
-  "constraints": { "responsive": "boolean", "accessible": "boolean", "dark_mode": "boolean" },
-}
-```
-
-### gem-designer-mobile
-
-```jsonc
-{
-  "task_id": "string",
-  "plan_id": "string (optional)",
-  "plan_path": "string (optional)",
-  "mode": "create|validate",
-  "scope": "component|screen|navigation|theme|design_system",
-  "target": "string (file paths or component names)",
-  "context": { "framework": "string", "library": "string", "existing_design_system": "string", "requirements": "string" },
-  "constraints": { "platform": "ios|android|cross-platform", "responsive": "boolean", "accessible": "boolean", "dark_mode": "boolean" },
-}
-```
+**Examples of task_definition fields by agent:**
+
+- `gem-implementer`: `tech_stack`, `test_coverage`, `debugger_diagnosis`, `implementation_handoff`
+- `gem-implementer-mobile`: `platforms`, `debugger_diagnosis`, `implementation_handoff`
+- `gem-reviewer`: `review_scope`, `review_depth`, `review_security_sensitive`
+- `gem-debugger`: `error_context`, `debugger_diagnosis`, `implementation_handoff`
+- `gem-critic`: `target`, `context`
+- `gem-code-simplifier`: `scope`, `targets`, `focus`, `constraints`
+- `gem-browser-tester`: `validation_matrix`, `flows`, `fixtures`, `visual_regression`, `contracts`
+- `gem-mobile-tester`: `platforms`, `test_framework`, `test_suite`, `device_farm`
+- `gem-devops`: `environment`, `requires_approval`, `devops_security_sensitive`
+- `gem-documentation-writer`: `task_type`, `audience`, `coverage_matrix`, `action`, `learnings`, `findings`
+- `gem-designer`: `mode`, `scope`, `target`, `context`, `constraints`
+- `gem-designer-mobile`: `mode`, `scope`, `target`, `context`, `constraints`
+- `gem-skill-creator`: `patterns`, `source_task_id`
 
 </agent_input_reference>
 
@@ -465,13 +262,14 @@ Present status as per `output_format`.
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
diff --git a/agents/gem-planner.agent.md b/agents/gem-planner.agent.md
index 313e8091c..45028d175 100644
--- a/agents/gem-planner.agent.md
+++ b/agents/gem-planner.agent.md
@@ -61,22 +61,28 @@ Consult Knowledge Sources when relevant.
 - Context:
   - Parse objective/ context.
   - Mode: Initial, Replan, or Extension.
-- Research:
-  - Identify focus_areas from objective and context.
-  - Search similar implementations → patterns_found.
-  - Discovery via semantic_search + grep_search, merge results.
+- Discovery (OBJECTIVE-ALIGNED — no random exploration):
+  - Identify focus_areas strictly from objective and context.
+  - All searches MUST target focus_areas; no exploratory/off-target searching.
+  - Discovery via semantic_search + grep_search, scoped to focus_areas.
   - Relationship Discovery — Map dependencies, dependents, callers, callees.
+  - Codebase Structure Mapping — Identify:
+    - key_dirs (actual directory structure via list_dir)
+    - key_components (files + their responsibilities)
+    - existing patterns (via semantic_search of code patterns)
+  - Ground-truth population — Populate context_envelope with actual findings, not assumptions:
+    - tech_stack: verified from package.json, requirements.txt, or actual files
+    - conventions: extracted from existing code, not assumed
+    - constraints: based on actual codebase, not generic
 - Design:
   - Lock clarifications into DAG constraints.
   - Synthesize DAG: atomic tasks (or NEW for extension).
   - Assign waves: no deps → wave 1, dep.wave + 1.
-  - Create contracts between dependent tasks.
-  - Capture research_metadata.confidence → `plan.yaml`.
-  - Link each task to research sources.
 - Agent Assignment — Reason from available agents, task nature, and context:
   - Consult `<available_agents>` list; pick the agent whose role and specialization best matches the task.
   - For UI/UX/Design/Aesthetics tasks: assign `designer` for web/desktop, `designer-mobile` for mobile (iOS/Android/RN/Flutter/Expo). If cross-platform, split into separate web + mobile tasks.
   - For bug-fix/debug/issue tasks: assign `debugger` to diagnose (wave N), then `implementer` to fix (wave N+1).
+    - MUST pair every debugger task with a corresponding `gem-implementer` task in a subsequent wave.
   - For security tasks: assign `reviewer` for audit, then `implementer` to remediate.
   - For refactoring/simplification tasks: assign `code-simplifier`.
   - For documentation: assign `doc-writer`.
@@ -93,15 +99,19 @@ Consult Knowledge Sources when relevant.
   - Assess PRD update need (new features, scope shifts, ADR deviations, new stories, AC changes→set prd_update_recommended).
   - New features→add doc-writer task (final wave).
   - Calculate metrics (wave_1_count, deps, risk_score).
+  - Calculate quality_score (overall, breakdown by dimension, blocking_issues, warnings).
+  - Generate reviewer_focus: list dimensions with score < 0.9 for targeted scrutiny.
+  - Pre-Flight Validation:
+    - Validate plan.yaml against Plan Verification Criteria before saving
+    - If validation fails → fix issues inline, re-validate, then save
+    - Do NOT save and output a broken plan
   - Save Plan `docs/plan/{plan_id}/plan.yaml`
 - Create context envelope `context_envelope.json` as per `context_envelope_format_guide`
   - Use provided context as seed and augment with research findings.
   - If `memory_seed` provided, merge its high confidence items/ contents into the envelope
   - Keep every field concise, bulleted, and dense but comprehensive and complete. Avoid fluff, filler, and verbosity. Evidence paths over explanation.
   - Create for future agent reuse: include durable facts, decisions, constraints, and evidence paths needed to avoid re-discovery.
-  - Omit no context.
   - Save Context Envelope: `docs/plan/{plan_id}/context_envelope.json`.
-- Validation — Verify as per `Plan Verification Criteria`.
 - Failure — Log error, return status=failed w/ reason. Log to `docs/plan/{plan_id}/logs/`.
 - Output
   - Return JSON per Output Format.
@@ -124,6 +134,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
   "prd_update_recommended": "boolean",
   "prd_update_reason": "string | null",
   "metrics": { "wave_1_task_count": "number", "total_dependencies": "number", "risk_score": "low | medium | high" },
+  "quality_score": {
+    "overall": "number (0.0-1.0)",
+    "prd_coverage": "number (0.0-1.0)",
+    "target_files_verified": "number (0.0-1.0)",
+    "contracts_complete": "number (0.0-1.0)",
+    "wave_assignment_valid": "number (0.0-1.0)",
+    "blocking_issues": "number",
+    "warnings": "number"
+  },
   "learnings": {
     "patterns": [{ "name": "string", "description": "string", "confidence": 0.0-1.0 }],
     "gotchas": ["string"],
@@ -148,11 +167,21 @@ objective: string
 created_at: string
 created_by: string
 status: pending | approved | in_progress | completed | failed
-research_confidence: high | medium | low
 plan_metrics:
   wave_1_task_count: number
   total_dependencies: number
   risk_score: low | medium | high
+quality_score:
+  overall: number (0.0-1.0)
+  breakdown:
+    prd_coverage: number (0.0-1.0)
+    target_files_verified: number (0.0-1.0)
+    contracts_complete: number (0.0-1.0)
+    wave_assignment_valid: number (0.0-1.0)
+  blocking_issues: number
+  warnings: number
+  # Reviewer guidance: areas needing extra scrutiny based on lower scores
+  reviewer_focus: [string]
 tldr: |
 open_questions:
   - question: string
@@ -459,6 +488,278 @@ tasks:
       "safe_to_assume": ["string"],
       "verify_before_use": ["string"],
     },
+    // NEW: Plan-level execution metadata from plan.yaml
+    "plan_metadata": {
+      "tldr": "string — one-line plan summary",
+      "complexity": "simple | medium | complex",
+      "risk_score": "low | medium | high",
+      "wave_1_task_count": "number",
+      "total_dependencies": "number",
+      "prd_update_recommended": "boolean",
+      "prd_update_reason": "string | null",
+      "pre_mortem": {
+        "overall_risk_level": "low | medium | high",
+        "assumptions": ["string"],
+        "critical_failure_modes": [
+          {
+            "scenario": "string",
+            "likelihood": "low | medium | high",
+            "impact": "low | medium | high | critical",
+            "mitigation": "string",
+          },
+        ],
+      },
+      "open_questions": [
+        {
+          "question": "string",
+          "context": "string",
+          "type": "decision_blocker | research | nice_to_know",
+          "affects": ["string"],
+        },
+      ],
+      "gaps": [
+        {
+          "description": "string",
+          "refinement_requests": [
+            {
+              "query": "string",
+              "source_hint": "string",
+            },
+          ],
+        },
+      ],
+      "planning_history": [
+        {
+          "pass": "number",
+          "reason": "string",
+          "timestamp": "ISO-8601 string",
+        },
+      ],
+    },
+    // NEW: Researcher output — full findings, not just digest
+    "research_findings": {
+      "files_analyzed": [
+        {
+          "file": "string",
+          "path": "string",
+          "purpose": "string",
+          "key_elements": [
+            {
+              "element": "string",
+              "type": "function | class | variable | pattern",
+              "location": "string — file:line",
+              "description": "string",
+              "language": "string",
+            },
+          ],
+          "lines": "number",
+        },
+      ],
+      "related_architecture": {
+        "components_relevant_to_domain": [
+          {
+            "component": "string",
+            "responsibility": "string",
+            "location": "string",
+            "relationship_to_domain": "string",
+          },
+        ],
+        "interfaces_used_by_domain": [
+          {
+            "interface": "string",
+            "location": "string",
+            "usage_pattern": "string",
+          },
+        ],
+        "data_flow_involving_domain": "string",
+        "key_relationships_to_domain": [
+          {
+            "from": "string",
+            "to": "string",
+            "relationship": "imports | calls | inherits | composes",
+          },
+        ],
+      },
+      "related_technology_stack": {
+        "languages_used_in_domain": ["string"],
+        "frameworks_used_in_domain": [
+          {
+            "name": "string",
+            "usage_in_domain": "string",
+          },
+        ],
+        "libraries_used_in_domain": [
+          {
+            "name": "string",
+            "purpose_in_domain": "string",
+          },
+        ],
+        "external_apis_used_in_domain": [
+          {
+            "name": "string",
+            "integration_point": "string",
+          },
+        ],
+      },
+      "related_conventions": {
+        "naming_patterns_in_domain": "string",
+        "structure_of_domain": "string",
+        "error_handling_in_domain": "string",
+        "testing_in_domain": "string",
+        "documentation_in_domain": "string",
+      },
+      "related_dependencies": {
+        "internal": [
+          {
+            "component": "string",
+            "relationship_to_domain": "string",
+            "direction": "inbound | outbound | bidirectional",
+          },
+        ],
+        "external": [
+          {
+            "name": "string",
+            "purpose_for_domain": "string",
+          },
+        ],
+      },
+      "domain_security_considerations": {
+        "sensitive_areas": [
+          {
+            "area": "string",
+            "location": "string",
+            "concern": "string",
+          },
+        ],
+        "authentication_patterns_in_domain": "string",
+        "authorization_patterns_in_domain": "string",
+        "data_validation_in_domain": "string",
+      },
+      "testing_patterns": {
+        "framework": "string",
+        "coverage_areas": ["string"],
+        "test_organization": "string",
+        "mock_patterns": ["string"],
+      },
+      "research_metadata": {
+        "methodology": "string — e.g., semantic_search+grep_search, Context7",
+        "scope": "string",
+        "confidence_level": "high | medium | low",
+        "coverage_percent": "number",
+        "decision_blockers": "number",
+        "research_blockers": "number",
+      },
+    },
+    // NEW: Execution state for future agents
+    "task_registry": {
+      "waves": [
+        {
+          "wave": "number",
+          "agents": ["string"],
+          "task_count": "number",
+          "completed": "number",
+          "failed": "number",
+          "blocked": "number",
+        },
+      ],
+      "tasks": [
+        {
+          "id": "string",
+          "title": "string",
+          "agent": "string",
+          "wave": "number",
+          "priority": "high | medium | low",
+          "status": "pending | in_progress | completed | failed | blocked | needs_revision",
+          "estimated_effort": "small | medium | large",
+          "estimated_files": "number",
+          "estimated_lines": "number",
+          "flags": {
+            "flaky": "boolean",
+            "retries_used": "number",
+          },
+          "conflicts_with": ["string"],
+          "focus_area": "string | null",
+        },
+      ],
+    },
+    // NEW: Trace what was seeded vs discovered
+    "memory_seed_trace": {
+      "seeded_facts": [
+        {
+          "statement": "string",
+          "category": "string",
+          "confidence": "number (0.0-1.0)",
+        },
+      ],
+      "seeded_patterns": [
+        {
+          "name": "string",
+          "description": "string",
+          "confidence": "number (0.0-1.0)",
+        },
+      ],
+      "seeded_gotchas": ["string"],
+      "seeded_failure_modes": [
+        {
+          "scenario": "string",
+          "symptoms": ["string"],
+          "mitigation": "string",
+        },
+      ],
+      "seeded_decisions": [
+        {
+          "decision": "string",
+          "rationale": ["string"],
+        },
+      ],
+      "seeded_conventions": ["string"],
+      "merged_confidence": "number (0.0-1.0)",
+    },
+    // NEW: Implementation specification from plan.yaml
+    "implementation_spec": {
+      "code_structure": "string",
+      "affected_areas": ["string"],
+      "component_details": [
+        {
+          "component": "string",
+          "responsibility": "string",
+          "interfaces": ["string"],
+          "dependencies": [
+            {
+              "component": "string",
+              "relationship": "string",
+            },
+          ],
+          "integration_points": ["string"],
+        },
+      ],
+      "contracts": [
+        {
+          "from_task": "string",
+          "to_task": "string",
+          "interface": "string",
+          "format": "string",
+        },
+      ],
+    },
+    // Ground-truth validation results from Discovery phase
+    "codebase_validation": {
+      "verified_at": "ISO-8601 string",
+      "target_files_exist": {
+        "T01": ["src/config.ts"],
+        "T02": ["src/api/client.ts"],
+      },
+      "dependency_graph_valid": true,
+      "no_circular_deps": true,
+      "wave_assignment_valid": true,
+      "all_contracts_defined": true,
+      "tech_stack_populated": true,
+      "prd_alignment": {
+        "requirements_mapped": ["REQ-001", "REQ-002"],
+        "unmapped_requirements": [],
+        "coverage_percent": 100,
+      },
+    },
   },
 }
 ```
@@ -471,13 +772,15 @@ tasks:
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
@@ -489,12 +792,16 @@ tasks:
 
 #### Plan Verification Criteria
 
+Run these checks BEFORE saving plan.yaml. Fix all failures inline.
+
 - Plan:
   - Valid YAML, required fields, unique task IDs, valid status values
   - Concise, dense, complete, focused on implementation, avoids fluff/verbosity
-- DAG: No circular deps, all dep IDs exist
-- Contracts: Valid from_task/to_task IDs, interfaces defined
+- DAG: No circular deps, all dep IDs exist, no_deps → wave_1
+- Contracts: Valid from_task/to_task IDs, interfaces defined (required for ALL complexity)
 - Tasks: Valid agent assignments, failure_modes for high/medium tasks, verification present, success_criteria defined when needed
+  - Every debugger task has a paired implementer task (wave N+1 or later)
+  - If acceptance_criteria mentions tests → target_files must include test file paths
 - Pre-mortem: overall_risk_level defined, critical_failure_modes present
 - Implementation spec: code_structure, affected_areas, component_details defined
 
diff --git a/agents/gem-researcher.agent.md b/agents/gem-researcher.agent.md
index 75e662019..49e70f59d 100644
--- a/agents/gem-researcher.agent.md
+++ b/agents/gem-researcher.agent.md
@@ -37,11 +37,11 @@ Consult Knowledge Sources when relevant.
 - Init
   - Read `docs/plan/{plan_id}/context_envelope.json` at start when it exists; read it in parallel with required agent inputs. Use `research_digest.relevant_files` as the file shortlist. Treat envelope data as a context cache.
 - Identify focus_area
-- Research Pass — Pattern discovery:
-  - Search similar implementations → patterns_found.
-  - Discovery via semantic_search + grep_search, merge results.
-  - Calculate confidence.
+- Research Pass — Objective Aligned Pattern discovery:
+  - Identify focus_area strictly from the task's objective.
+  - Discovery via semantic_search + grep_search, scoped to focus_area.
   - Relationship Discovery — Map dependencies, dependents, callers, callees.
+  - Calculate confidence.
 - Early Exit:
   - If confidence ≥ 0.85 → skip relationships + detailed → Synthesize Phase.
   - If decision_blockers resolved AND confidence ≥ 0.8 → early exit.
@@ -229,13 +229,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
diff --git a/agents/gem-reviewer.agent.md b/agents/gem-reviewer.agent.md
index 1626311eb..8286cd83f 100644
--- a/agents/gem-reviewer.agent.md
+++ b/agents/gem-reviewer.agent.md
@@ -40,6 +40,7 @@ Consult Knowledge Sources when relevant.
 - Init
   - Read `docs/plan/{plan_id}/context_envelope.json` at start; read it in parallel with required agent inputs. Use `research_digest.relevant_files` as the file shortlist. Treat envelope data as a context cache. Then parse review_scope: plan|wave.
   - Read `plan.yaml` + `PRD.yaml`.
+  - Use quality_score.reviewer_focus to prioritize scrutiny on weak areas.
 
 ### Plan Review
 
@@ -49,8 +50,13 @@ Consult Knowledge Sources when relevant.
   - Atomicity (≤ 300 lines/task).
   - No circular deps, all IDs exist.
   - Wave parallelism, conflicts_with not parallel.
+  - Wave assignment: tasks with no dependencies are in wave 1.
   - Tasks have verification + acceptance_criteria.
+  - Test file inclusion: if acceptance_criteria mentions tests (contains 'test' or 'tests'), target_files must include corresponding test file paths.
   - PRD alignment, valid agents.
+  - Tech stack: context_envelope.tech_stack exists and is non-empty.
+  - Contracts: Every dependency edge must have a contract.
+  - Diagnose-then-fix: every debugger task has a paired implementer task in a later wave.
 - Status:
   - Critical → failed.
   - Non-critical → needs_revision.
@@ -125,13 +131,15 @@ Consult Knowledge Sources when relevant.
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
diff --git a/agents/gem-skill-creator.agent.md b/agents/gem-skill-creator.agent.md
index 42c2d0911..fd2e3c50a 100644
--- a/agents/gem-skill-creator.agent.md
+++ b/agents/gem-skill-creator.agent.md
@@ -149,13 +149,15 @@ metadata:
 
 ### Execution
 
-- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound.
-- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs.
-- Discover first → read full set in parallel. Avoid line-by-line reads.
-- Narrow search with includePattern/excludePattern.
-- Autonomous execution.
-- Retry 3x.
-- JSON output only.
+- Execution priority: native tools → subagents/tasks → scripts → raw CLI.
+- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls.
+- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set.
+- Execute autonomously; ask only for true blockers.
+- Retry transient failures up to 3x.
+- Return JSON output only.
+- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports.
+  - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits.
+  - Test on sample/small input before full run.
 
 ### Constitutional
 
@@ -164,19 +166,4 @@ metadata:
 - Minimum content, nothing speculative.
 - Treat patterns as read-only source of truth. Deduplicate before creating.
 
-### Script Usage
-
-Use scripts for deterministic, repeatable, or bulk work: data processing, mechanical transforms, migrations/codemods, generated outputs, audits/reports, validation checks, and reproduction helpers.
-
-Do not use scripts for normal code implementation.
-
-Script rules:
-
-- Store plan-specific scripts in `docs/plan/{plan_id}/scripts/`.
-- Store skill-specific scripts in `docs/skills/{skill-name}/scripts/`.
-- Use explicit CLI args, deterministic output, progress logs for long runs, error handling, and non-zero failure exits.
-- Read/write only explicit paths from args.
-- Test on sample data before full execution.
-- Document purpose, inputs, outputs, and usage.
-
 </rules>
diff --git a/plugins/gem-team/.github/plugin/plugin.json b/plugins/gem-team/.github/plugin/plugin.json
index bfbec766b..a4544ce9e 100644
--- a/plugins/gem-team/.github/plugin/plugin.json
+++ b/plugins/gem-team/.github/plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "gem-team",
-  "version": "1.42.0",
+  "version": "1.46.0",
   "description": "Self-Learning Multi-agent orchestration framework for spec-driven development and automated verification.",
   "author": {
     "name": "mubaidr",

From 1e1cd22f88ba8515e7b8185a994621513131707c Mon Sep 17 00:00:00 2001
From: Muhammad Ubaid Raza <mubaidr@gmail.com>
Date: Sun, 31 May 2026 03:14:19 +0500
Subject: [PATCH 2/2] feat: bump marketplace version to 1.47.0 and enhance
 agent workflows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add Bug‑Fix Mode with validation gate for `debugger_diagnosis` tasks
- Expand allowed task types to include `research`
- Reduce subagent concurrency limit from 4 to 2
- Update design validation handling for flagged tasks
- Update marketplace plugin version reference to 1.47.0
---
 .github/plugin/marketplace.json             |  2 +-
 agents/gem-implementer.agent.md             | 21 +++++++++++++++------
 agents/gem-orchestrator.agent.md            |  6 ++++--
 agents/gem-planner.agent.md                 | 14 ++++++++------
 agents/gem-researcher.agent.md              |  3 ++-
 plugins/gem-team/.github/plugin/plugin.json |  2 +-
 plugins/gem-team/README.md                  |  3 ++-
 7 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json
index 618fc7e21..89a307bc3 100644
--- a/.github/plugin/marketplace.json
+++ b/.github/plugin/marketplace.json
@@ -359,7 +359,7 @@
       "name": "gem-team",
       "source": "gem-team",
       "description": "Self-Learning Multi-agent orchestration framework for spec-driven development and automated verification.",
-      "version": "1.46.0"
+      "version": "1.47.0"
     },
     {
       "name": "git-ape",
diff --git a/agents/gem-implementer.agent.md b/agents/gem-implementer.agent.md
index c586697d8..307db13bd 100644
--- a/agents/gem-implementer.agent.md
+++ b/agents/gem-implementer.agent.md
@@ -42,7 +42,9 @@ Consult Knowledge Sources when relevant.
   - Read — PRD sections, `DESIGN.md` tokens
 - Analyze:
   - Criteria — Understand acceptance_criteria.
-- TDD Cycle (Red → Green → Refactor → Verify):
+- Bug-Fix Mode Branch:
+  - If `task_definition.debugger_diagnosis` exists → follow Bug-Fix Mode (see Rules). Validation gate runs first.
+- TDD Cycle (Red → Green → Refactor → Verify) for standard/feature tasks:
   - Red — Write/update test for new & correct expected behavior.
   - Green — Write minimal code to pass.
     - Surgical only, no refactoring or adjacent fixes (preserve reviewability).
@@ -123,10 +125,17 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 
 #### Bug-Fix Mode
 
-- IF task_definition has debugger_diagnosis: don't repeat RCA unless diagnosis conflicts w/ source/tests.
-- Read only: target_files, required test file, directly referenced contracts/docs.
-- Start w/ required_test_first.
-- Implement minimal_change.
-- If diagnosis wrong→return needs_revision w/ contradiction evidence.
+When `task_definition.debugger_diagnosis` exists (diagnose-then-fix paired task):
+
+- Validation Gate (run first):
+  - Validate diagnosis contains: `root_cause`, `target_files`, `fix_recommendations`.
+  - If any field missing → return `needs_revision` immediately. Do NOT proceed with TDD.
+  - Use `implementation_handoff` as the authoritative work scope.
+- Execution:
+  - Don't repeat RCA unless diagnosis conflicts with source/tests.
+  - Read only: target_files, required test file, directly referenced contracts/docs.
+  - Start w/ required_test_first.
+  - Implement minimal_change.
+  - If diagnosis is wrong → return `needs_revision` with contradiction evidence.
 
 </rules>
diff --git a/agents/gem-orchestrator.agent.md b/agents/gem-orchestrator.agent.md
index a33d3ba88..32ccd54ca 100644
--- a/agents/gem-orchestrator.agent.md
+++ b/agents/gem-orchestrator.agent.md
@@ -70,7 +70,9 @@ IMPORTANT: On receiving user input, immediately announce and execute the followi
   - `docs`: document, readme, comment, write docs, update docs
   - `config`: configure, setup, install, config, settings
   - `typo`: typo, spelling, grammar, rename trivial
+  - `research`: research, investigate, explore, analyze, compare, evaluate, explain, understand
   - `unknown`: none of the above match
+  - If `unknown`: confidence ≥ 0.85 → default to `feature`; confidence < 0.85 → escalate to user with clarification
 - Complexity Assessment:
   - LOW: single file/small change, known patterns. Minimal blast radius.
   - MEDIUM: multiple files, new patterns, moderate scope. Some blast radius.
@@ -124,10 +126,10 @@ Delegate ALL waves/tasks without pausing for approval between them.
   - Wave > 1: include contracts from task definitions.
   - Get pending (deps = completed, status = pending, wave = current).
   - Filter conflicts_with: same-file tasks serialize.
-  - Delegate to subagents (max 4 concurrent) as per `agent_input_reference`.
+  - Delegate to subagents (max 2 concurrent).
 - Integration Check:
   - Delegate to `gem-reviewer(wave scope)` for integration + security scan.
-  - ui|ux|design|interface|a11y tasks → validate with the designer agent matching the task's assigned agent (if task.agent is `designer-mobile`, use `gem-designer-mobile(validate)`; otherwise use `gem-designer(validate)`), run in parallel with `gem-reviewer(wave scope)`.
+  - Tasks with `flags.requires_design_validation: true` → validate with the designer agent matching the task's assigned agent (if task.agent is `designer-mobile`, use `gem-designer-mobile(validate)`; otherwise use `gem-designer(validate)`), run in parallel with `gem-reviewer(wave scope)`.
   - If reviewer fails → `gem-debugger` to diagnose:
     - If debugger confidence ≥ 0.85 → delegate to `gem-implementer` with diagnosis → re-verify.
     - If debugger confidence < 0.85 → escalate to user (cannot reliably diagnose).
diff --git a/agents/gem-planner.agent.md b/agents/gem-planner.agent.md
index 45028d175..eedb9d66a 100644
--- a/agents/gem-planner.agent.md
+++ b/agents/gem-planner.agent.md
@@ -83,6 +83,7 @@ Consult Knowledge Sources when relevant.
   - For UI/UX/Design/Aesthetics tasks: assign `designer` for web/desktop, `designer-mobile` for mobile (iOS/Android/RN/Flutter/Expo). If cross-platform, split into separate web + mobile tasks.
   - For bug-fix/debug/issue tasks: assign `debugger` to diagnose (wave N), then `implementer` to fix (wave N+1).
     - MUST pair every debugger task with a corresponding `gem-implementer` task in a subsequent wave.
+    - The implementer task MUST include `debugger_diagnosis` field (populated from debugger's output) in its task_definition.
   - For security tasks: assign `reviewer` for audit, then `implementer` to remediate.
   - For refactoring/simplification tasks: assign `code-simplifier`.
   - For documentation: assign `doc-writer`.
@@ -183,17 +184,17 @@ quality_score:
   # Reviewer guidance: areas needing extra scrutiny based on lower scores
   reviewer_focus: [string]
 tldr: |
-open_questions:
+open_questions: # Optional for LOW complexity; required for MEDIUM/HIGH
   - question: string
     context: string
     type: decision_blocker | research | nice_to_know
     affects: [string]
-gaps:
+gaps: # Optional for LOW complexity; required for MEDIUM/HIGH
   - description: string
     refinement_requests:
       - query: string
         source_hint: string
-pre_mortem:
+pre_mortem: # Optional for LOW complexity; required for MEDIUM/HIGH
   overall_risk_level: low | medium | high
   critical_failure_modes:
     - scenario: string
@@ -201,7 +202,7 @@ pre_mortem:
       impact: low | medium | high | critical
       mitigation: string
   assumptions: [string]
-implementation_specification:
+implementation_specification: # Optional for LOW complexity; required for MEDIUM/HIGH
   code_structure: string
   affected_areas: [string]
   component_details:
@@ -212,7 +213,7 @@ implementation_specification:
         - component: string
           relationship: string
       integration_points: [string]
-contracts:
+contracts: # Optional for LOW/MEDIUM; required for HIGH complexity
   - from_task: string
     to_task: string
     interface: string
@@ -230,6 +231,7 @@ tasks:
     flags:
       flaky: boolean
       retries_used: number
+      requires_design_validation: boolean # set true for ui/ux/design/a11y/style related tasks
     dependencies: [string]
     conflicts_with: [string]
     context_files:
@@ -259,7 +261,7 @@ tasks:
     # gem-implementer:
     tech_stack: [string]
     test_coverage: string | null
-    debugger_diagnosis: object | null # from bug-fix fast path
+    debugger_diagnosis: object | null # REQUIRED when paired with a debugger task; null otherwise
     implementation_handoff:
       do_not_reinvestigate: [string]
       required_test_first: string
diff --git a/agents/gem-researcher.agent.md b/agents/gem-researcher.agent.md
index 49e70f59d..841295da4 100644
--- a/agents/gem-researcher.agent.md
+++ b/agents/gem-researcher.agent.md
@@ -60,7 +60,8 @@ Return ONLY valid JSON. Omit nulls and empty arrays.
 ```json
 {
   "status": "completed | failed | in_progress | needs_revision",
-  "task_id": "string | omit if unknown",
+  "task_id": "string | null", // optional — researcher can run standalone before task exists
+  "plan_id": "string",
   "failure_type": "transient | fixable | needs_replan | escalate | flaky | regression | new_failure | platform_specific",
   "confidence": 0.0-1.0,
   "complexity": "simple | medium | complex",
diff --git a/plugins/gem-team/.github/plugin/plugin.json b/plugins/gem-team/.github/plugin/plugin.json
index a4544ce9e..9ff0dfd5b 100644
--- a/plugins/gem-team/.github/plugin/plugin.json
+++ b/plugins/gem-team/.github/plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "gem-team",
-  "version": "1.46.0",
+  "version": "1.47.0",
   "description": "Self-Learning Multi-agent orchestration framework for spec-driven development and automated verification.",
   "author": {
     "name": "mubaidr",
diff --git a/plugins/gem-team/README.md b/plugins/gem-team/README.md
index 4e935dbd4..992bb771a 100644
--- a/plugins/gem-team/README.md
+++ b/plugins/gem-team/README.md
@@ -56,8 +56,9 @@ See [all supported installation options](#installation) below.
 
 ### Performance
 
-- **4x Faster** — Parallel execution with wave-based execution
+- **2x Faster** — Parallel execution with wave-based execution
 - **Pattern Reuse** — Codebase pattern discovery prevents reinventing wheels
+- **Context Efficiency** — Concise outputs, file-based context, and caching reduce LLM token usage by 80-90% compared to naive single-pass prompting
 
 ### Quality & Security