From e8e00d5c12ef998cfcfa9121ea2c07d45958882f Mon Sep 17 00:00:00 2001 From: Muhammad Ubaid Raza Date: Sat, 30 May 2026 00:40:57 +0500 Subject: [PATCH 1/2] chore(deps, docs): bump marketplace version to 1.46.0 - Refine execution priority guidance in agent documentation - Imrpvoe discovery guidance - Improve context cache guidance - Add script usage guidelines to agent documentation - Simplify agent input references --- .github/plugin/marketplace.json | 2 +- agents/gem-browser-tester.agent.md | 16 +- agents/gem-code-simplifier.agent.md | 31 +- agents/gem-critic.agent.md | 17 +- agents/gem-debugger.agent.md | 16 +- agents/gem-designer-mobile.agent.md | 16 +- agents/gem-designer.agent.md | 16 +- agents/gem-devops.agent.md | 31 +- agents/gem-documentation-writer.agent.md | 30 +- agents/gem-implementer-mobile.agent.md | 31 +- agents/gem-implementer.agent.md | 31 +- agents/gem-mobile-tester.agent.md | 16 +- agents/gem-orchestrator.agent.md | 370 +++++--------------- agents/gem-planner.agent.md | 345 +++++++++++++++++- agents/gem-researcher.agent.md | 24 +- agents/gem-reviewer.agent.md | 22 +- agents/gem-skill-creator.agent.md | 31 +- plugins/gem-team/.github/plugin/plugin.json | 2 +- 18 files changed, 552 insertions(+), 495 deletions(-) diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json index 2d1b29a1a..618fc7e21 100644 --- a/.github/plugin/marketplace.json +++ b/.github/plugin/marketplace.json @@ -359,7 +359,7 @@ "name": "gem-team", "source": "gem-team", "description": "Self-Learning Multi-agent orchestration framework for spec-driven development and automated verification.", - "version": "1.42.0" + "version": "1.46.0" }, { "name": "git-ape", diff --git a/agents/gem-browser-tester.agent.md b/agents/gem-browser-tester.agent.md index ff329c084..3ad37798d 100644 --- a/agents/gem-browser-tester.agent.md +++ b/agents/gem-browser-tester.agent.md @@ -103,13 +103,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays. ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional diff --git a/agents/gem-code-simplifier.agent.md b/agents/gem-code-simplifier.agent.md index 3eedb875d..7bd7f6325 100644 --- a/agents/gem-code-simplifier.agent.md +++ b/agents/gem-code-simplifier.agent.md @@ -109,13 +109,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays. ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional @@ -127,19 +129,4 @@ Return ONLY valid JSON. Omit nulls and empty arrays. - Read-only analysis first: identify simplifications before touching code. - Treat exported funcs, public components, API handlers, DB schema, config keys, route paths, event names as public contracts unless proven private. Do not rename/remove without explicit permission. -### Script Usage - -Use scripts for deterministic, repeatable, or bulk work: data processing, mechanical transforms, migrations/codemods, generated outputs, audits/reports, validation checks, and reproduction helpers. - -Do not use scripts for normal code implementation. - -Script rules: - -- Store plan-specific scripts in `docs/plan/{plan_id}/scripts/`. -- Store skill-specific scripts in `docs/skills/{skill-name}/scripts/`. -- Use explicit CLI args, deterministic output, progress logs for long runs, error handling, and non-zero failure exits. -- Read/write only explicit paths from args. -- Test on sample data before full execution. -- Document purpose, inputs, outputs, and usage. - diff --git a/agents/gem-critic.agent.md b/agents/gem-critic.agent.md index ccc427a78..984c7e971 100644 --- a/agents/gem-critic.agent.md +++ b/agents/gem-critic.agent.md @@ -37,6 +37,7 @@ Consult Knowledge Sources when relevant. - Init - Read `docs/plan/{plan_id}/context_envelope.json` at start; read it in parallel with required agent inputs. Use `research_digest.relevant_files` as the file shortlist. Treat envelope data as a context cache. - Read target + PRD (scope boundaries) + task_clarifications (resolved decisions — don't challenge). + - Read `plan.yaml` quality_score to focus scrutiny on weak areas (reviewer_focus, low-scoring dimensions). - Analyze: - Assumptions — Explicit vs implicit. Stated? Valid? What if wrong? - Scope — Too much? Too little? @@ -102,13 +103,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays. ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional diff --git a/agents/gem-debugger.agent.md b/agents/gem-debugger.agent.md index 487507d27..2f8685e9c 100644 --- a/agents/gem-debugger.agent.md +++ b/agents/gem-debugger.agent.md @@ -141,13 +141,15 @@ ESLint recommendations: (general recurring patterns only): ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional diff --git a/agents/gem-designer-mobile.agent.md b/agents/gem-designer-mobile.agent.md index 392d8f51e..9c452f0d4 100644 --- a/agents/gem-designer-mobile.agent.md +++ b/agents/gem-designer-mobile.agent.md @@ -209,13 +209,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays. ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional diff --git a/agents/gem-designer.agent.md b/agents/gem-designer.agent.md index 4bea90979..c19136443 100644 --- a/agents/gem-designer.agent.md +++ b/agents/gem-designer.agent.md @@ -167,13 +167,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays. ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional diff --git a/agents/gem-devops.agent.md b/agents/gem-devops.agent.md index 94155cbeb..eb02b3819 100644 --- a/agents/gem-devops.agent.md +++ b/agents/gem-devops.agent.md @@ -157,13 +157,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays. ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional @@ -174,19 +176,4 @@ Return ONLY valid JSON. Omit nulls and empty arrays. - YAGNI, KISS, DRY, idempotency. - Never implement application code. Return needs_approval when gates triggered. -### Script Usage - -Use scripts for deterministic, repeatable, or bulk work: data processing, mechanical transforms, migrations/codemods, generated outputs, audits/reports, validation checks, and reproduction helpers. - -Do not use scripts for normal code implementation. - -Script rules: - -- Store plan-specific scripts in `docs/plan/{plan_id}/scripts/`. -- Store skill-specific scripts in `docs/skills/{skill-name}/scripts/`. -- Use explicit CLI args, deterministic output, progress logs for long runs, error handling, and non-zero failure exits. -- Read/write only explicit paths from args. -- Test on sample data before full execution. -- Document purpose, inputs, outputs, and usage. - diff --git a/agents/gem-documentation-writer.agent.md b/agents/gem-documentation-writer.agent.md index 4f7d338ee..cbe490538 100644 --- a/agents/gem-documentation-writer.agent.md +++ b/agents/gem-documentation-writer.agent.md @@ -59,17 +59,9 @@ Consult Knowledge Sources when relevant. - Check duplicates, append concisely. - Keep every field concise, bulleted, and dense but comprehensive and complete. - `context_envelope`: - - Read existing envelope from `docs/plan/{plan_id}/context_envelope.json`. - - Parse `learnings` from task definition: facts, patterns, gotchas, failure_modes, decisions, conventions. - - Merge into envelope fields deduped by key: - - `facts` → `research_digest.relevant_files` (deduped by path). - - `patterns` → `research_digest.patterns_found` (deduped by name). - - `gotchas` → `research_digest.gotchas` (deduped by text). - - `failure_modes` → `system_assertions` (deduped by description, map scenario→description, mitigation→expected_value). - - `decisions` → `prior_decisions` (deduped by decision). - - `conventions` → `conventions` (deduped string match). - - Bump `meta.version` (increment), set `meta.last_updated` (now), set `meta.previous_version_fields_changed` to list of changed top-level keys. - - Write back to `docs/plan/{plan_id}/context_envelope.json`. + - Update existing envelope from `docs/plan/{plan_id}/context_envelope.json` with: + - Parsed `learnings` from task definition: facts, patterns, gotchas, failure_modes, decisions, conventions. + - Bump `meta.version` (increment), set `meta.last_updated` (now), set `meta.previous_version_fields_changed` to list of changed top-level keys. - Validate: - get_errors, ensure diagrams render, check no secrets exposed. - Verify: @@ -172,13 +164,15 @@ changes: ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional diff --git a/agents/gem-implementer-mobile.agent.md b/agents/gem-implementer-mobile.agent.md index d4fab1aa1..95a419524 100644 --- a/agents/gem-implementer-mobile.agent.md +++ b/agents/gem-implementer-mobile.agent.md @@ -97,13 +97,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays. ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional @@ -134,19 +136,4 @@ Return ONLY valid JSON. Omit nulls and empty arrays. - Implement minimal_change. - If wrong→needs_revision w/ contradiction evidence. -### Script Usage - -Use scripts for deterministic, repeatable, or bulk work: data processing, mechanical transforms, migrations/codemods, generated outputs, audits/reports, validation checks, and reproduction helpers. - -Do not use scripts for normal code implementation. - -Script rules: - -- Store plan-specific scripts in `docs/plan/{plan_id}/scripts/`. -- Store skill-specific scripts in `docs/skills/{skill-name}/scripts/`. -- Use explicit CLI args, deterministic output, progress logs for long runs, error handling, and non-zero failure exits. -- Read/write only explicit paths from args. -- Test on sample data before full execution. -- Document purpose, inputs, outputs, and usage. - diff --git a/agents/gem-implementer.agent.md b/agents/gem-implementer.agent.md index d17ef8099..c586697d8 100644 --- a/agents/gem-implementer.agent.md +++ b/agents/gem-implementer.agent.md @@ -100,13 +100,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays. ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional @@ -127,19 +129,4 @@ Return ONLY valid JSON. Omit nulls and empty arrays. - Implement minimal_change. - If diagnosis wrong→return needs_revision w/ contradiction evidence. -### Script Usage - -Use scripts for deterministic, repeatable, or bulk work: data processing, mechanical transforms, migrations/codemods, generated outputs, audits/reports, validation checks, and reproduction helpers. - -Do not use scripts for normal code implementation. - -Script rules: - -- Store plan-specific scripts in `docs/plan/{plan_id}/scripts/`. -- Store skill-specific scripts in `docs/skills/{skill-name}/scripts/`. -- Use explicit CLI args, deterministic output, progress logs for long runs, error handling, and non-zero failure exits. -- Read/write only explicit paths from args. -- Test on sample data before full execution. -- Document purpose, inputs, outputs, and usage. - diff --git a/agents/gem-mobile-tester.agent.md b/agents/gem-mobile-tester.agent.md index 327ee7b06..4890aecb8 100644 --- a/agents/gem-mobile-tester.agent.md +++ b/agents/gem-mobile-tester.agent.md @@ -144,13 +144,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays. ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional diff --git a/agents/gem-orchestrator.agent.md b/agents/gem-orchestrator.agent.md index 2e70f2c2e..a33d3ba88 100644 --- a/agents/gem-orchestrator.agent.md +++ b/agents/gem-orchestrator.agent.md @@ -62,28 +62,42 @@ IMPORTANT: On receiving user input, immediately announce and execute the followi ### Phase 0: Init & Clarify -- Delegate to a generic subagent for intent detection with following instructions: - - Analyze user input + memory for intent, hints, context, patterns, gotchas etc. Check for feedback keywords and classify task type. - - Plan ID — If not provided, generate `YYYYMMDD-kebab-case`. If `plan_id` provided → validate existence of `docs/plan/{plan_id}/plan.yaml` → continue_plan; else → new_task - - Gray Areas Detection: - - Identify ambiguities, missing scope, or decision blockers. - - Identify focus_areas from request keywords. - - Generate clarification options if needed. - - Ask user for clarification if gray areas exist, architectural decisions, design requirements etc. - - Complexity Assessment: - - LOW: single file/small change, known patterns. Minimal blast radius. - - MEDIUM: multiple files, new patterns, moderate scope. Some blast radius. - - HIGH: architectural change, multiple domains, unknown patterns. Significant blast radius. +- Plan ID — If not provided, generate `YYYYMMDD-kebab-case`. If `plan_id` provided → validate existence of `docs/plan/{plan_id}/plan.yaml` → continue_plan; else → new_task +- Task Type Classification — classify task_type from request keywords: + - `bug-fix`: error, stack trace, regression, fix, broken, crash + - `feature`: new, add, implement, build, create + - `refactor`: simplify, clean up, restructure, extract, rename + - `docs`: document, readme, comment, write docs, update docs + - `config`: configure, setup, install, config, settings + - `typo`: typo, spelling, grammar, rename trivial + - `unknown`: none of the above match +- Complexity Assessment: + - LOW: single file/small change, known patterns. Minimal blast radius. + - MEDIUM: multiple files, new patterns, moderate scope. Some blast radius. + - HIGH: architectural change, multiple domains, unknown patterns. Significant blast radius. +- Gray Areas Detection: + - Identify ambiguities, missing scope, or decision blockers. + - Identify focus_areas from request keywords. + - Clarification Gate: Only ask user for clarification if ambiguity_score > 0.5 AND the question is a decision_blocker. For non-blocking gray areas, document assumptions and proceed. - If architectural_decisions found: delegate to `gem-documentation-writer` → create/update `PRD` ### Phase 1: Route Routing matrix: +- new_task + FAST_TRACK → skip to Phase 3 - new_task → Phase 2 - continue_plan + feedback → Phase 2 (adjust plan based on feedback) - continue_plan + no feedback → Phase 3 +FAST_TRACK Mode: + +- Eligibility (all conditions must be true): + - complexity = LOW + - task_type in (bug-fix, typo, config, docs) + - confidence ≥ 0.85 +- Goal: Skip Phase 2. Create plan. Execute directly using Phase 3. + ### Phase 2: Planning - Seed Memory: @@ -91,13 +105,13 @@ Routing matrix: - Package relevant entries into `memory_seed` object to pass to planner for envelope seeding. - Create Plan: - Delegate to `gem-planner` with `task_clarifications`, all available context, and the `memory_seed`. -- Plan Validation: - - Complexity=LOW: Skip validation. - - Complexity=MEDIUM: delegate to `gem-reviewer(plan)`. - - Complexity=HIGH: delegate to both `gem-reviewer(plan)` + `gem-critic(plan)` in parallel. -- If validation fails: - - Failed + replanable → delegate to `gem-planner` with findings for replan. - - Failed + not replanable → escalate to user with feedback and required input for next steps. + - Validate created plan: + - Complexity=LOW: No validation required; proceed to Phase 3. + - Complexity=MEDIUM: delegate to `gem-reviewer(plan)`. + - Complexity=HIGH: delegate to both `gem-reviewer(plan)` + `gem-critic(plan)` in parallel. + - If validation fails: + - Failed + replanable → delegate to `gem-planner` with findings for replan/ adjustments. + - Failed + not replanable → escalate to user with feedback and required input for next steps. ### Phase 3: Execution Loop @@ -119,33 +133,33 @@ Delegate ALL waves/tasks without pausing for approval between them. - If debugger confidence < 0.85 → escalate to user (cannot reliably diagnose). - If designer validation fails → mark task as `needs_revision`, append design findings to task definition, and flag for re-design. - Synthesize statuses (completed / escalate / needs_replan). Persist all to `plan.yaml`. +- Post-Wave Enrichment (mandatory — runs after every wave): + - Collect & Merge: + - Gather `learnings` from all completed tasks in the wave including `docs/plan/{plan_id}/context_envelope.json` data. + - Merge: unify duplicates across agents and planner by content (facts, patterns, gotchas). + - Cross-reference: when a `gotcha` matches a `failure_mode` symptom, link them. + - Promote: `gotchas` recurring ≥ 3× across plans → `patterns`. `failure_modes` recurring ≥ 2× → elevate severity. + - High confidence patterns (confidence ≥ 0.85) with significant impact → candidate for persistence. + - Context Envelope (greedy — always updated): + - Always delegate to `gem-documentation-writer` with `task_type: update_context_envelope` to refresh `docs/plan/{plan_id}/context_envelope.json` with merged learnings from the wave. + - Memory (picky — confidence gate): + - Only persist items with confidence ≥ 0.80. Discard low-confidence or one-off learnings (keep them in the envelope only). + - Persist deduped `facts`, `patterns`, `gotchas`, `failure_modes`, `decisions`, `conventions` to memory tool. + - Conventions (picky — recurrence gate): + - If same convention recurs ≥ 3× across tasks in this plan: delegate to `gem-documentation-writer` → create/update `AGENTS.md` + - Otherwise: keep in envelope only. + - Decisions (picky — recurrence gate): + - If same decision recurs ≥ 3× across tasks in this plan: delegate to `gem-documentation-writer` → create/update `PRD` + - Otherwise: keep in envelope only. + - Skills (picky — confidence gate): + - If `patterns` with confidence ≥ 0.9 AND non-trivial: delegate to `gem-skill-creator`. - Loop: - - After each wave → Phase 4 → immediately next. + - After each wave → run Post-Wave Enrichment → immediately next. - Blocked → Escalate. - Present status as per `output_format`. - - All done → Phase 5. - -### Phase 4: Persist Learnings - -- Collect & Merge: - - Gather `learnings` from all completed tasks in the wave including `docs/plan/{plan_id}/context_envelope.json` data. - - Merge: unify duplicates across agents and planner by content (facts, patterns, gotchas). - - Cross-reference: when a `gotcha` matches a `failure_mode` symptom, link them. - - Promote: `gotchas` recurring ≥ 3× across plans → `patterns`. `failure_modes` recurring ≥ 2× → elevate severity. -- Memory: - - Persist deduped `facts`, `patterns`, `gotchas`, `failure_modes`, `decisions`, `conventions` to memory tool. -- Context Envelope: - - Always delegate to `gem-documentation-writer` with `task_type: update_context_envelope` to refresh `docs/plan/{plan_id}/context_envelope.json` with merged learnings from the wave. - - Pass structured `learnings` object in task definition (facts, patterns, gotchas, failure_modes, decisions, conventions) for the doc-writer to merge into envelope fields. - - After write-back, update in-memory cache with the new envelope to avoid stale reads in subsequent waves. -- Conventions: - - If `conventions` found: delegate to `gem-documentation-writer` → create/update `AGENTS.md` -- Decisions: - - If `decisions` found: delegate to `gem-documentation-writer` → create/update `PRD` -- Skills: - - If `patterns` with confidence ≥ 0.85 AND non-trivial: delegate to `gem-skill-creator`. - -### Phase 5: Output + - All done → Phase 4. + +### Phase 4: Output Present status as per `output_format`. @@ -182,251 +196,34 @@ Present status as per `output_format`. } ``` -### gem-implementer - -```jsonc -{ - "task_id": "string", - "plan_id": "string", - "plan_path": "string", - "task_definition": { - "tech_stack": ["string"], - "test_coverage": "string | null", - "debugger_diagnosis": "object (for bug-fix mode)", - "implementation_handoff": { - "do_not_reinvestigate": ["string"], - "required_test_first": "string", - "target_files": ["string"], - "minimal_change": "string", - "acceptance_checks": ["string"], - }, - }, -} -``` - -### gem-implementer-mobile - -```jsonc -{ - "task_id": "string", - "plan_id": "string", - "plan_path": "string", - "task_definition": { - "platforms": ["ios", "android"], - "debugger_diagnosis": "object (for bug-fix mode)", - "implementation_handoff": { - "do_not_reinvestigate": ["string"], - "required_test_first": "string", - "target_files": ["string"], - "minimal_change": "string", - "acceptance_checks": ["string"], - }, - }, -} -``` - -### gem-reviewer - -```jsonc -{ - "review_scope": "plan|wave", - "plan_id": "string", - "plan_path": "string", - "wave_tasks": ["string (for wave scope)"], - "security_sensitive_tasks": ["string — task IDs requiring per-task deep scan (merged into wave review)"], - "task_definition": "object (optional task context for wave checks)", - "review_depth": "full|standard|lightweight", - "review_security_sensitive": "boolean", -} -``` - -### gem-debugger - -```jsonc -{ - "task_id": "string", - "plan_id": "string", - "plan_path": "string", - "task_definition": "object", - "debugger_diagnosis": "object (for retry after failed fix)", - "implementation_handoff": { - "do_not_reinvestigate": ["string"], - "required_test_first": "string", - "target_files": ["string"], - "minimal_change": "string", - "acceptance_checks": ["string"], - }, - "error_context": { - "error_message": "string", - "stack_trace": "string (optional)", - "failing_test": "string (optional)", - "reproduction_steps": ["string (optional)"], - "environment": "string (optional)", - "flow_id": "string (optional)", - "step_index": "number (optional)", - "evidence": ["string (optional)"], - "browser_console": ["string (optional)"], - "network_failures": ["string (optional)"], - }, -} -``` - -### gem-critic - -```jsonc -{ - "task_id": "string (optional)", - "plan_id": "string", - "plan_path": "string", - "target": "string (file paths or plan section)", - "context": "string (what is being built, focus)", -} -``` - -### gem-code-simplifier - -```jsonc -{ - "task_id": "string", - "plan_id": "string (optional)", - "plan_path": "string (optional)", - "scope": "single_file|multiple_files|project_wide", - "targets": ["string (file paths or patterns)"], - "focus": "dead_code|complexity|duplication|naming|all", - "constraints": { "preserve_api": "boolean", "run_tests": "boolean", "max_changes": "number" }, -} -``` - -### gem-browser-tester - -```jsonc -{ - "task_id": "string", - "plan_id": "string", - "plan_path": "string", - "validation_matrix": [...], - "flows": [...], - "fixtures": {...}, - "visual_regression": {...}, - "contracts": [...] -} -``` - -### gem-mobile-tester - -```jsonc -{ - "task_id": "string", - "plan_id": "string", - "plan_path": "string", - "task_definition": { - "platforms": ["ios", "android"] | ["ios"] | ["android"], - "test_framework": "detox | maestro | appium", - "test_suite": { "flows": [...], "scenarios": [...], "gestures": [...], "app_lifecycle": [...], "push_notifications": [...] }, - "device_farm": { "provider": "browserstack | saucelabs", "credentials": {...} }, - "performance_baseline": {...}, - "fixtures": {...}, - "cleanup": "boolean" - } -} -``` - -### gem-devops - -```jsonc -{ - "task_id": "string", - "plan_id": "string", - "plan_path": "string", - "task_definition": { - "environment": "development|staging|production", - "requires_approval": "boolean", - "devops_security_sensitive": "boolean", - }, -} -``` - -### gem-documentation-writer +### All Other Agents ```jsonc { - "task_id": "string", "plan_id": "string", - "plan_path": "string", "task_definition": { - "learnings": { - "facts": [{ "statement": "string", "category": "string" }], - "patterns": [{ "name": "string", "description": "string", "confidence": 0.0-1.0 }], - "gotchas": ["string"], - "failure_modes": [{ "scenario": "string", "symptoms": ["string"], "mitigation": "string" }], - "decisions": [{ "decision": "string", "rationale": ["string"], "evidence": ["string"] }], - "conventions": ["string"], - }, + // Agent-specific fields live here. + // Examples: mode, scope, target, context, constraints, environment, etc. + // Agents read full context from docs/plan/{plan_id}/context_envelope.json }, - "task_type": "documentation | update | prd | agents_md | update_context_envelope", - "audience": "developers | end_users | stakeholders", - "coverage_matrix": ["string"], - "action": "create_prd | update_prd | update_agents_md | update_context_envelope", - "architectural_decisions": [{ "decision": "string", "rationale": "string" }], - "findings": [{ "type": "string", "content": "string" }], - "overview": "string", - "tasks_completed": ["string"], - "outcomes": "string", - "next_steps": ["string"], - "acceptance_criteria": ["string"], -} -``` - -### gem-skill-creator - -```jsonc -{ - "task_id": "string", - "plan_id": "string", - "plan_path": "string", - "patterns": [ - { - "name": "string", - "when_to_apply": "string", - "code_example": "string", - "anti_pattern": "string", - "context": "string", - "confidence": "number", - }, - ], - "source_task_id": "string", } ``` -### gem-designer - -```jsonc -{ - "task_id": "string", - "plan_id": "string (optional)", - "plan_path": "string (optional)", - "mode": "create|validate", - "scope": "component|page|layout|theme|design_system", - "target": "string (file paths or component names)", - "context": { "framework": "string", "library": "string", "existing_design_system": "string", "requirements": "string" }, - "constraints": { "responsive": "boolean", "accessible": "boolean", "dark_mode": "boolean" }, -} -``` - -### gem-designer-mobile - -```jsonc -{ - "task_id": "string", - "plan_id": "string (optional)", - "plan_path": "string (optional)", - "mode": "create|validate", - "scope": "component|screen|navigation|theme|design_system", - "target": "string (file paths or component names)", - "context": { "framework": "string", "library": "string", "existing_design_system": "string", "requirements": "string" }, - "constraints": { "platform": "ios|android|cross-platform", "responsive": "boolean", "accessible": "boolean", "dark_mode": "boolean" }, -} -``` +**Examples of task_definition fields by agent:** + +- `gem-implementer`: `tech_stack`, `test_coverage`, `debugger_diagnosis`, `implementation_handoff` +- `gem-implementer-mobile`: `platforms`, `debugger_diagnosis`, `implementation_handoff` +- `gem-reviewer`: `review_scope`, `review_depth`, `review_security_sensitive` +- `gem-debugger`: `error_context`, `debugger_diagnosis`, `implementation_handoff` +- `gem-critic`: `target`, `context` +- `gem-code-simplifier`: `scope`, `targets`, `focus`, `constraints` +- `gem-browser-tester`: `validation_matrix`, `flows`, `fixtures`, `visual_regression`, `contracts` +- `gem-mobile-tester`: `platforms`, `test_framework`, `test_suite`, `device_farm` +- `gem-devops`: `environment`, `requires_approval`, `devops_security_sensitive` +- `gem-documentation-writer`: `task_type`, `audience`, `coverage_matrix`, `action`, `learnings`, `findings` +- `gem-designer`: `mode`, `scope`, `target`, `context`, `constraints` +- `gem-designer-mobile`: `mode`, `scope`, `target`, `context`, `constraints` +- `gem-skill-creator`: `patterns`, `source_task_id` @@ -465,13 +262,14 @@ Present status as per `output_format`. ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional diff --git a/agents/gem-planner.agent.md b/agents/gem-planner.agent.md index 313e8091c..45028d175 100644 --- a/agents/gem-planner.agent.md +++ b/agents/gem-planner.agent.md @@ -61,22 +61,28 @@ Consult Knowledge Sources when relevant. - Context: - Parse objective/ context. - Mode: Initial, Replan, or Extension. -- Research: - - Identify focus_areas from objective and context. - - Search similar implementations → patterns_found. - - Discovery via semantic_search + grep_search, merge results. +- Discovery (OBJECTIVE-ALIGNED — no random exploration): + - Identify focus_areas strictly from objective and context. + - All searches MUST target focus_areas; no exploratory/off-target searching. + - Discovery via semantic_search + grep_search, scoped to focus_areas. - Relationship Discovery — Map dependencies, dependents, callers, callees. + - Codebase Structure Mapping — Identify: + - key_dirs (actual directory structure via list_dir) + - key_components (files + their responsibilities) + - existing patterns (via semantic_search of code patterns) + - Ground-truth population — Populate context_envelope with actual findings, not assumptions: + - tech_stack: verified from package.json, requirements.txt, or actual files + - conventions: extracted from existing code, not assumed + - constraints: based on actual codebase, not generic - Design: - Lock clarifications into DAG constraints. - Synthesize DAG: atomic tasks (or NEW for extension). - Assign waves: no deps → wave 1, dep.wave + 1. - - Create contracts between dependent tasks. - - Capture research_metadata.confidence → `plan.yaml`. - - Link each task to research sources. - Agent Assignment — Reason from available agents, task nature, and context: - Consult `` list; pick the agent whose role and specialization best matches the task. - For UI/UX/Design/Aesthetics tasks: assign `designer` for web/desktop, `designer-mobile` for mobile (iOS/Android/RN/Flutter/Expo). If cross-platform, split into separate web + mobile tasks. - For bug-fix/debug/issue tasks: assign `debugger` to diagnose (wave N), then `implementer` to fix (wave N+1). + - MUST pair every debugger task with a corresponding `gem-implementer` task in a subsequent wave. - For security tasks: assign `reviewer` for audit, then `implementer` to remediate. - For refactoring/simplification tasks: assign `code-simplifier`. - For documentation: assign `doc-writer`. @@ -93,15 +99,19 @@ Consult Knowledge Sources when relevant. - Assess PRD update need (new features, scope shifts, ADR deviations, new stories, AC changes→set prd_update_recommended). - New features→add doc-writer task (final wave). - Calculate metrics (wave_1_count, deps, risk_score). + - Calculate quality_score (overall, breakdown by dimension, blocking_issues, warnings). + - Generate reviewer_focus: list dimensions with score < 0.9 for targeted scrutiny. + - Pre-Flight Validation: + - Validate plan.yaml against Plan Verification Criteria before saving + - If validation fails → fix issues inline, re-validate, then save + - Do NOT save and output a broken plan - Save Plan `docs/plan/{plan_id}/plan.yaml` - Create context envelope `context_envelope.json` as per `context_envelope_format_guide` - Use provided context as seed and augment with research findings. - If `memory_seed` provided, merge its high confidence items/ contents into the envelope - Keep every field concise, bulleted, and dense but comprehensive and complete. Avoid fluff, filler, and verbosity. Evidence paths over explanation. - Create for future agent reuse: include durable facts, decisions, constraints, and evidence paths needed to avoid re-discovery. - - Omit no context. - Save Context Envelope: `docs/plan/{plan_id}/context_envelope.json`. -- Validation — Verify as per `Plan Verification Criteria`. - Failure — Log error, return status=failed w/ reason. Log to `docs/plan/{plan_id}/logs/`. - Output - Return JSON per Output Format. @@ -124,6 +134,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays. "prd_update_recommended": "boolean", "prd_update_reason": "string | null", "metrics": { "wave_1_task_count": "number", "total_dependencies": "number", "risk_score": "low | medium | high" }, + "quality_score": { + "overall": "number (0.0-1.0)", + "prd_coverage": "number (0.0-1.0)", + "target_files_verified": "number (0.0-1.0)", + "contracts_complete": "number (0.0-1.0)", + "wave_assignment_valid": "number (0.0-1.0)", + "blocking_issues": "number", + "warnings": "number" + }, "learnings": { "patterns": [{ "name": "string", "description": "string", "confidence": 0.0-1.0 }], "gotchas": ["string"], @@ -148,11 +167,21 @@ objective: string created_at: string created_by: string status: pending | approved | in_progress | completed | failed -research_confidence: high | medium | low plan_metrics: wave_1_task_count: number total_dependencies: number risk_score: low | medium | high +quality_score: + overall: number (0.0-1.0) + breakdown: + prd_coverage: number (0.0-1.0) + target_files_verified: number (0.0-1.0) + contracts_complete: number (0.0-1.0) + wave_assignment_valid: number (0.0-1.0) + blocking_issues: number + warnings: number + # Reviewer guidance: areas needing extra scrutiny based on lower scores + reviewer_focus: [string] tldr: | open_questions: - question: string @@ -459,6 +488,278 @@ tasks: "safe_to_assume": ["string"], "verify_before_use": ["string"], }, + // NEW: Plan-level execution metadata from plan.yaml + "plan_metadata": { + "tldr": "string — one-line plan summary", + "complexity": "simple | medium | complex", + "risk_score": "low | medium | high", + "wave_1_task_count": "number", + "total_dependencies": "number", + "prd_update_recommended": "boolean", + "prd_update_reason": "string | null", + "pre_mortem": { + "overall_risk_level": "low | medium | high", + "assumptions": ["string"], + "critical_failure_modes": [ + { + "scenario": "string", + "likelihood": "low | medium | high", + "impact": "low | medium | high | critical", + "mitigation": "string", + }, + ], + }, + "open_questions": [ + { + "question": "string", + "context": "string", + "type": "decision_blocker | research | nice_to_know", + "affects": ["string"], + }, + ], + "gaps": [ + { + "description": "string", + "refinement_requests": [ + { + "query": "string", + "source_hint": "string", + }, + ], + }, + ], + "planning_history": [ + { + "pass": "number", + "reason": "string", + "timestamp": "ISO-8601 string", + }, + ], + }, + // NEW: Researcher output — full findings, not just digest + "research_findings": { + "files_analyzed": [ + { + "file": "string", + "path": "string", + "purpose": "string", + "key_elements": [ + { + "element": "string", + "type": "function | class | variable | pattern", + "location": "string — file:line", + "description": "string", + "language": "string", + }, + ], + "lines": "number", + }, + ], + "related_architecture": { + "components_relevant_to_domain": [ + { + "component": "string", + "responsibility": "string", + "location": "string", + "relationship_to_domain": "string", + }, + ], + "interfaces_used_by_domain": [ + { + "interface": "string", + "location": "string", + "usage_pattern": "string", + }, + ], + "data_flow_involving_domain": "string", + "key_relationships_to_domain": [ + { + "from": "string", + "to": "string", + "relationship": "imports | calls | inherits | composes", + }, + ], + }, + "related_technology_stack": { + "languages_used_in_domain": ["string"], + "frameworks_used_in_domain": [ + { + "name": "string", + "usage_in_domain": "string", + }, + ], + "libraries_used_in_domain": [ + { + "name": "string", + "purpose_in_domain": "string", + }, + ], + "external_apis_used_in_domain": [ + { + "name": "string", + "integration_point": "string", + }, + ], + }, + "related_conventions": { + "naming_patterns_in_domain": "string", + "structure_of_domain": "string", + "error_handling_in_domain": "string", + "testing_in_domain": "string", + "documentation_in_domain": "string", + }, + "related_dependencies": { + "internal": [ + { + "component": "string", + "relationship_to_domain": "string", + "direction": "inbound | outbound | bidirectional", + }, + ], + "external": [ + { + "name": "string", + "purpose_for_domain": "string", + }, + ], + }, + "domain_security_considerations": { + "sensitive_areas": [ + { + "area": "string", + "location": "string", + "concern": "string", + }, + ], + "authentication_patterns_in_domain": "string", + "authorization_patterns_in_domain": "string", + "data_validation_in_domain": "string", + }, + "testing_patterns": { + "framework": "string", + "coverage_areas": ["string"], + "test_organization": "string", + "mock_patterns": ["string"], + }, + "research_metadata": { + "methodology": "string — e.g., semantic_search+grep_search, Context7", + "scope": "string", + "confidence_level": "high | medium | low", + "coverage_percent": "number", + "decision_blockers": "number", + "research_blockers": "number", + }, + }, + // NEW: Execution state for future agents + "task_registry": { + "waves": [ + { + "wave": "number", + "agents": ["string"], + "task_count": "number", + "completed": "number", + "failed": "number", + "blocked": "number", + }, + ], + "tasks": [ + { + "id": "string", + "title": "string", + "agent": "string", + "wave": "number", + "priority": "high | medium | low", + "status": "pending | in_progress | completed | failed | blocked | needs_revision", + "estimated_effort": "small | medium | large", + "estimated_files": "number", + "estimated_lines": "number", + "flags": { + "flaky": "boolean", + "retries_used": "number", + }, + "conflicts_with": ["string"], + "focus_area": "string | null", + }, + ], + }, + // NEW: Trace what was seeded vs discovered + "memory_seed_trace": { + "seeded_facts": [ + { + "statement": "string", + "category": "string", + "confidence": "number (0.0-1.0)", + }, + ], + "seeded_patterns": [ + { + "name": "string", + "description": "string", + "confidence": "number (0.0-1.0)", + }, + ], + "seeded_gotchas": ["string"], + "seeded_failure_modes": [ + { + "scenario": "string", + "symptoms": ["string"], + "mitigation": "string", + }, + ], + "seeded_decisions": [ + { + "decision": "string", + "rationale": ["string"], + }, + ], + "seeded_conventions": ["string"], + "merged_confidence": "number (0.0-1.0)", + }, + // NEW: Implementation specification from plan.yaml + "implementation_spec": { + "code_structure": "string", + "affected_areas": ["string"], + "component_details": [ + { + "component": "string", + "responsibility": "string", + "interfaces": ["string"], + "dependencies": [ + { + "component": "string", + "relationship": "string", + }, + ], + "integration_points": ["string"], + }, + ], + "contracts": [ + { + "from_task": "string", + "to_task": "string", + "interface": "string", + "format": "string", + }, + ], + }, + // Ground-truth validation results from Discovery phase + "codebase_validation": { + "verified_at": "ISO-8601 string", + "target_files_exist": { + "T01": ["src/config.ts"], + "T02": ["src/api/client.ts"], + }, + "dependency_graph_valid": true, + "no_circular_deps": true, + "wave_assignment_valid": true, + "all_contracts_defined": true, + "tech_stack_populated": true, + "prd_alignment": { + "requirements_mapped": ["REQ-001", "REQ-002"], + "unmapped_requirements": [], + "coverage_percent": 100, + }, + }, }, } ``` @@ -471,13 +772,15 @@ tasks: ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional @@ -489,12 +792,16 @@ tasks: #### Plan Verification Criteria +Run these checks BEFORE saving plan.yaml. Fix all failures inline. + - Plan: - Valid YAML, required fields, unique task IDs, valid status values - Concise, dense, complete, focused on implementation, avoids fluff/verbosity -- DAG: No circular deps, all dep IDs exist -- Contracts: Valid from_task/to_task IDs, interfaces defined +- DAG: No circular deps, all dep IDs exist, no_deps → wave_1 +- Contracts: Valid from_task/to_task IDs, interfaces defined (required for ALL complexity) - Tasks: Valid agent assignments, failure_modes for high/medium tasks, verification present, success_criteria defined when needed + - Every debugger task has a paired implementer task (wave N+1 or later) + - If acceptance_criteria mentions tests → target_files must include test file paths - Pre-mortem: overall_risk_level defined, critical_failure_modes present - Implementation spec: code_structure, affected_areas, component_details defined diff --git a/agents/gem-researcher.agent.md b/agents/gem-researcher.agent.md index 75e662019..49e70f59d 100644 --- a/agents/gem-researcher.agent.md +++ b/agents/gem-researcher.agent.md @@ -37,11 +37,11 @@ Consult Knowledge Sources when relevant. - Init - Read `docs/plan/{plan_id}/context_envelope.json` at start when it exists; read it in parallel with required agent inputs. Use `research_digest.relevant_files` as the file shortlist. Treat envelope data as a context cache. - Identify focus_area -- Research Pass — Pattern discovery: - - Search similar implementations → patterns_found. - - Discovery via semantic_search + grep_search, merge results. - - Calculate confidence. +- Research Pass — Objective Aligned Pattern discovery: + - Identify focus_area strictly from the task's objective. + - Discovery via semantic_search + grep_search, scoped to focus_area. - Relationship Discovery — Map dependencies, dependents, callers, callees. + - Calculate confidence. - Early Exit: - If confidence ≥ 0.85 → skip relationships + detailed → Synthesize Phase. - If decision_blockers resolved AND confidence ≥ 0.8 → early exit. @@ -229,13 +229,15 @@ Return ONLY valid JSON. Omit nulls and empty arrays. ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional diff --git a/agents/gem-reviewer.agent.md b/agents/gem-reviewer.agent.md index 1626311eb..8286cd83f 100644 --- a/agents/gem-reviewer.agent.md +++ b/agents/gem-reviewer.agent.md @@ -40,6 +40,7 @@ Consult Knowledge Sources when relevant. - Init - Read `docs/plan/{plan_id}/context_envelope.json` at start; read it in parallel with required agent inputs. Use `research_digest.relevant_files` as the file shortlist. Treat envelope data as a context cache. Then parse review_scope: plan|wave. - Read `plan.yaml` + `PRD.yaml`. + - Use quality_score.reviewer_focus to prioritize scrutiny on weak areas. ### Plan Review @@ -49,8 +50,13 @@ Consult Knowledge Sources when relevant. - Atomicity (≤ 300 lines/task). - No circular deps, all IDs exist. - Wave parallelism, conflicts_with not parallel. + - Wave assignment: tasks with no dependencies are in wave 1. - Tasks have verification + acceptance_criteria. + - Test file inclusion: if acceptance_criteria mentions tests (contains 'test' or 'tests'), target_files must include corresponding test file paths. - PRD alignment, valid agents. + - Tech stack: context_envelope.tech_stack exists and is non-empty. + - Contracts: Every dependency edge must have a contract. + - Diagnose-then-fix: every debugger task has a paired implementer task in a later wave. - Status: - Critical → failed. - Non-critical → needs_revision. @@ -125,13 +131,15 @@ Consult Knowledge Sources when relevant. ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional diff --git a/agents/gem-skill-creator.agent.md b/agents/gem-skill-creator.agent.md index 42c2d0911..fd2e3c50a 100644 --- a/agents/gem-skill-creator.agent.md +++ b/agents/gem-skill-creator.agent.md @@ -149,13 +149,15 @@ metadata: ### Execution -- Priority: Tools > Tasks > Scripts > CLI. Batch independent I/O calls, prioritize I/O-bound. -- Plan and batch independent tool calls. Use `OR` regex for related patterns, multi-pattern globs. -- Discover first → read full set in parallel. Avoid line-by-line reads. -- Narrow search with includePattern/excludePattern. -- Autonomous execution. -- Retry 3x. -- JSON output only. +- Execution priority: native tools → subagents/tasks → scripts → raw CLI. +- Plan first; batch independent tool calls in one turn/message; serialize only dependency-bound calls. +- Discover broadly, narrow early with OR regexes/multi-globs/include/exclude filters, then parallel-read the full relevant file set. +- Execute autonomously; ask only for true blockers. +- Retry transient failures up to 3x. +- Return JSON output only. +- Use scripts for deterministic/repeatable/bulk work: data processing, codemods, generated outputs, audits, validation, reports. + - Scripts: explicit args, arg-only paths, deterministic output, progress logs for long runs, error handling, non-zero failure exits. + - Test on sample/small input before full run. ### Constitutional @@ -164,19 +166,4 @@ metadata: - Minimum content, nothing speculative. - Treat patterns as read-only source of truth. Deduplicate before creating. -### Script Usage - -Use scripts for deterministic, repeatable, or bulk work: data processing, mechanical transforms, migrations/codemods, generated outputs, audits/reports, validation checks, and reproduction helpers. - -Do not use scripts for normal code implementation. - -Script rules: - -- Store plan-specific scripts in `docs/plan/{plan_id}/scripts/`. -- Store skill-specific scripts in `docs/skills/{skill-name}/scripts/`. -- Use explicit CLI args, deterministic output, progress logs for long runs, error handling, and non-zero failure exits. -- Read/write only explicit paths from args. -- Test on sample data before full execution. -- Document purpose, inputs, outputs, and usage. - diff --git a/plugins/gem-team/.github/plugin/plugin.json b/plugins/gem-team/.github/plugin/plugin.json index bfbec766b..a4544ce9e 100644 --- a/plugins/gem-team/.github/plugin/plugin.json +++ b/plugins/gem-team/.github/plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "gem-team", - "version": "1.42.0", + "version": "1.46.0", "description": "Self-Learning Multi-agent orchestration framework for spec-driven development and automated verification.", "author": { "name": "mubaidr", From 1e1cd22f88ba8515e7b8185a994621513131707c Mon Sep 17 00:00:00 2001 From: Muhammad Ubaid Raza Date: Sun, 31 May 2026 03:14:19 +0500 Subject: [PATCH 2/2] feat: bump marketplace version to 1.47.0 and enhance agent workflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add Bug‑Fix Mode with validation gate for `debugger_diagnosis` tasks - Expand allowed task types to include `research` - Reduce subagent concurrency limit from 4 to 2 - Update design validation handling for flagged tasks - Update marketplace plugin version reference to 1.47.0 --- .github/plugin/marketplace.json | 2 +- agents/gem-implementer.agent.md | 21 +++++++++++++++------ agents/gem-orchestrator.agent.md | 6 ++++-- agents/gem-planner.agent.md | 14 ++++++++------ agents/gem-researcher.agent.md | 3 ++- plugins/gem-team/.github/plugin/plugin.json | 2 +- plugins/gem-team/README.md | 3 ++- 7 files changed, 33 insertions(+), 18 deletions(-) diff --git a/.github/plugin/marketplace.json b/.github/plugin/marketplace.json index 618fc7e21..89a307bc3 100644 --- a/.github/plugin/marketplace.json +++ b/.github/plugin/marketplace.json @@ -359,7 +359,7 @@ "name": "gem-team", "source": "gem-team", "description": "Self-Learning Multi-agent orchestration framework for spec-driven development and automated verification.", - "version": "1.46.0" + "version": "1.47.0" }, { "name": "git-ape", diff --git a/agents/gem-implementer.agent.md b/agents/gem-implementer.agent.md index c586697d8..307db13bd 100644 --- a/agents/gem-implementer.agent.md +++ b/agents/gem-implementer.agent.md @@ -42,7 +42,9 @@ Consult Knowledge Sources when relevant. - Read — PRD sections, `DESIGN.md` tokens - Analyze: - Criteria — Understand acceptance_criteria. -- TDD Cycle (Red → Green → Refactor → Verify): +- Bug-Fix Mode Branch: + - If `task_definition.debugger_diagnosis` exists → follow Bug-Fix Mode (see Rules). Validation gate runs first. +- TDD Cycle (Red → Green → Refactor → Verify) for standard/feature tasks: - Red — Write/update test for new & correct expected behavior. - Green — Write minimal code to pass. - Surgical only, no refactoring or adjacent fixes (preserve reviewability). @@ -123,10 +125,17 @@ Return ONLY valid JSON. Omit nulls and empty arrays. #### Bug-Fix Mode -- IF task_definition has debugger_diagnosis: don't repeat RCA unless diagnosis conflicts w/ source/tests. -- Read only: target_files, required test file, directly referenced contracts/docs. -- Start w/ required_test_first. -- Implement minimal_change. -- If diagnosis wrong→return needs_revision w/ contradiction evidence. +When `task_definition.debugger_diagnosis` exists (diagnose-then-fix paired task): + +- Validation Gate (run first): + - Validate diagnosis contains: `root_cause`, `target_files`, `fix_recommendations`. + - If any field missing → return `needs_revision` immediately. Do NOT proceed with TDD. + - Use `implementation_handoff` as the authoritative work scope. +- Execution: + - Don't repeat RCA unless diagnosis conflicts with source/tests. + - Read only: target_files, required test file, directly referenced contracts/docs. + - Start w/ required_test_first. + - Implement minimal_change. + - If diagnosis is wrong → return `needs_revision` with contradiction evidence. diff --git a/agents/gem-orchestrator.agent.md b/agents/gem-orchestrator.agent.md index a33d3ba88..32ccd54ca 100644 --- a/agents/gem-orchestrator.agent.md +++ b/agents/gem-orchestrator.agent.md @@ -70,7 +70,9 @@ IMPORTANT: On receiving user input, immediately announce and execute the followi - `docs`: document, readme, comment, write docs, update docs - `config`: configure, setup, install, config, settings - `typo`: typo, spelling, grammar, rename trivial + - `research`: research, investigate, explore, analyze, compare, evaluate, explain, understand - `unknown`: none of the above match + - If `unknown`: confidence ≥ 0.85 → default to `feature`; confidence < 0.85 → escalate to user with clarification - Complexity Assessment: - LOW: single file/small change, known patterns. Minimal blast radius. - MEDIUM: multiple files, new patterns, moderate scope. Some blast radius. @@ -124,10 +126,10 @@ Delegate ALL waves/tasks without pausing for approval between them. - Wave > 1: include contracts from task definitions. - Get pending (deps = completed, status = pending, wave = current). - Filter conflicts_with: same-file tasks serialize. - - Delegate to subagents (max 4 concurrent) as per `agent_input_reference`. + - Delegate to subagents (max 2 concurrent). - Integration Check: - Delegate to `gem-reviewer(wave scope)` for integration + security scan. - - ui|ux|design|interface|a11y tasks → validate with the designer agent matching the task's assigned agent (if task.agent is `designer-mobile`, use `gem-designer-mobile(validate)`; otherwise use `gem-designer(validate)`), run in parallel with `gem-reviewer(wave scope)`. + - Tasks with `flags.requires_design_validation: true` → validate with the designer agent matching the task's assigned agent (if task.agent is `designer-mobile`, use `gem-designer-mobile(validate)`; otherwise use `gem-designer(validate)`), run in parallel with `gem-reviewer(wave scope)`. - If reviewer fails → `gem-debugger` to diagnose: - If debugger confidence ≥ 0.85 → delegate to `gem-implementer` with diagnosis → re-verify. - If debugger confidence < 0.85 → escalate to user (cannot reliably diagnose). diff --git a/agents/gem-planner.agent.md b/agents/gem-planner.agent.md index 45028d175..eedb9d66a 100644 --- a/agents/gem-planner.agent.md +++ b/agents/gem-planner.agent.md @@ -83,6 +83,7 @@ Consult Knowledge Sources when relevant. - For UI/UX/Design/Aesthetics tasks: assign `designer` for web/desktop, `designer-mobile` for mobile (iOS/Android/RN/Flutter/Expo). If cross-platform, split into separate web + mobile tasks. - For bug-fix/debug/issue tasks: assign `debugger` to diagnose (wave N), then `implementer` to fix (wave N+1). - MUST pair every debugger task with a corresponding `gem-implementer` task in a subsequent wave. + - The implementer task MUST include `debugger_diagnosis` field (populated from debugger's output) in its task_definition. - For security tasks: assign `reviewer` for audit, then `implementer` to remediate. - For refactoring/simplification tasks: assign `code-simplifier`. - For documentation: assign `doc-writer`. @@ -183,17 +184,17 @@ quality_score: # Reviewer guidance: areas needing extra scrutiny based on lower scores reviewer_focus: [string] tldr: | -open_questions: +open_questions: # Optional for LOW complexity; required for MEDIUM/HIGH - question: string context: string type: decision_blocker | research | nice_to_know affects: [string] -gaps: +gaps: # Optional for LOW complexity; required for MEDIUM/HIGH - description: string refinement_requests: - query: string source_hint: string -pre_mortem: +pre_mortem: # Optional for LOW complexity; required for MEDIUM/HIGH overall_risk_level: low | medium | high critical_failure_modes: - scenario: string @@ -201,7 +202,7 @@ pre_mortem: impact: low | medium | high | critical mitigation: string assumptions: [string] -implementation_specification: +implementation_specification: # Optional for LOW complexity; required for MEDIUM/HIGH code_structure: string affected_areas: [string] component_details: @@ -212,7 +213,7 @@ implementation_specification: - component: string relationship: string integration_points: [string] -contracts: +contracts: # Optional for LOW/MEDIUM; required for HIGH complexity - from_task: string to_task: string interface: string @@ -230,6 +231,7 @@ tasks: flags: flaky: boolean retries_used: number + requires_design_validation: boolean # set true for ui/ux/design/a11y/style related tasks dependencies: [string] conflicts_with: [string] context_files: @@ -259,7 +261,7 @@ tasks: # gem-implementer: tech_stack: [string] test_coverage: string | null - debugger_diagnosis: object | null # from bug-fix fast path + debugger_diagnosis: object | null # REQUIRED when paired with a debugger task; null otherwise implementation_handoff: do_not_reinvestigate: [string] required_test_first: string diff --git a/agents/gem-researcher.agent.md b/agents/gem-researcher.agent.md index 49e70f59d..841295da4 100644 --- a/agents/gem-researcher.agent.md +++ b/agents/gem-researcher.agent.md @@ -60,7 +60,8 @@ Return ONLY valid JSON. Omit nulls and empty arrays. ```json { "status": "completed | failed | in_progress | needs_revision", - "task_id": "string | omit if unknown", + "task_id": "string | null", // optional — researcher can run standalone before task exists + "plan_id": "string", "failure_type": "transient | fixable | needs_replan | escalate | flaky | regression | new_failure | platform_specific", "confidence": 0.0-1.0, "complexity": "simple | medium | complex", diff --git a/plugins/gem-team/.github/plugin/plugin.json b/plugins/gem-team/.github/plugin/plugin.json index a4544ce9e..9ff0dfd5b 100644 --- a/plugins/gem-team/.github/plugin/plugin.json +++ b/plugins/gem-team/.github/plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "gem-team", - "version": "1.46.0", + "version": "1.47.0", "description": "Self-Learning Multi-agent orchestration framework for spec-driven development and automated verification.", "author": { "name": "mubaidr", diff --git a/plugins/gem-team/README.md b/plugins/gem-team/README.md index 4e935dbd4..992bb771a 100644 --- a/plugins/gem-team/README.md +++ b/plugins/gem-team/README.md @@ -56,8 +56,9 @@ See [all supported installation options](#installation) below. ### Performance -- **4x Faster** — Parallel execution with wave-based execution +- **2x Faster** — Parallel execution with wave-based execution - **Pattern Reuse** — Codebase pattern discovery prevents reinventing wheels +- **Context Efficiency** — Concise outputs, file-based context, and caching reduce LLM token usage by 80-90% compared to naive single-pass prompting ### Quality & Security