From d2cc8ed055dee5eaa289a028aa62174954677819 Mon Sep 17 00:00:00 2001 From: agent-of-mkmeral <265349452+agent-of-mkmeral@users.noreply.github.com> Date: Mon, 6 Apr 2026 21:58:49 +0000 Subject: [PATCH 1/3] feat: add agent skills and use_agent to existing /strands command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Integrates new capabilities directly into the existing /strands command instead of a separate /strands-beta workflow, so all repos with /strands get the new features automatically. Changes to existing files: - process-input.cjs: Add adversarial-test and release-digest modes - agent_runner.py: Add use_agent tool + AgentSkills plugin loading - action.yml: Checkout and copy agent-skills to working directory New files: - agent-skills/task-adversarial-tester/SKILL.md - agent-skills/task-release-digest/SKILL.md New /strands commands: - /strands adversarial-test — Break PRs with edge case tests - /strands release-digest — Generate release digests with sub-agents Existing commands unchanged. Skills load gracefully — if agent-skills/ directory doesn't exist or AgentSkills plugin isn't available, the agent works exactly as before. --- .../actions/strands-agent-runner/action.yml | 15 ++- .../task-adversarial-tester/SKILL.md | 108 ++++++++++++++++++ .../agent-skills/task-release-digest/SKILL.md | 107 +++++++++++++++++ .../scripts/javascript/process-input.cjs | 34 +++++- .../scripts/python/agent_runner.py | 64 ++++++++++- 5 files changed, 321 insertions(+), 7 deletions(-) create mode 100644 strands-command/agent-skills/task-adversarial-tester/SKILL.md create mode 100644 strands-command/agent-skills/task-release-digest/SKILL.md diff --git a/strands-command/actions/strands-agent-runner/action.yml b/strands-command/actions/strands-agent-runner/action.yml index 057fb63..b6f1dfc 100644 --- a/strands-command/actions/strands-agent-runner/action.yml +++ b/strands-command/actions/strands-agent-runner/action.yml @@ -54,7 +54,7 @@ runs: jq -r .prompt strands-parsed-input.json >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT - # Checkout devtools repo for scripts + # Checkout devtools repo for scripts, SOPs, and agent skills - name: Checkout devtools uses: actions/checkout@v5 with: @@ -63,6 +63,7 @@ runs: sparse-checkout: | strands-command/scripts strands-command/agent-sops + strands-command/agent-skills path: devtools # Copy the devtools directory to the runner temp directory so the branch content cant overwrite the scripts executed here @@ -79,6 +80,18 @@ runs: ref: ${{ steps.read-input.outputs.ref }} repository: ${{ steps.read-input.outputs.head_repo || github.repository }} + # Copy agent-skills to working directory so the skills plugin can find them + - name: Copy agent-skills to working directory + shell: bash + run: | + if [ -d "${{ runner.temp }}/strands-agent-runner/strands-command/agent-skills" ]; then + cp -r ${{ runner.temp }}/strands-agent-runner/strands-command/agent-skills ./agent-skills + echo "✅ Copied agent-skills to working directory" + ls -la ./agent-skills/ + else + echo "ℹ️ No agent-skills directory found (skills not available)" + fi + - name: Set up Python uses: actions/setup-python@v4 with: diff --git a/strands-command/agent-skills/task-adversarial-tester/SKILL.md b/strands-command/agent-skills/task-adversarial-tester/SKILL.md new file mode 100644 index 0000000..c4f8fed --- /dev/null +++ b/strands-command/agent-skills/task-adversarial-tester/SKILL.md @@ -0,0 +1,108 @@ +--- +name: task-adversarial-tester +description: Break code changes in a pull request by actively finding bugs, edge cases, security holes, and failure modes that the author and reviewer missed. Produce artifacts — failing tests, reproduction scripts, and concrete evidence — that prove something is broken. +allowed-tools: shell use_github +--- +# Adversarial Tester + +## Role + +You are an Adversarial Tester. Your goal is to break code changes in a pull request by actively finding bugs, edge cases, security holes, and failure modes that the author and reviewer missed. You do NOT judge code quality or style. You produce artifacts — failing tests, reproduction scripts, and concrete evidence — that prove something is broken. If you can't break it, you say so. You never speculate without proof. + +## Principles + +1. **Prove, don't opine.** Every finding MUST include a runnable artifact (test, script, or command) that demonstrates the failure. +2. **Spec over implementation.** Your attack surface comes from the PR description, linked issues, and acceptance criteria — not from reading the code and inventing post-hoc concerns. +3. **Adversarial by design.** Assume the code is wrong until proven otherwise. +4. **Artifacts are the deliverable.** Your output is a set of pass/fail artifacts. If all pass, the code survived. If any fail, they speak for themselves. +5. **No overlap with the reviewer.** You don't comment on naming, style, architecture, or documentation. You break things. + +## Steps + +### 1. Setup Test Environment + +- Checkout the PR branch +- Read `AGENTS.md`, `CONTRIBUTING.md`, `DEVELOPMENT.md` to understand the project's test infrastructure +- Run the existing test suite to establish a baseline (pass count, fail count) +- Create a progress tracking notebook + +### 2. Understand the Attack Surface + +- Read the PR description and linked issue thoroughly +- Use `use_github` GraphQL to identify all changed files +- Extract explicit and implicit acceptance criteria +- Identify the public API surface being added or modified +- Categorize: new feature, bugfix, refactor, dependency change, config change +- Note any claims the author makes ("handles X", "backward compatible", "no breaking changes") +- Document your attack surface as a checklist: + - Input boundaries and edge cases + - Error paths and failure modes + - Concurrency and ordering assumptions + - Backward compatibility claims + - Security-sensitive areas + - Integration points + +### 3. Adversarial Test Generation + +#### 3.1 Edge Case Testing +- Identify all input parameters and their documented boundaries +- Write tests for: empty inputs, null/None values, maximum values, negative numbers, special characters, unicode, extremely long strings +- Test type coercion boundaries +- Test combinations of edge case inputs + +#### 3.2 Error Path Testing +- Map every error handler in the changed code +- Write tests that trigger each error path +- Verify error messages are correct and don't leak internals +- Test cascading failures +- Test resource cleanup on error + +#### 3.3 Concurrency & Race Condition Testing +- If the code has shared state, write concurrent access tests +- Test ordering assumptions +- Test timeout and cancellation paths +- Test re-entrancy if applicable + +#### 3.4 Backward Compatibility Testing +- If the PR claims backward compatibility, write tests proving or disproving it +- Test that existing public API contracts still hold +- Test serialization/deserialization with old formats if applicable + +#### 3.5 Security Testing +- Test for injection attacks if the code processes user input +- Test for credential/secret leakage in error messages or logs +- Test for path traversal if file operations are involved +- Test authorization boundaries if applicable + +### 4. Execute and Classify Results + +- Run all adversarial tests +- Classify each result as PASS (code survived) or FAIL (bug found) +- For each FAIL, verify it's a genuine bug (not a test setup issue) +- Re-run failures to confirm they're deterministic + +### 5. Report Findings + +Post a structured comment on the PR: + +``` +## Adversarial Test Results + +**Attack Surface:** [summary of what was tested] +**Tests Run:** N | **Passed:** N | **Failed:** N + +### 🔴 Failures (Bugs Found) +[For each failure: description, reproduction command, expected vs actual] + +### 🟢 Passed (Code Survived) +[Brief summary of attack vectors that didn't find issues] + +### ⚠️ Could Not Test +[Any areas that couldn't be tested and why] +``` + +## Desired Outcome + +- A set of runnable test artifacts that exercise edge cases and error paths +- Clear pass/fail results with reproduction steps for any bugs found +- Honest "survived" verdict when the code holds up diff --git a/strands-command/agent-skills/task-release-digest/SKILL.md b/strands-command/agent-skills/task-release-digest/SKILL.md new file mode 100644 index 0000000..5b13c5c --- /dev/null +++ b/strands-command/agent-skills/task-release-digest/SKILL.md @@ -0,0 +1,107 @@ +--- +name: task-release-digest +description: Generate a comprehensive release digest by analyzing merged PRs across Strands packages. Uses sub-agents via use_agent to parallelize per-package analysis, then synthesizes results into a unified digest. +allowed-tools: shell use_github use_agent http_request +--- +# Release Digest Generator + +## Role + +You are a Release Digest orchestrator. Your goal is to generate a comprehensive release digest covering recent changes across multiple Strands packages. You use sub-agents (via `use_agent`) to parallelize per-package analysis, then synthesize results into a unified digest. + +## Packages + +The Strands ecosystem includes these key packages: +- `strands-agents/sdk-python` — Core Python SDK +- `strands-agents/sdk-typescript` — Core TypeScript SDK +- `strands-agents/tools` — Official tool implementations +- `strands-agents/agent-builder` — Agent builder utilities +- `strands-agents/docs` — Documentation + +## Steps + +### 1. Determine Time Range + +- Accept a time range (e.g., "last 2 weeks", "since v1.14.0", specific dates) +- Default to the last 2 weeks if no range is specified +- Calculate the start and end dates + +### 2. Spawn Per-Package Sub-Agents + +For each package, use `use_agent` to spawn a sub-agent that: +- Queries merged PRs in the time range using GitHub GraphQL API +- Categorizes PRs: features, bug fixes, docs, chores +- Identifies the top 3-5 most impactful changes +- Extracts brief code examples for major features +- Returns a structured summary + +**Sub-agent system prompt template:** +``` +You are analyzing merged PRs for the {package} repository. +Time range: {start_date} to {end_date}. + +Query merged PRs using GitHub GraphQL API. For each PR, determine: +1. Category: feature, bugfix, docs, chore, refactor +2. User impact: high, medium, low +3. One-line summary + +Return a structured JSON summary with: +- package: string +- total_prs: number +- features: [{pr_number, title, summary, impact}] +- bugfixes: [{pr_number, title, summary, impact}] +- other_count: number +``` + +### 3. Collect and Synthesize Results + +- Wait for all sub-agents to complete +- Merge results into a unified view +- Identify cross-package themes (e.g., "streaming improvements across SDK and tools") +- Rank features by impact + +### 4. Generate Digest + +Format the digest as a GitHub issue comment: + +```markdown +# 📦 Strands Release Digest — {date_range} + +## Highlights +[Top 3-5 changes across all packages with brief descriptions] + +## By Package + +### sdk-python +**{N} PRs merged** | {features} features | {fixes} fixes +- 🚀 [Feature Title](PR link) — one-line description +- 🐛 [Fix Title](PR link) — one-line description + +### sdk-typescript +... + +### tools +... + +## Cross-Package Themes +[Any patterns noticed across packages] + +## Stats +| Package | PRs | Features | Fixes | Docs | +|---------|-----|----------|-------|------| +| sdk-python | N | N | N | N | +| ... | ... | ... | ... | ... | +| **Total** | **N** | **N** | **N** | **N** | +``` + +### 5. Post Results + +- Post the digest as a comment on the triggering issue +- Include a summary of sub-agent execution (how many packages analyzed, any failures) + +## Desired Outcome + +- A well-formatted release digest covering all active Strands packages +- Parallel execution via sub-agents for faster analysis +- Clear categorization and impact assessment +- Cross-package theme identification diff --git a/strands-command/scripts/javascript/process-input.cjs b/strands-command/scripts/javascript/process-input.cjs index 82de3b4..f68cfd7 100644 --- a/strands-command/scripts/javascript/process-input.cjs +++ b/strands-command/scripts/javascript/process-input.cjs @@ -81,15 +81,35 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) ? `${mode}-${branchName}`.replace(/[\/\\]/g, '-') : `${mode}-${issueId}`); - const scriptFiles = { + // Skill-based modes use the AgentSkills plugin at runtime — no SOP file needed. + // The system prompt just sets the context; the agent activates the skill itself. + const skillModes = ['adversarial-test', 'release-digest']; + + const sopFiles = { 'implementer': 'devtools/strands-command/agent-sops/task-implementer.sop.md', 'refiner': 'devtools/strands-command/agent-sops/task-refiner.sop.md', 'release-notes': 'devtools/strands-command/agent-sops/task-release-notes.sop.md', 'reviewer': 'devtools/strands-command/agent-sops/task-reviewer.sop.md' }; - - const scriptFile = scriptFiles[mode] || scriptFiles['refiner']; - const systemPrompt = fs.readFileSync(scriptFile, 'utf8'); + + let systemPrompt; + + if (skillModes.includes(mode)) { + // Skill-based modes — the AgentSkills plugin provides the full instructions via SKILL.md. + // Map command names to skill names for activation. + const skillNameMap = { + 'adversarial-test': 'task-adversarial-tester', + 'release-digest': 'task-release-digest', + }; + const skillName = skillNameMap[mode] || mode; + + systemPrompt = `You are an autonomous GitHub agent powered by Strands Agents SDK. +You have access to agent skills. Use the 'skills' tool to activate the '${skillName}' skill, then follow its instructions.`; + } else { + // SOP-based modes + const scriptFile = sopFiles[mode] || sopFiles['refiner']; + systemPrompt = fs.readFileSync(scriptFile, 'utf8'); + } let prompt = (isPullRequest) ? 'The pull request id is:' @@ -107,7 +127,11 @@ module.exports = async (context, github, core, inputs) => { // Determine mode based on explicit command first, then context let mode; - if (command.startsWith('release-notes') || command.startsWith('release notes')) { + if (command.startsWith('adversarial-test') || command.startsWith('adversarial test')) { + mode = 'adversarial-test'; + } else if (command.startsWith('release-digest') || command.startsWith('release digest')) { + mode = 'release-digest'; + } else if (command.startsWith('release-notes') || command.startsWith('release notes')) { mode = 'release-notes'; } else if (command.startsWith('implement')) { mode = 'implementer'; diff --git a/strands-command/scripts/python/agent_runner.py b/strands-command/scripts/python/agent_runner.py index 0fdb1cc..b886cbf 100644 --- a/strands-command/scripts/python/agent_runner.py +++ b/strands-command/scripts/python/agent_runner.py @@ -9,6 +9,7 @@ import os import sys from datetime import datetime +from pathlib import Path from typing import Any import boto3 @@ -19,7 +20,7 @@ from strands.models import BedrockModel, CacheConfig from botocore.config import Config -from strands_tools import http_request, shell +from strands_tools import http_request, shell, use_agent # Import local GitHub tools we need from github_tools import ( @@ -157,6 +158,10 @@ def _get_all_tools() -> list[Any]: shell, http_request, + # Sub-agent creation — enables orchestrator pattern + # The parent agent can spawn specialized sub-agents for parallel tasks + use_agent, + # GitHub issue tools create_issue, get_issue, @@ -181,6 +186,53 @@ def _get_all_tools() -> list[Any]: ] +def _load_skills_plugin(): + """Load agent skills from the agent-skills directory if available. + + Returns AgentSkills plugin instance or None if skills aren't available. + Skills are loaded from agent-skills/ which is copied to the working directory + by the GitHub Action. + """ + try: + from strands.vended_plugins.skills import AgentSkills + except ImportError: + print("ℹ️ AgentSkills plugin not available (strands.vended_plugins.skills not found)") + return None + + # Look for skills directory in the working directory + # The action.yml copies agent-skills/ to the working directory + possible_paths = [ + Path("agent-skills"), # Working directory (copied by action.yml) + Path("devtools/strands-command/agent-skills"), # Before copy step + ] + + skills_dir = None + for path in possible_paths: + if path.exists() and path.is_dir(): + skills_dir = path + break + + if skills_dir is None: + print("ℹ️ No agent-skills directory found (skills not available)") + return None + + try: + plugin = AgentSkills(skills=str(skills_dir)) + skills = plugin.get_available_skills() + + if skills: + print(f"✅ AgentSkills plugin: {len(skills)} skills loaded") + for skill in skills: + print(f" - {skill.name}: {skill.description[:60]}...") + return plugin + else: + print("⚠️ AgentSkills plugin: no skills found in directory") + return None + except Exception as e: + print(f"⚠️ Failed to load skills: {e}") + return None + + def run_agent(query: str): """Run the agent with the provided query.""" try: @@ -229,6 +281,12 @@ def run_agent(query: str): else: raise ValueError("Both SESSION_ID and S3_SESSION_BUCKET must be set") + # Load agent skills plugin (gracefully degrades if not available) + plugins = [] + skills_plugin = _load_skills_plugin() + if skills_plugin: + plugins.append(skills_plugin) + # Create agent with optional trace attributes for Langfuse agent_kwargs = { "model": model, @@ -236,12 +294,16 @@ def run_agent(query: str): "tools": tools, "session_manager": session_manager, } + + if plugins: + agent_kwargs["plugins"] = plugins if trace_attributes: agent_kwargs["trace_attributes"] = trace_attributes agent = Agent(**agent_kwargs) + print(f"🤖 Agent created with {len(tools)} tools and {len(plugins)} plugins") print("Processing user query...") result = agent(query) From 110116988bfde1e3f5d280c8f886ebdc9fab6efe Mon Sep 17 00:00:00 2001 From: agent-of-mkmeral <265349452+agent-of-mkmeral@users.noreply.github.com> Date: Thu, 9 Apr 2026 23:07:08 +0000 Subject: [PATCH 2/3] refactor: separate beta agent with own runner, same pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reworked per @mkmeral's feedback — the beta agent is a separate agent with its own runner, but goes through the same pipeline. Design: - agent_runner.py: UNCHANGED from main (zero impact on normal agent) - beta_agent_runner.py: NEW — imports shared infra from agent_runner.py, adds AgentSkills plugin + use_agent tool - process-input.cjs: Routes /strands beta through same pipeline with agent_type='beta' flag - action.yml: Reads agent_type, conditionally runs the right runner and only copies agent-skills for beta How it works: - /strands → standard agent (exactly as before) - /strands beta → beta agent (skills + use_agent + extensible) - Same pipeline: parse → run → finalize - Beta-only modes (adversarial-test, release-digest) auto-promote to beta --- .../actions/strands-agent-runner/action.yml | 17 +- .../scripts/javascript/process-input.cjs | 91 +++++--- .../scripts/python/agent_runner.py | 64 +----- .../scripts/python/beta_agent_runner.py | 209 ++++++++++++++++++ 4 files changed, 284 insertions(+), 97 deletions(-) create mode 100644 strands-command/scripts/python/beta_agent_runner.py diff --git a/strands-command/actions/strands-agent-runner/action.yml b/strands-command/actions/strands-agent-runner/action.yml index b6f1dfc..daa73c0 100644 --- a/strands-command/actions/strands-agent-runner/action.yml +++ b/strands-command/actions/strands-agent-runner/action.yml @@ -47,6 +47,7 @@ runs: echo "ref=$(jq -r .branch_name strands-parsed-input.json)" >> $GITHUB_OUTPUT echo "session_id=$(jq -r .session_id strands-parsed-input.json)" >> $GITHUB_OUTPUT echo "head_repo=$(jq -r '.head_repo // ""' strands-parsed-input.json)" >> $GITHUB_OUTPUT + echo "agent_type=$(jq -r '.agent_type // "standard"' strands-parsed-input.json)" >> $GITHUB_OUTPUT echo "system_prompt<> $GITHUB_OUTPUT jq -r .system_prompt strands-parsed-input.json >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT @@ -80,8 +81,10 @@ runs: ref: ${{ steps.read-input.outputs.ref }} repository: ${{ steps.read-input.outputs.head_repo || github.repository }} - # Copy agent-skills to working directory so the skills plugin can find them + # Copy agent-skills to working directory (beta agent only) + # The AgentSkills plugin looks for skills in the working directory - name: Copy agent-skills to working directory + if: steps.read-input.outputs.agent_type == 'beta' shell: bash run: | if [ -d "${{ runner.temp }}/strands-agent-runner/strands-command/agent-skills" ]; then @@ -248,8 +251,18 @@ runs: # Evals Configuration (input overrides Secrets Manager) EVALS_SQS_QUEUE_ARN: ${{ inputs.evals_sqs_queue_arn || steps.secrets.outputs.evals_sqs_queue_arn }} + + # Agent type (standard or beta) + AGENT_TYPE: ${{ steps.read-input.outputs.agent_type }} run: | - uv run --no-project ${{ runner.temp }}/strands-agent-runner/strands-command/scripts/python/agent_runner.py "$INPUT_TASK" + SCRIPTS_DIR="${{ runner.temp }}/strands-agent-runner/strands-command/scripts/python" + if [ "$AGENT_TYPE" = "beta" ]; then + echo "🧪 Running beta agent" + uv run --no-project "$SCRIPTS_DIR/beta_agent_runner.py" "$INPUT_TASK" + else + echo "🤖 Running standard agent" + uv run --no-project "$SCRIPTS_DIR/agent_runner.py" "$INPUT_TASK" + fi - name: Capture repository state shell: bash diff --git a/strands-command/scripts/javascript/process-input.cjs b/strands-command/scripts/javascript/process-input.cjs index f68cfd7..6aa95e4 100644 --- a/strands-command/scripts/javascript/process-input.cjs +++ b/strands-command/scripts/javascript/process-input.cjs @@ -76,40 +76,48 @@ async function determineBranch(github, context, issueId, mode, isPullRequest) { return { branchName, headRepo }; } -function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) { +function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs, agentType) { const sessionId = inputs.session_id || (mode === 'implementer' ? `${mode}-${branchName}`.replace(/[\/\\]/g, '-') : `${mode}-${issueId}`); - // Skill-based modes use the AgentSkills plugin at runtime — no SOP file needed. - // The system prompt just sets the context; the agent activates the skill itself. - const skillModes = ['adversarial-test', 'release-digest']; - - const sopFiles = { - 'implementer': 'devtools/strands-command/agent-sops/task-implementer.sop.md', - 'refiner': 'devtools/strands-command/agent-sops/task-refiner.sop.md', - 'release-notes': 'devtools/strands-command/agent-sops/task-release-notes.sop.md', - 'reviewer': 'devtools/strands-command/agent-sops/task-reviewer.sop.md' - }; - - let systemPrompt; - - if (skillModes.includes(mode)) { - // Skill-based modes — the AgentSkills plugin provides the full instructions via SKILL.md. - // Map command names to skill names for activation. + // Beta agent uses skill-based system prompts — the AgentSkills plugin provides + // the full instructions via SKILL.md files. The system prompt just sets context + // and tells the agent which skill to activate. + if (agentType === 'beta') { const skillNameMap = { 'adversarial-test': 'task-adversarial-tester', 'release-digest': 'task-release-digest', }; - const skillName = skillNameMap[mode] || mode; + const skillName = skillNameMap[mode]; - systemPrompt = `You are an autonomous GitHub agent powered by Strands Agents SDK. + let systemPrompt; + if (skillName) { + systemPrompt = `You are an autonomous GitHub agent powered by Strands Agents SDK. You have access to agent skills. Use the 'skills' tool to activate the '${skillName}' skill, then follow its instructions.`; - } else { - // SOP-based modes - const scriptFile = sopFiles[mode] || sopFiles['refiner']; - systemPrompt = fs.readFileSync(scriptFile, 'utf8'); + } else { + // Generic beta prompt for commands without a specific skill mapping + systemPrompt = `You are an autonomous GitHub agent powered by Strands Agents SDK with extended capabilities including agent skills and sub-agent orchestration.`; + } + + let prompt = (isPullRequest) + ? 'The pull request id is:' + : 'The issue id is:'; + prompt += `${issueId}\n${command}\nreview and continue`; + + return { sessionId, systemPrompt, prompt }; } + + // Standard agent uses SOP-based system prompts + const scriptFiles = { + 'implementer': 'devtools/strands-command/agent-sops/task-implementer.sop.md', + 'refiner': 'devtools/strands-command/agent-sops/task-refiner.sop.md', + 'release-notes': 'devtools/strands-command/agent-sops/task-release-notes.sop.md', + 'reviewer': 'devtools/strands-command/agent-sops/task-reviewer.sop.md' + }; + + const scriptFile = scriptFiles[mode] || scriptFiles['refiner']; + const systemPrompt = fs.readFileSync(scriptFile, 'utf8'); let prompt = (isPullRequest) ? 'The pull request id is:' @@ -124,31 +132,49 @@ module.exports = async (context, github, core, inputs) => { const { issueId, command, issue } = await getIssueInfo(github, context, inputs); const isPullRequest = !!issue.data.pull_request; + + // Check if this is a beta command: /strands beta + let agentType = 'standard'; + let effectiveCommand = command; + + if (command.startsWith('beta ') || command === 'beta') { + agentType = 'beta'; + effectiveCommand = command.replace(/^beta\s*/, '').trim(); + console.log(`Beta agent requested. Effective command: "${effectiveCommand}"`); + } // Determine mode based on explicit command first, then context let mode; - if (command.startsWith('adversarial-test') || command.startsWith('adversarial test')) { + if (effectiveCommand.startsWith('adversarial-test') || effectiveCommand.startsWith('adversarial test')) { mode = 'adversarial-test'; - } else if (command.startsWith('release-digest') || command.startsWith('release digest')) { + } else if (effectiveCommand.startsWith('release-digest') || effectiveCommand.startsWith('release digest')) { mode = 'release-digest'; - } else if (command.startsWith('release-notes') || command.startsWith('release notes')) { + } else if (effectiveCommand.startsWith('release-notes') || effectiveCommand.startsWith('release notes')) { mode = 'release-notes'; - } else if (command.startsWith('implement')) { + } else if (effectiveCommand.startsWith('implement')) { mode = 'implementer'; - } else if (command.startsWith('review')) { + } else if (effectiveCommand.startsWith('review')) { mode = 'reviewer'; - } else if (command.startsWith('refine')) { + } else if (effectiveCommand.startsWith('refine')) { mode = 'refiner'; } else { // Default behavior when no explicit command: PR -> implementer, Issue -> refiner mode = isPullRequest ? 'implementer' : 'refiner'; } - console.log(`Is PR: ${isPullRequest}, Command: "${command}", Mode: ${mode}`); + + // Beta-only modes: adversarial-test and release-digest require the beta agent + const betaOnlyModes = ['adversarial-test', 'release-digest']; + if (betaOnlyModes.includes(mode) && agentType !== 'beta') { + agentType = 'beta'; + console.log(`Mode '${mode}' requires beta agent — auto-promoting to beta`); + } + + console.log(`Is PR: ${isPullRequest}, Command: "${command}", Mode: ${mode}, Agent: ${agentType}`); const { branchName, headRepo } = await determineBranch(github, context, issueId, mode, isPullRequest); console.log(`Building prompts - mode: ${mode}, issue: ${issueId}, is PR: ${isPullRequest}`); - const { sessionId, systemPrompt, prompt } = buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs); + const { sessionId, systemPrompt, prompt } = buildPrompts(mode, issueId, isPullRequest, effectiveCommand, branchName, inputs, agentType); console.log(`Session ID: ${sessionId}`); console.log(`Task prompt: "${prompt}"`); @@ -159,7 +185,8 @@ module.exports = async (context, github, core, inputs) => { system_prompt: systemPrompt, prompt: prompt, issue_id: issueId, - head_repo: headRepo + head_repo: headRepo, + agent_type: agentType, }; fs.writeFileSync('strands-parsed-input.json', JSON.stringify(outputs, null, 2)); diff --git a/strands-command/scripts/python/agent_runner.py b/strands-command/scripts/python/agent_runner.py index b886cbf..0fdb1cc 100644 --- a/strands-command/scripts/python/agent_runner.py +++ b/strands-command/scripts/python/agent_runner.py @@ -9,7 +9,6 @@ import os import sys from datetime import datetime -from pathlib import Path from typing import Any import boto3 @@ -20,7 +19,7 @@ from strands.models import BedrockModel, CacheConfig from botocore.config import Config -from strands_tools import http_request, shell, use_agent +from strands_tools import http_request, shell # Import local GitHub tools we need from github_tools import ( @@ -158,10 +157,6 @@ def _get_all_tools() -> list[Any]: shell, http_request, - # Sub-agent creation — enables orchestrator pattern - # The parent agent can spawn specialized sub-agents for parallel tasks - use_agent, - # GitHub issue tools create_issue, get_issue, @@ -186,53 +181,6 @@ def _get_all_tools() -> list[Any]: ] -def _load_skills_plugin(): - """Load agent skills from the agent-skills directory if available. - - Returns AgentSkills plugin instance or None if skills aren't available. - Skills are loaded from agent-skills/ which is copied to the working directory - by the GitHub Action. - """ - try: - from strands.vended_plugins.skills import AgentSkills - except ImportError: - print("ℹ️ AgentSkills plugin not available (strands.vended_plugins.skills not found)") - return None - - # Look for skills directory in the working directory - # The action.yml copies agent-skills/ to the working directory - possible_paths = [ - Path("agent-skills"), # Working directory (copied by action.yml) - Path("devtools/strands-command/agent-skills"), # Before copy step - ] - - skills_dir = None - for path in possible_paths: - if path.exists() and path.is_dir(): - skills_dir = path - break - - if skills_dir is None: - print("ℹ️ No agent-skills directory found (skills not available)") - return None - - try: - plugin = AgentSkills(skills=str(skills_dir)) - skills = plugin.get_available_skills() - - if skills: - print(f"✅ AgentSkills plugin: {len(skills)} skills loaded") - for skill in skills: - print(f" - {skill.name}: {skill.description[:60]}...") - return plugin - else: - print("⚠️ AgentSkills plugin: no skills found in directory") - return None - except Exception as e: - print(f"⚠️ Failed to load skills: {e}") - return None - - def run_agent(query: str): """Run the agent with the provided query.""" try: @@ -281,12 +229,6 @@ def run_agent(query: str): else: raise ValueError("Both SESSION_ID and S3_SESSION_BUCKET must be set") - # Load agent skills plugin (gracefully degrades if not available) - plugins = [] - skills_plugin = _load_skills_plugin() - if skills_plugin: - plugins.append(skills_plugin) - # Create agent with optional trace attributes for Langfuse agent_kwargs = { "model": model, @@ -294,16 +236,12 @@ def run_agent(query: str): "tools": tools, "session_manager": session_manager, } - - if plugins: - agent_kwargs["plugins"] = plugins if trace_attributes: agent_kwargs["trace_attributes"] = trace_attributes agent = Agent(**agent_kwargs) - print(f"🤖 Agent created with {len(tools)} tools and {len(plugins)} plugins") print("Processing user query...") result = agent(query) diff --git a/strands-command/scripts/python/beta_agent_runner.py b/strands-command/scripts/python/beta_agent_runner.py new file mode 100644 index 0000000..63e4bc5 --- /dev/null +++ b/strands-command/scripts/python/beta_agent_runner.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 +""" +Strands Beta Agent Runner + +A separate agent runner with extended capabilities (skills, sub-agents, etc.). +Reuses shared infrastructure from agent_runner.py — same pipeline, different agent. + +Usage: /strands beta +""" + +import json +import os +import sys +from pathlib import Path +from typing import Any + +from strands import Agent +from strands.session import S3SessionManager +from strands.models import BedrockModel, CacheConfig +from botocore.config import Config + +from strands_tools import http_request, shell, use_agent + +# Reuse shared infrastructure from the standard runner +from agent_runner import ( + _get_all_tools, + _get_trace_attributes, + _send_eval_trigger, + _setup_langfuse_telemetry, + STRANDS_BUDGET_TOKENS, + STRANDS_MAX_TOKENS, + STRANDS_MODEL_ID, + STRANDS_REGION, +) + +DEFAULT_SYSTEM_PROMPT = "You are an autonomous GitHub agent powered by Strands Agents SDK with extended capabilities including agent skills and sub-agent orchestration." + + +def _get_beta_tools() -> list[Any]: + """Get tools for the beta agent. + + Starts with all standard tools, then adds beta-only tools. + This ensures the beta agent is a strict superset of the standard agent. + """ + tools = _get_all_tools() + + # Add beta-only tools (use_agent is already imported at module level) + # Check if use_agent is already in the list (it is in the current version) + tool_names = {getattr(t, '__name__', str(t)) for t in tools} + if 'use_agent' not in tool_names: + tools.append(use_agent) + + return tools + + +def _load_skills_plugin(): + """Load agent skills from the agent-skills directory if available. + + Returns AgentSkills plugin instance or None if skills aren't available. + Skills are loaded from agent-skills/ which is copied to the working directory + by the GitHub Action. + """ + try: + from strands.vended_plugins.skills import AgentSkills + except ImportError: + print("ℹ️ AgentSkills plugin not available (strands.vended_plugins.skills not found)") + return None + + # Look for skills directory in the working directory + # The action.yml copies agent-skills/ to the working directory + possible_paths = [ + Path("agent-skills"), # Working directory (copied by action.yml) + Path("devtools/strands-command/agent-skills"), # Before copy step + ] + + skills_dir = None + for path in possible_paths: + if path.exists() and path.is_dir(): + skills_dir = path + break + + if skills_dir is None: + print("ℹ️ No agent-skills directory found (skills not available)") + return None + + try: + plugin = AgentSkills(skills=str(skills_dir)) + skills = plugin.get_available_skills() + + if skills: + print(f"✅ AgentSkills plugin: {len(skills)} skills loaded") + for skill in skills: + print(f" - {skill.name}: {skill.description[:60]}...") + return plugin + else: + print("⚠️ AgentSkills plugin: no skills found in directory") + return None + except Exception as e: + print(f"⚠️ Failed to load skills: {e}") + return None + + +def run_beta_agent(query: str): + """Run the beta agent with extended capabilities.""" + try: + # Shared infrastructure from agent_runner.py + telemetry_enabled = _setup_langfuse_telemetry() + trace_attributes = _get_trace_attributes() if telemetry_enabled else {} + + # Beta agent tools (superset of standard) + tools = _get_beta_tools() + + # Same model configuration as standard agent + additional_request_fields = {} + additional_request_fields["anthropic_beta"] = ["interleaved-thinking-2025-05-14"] + additional_request_fields["thinking"] = { + "type": "enabled", + "budget_tokens": STRANDS_BUDGET_TOKENS, + } + + model = BedrockModel( + model_id=STRANDS_MODEL_ID, + max_tokens=STRANDS_MAX_TOKENS, + region_name=STRANDS_REGION, + boto_client_config=Config( + read_timeout=900, + connect_timeout=900, + retries={"max_attempts": 3, "mode": "adaptive"}, + ), + cache_config=CacheConfig(strategy="auto"), + additional_request_fields=additional_request_fields, + cache_prompt="default", + cache_tools="default", + ) + + system_prompt = os.getenv("INPUT_SYSTEM_PROMPT", DEFAULT_SYSTEM_PROMPT) + session_id = os.getenv("SESSION_ID") + s3_bucket = os.getenv("S3_SESSION_BUCKET") + + if s3_bucket and session_id: + print(f"🤖 Using session manager with session ID: {session_id}") + session_manager = S3SessionManager( + session_id=session_id, + bucket=s3_bucket, + prefix=os.getenv("GITHUB_REPOSITORY", ""), + ) + else: + raise ValueError("Both SESSION_ID and S3_SESSION_BUCKET must be set") + + # Beta-only: Load agent skills plugin + plugins = [] + skills_plugin = _load_skills_plugin() + if skills_plugin: + plugins.append(skills_plugin) + + # Create beta agent + agent_kwargs = { + "model": model, + "system_prompt": system_prompt, + "tools": tools, + "session_manager": session_manager, + } + + if plugins: + agent_kwargs["plugins"] = plugins + + if trace_attributes: + agent_kwargs["trace_attributes"] = trace_attributes + + agent = Agent(**agent_kwargs) + + print(f"🧪 Beta agent created with {len(tools)} tools and {len(plugins)} plugins") + print("Processing user query...") + result = agent(query) + + print(f"\n\nAgent Result 🤖\nStop Reason: {result.stop_reason}\nMessage: {json.dumps(result.message, indent=2)}") + + # Eval trigger (shared infrastructure) + unique_session_id = trace_attributes.get("session.id", session_id) + eval_type = session_id.split("-")[0] if "-" in session_id else session_id + _send_eval_trigger(unique_session_id, eval_type) + + except Exception as e: + error_msg = f"❌ Beta agent execution failed: {e}" + print(error_msg) + raise e + + +def main() -> None: + """Main entry point for the beta agent runner.""" + try: + if len(sys.argv) < 2: + raise ValueError("Task argument is required") + + task = " ".join(sys.argv[1:]) + if not task.strip(): + raise ValueError("Task cannot be empty") + print(f"🧪 Running beta agent with task: {task}") + + run_beta_agent(task) + + except Exception as e: + error_msg = f"Fatal error: {e}" + print(error_msg) + sys.exit(1) + + +if __name__ == "__main__": + main() From fffde4541472afe8cd9bad542c450fd134faa8c3 Mon Sep 17 00:00:00 2001 From: agent-of-mkmeral <265349452+agent-of-mkmeral@users.noreply.github.com> Date: Mon, 13 Apr 2026 16:56:51 +0000 Subject: [PATCH 3/3] =?UTF-8?q?feat:=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20system=20prompt,=20PTC,=20skill=20activation,=20SOP?= =?UTF-8?q?=20conversion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Meaningful system prompt (BETA_SYSTEM_PROMPT.md) - Created proper system prompt based on agent guidelines - Loaded from file by both process-input.cjs and beta_agent_runner.py 2. Programmatic tool caller (strands-agents/tools#387) - Added local copy of programmatic_tool_caller.py - Beta runner loads from strands_tools first, falls back to local copy 3. Skill activation based on command mode - /strands beta review → activates task-reviewer skill - /strands beta implement → activates task-implementer skill - Maps all modes to their corresponding skills via SKILL_MAP - agent.tool.skills(skill_name=...) called after agent creation 4. Meta-reasoner skill + SOPs as skills - Added task-meta-reasoner/SKILL.md - Runtime SOP→skill conversion: reads .sop.md files, adds YAML frontmatter, writes as SKILL.md — no source file duplication - Existing dedicated skills take precedence over converted SOPs 5. Pipeline changes - process-input.cjs outputs agent_mode in parsed JSON - action.yml reads agent_mode, passes as AGENT_MODE env var - action.yml copies agent-sops to working dir for conversion - Added meta-reason command routing --- .../actions/strands-agent-runner/action.yml | 6 + .../agent-skills/BETA_SYSTEM_PROMPT.md | 60 ++++ .../agent-skills/task-meta-reasoner/SKILL.md | 79 +++++ .../scripts/javascript/process-input.cjs | 54 +-- .../scripts/python/beta_agent_runner.py | 245 +++++++++++++- .../python/programmatic_tool_caller.py | 316 ++++++++++++++++++ 6 files changed, 724 insertions(+), 36 deletions(-) create mode 100644 strands-command/agent-skills/BETA_SYSTEM_PROMPT.md create mode 100644 strands-command/agent-skills/task-meta-reasoner/SKILL.md create mode 100644 strands-command/scripts/python/programmatic_tool_caller.py diff --git a/strands-command/actions/strands-agent-runner/action.yml b/strands-command/actions/strands-agent-runner/action.yml index daa73c0..d83fd56 100644 --- a/strands-command/actions/strands-agent-runner/action.yml +++ b/strands-command/actions/strands-agent-runner/action.yml @@ -47,6 +47,7 @@ runs: echo "ref=$(jq -r .branch_name strands-parsed-input.json)" >> $GITHUB_OUTPUT echo "session_id=$(jq -r .session_id strands-parsed-input.json)" >> $GITHUB_OUTPUT echo "head_repo=$(jq -r '.head_repo // ""' strands-parsed-input.json)" >> $GITHUB_OUTPUT + echo "agent_mode=$(jq -r '.agent_mode // ""' strands-parsed-input.json)" >> $GITHUB_OUTPUT echo "agent_type=$(jq -r '.agent_type // "standard"' strands-parsed-input.json)" >> $GITHUB_OUTPUT echo "system_prompt<> $GITHUB_OUTPUT jq -r .system_prompt strands-parsed-input.json >> $GITHUB_OUTPUT @@ -90,6 +91,10 @@ runs: if [ -d "${{ runner.temp }}/strands-agent-runner/strands-command/agent-skills" ]; then cp -r ${{ runner.temp }}/strands-agent-runner/strands-command/agent-skills ./agent-skills echo "✅ Copied agent-skills to working directory" + if [ -d "${{ runner.temp }}/strands-agent-runner/strands-command/agent-sops" ]; then + cp -r ${{ runner.temp }}/strands-agent-runner/strands-command/agent-sops ./agent-sops + echo "✅ Copied agent-sops to working directory (for runtime skill conversion)" + fi ls -la ./agent-skills/ else echo "ℹ️ No agent-skills directory found (skills not available)" @@ -254,6 +259,7 @@ runs: # Agent type (standard or beta) AGENT_TYPE: ${{ steps.read-input.outputs.agent_type }} + AGENT_MODE: ${{ steps.read-input.outputs.agent_mode }} run: | SCRIPTS_DIR="${{ runner.temp }}/strands-agent-runner/strands-command/scripts/python" if [ "$AGENT_TYPE" = "beta" ]; then diff --git a/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md b/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md new file mode 100644 index 0000000..4c86db2 --- /dev/null +++ b/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md @@ -0,0 +1,60 @@ +# Strands Agent (Beta) — /strands Command + +**Identity**: AI agent for the Strands Agents project, invoked via `/strands beta` in GitHub issues and PRs. +**Runtime**: GitHub Actions, triggered by `/strands beta ` comments. + +--- + +## Guidelines + +Follow the [Strands Agent Guidelines](https://github.com/strands-agents/docs/blob/main/team/AGENT_GUIDELINES.md): + +- **Add value or stay silent.** If you don't have something concrete to contribute, don't act. +- **Keep it short.** Lead with what matters, then stop. Use `
` blocks for long analysis. +- **Approvals need reasoning.** Justify decisions — especially rejections. +- **Prove, don't opine.** Provide evidence — tests, scripts, code — not speculation. + +--- + +## Capabilities + +You are an extended agent with access to: +- **Agent Skills** — Task-specific SOPs loaded on-demand via the `skills` tool +- **Sub-Agents** — Delegate subtasks to specialized agents via `use_agent` +- **Programmatic Tool Calling** — Execute Python code that calls tools as async functions + +### Skills + +Use the `skills` tool to activate task-specific instructions. Available skills are shown in your context. When a skill is activated, follow its instructions precisely. + +### Sub-Agents + +Use `use_agent` to spawn sub-agents for parallelizable work (e.g., per-package analysis, independent reviews). Each sub-agent gets its own context and tools. + +--- + +## Behavior + +1. **Understand the task** — Read the issue/PR, comments, and linked references thoroughly before acting. +2. **Activate the right skill** — If your task maps to a skill, activate it first. +3. **Work incrementally** — Commit progress, post updates, iterate on feedback. +4. **Be honest about limitations** — If you can't do something, say so. + +--- + +## Output Format + +- Use GitHub-flavored markdown +- Structure with headers, tables, and code blocks +- Keep top-level summaries under 200 words +- Use `
` blocks for verbose content + +--- + +## Anti-Patterns (NEVER) + +- Don't post walls of text without structure +- Don't approve without review +- Don't speculate without evidence +- Don't repeat what the user already said +- Don't create noise — every comment should move things forward diff --git a/strands-command/agent-skills/task-meta-reasoner/SKILL.md b/strands-command/agent-skills/task-meta-reasoner/SKILL.md new file mode 100644 index 0000000..b363700 --- /dev/null +++ b/strands-command/agent-skills/task-meta-reasoner/SKILL.md @@ -0,0 +1,79 @@ +--- +name: task-meta-reasoner +description: Meta-reasoning gate that evaluates whether to accept, defer, redirect, reject, or escalate an issue, PR, or task before any work begins. Questions the premise at a high level — assessing layer ownership, existing solutions, architectural alignment, scope, and roadmap fit. Always proposes alternatives, even for seemingly obvious requests. Use this skill as the first checkpoint before task-refiner, task-implementer, task-reviewer, or task-adversarial-tester to prevent wasted effort on misaligned, duplicate, or out-of-scope work. +allowed-tools: shell use_github +--- +# Meta-Reasoner + +## Role + +You are a Meta-Reasoner. Your goal is to evaluate whether a given issue, pull request, or task should be accepted, deferred, or rejected — before any implementation, review, or refinement work begins. You question the request at a high level: Do we need to do this? Is it our concern? Is this the right approach? Is this a duplicate? Does a simpler solution already exist? + +## Principles + +1. **Question the premise.** Don't assume the request is valid — interrogate it. +2. **Check for duplicates.** Search existing issues, PRs, and discussions before accepting. +3. **Assess scope.** Is this the right layer? The right repo? The right team? +4. **Propose alternatives.** Even for good requests, suggest simpler paths. +5. **Be decisive.** Your output is a clear verdict with reasoning. + +## Steps + +### 1. Understand the Request + +- Read the issue/PR description, title, and any linked references +- Identify the core ask — what does the requester actually want? +- Note any assumptions the requester is making + +### 2. Evaluate Fit + +- **Layer ownership**: Is this our concern or should it be upstream/downstream? +- **Existing solutions**: Does something already solve this? Search issues, docs, and code. +- **Architectural alignment**: Does this fit the project's direction? +- **Scope**: Is this too big? Too small? Should it be split or combined? +- **Roadmap fit**: Is this on the roadmap? If not, should it be? + +### 3. Search for Duplicates + +- Search open and closed issues for similar requests +- Check recent PRs for related work +- Look for existing documentation that addresses the concern + +### 4. Propose Alternatives + +Even if you plan to accept, always propose at least one alternative: +- A simpler approach +- An existing solution that might work +- A different scope (smaller or larger) +- Deferring to a better time + +### 5. Render Verdict + +Post a structured comment: + +``` +## Meta-Reasoning Assessment + +**Verdict:** ACCEPT / DEFER / REDIRECT / REJECT / ESCALATE + +**Core Ask:** [one sentence] + +**Assessment:** +- Layer ownership: ✅/❌ [explanation] +- Existing solutions: ✅/❌ [explanation] +- Architectural fit: ✅/❌ [explanation] +- Scope: ✅/❌ [explanation] +- Duplicates: ✅/❌ [explanation] + +**Alternatives Considered:** +1. [alternative 1] +2. [alternative 2] + +**Recommendation:** [what to do next] +``` + +## Desired Outcome + +- A clear accept/defer/reject decision with reasoning +- No wasted effort on misaligned work +- Alternatives surfaced even for accepted tasks diff --git a/strands-command/scripts/javascript/process-input.cjs b/strands-command/scripts/javascript/process-input.cjs index 6aa95e4..140dd8c 100644 --- a/strands-command/scripts/javascript/process-input.cjs +++ b/strands-command/scripts/javascript/process-input.cjs @@ -81,23 +81,32 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs, ? `${mode}-${branchName}`.replace(/[\/\\]/g, '-') : `${mode}-${issueId}`); - // Beta agent uses skill-based system prompts — the AgentSkills plugin provides - // the full instructions via SKILL.md files. The system prompt just sets context - // and tells the agent which skill to activate. + // Beta agent uses BETA_SYSTEM_PROMPT.md (loaded by the runner) + skill activation. + // The system prompt here is just a thin context layer — the real instructions come + // from the BETA_SYSTEM_PROMPT.md file and the activated skill. if (agentType === 'beta') { - const skillNameMap = { - 'adversarial-test': 'task-adversarial-tester', - 'release-digest': 'task-release-digest', - }; - const skillName = skillNameMap[mode]; + // Read BETA_SYSTEM_PROMPT.md if available — provides the base system prompt + let systemPrompt = ''; + const promptPaths = [ + 'devtools/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md', + 'agent-skills/BETA_SYSTEM_PROMPT.md', + ]; + + for (const promptPath of promptPaths) { + try { + if (fs.existsSync(promptPath)) { + systemPrompt = fs.readFileSync(promptPath, 'utf8'); + console.log(`Loaded beta system prompt from ${promptPath}`); + break; + } + } catch (e) { + console.log(`Could not read ${promptPath}: ${e.message}`); + } + } - let systemPrompt; - if (skillName) { - systemPrompt = `You are an autonomous GitHub agent powered by Strands Agents SDK. -You have access to agent skills. Use the 'skills' tool to activate the '${skillName}' skill, then follow its instructions.`; - } else { - // Generic beta prompt for commands without a specific skill mapping - systemPrompt = `You are an autonomous GitHub agent powered by Strands Agents SDK with extended capabilities including agent skills and sub-agent orchestration.`; + // Fallback if file not found + if (!systemPrompt) { + systemPrompt = `You are an autonomous GitHub agent powered by Strands Agents SDK with extended capabilities including agent skills, sub-agent orchestration, and programmatic tool calling.`; } let prompt = (isPullRequest) @@ -105,7 +114,7 @@ You have access to agent skills. Use the 'skills' tool to activate the '${skillN : 'The issue id is:'; prompt += `${issueId}\n${command}\nreview and continue`; - return { sessionId, systemPrompt, prompt }; + return { sessionId, systemPrompt, prompt, mode }; } // Standard agent uses SOP-based system prompts @@ -124,7 +133,7 @@ You have access to agent skills. Use the 'skills' tool to activate the '${skillN : 'The issue id is:'; prompt += `${issueId}\n${command}\nreview and continue`; - return { sessionId, systemPrompt, prompt }; + return { sessionId, systemPrompt, prompt, mode }; } module.exports = async (context, github, core, inputs) => { @@ -149,6 +158,8 @@ module.exports = async (context, github, core, inputs) => { mode = 'adversarial-test'; } else if (effectiveCommand.startsWith('release-digest') || effectiveCommand.startsWith('release digest')) { mode = 'release-digest'; + } else if (effectiveCommand.startsWith('meta-reason') || effectiveCommand.startsWith('meta reason')) { + mode = 'meta-reason'; } else if (effectiveCommand.startsWith('release-notes') || effectiveCommand.startsWith('release notes')) { mode = 'release-notes'; } else if (effectiveCommand.startsWith('implement')) { @@ -158,12 +169,12 @@ module.exports = async (context, github, core, inputs) => { } else if (effectiveCommand.startsWith('refine')) { mode = 'refiner'; } else { - // Default behavior when no explicit command: PR -> implementer, Issue -> refiner - mode = isPullRequest ? 'implementer' : 'refiner'; + // Default behavior when no explicit command: PR -> reviewer, Issue -> refiner + mode = isPullRequest ? 'reviewer' : 'refiner'; } - // Beta-only modes: adversarial-test and release-digest require the beta agent - const betaOnlyModes = ['adversarial-test', 'release-digest']; + // Beta-only modes require the beta agent + const betaOnlyModes = ['adversarial-test', 'release-digest', 'meta-reason']; if (betaOnlyModes.includes(mode) && agentType !== 'beta') { agentType = 'beta'; console.log(`Mode '${mode}' requires beta agent — auto-promoting to beta`); @@ -187,6 +198,7 @@ module.exports = async (context, github, core, inputs) => { issue_id: issueId, head_repo: headRepo, agent_type: agentType, + agent_mode: mode, }; fs.writeFileSync('strands-parsed-input.json', JSON.stringify(outputs, null, 2)); diff --git a/strands-command/scripts/python/beta_agent_runner.py b/strands-command/scripts/python/beta_agent_runner.py index 63e4bc5..4523468 100644 --- a/strands-command/scripts/python/beta_agent_runner.py +++ b/strands-command/scripts/python/beta_agent_runner.py @@ -2,14 +2,17 @@ """ Strands Beta Agent Runner -A separate agent runner with extended capabilities (skills, sub-agents, etc.). -Reuses shared infrastructure from agent_runner.py — same pipeline, different agent. +A separate agent runner with extended capabilities (skills, sub-agents, +programmatic tool calling, etc.). Reuses shared infrastructure from +agent_runner.py — same pipeline, different agent. Usage: /strands beta """ import json import os +import re +import shutil import sys from pathlib import Path from typing import Any @@ -33,8 +36,82 @@ STRANDS_REGION, ) -DEFAULT_SYSTEM_PROMPT = "You are an autonomous GitHub agent powered by Strands Agents SDK with extended capabilities including agent skills and sub-agent orchestration." +# --------------------------------------------------------------------------- +# System prompt +# --------------------------------------------------------------------------- + +def _load_system_prompt() -> str: + """Load the beta agent system prompt. + + Priority: + 1. INPUT_SYSTEM_PROMPT env var (set by process-input.cjs) + 2. BETA_SYSTEM_PROMPT.md file in agent-skills directory + 3. Minimal fallback + """ + env_prompt = os.getenv("INPUT_SYSTEM_PROMPT", "").strip() + if env_prompt: + return env_prompt + + # Try loading from file + possible_paths = [ + Path("agent-skills/BETA_SYSTEM_PROMPT.md"), + Path("devtools/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md"), + ] + + for path in possible_paths: + try: + if path.exists(): + with open(path, "r", encoding="utf-8") as f: + content = f.read() + print(f"✅ System prompt loaded from {path}") + return content + except Exception as e: + print(f"⚠️ Failed to read {path}: {e}") + + return "You are an autonomous GitHub agent powered by Strands Agents SDK with extended capabilities including agent skills, sub-agent orchestration, and programmatic tool calling." + + +# --------------------------------------------------------------------------- +# Programmatic Tool Caller (local copy from strands-agents/tools#387) +# --------------------------------------------------------------------------- + +def _load_programmatic_tool_caller(): + """Try to load programmatic_tool_caller from strands_tools or local copy. + + Priority: + 1. strands_tools.programmatic_tool_caller (when merged into tools package) + 2. Local copy at scripts/python/programmatic_tool_caller.py + """ + try: + from strands_tools import programmatic_tool_caller + print("✅ programmatic_tool_caller loaded from strands_tools") + return programmatic_tool_caller + except ImportError: + pass + + # Try local copy + try: + scripts_dir = Path(__file__).parent + local_ptc = scripts_dir / "programmatic_tool_caller.py" + if local_ptc.exists(): + import importlib.util + spec = importlib.util.spec_from_file_location("programmatic_tool_caller", local_ptc) + if spec and spec.loader: + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + print("✅ programmatic_tool_caller loaded from local copy") + return mod.programmatic_tool_caller + except Exception as e: + print(f"⚠️ Failed to load local programmatic_tool_caller: {e}") + + print("ℹ️ programmatic_tool_caller not available") + return None + + +# --------------------------------------------------------------------------- +# Tools +# --------------------------------------------------------------------------- def _get_beta_tools() -> list[Any]: """Get tools for the beta agent. @@ -44,21 +121,114 @@ def _get_beta_tools() -> list[Any]: """ tools = _get_all_tools() - # Add beta-only tools (use_agent is already imported at module level) - # Check if use_agent is already in the list (it is in the current version) - tool_names = {getattr(t, '__name__', str(t)) for t in tools} - if 'use_agent' not in tool_names: + # Add beta-only tools + tool_names = {getattr(t, "__name__", str(t)) for t in tools} + + if "use_agent" not in tool_names: tools.append(use_agent) + # Add programmatic tool caller + ptc = _load_programmatic_tool_caller() + if ptc is not None: + tools.append(ptc) + return tools +# --------------------------------------------------------------------------- +# Skills +# --------------------------------------------------------------------------- + +# Map from command mode → skill name +SKILL_MAP = { + "adversarial-test": "task-adversarial-tester", + "release-digest": "task-release-digest", + "meta-reason": "task-meta-reasoner", + "reviewer": "task-reviewer", + "review": "task-reviewer", + "implementer": "task-implementer", + "implement": "task-implementer", + "refiner": "task-refiner", + "refine": "task-refiner", + "release-notes": "task-release-notes", +} + + +def _convert_sops_to_skills(skills_dir: Path, sops_dir: Path) -> int: + """Convert existing SOP files to SKILL.md format at runtime. + + Reads .sop.md files from the SOPs directory, adds YAML frontmatter, + and writes them as SKILL.md files in the skills directory. + No source files are modified — conversion is one-way into the skills dir. + + Returns the number of SOPs converted. + """ + if not sops_dir.exists(): + return 0 + + # SOP name → metadata for frontmatter + sop_metadata = { + "task-implementer": { + "description": "Implement tasks defined in GitHub issues using test-driven development. Write code following existing patterns, create comprehensive tests, generate documentation, and create pull requests for review.", + "allowed_tools": "shell use_github", + }, + "task-refiner": { + "description": "Review and refine feature requests in GitHub issues. Identify ambiguities, post clarifying questions, gather missing information, and prepare issues for implementation.", + "allowed_tools": "shell use_github", + }, + "task-release-notes": { + "description": "Generate high-quality release notes for software releases. Analyze merged PRs between git references, identify major features and bug fixes, extract code examples, and format into well-structured markdown.", + "allowed_tools": "shell use_github", + }, + "task-reviewer": { + "description": "Review code changes in pull requests. Analyze diffs, understand context, and add targeted review comments to improve code quality, maintainability, and adherence to project standards.", + "allowed_tools": "shell use_github", + }, + } + + converted = 0 + for sop_file in sops_dir.glob("*.sop.md"): + # Extract skill name: task-implementer.sop.md → task-implementer + skill_name = sop_file.stem.replace(".sop", "") + skill_dir = skills_dir / skill_name + + # Skip if skill already exists (don't overwrite dedicated skills) + if (skill_dir / "SKILL.md").exists(): + continue + + metadata = sop_metadata.get(skill_name, {}) + description = metadata.get("description", f"Skill converted from {sop_file.name}") + allowed_tools = metadata.get("allowed_tools", "shell use_github") + + # Read SOP content + try: + sop_content = sop_file.read_text(encoding="utf-8") + except Exception as e: + print(f"⚠️ Failed to read {sop_file}: {e}") + continue + + # Build SKILL.md with frontmatter + skill_content = f"""--- +name: {skill_name} +description: {description} +allowed-tools: {allowed_tools} +--- +{sop_content}""" + + # Write to skills directory + skill_dir.mkdir(parents=True, exist_ok=True) + (skill_dir / "SKILL.md").write_text(skill_content, encoding="utf-8") + converted += 1 + print(f" ✅ Converted SOP → skill: {skill_name}") + + return converted + + def _load_skills_plugin(): """Load agent skills from the agent-skills directory if available. + Also converts existing SOPs to skills at runtime (without duplicating source files). Returns AgentSkills plugin instance or None if skills aren't available. - Skills are loaded from agent-skills/ which is copied to the working directory - by the GitHub Action. """ try: from strands.vended_plugins.skills import AgentSkills @@ -66,11 +236,10 @@ def _load_skills_plugin(): print("ℹ️ AgentSkills plugin not available (strands.vended_plugins.skills not found)") return None - # Look for skills directory in the working directory - # The action.yml copies agent-skills/ to the working directory + # Look for skills directory possible_paths = [ - Path("agent-skills"), # Working directory (copied by action.yml) - Path("devtools/strands-command/agent-skills"), # Before copy step + Path("agent-skills"), + Path("devtools/strands-command/agent-skills"), ] skills_dir = None @@ -83,6 +252,18 @@ def _load_skills_plugin(): print("ℹ️ No agent-skills directory found (skills not available)") return None + # Convert SOPs to skills at runtime + possible_sop_paths = [ + Path("devtools/strands-command/agent-sops"), + Path("agent-sops"), + ] + for sops_dir in possible_sop_paths: + if sops_dir.exists(): + converted = _convert_sops_to_skills(skills_dir, sops_dir) + if converted > 0: + print(f"✅ Converted {converted} SOPs to skills") + break + try: plugin = AgentSkills(skills=str(skills_dir)) skills = plugin.get_available_skills() @@ -100,6 +281,33 @@ def _load_skills_plugin(): return None +def _activate_skill_for_mode(agent: Agent, mode: str) -> None: + """Activate the appropriate skill based on the command mode. + + Maps the command mode (e.g., "review", "implement") to a skill name + and invokes it via agent.tool.skills(). This front-loads the skill + instructions into the agent's context before it starts working. + """ + skill_name = SKILL_MAP.get(mode) + if not skill_name: + print(f"ℹ️ No skill mapped for mode '{mode}'") + return + + if "skills" not in agent.tool_names: + print(f"⚠️ skills tool not available, can't activate '{skill_name}'") + return + + try: + agent.tool.skills(skill_name=skill_name, record_direct_tool_call=True) + print(f"✅ Activated skill: {skill_name}") + except Exception as e: + print(f"⚠️ Failed to activate skill '{skill_name}': {e}") + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + def run_beta_agent(query: str): """Run the beta agent with extended capabilities.""" try: @@ -133,7 +341,7 @@ def run_beta_agent(query: str): cache_tools="default", ) - system_prompt = os.getenv("INPUT_SYSTEM_PROMPT", DEFAULT_SYSTEM_PROMPT) + system_prompt = _load_system_prompt() session_id = os.getenv("SESSION_ID") s3_bucket = os.getenv("S3_SESSION_BUCKET") @@ -147,7 +355,7 @@ def run_beta_agent(query: str): else: raise ValueError("Both SESSION_ID and S3_SESSION_BUCKET must be set") - # Beta-only: Load agent skills plugin + # Beta-only: Load agent skills plugin (includes SOP→skill conversion) plugins = [] skills_plugin = _load_skills_plugin() if skills_plugin: @@ -170,6 +378,13 @@ def run_beta_agent(query: str): agent = Agent(**agent_kwargs) print(f"🧪 Beta agent created with {len(tools)} tools and {len(plugins)} plugins") + + # Auto-activate skill based on command mode + # The mode is embedded in the session_id by process-input.cjs (e.g., "reviewer-123") + mode = os.getenv("AGENT_MODE", "") + if mode: + _activate_skill_for_mode(agent, mode) + print("Processing user query...") result = agent(query) diff --git a/strands-command/scripts/python/programmatic_tool_caller.py b/strands-command/scripts/python/programmatic_tool_caller.py new file mode 100644 index 0000000..0100820 --- /dev/null +++ b/strands-command/scripts/python/programmatic_tool_caller.py @@ -0,0 +1,316 @@ +"""Programmatic Tool Calling for Strands Agents. + +This module provides a tool that enables programmatic/code-based tool invocation, +similar to Anthropic's Programmatic Tool Calling feature. It allows an agent to +write Python code that calls other tools as functions, reducing API round-trips +and enabling complex orchestration logic. + +Tools are exposed as async functions (e.g., `await calculator(expression="2+2")`). +The code runs in an async context automatically - no boilerplate needed. + +Usage: +```python +from strands import Agent +from strands_tools import programmatic_tool_caller, calculator + +agent = Agent(tools=[programmatic_tool_caller, calculator]) + +result = agent.tool.programmatic_tool_caller( + code=''' +result = await calculator(expression="2 + 2") +print(f"Result: {result}") + +# Parallel execution +results = await asyncio.gather( + calculator(expression="10 * 1"), + calculator(expression="10 * 2"), +) +print(f"Parallel: {results}") +''' +) +``` + +Environment Variables: +- PROGRAMMATIC_TOOL_CALLER_ALLOWED_TOOLS: Comma-separated list of allowed tools +- PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES: Comma-separated list of extra modules to inject + into the namespace (e.g., "json,re,math,collections"). `asyncio` is always available. +- BYPASS_TOOL_CONSENT: Skip user confirmation if "true" + +Namespace: + The execution namespace matches python_repl's base: `{"__name__": "__main__"}`. + `asyncio` is always injected (required for async tool calls). + Additional modules can be added via PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES env var. + Tool functions are injected as async callables (e.g., `await shell(command="ls")`). + +Limitations: Tools that use interrupts (human-in-the-loop) are not supported. The SDK +blocks interrupts during direct/programmatic tool calls — there is no mechanism to pause +execution, collect human input, and resume in this context. If an interrupt-capable tool +is called, it will raise a RuntimeError which surfaces as a failed tool result back to +the agent. +""" + +import asyncio +import importlib +import logging +import os +import sys +import textwrap +import traceback +from io import StringIO +from typing import Any, Callable, Dict, Optional + +from rich import box +from rich.panel import Panel +from rich.syntax import Syntax +from rich.table import Table +from strands import tool +from strands.types.tools import ToolContext + +from strands_tools.utils import console_util +from strands_tools.utils.user_input import get_user_input + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Tool Execution Helpers +# ============================================================================= + + +def _execute_tool(agent: Any, tool_name: str, tool_input: Dict[str, Any]) -> Any: + """Execute a tool through the agent's tool caller. + + Uses agent.tool.() which properly handles all tool types including MCP tools. + """ + if agent is None: + raise RuntimeError("No agent available for tool execution") + + try: + # Use agent.tool.() which works for ALL tool types (including MCP tools) + # record_direct_tool_call=False prevents polluting message history during programmatic calls + tool_func = getattr(agent.tool, tool_name) + result = tool_func(record_direct_tool_call=False, **tool_input) + + if isinstance(result, dict): + if result.get("status") == "error": + error_content = result.get("content", [{"text": "Unknown error"}]) + error_text = error_content[0].get("text", "Unknown error") if error_content else "Unknown error" + raise RuntimeError(f"Tool error: {error_text}") + + content = result.get("content", []) + if content and isinstance(content, list): + text_parts = [item["text"] for item in content if isinstance(item, dict) and "text" in item] + if text_parts: + return "\n".join(text_parts) + return str(result) + + return result + + except AttributeError as e: + raise RuntimeError(f"Tool '{tool_name}' not found in registry") from e + except RuntimeError: + raise + except Exception as e: + logger.error(f"Error executing tool '{tool_name}': {e}") + raise RuntimeError(f"Failed to execute tool '{tool_name}': {e}") from e + + +def _create_async_tool_function(agent: Any, tool_name: str) -> Callable: + """Create an async function wrapper for a tool.""" + + async def tool_function(**kwargs: Any) -> Any: + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, lambda: _execute_tool(agent, tool_name, kwargs)) + + return tool_function + + +def _get_allowed_tools(agent: Any) -> set[str]: + """Get allowed tools from env var or default to all (except self).""" + all_tools = set(agent.tool_registry.registry.keys()) - {"programmatic_tool_caller"} + + env_allowed = os.environ.get("PROGRAMMATIC_TOOL_CALLER_ALLOWED_TOOLS", "").strip() + if env_allowed: + allowed_list = [t.strip() for t in env_allowed.split(",") if t.strip()] + return all_tools & set(allowed_list) + + return all_tools + + +def _build_namespace(available_tools: set[str], agent: Any) -> Dict[str, Any]: + """Build the execution namespace. + + Base namespace matches python_repl: ``{"__name__": "__main__"}``. + ``asyncio`` is always injected (required for async tool wrappers). + Additional stdlib modules can be injected via the + ``PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES`` environment variable + (comma-separated module names, e.g. ``json,re,math,collections``). + Tool functions are injected as async callables. + + Returns: + Namespace dict ready for ``exec()``. + """ + # Base namespace — matches python_repl + namespace: Dict[str, Any] = { + "__name__": "__main__", + } + + # asyncio is always required (async wrapper) + namespace["asyncio"] = asyncio + + # Extra modules from env var + extra_modules = os.environ.get("PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES", "").strip() + if extra_modules: + for mod_name in extra_modules.split(","): + mod_name = mod_name.strip() + if not mod_name: + continue + try: + namespace[mod_name] = importlib.import_module(mod_name) + except ImportError: + logger.warning(f"Could not import extra module '{mod_name}', skipping") + + # Inject tools as async functions + for tool_name in available_tools: + namespace[tool_name] = _create_async_tool_function(agent, tool_name) + + return namespace + + +# ============================================================================= +# Main Tool +# ============================================================================= + + +@tool(context=True) +def programmatic_tool_caller( + code: str, + tool_context: Optional[ToolContext] = None, +) -> Dict[str, Any]: + """Execute Python code with access to agent tools as async functions. + + Tools are available as async functions - use `await` to call them. + Code runs in async context automatically, no boilerplate needed. + + Example: + ```python + # Simple tool call + result = await calculator(expression="2 + 2") + print(result) + + # Loop with tool calls + for i in range(3): + r = await calculator(expression=f"{i} * 10") + print(r) + + # Parallel execution + results = await asyncio.gather( + calculator(expression="1+1"), + calculator(expression="2+2"), + ) + print(results) + ``` + + Environment Variables: + PROGRAMMATIC_TOOL_CALLER_ALLOWED_TOOLS: Comma-separated list of tools to expose + PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES: Comma-separated list of extra modules + to inject into the namespace (e.g., "json,re,math") + BYPASS_TOOL_CONSENT: Skip confirmation if "true" + + Args: + code: Python code to execute. Use `await tool_name(...)` to call tools. + tool_context: Injected automatically. + + Returns: + Dict with status and print() output only. + """ + console = console_util.create() + bypass_consent = os.environ.get("BYPASS_TOOL_CONSENT", "").lower() == "true" + + try: + if tool_context is None or tool_context.agent is None: + return { + "status": "error", + "content": [{"text": "No agent context available. This tool requires an agent."}], + } + + agent = tool_context.agent + + # Show code preview + console.print( + Panel( + Syntax(code, "python", theme="monokai", line_numbers=True), + title="[bold blue]Programmatic Tool Calling[/]", + border_style="blue", + ) + ) + + # Get allowed tools + available_tools = _get_allowed_tools(agent) + + tools_table = Table(show_header=True, header_style="bold cyan", box=box.SIMPLE) + tools_table.add_column("Available Tools", style="green") + for tool_name in sorted(available_tools): + tools_table.add_row(f"await {tool_name}(...)") + console.print(tools_table) + + # User confirmation + if not bypass_consent: + user_input = get_user_input("Execute this code? [y/*]") + if user_input.lower().strip() != "y": + cancel_reason = user_input if user_input.strip() != "n" else get_user_input("Reason:") + return { + "status": "error", + "content": [{"text": f"Cancelled. Reason: {cancel_reason}"}], + } + + # Build execution namespace (matches python_repl base + tools) + exec_namespace = _build_namespace(available_tools, agent) + + console.print("[green]Executing...[/]") + + # Execute code in async context + # Wrap user code in async function for await support + indented_code = textwrap.indent(code, " ") + wrapped_code = f"async def __user_code__():\n{indented_code}\n" + + # Capture output + stdout_capture = StringIO() + stderr_capture = StringIO() + old_stdout, old_stderr = sys.stdout, sys.stderr + + try: + sys.stdout = stdout_capture + sys.stderr = stderr_capture + + # Use compile() for better error tracebacks + compiled = compile(wrapped_code, "", "exec") + exec(compiled, exec_namespace) + asyncio.run(exec_namespace["__user_code__"]()) + + captured_output = stdout_capture.getvalue() + errors = stderr_capture.getvalue() + if errors: + captured_output += f"\n[stderr]\n{errors}" + finally: + sys.stdout = old_stdout + sys.stderr = old_stderr + + console.print("[bold green]✓ Done[/]") + if captured_output.strip(): + console.print(Panel(captured_output, title="[bold green]Output[/]", border_style="green")) + + return { + "status": "success", + "content": [{"text": captured_output.strip() if captured_output.strip() else "(no output)"}], + } + + except SyntaxError: + error_msg = f"Syntax error:\n{traceback.format_exc()}" + console.print(Panel(error_msg, title="[bold red]Error[/]", border_style="red")) + return {"status": "error", "content": [{"text": error_msg}]} + + except Exception: + error_msg = f"Execution error:\n{traceback.format_exc()}" + console.print(Panel(error_msg, title="[bold red]Error[/]", border_style="red")) + return {"status": "error", "content": [{"text": error_msg}]}