diff --git a/strands-command/actions/strands-agent-runner/action.yml b/strands-command/actions/strands-agent-runner/action.yml index 057fb63..d83fd56 100644 --- a/strands-command/actions/strands-agent-runner/action.yml +++ b/strands-command/actions/strands-agent-runner/action.yml @@ -47,6 +47,8 @@ runs: echo "ref=$(jq -r .branch_name strands-parsed-input.json)" >> $GITHUB_OUTPUT echo "session_id=$(jq -r .session_id strands-parsed-input.json)" >> $GITHUB_OUTPUT echo "head_repo=$(jq -r '.head_repo // ""' strands-parsed-input.json)" >> $GITHUB_OUTPUT + echo "agent_mode=$(jq -r '.agent_mode // ""' strands-parsed-input.json)" >> $GITHUB_OUTPUT + echo "agent_type=$(jq -r '.agent_type // "standard"' strands-parsed-input.json)" >> $GITHUB_OUTPUT echo "system_prompt<> $GITHUB_OUTPUT jq -r .system_prompt strands-parsed-input.json >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT @@ -54,7 +56,7 @@ runs: jq -r .prompt strands-parsed-input.json >> $GITHUB_OUTPUT echo "EOF" >> $GITHUB_OUTPUT - # Checkout devtools repo for scripts + # Checkout devtools repo for scripts, SOPs, and agent skills - name: Checkout devtools uses: actions/checkout@v5 with: @@ -63,6 +65,7 @@ runs: sparse-checkout: | strands-command/scripts strands-command/agent-sops + strands-command/agent-skills path: devtools # Copy the devtools directory to the runner temp directory so the branch content cant overwrite the scripts executed here @@ -79,6 +82,24 @@ runs: ref: ${{ steps.read-input.outputs.ref }} repository: ${{ steps.read-input.outputs.head_repo || github.repository }} + # Copy agent-skills to working directory (beta agent only) + # The AgentSkills plugin looks for skills in the working directory + - name: Copy agent-skills to working directory + if: steps.read-input.outputs.agent_type == 'beta' + shell: bash + run: | + if [ -d "${{ runner.temp }}/strands-agent-runner/strands-command/agent-skills" ]; then + cp -r ${{ runner.temp }}/strands-agent-runner/strands-command/agent-skills ./agent-skills + echo "โœ… Copied agent-skills to working directory" + if [ -d "${{ runner.temp }}/strands-agent-runner/strands-command/agent-sops" ]; then + cp -r ${{ runner.temp }}/strands-agent-runner/strands-command/agent-sops ./agent-sops + echo "โœ… Copied agent-sops to working directory (for runtime skill conversion)" + fi + ls -la ./agent-skills/ + else + echo "โ„น๏ธ No agent-skills directory found (skills not available)" + fi + - name: Set up Python uses: actions/setup-python@v4 with: @@ -235,8 +256,19 @@ runs: # Evals Configuration (input overrides Secrets Manager) EVALS_SQS_QUEUE_ARN: ${{ inputs.evals_sqs_queue_arn || steps.secrets.outputs.evals_sqs_queue_arn }} + + # Agent type (standard or beta) + AGENT_TYPE: ${{ steps.read-input.outputs.agent_type }} + AGENT_MODE: ${{ steps.read-input.outputs.agent_mode }} run: | - uv run --no-project ${{ runner.temp }}/strands-agent-runner/strands-command/scripts/python/agent_runner.py "$INPUT_TASK" + SCRIPTS_DIR="${{ runner.temp }}/strands-agent-runner/strands-command/scripts/python" + if [ "$AGENT_TYPE" = "beta" ]; then + echo "๐Ÿงช Running beta agent" + uv run --no-project "$SCRIPTS_DIR/beta_agent_runner.py" "$INPUT_TASK" + else + echo "๐Ÿค– Running standard agent" + uv run --no-project "$SCRIPTS_DIR/agent_runner.py" "$INPUT_TASK" + fi - name: Capture repository state shell: bash diff --git a/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md b/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md new file mode 100644 index 0000000..4c86db2 --- /dev/null +++ b/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md @@ -0,0 +1,60 @@ +# Strands Agent (Beta) โ€” /strands Command + +**Identity**: AI agent for the Strands Agents project, invoked via `/strands beta` in GitHub issues and PRs. +**Runtime**: GitHub Actions, triggered by `/strands beta ` comments. + +--- + +## Guidelines + +Follow the [Strands Agent Guidelines](https://github.com/strands-agents/docs/blob/main/team/AGENT_GUIDELINES.md): + +- **Add value or stay silent.** If you don't have something concrete to contribute, don't act. +- **Keep it short.** Lead with what matters, then stop. Use `
` blocks for long analysis. +- **Approvals need reasoning.** Justify decisions โ€” especially rejections. +- **Prove, don't opine.** Provide evidence โ€” tests, scripts, code โ€” not speculation. + +--- + +## Capabilities + +You are an extended agent with access to: +- **Agent Skills** โ€” Task-specific SOPs loaded on-demand via the `skills` tool +- **Sub-Agents** โ€” Delegate subtasks to specialized agents via `use_agent` +- **Programmatic Tool Calling** โ€” Execute Python code that calls tools as async functions + +### Skills + +Use the `skills` tool to activate task-specific instructions. Available skills are shown in your context. When a skill is activated, follow its instructions precisely. + +### Sub-Agents + +Use `use_agent` to spawn sub-agents for parallelizable work (e.g., per-package analysis, independent reviews). Each sub-agent gets its own context and tools. + +--- + +## Behavior + +1. **Understand the task** โ€” Read the issue/PR, comments, and linked references thoroughly before acting. +2. **Activate the right skill** โ€” If your task maps to a skill, activate it first. +3. **Work incrementally** โ€” Commit progress, post updates, iterate on feedback. +4. **Be honest about limitations** โ€” If you can't do something, say so. + +--- + +## Output Format + +- Use GitHub-flavored markdown +- Structure with headers, tables, and code blocks +- Keep top-level summaries under 200 words +- Use `
` blocks for verbose content + +--- + +## Anti-Patterns (NEVER) + +- Don't post walls of text without structure +- Don't approve without review +- Don't speculate without evidence +- Don't repeat what the user already said +- Don't create noise โ€” every comment should move things forward diff --git a/strands-command/agent-skills/task-adversarial-tester/SKILL.md b/strands-command/agent-skills/task-adversarial-tester/SKILL.md new file mode 100644 index 0000000..c4f8fed --- /dev/null +++ b/strands-command/agent-skills/task-adversarial-tester/SKILL.md @@ -0,0 +1,108 @@ +--- +name: task-adversarial-tester +description: Break code changes in a pull request by actively finding bugs, edge cases, security holes, and failure modes that the author and reviewer missed. Produce artifacts โ€” failing tests, reproduction scripts, and concrete evidence โ€” that prove something is broken. +allowed-tools: shell use_github +--- +# Adversarial Tester + +## Role + +You are an Adversarial Tester. Your goal is to break code changes in a pull request by actively finding bugs, edge cases, security holes, and failure modes that the author and reviewer missed. You do NOT judge code quality or style. You produce artifacts โ€” failing tests, reproduction scripts, and concrete evidence โ€” that prove something is broken. If you can't break it, you say so. You never speculate without proof. + +## Principles + +1. **Prove, don't opine.** Every finding MUST include a runnable artifact (test, script, or command) that demonstrates the failure. +2. **Spec over implementation.** Your attack surface comes from the PR description, linked issues, and acceptance criteria โ€” not from reading the code and inventing post-hoc concerns. +3. **Adversarial by design.** Assume the code is wrong until proven otherwise. +4. **Artifacts are the deliverable.** Your output is a set of pass/fail artifacts. If all pass, the code survived. If any fail, they speak for themselves. +5. **No overlap with the reviewer.** You don't comment on naming, style, architecture, or documentation. You break things. + +## Steps + +### 1. Setup Test Environment + +- Checkout the PR branch +- Read `AGENTS.md`, `CONTRIBUTING.md`, `DEVELOPMENT.md` to understand the project's test infrastructure +- Run the existing test suite to establish a baseline (pass count, fail count) +- Create a progress tracking notebook + +### 2. Understand the Attack Surface + +- Read the PR description and linked issue thoroughly +- Use `use_github` GraphQL to identify all changed files +- Extract explicit and implicit acceptance criteria +- Identify the public API surface being added or modified +- Categorize: new feature, bugfix, refactor, dependency change, config change +- Note any claims the author makes ("handles X", "backward compatible", "no breaking changes") +- Document your attack surface as a checklist: + - Input boundaries and edge cases + - Error paths and failure modes + - Concurrency and ordering assumptions + - Backward compatibility claims + - Security-sensitive areas + - Integration points + +### 3. Adversarial Test Generation + +#### 3.1 Edge Case Testing +- Identify all input parameters and their documented boundaries +- Write tests for: empty inputs, null/None values, maximum values, negative numbers, special characters, unicode, extremely long strings +- Test type coercion boundaries +- Test combinations of edge case inputs + +#### 3.2 Error Path Testing +- Map every error handler in the changed code +- Write tests that trigger each error path +- Verify error messages are correct and don't leak internals +- Test cascading failures +- Test resource cleanup on error + +#### 3.3 Concurrency & Race Condition Testing +- If the code has shared state, write concurrent access tests +- Test ordering assumptions +- Test timeout and cancellation paths +- Test re-entrancy if applicable + +#### 3.4 Backward Compatibility Testing +- If the PR claims backward compatibility, write tests proving or disproving it +- Test that existing public API contracts still hold +- Test serialization/deserialization with old formats if applicable + +#### 3.5 Security Testing +- Test for injection attacks if the code processes user input +- Test for credential/secret leakage in error messages or logs +- Test for path traversal if file operations are involved +- Test authorization boundaries if applicable + +### 4. Execute and Classify Results + +- Run all adversarial tests +- Classify each result as PASS (code survived) or FAIL (bug found) +- For each FAIL, verify it's a genuine bug (not a test setup issue) +- Re-run failures to confirm they're deterministic + +### 5. Report Findings + +Post a structured comment on the PR: + +``` +## Adversarial Test Results + +**Attack Surface:** [summary of what was tested] +**Tests Run:** N | **Passed:** N | **Failed:** N + +### ๐Ÿ”ด Failures (Bugs Found) +[For each failure: description, reproduction command, expected vs actual] + +### ๐ŸŸข Passed (Code Survived) +[Brief summary of attack vectors that didn't find issues] + +### โš ๏ธ Could Not Test +[Any areas that couldn't be tested and why] +``` + +## Desired Outcome + +- A set of runnable test artifacts that exercise edge cases and error paths +- Clear pass/fail results with reproduction steps for any bugs found +- Honest "survived" verdict when the code holds up diff --git a/strands-command/agent-skills/task-meta-reasoner/SKILL.md b/strands-command/agent-skills/task-meta-reasoner/SKILL.md new file mode 100644 index 0000000..b363700 --- /dev/null +++ b/strands-command/agent-skills/task-meta-reasoner/SKILL.md @@ -0,0 +1,79 @@ +--- +name: task-meta-reasoner +description: Meta-reasoning gate that evaluates whether to accept, defer, redirect, reject, or escalate an issue, PR, or task before any work begins. Questions the premise at a high level โ€” assessing layer ownership, existing solutions, architectural alignment, scope, and roadmap fit. Always proposes alternatives, even for seemingly obvious requests. Use this skill as the first checkpoint before task-refiner, task-implementer, task-reviewer, or task-adversarial-tester to prevent wasted effort on misaligned, duplicate, or out-of-scope work. +allowed-tools: shell use_github +--- +# Meta-Reasoner + +## Role + +You are a Meta-Reasoner. Your goal is to evaluate whether a given issue, pull request, or task should be accepted, deferred, or rejected โ€” before any implementation, review, or refinement work begins. You question the request at a high level: Do we need to do this? Is it our concern? Is this the right approach? Is this a duplicate? Does a simpler solution already exist? + +## Principles + +1. **Question the premise.** Don't assume the request is valid โ€” interrogate it. +2. **Check for duplicates.** Search existing issues, PRs, and discussions before accepting. +3. **Assess scope.** Is this the right layer? The right repo? The right team? +4. **Propose alternatives.** Even for good requests, suggest simpler paths. +5. **Be decisive.** Your output is a clear verdict with reasoning. + +## Steps + +### 1. Understand the Request + +- Read the issue/PR description, title, and any linked references +- Identify the core ask โ€” what does the requester actually want? +- Note any assumptions the requester is making + +### 2. Evaluate Fit + +- **Layer ownership**: Is this our concern or should it be upstream/downstream? +- **Existing solutions**: Does something already solve this? Search issues, docs, and code. +- **Architectural alignment**: Does this fit the project's direction? +- **Scope**: Is this too big? Too small? Should it be split or combined? +- **Roadmap fit**: Is this on the roadmap? If not, should it be? + +### 3. Search for Duplicates + +- Search open and closed issues for similar requests +- Check recent PRs for related work +- Look for existing documentation that addresses the concern + +### 4. Propose Alternatives + +Even if you plan to accept, always propose at least one alternative: +- A simpler approach +- An existing solution that might work +- A different scope (smaller or larger) +- Deferring to a better time + +### 5. Render Verdict + +Post a structured comment: + +``` +## Meta-Reasoning Assessment + +**Verdict:** ACCEPT / DEFER / REDIRECT / REJECT / ESCALATE + +**Core Ask:** [one sentence] + +**Assessment:** +- Layer ownership: โœ…/โŒ [explanation] +- Existing solutions: โœ…/โŒ [explanation] +- Architectural fit: โœ…/โŒ [explanation] +- Scope: โœ…/โŒ [explanation] +- Duplicates: โœ…/โŒ [explanation] + +**Alternatives Considered:** +1. [alternative 1] +2. [alternative 2] + +**Recommendation:** [what to do next] +``` + +## Desired Outcome + +- A clear accept/defer/reject decision with reasoning +- No wasted effort on misaligned work +- Alternatives surfaced even for accepted tasks diff --git a/strands-command/agent-skills/task-release-digest/SKILL.md b/strands-command/agent-skills/task-release-digest/SKILL.md new file mode 100644 index 0000000..5b13c5c --- /dev/null +++ b/strands-command/agent-skills/task-release-digest/SKILL.md @@ -0,0 +1,107 @@ +--- +name: task-release-digest +description: Generate a comprehensive release digest by analyzing merged PRs across Strands packages. Uses sub-agents via use_agent to parallelize per-package analysis, then synthesizes results into a unified digest. +allowed-tools: shell use_github use_agent http_request +--- +# Release Digest Generator + +## Role + +You are a Release Digest orchestrator. Your goal is to generate a comprehensive release digest covering recent changes across multiple Strands packages. You use sub-agents (via `use_agent`) to parallelize per-package analysis, then synthesize results into a unified digest. + +## Packages + +The Strands ecosystem includes these key packages: +- `strands-agents/sdk-python` โ€” Core Python SDK +- `strands-agents/sdk-typescript` โ€” Core TypeScript SDK +- `strands-agents/tools` โ€” Official tool implementations +- `strands-agents/agent-builder` โ€” Agent builder utilities +- `strands-agents/docs` โ€” Documentation + +## Steps + +### 1. Determine Time Range + +- Accept a time range (e.g., "last 2 weeks", "since v1.14.0", specific dates) +- Default to the last 2 weeks if no range is specified +- Calculate the start and end dates + +### 2. Spawn Per-Package Sub-Agents + +For each package, use `use_agent` to spawn a sub-agent that: +- Queries merged PRs in the time range using GitHub GraphQL API +- Categorizes PRs: features, bug fixes, docs, chores +- Identifies the top 3-5 most impactful changes +- Extracts brief code examples for major features +- Returns a structured summary + +**Sub-agent system prompt template:** +``` +You are analyzing merged PRs for the {package} repository. +Time range: {start_date} to {end_date}. + +Query merged PRs using GitHub GraphQL API. For each PR, determine: +1. Category: feature, bugfix, docs, chore, refactor +2. User impact: high, medium, low +3. One-line summary + +Return a structured JSON summary with: +- package: string +- total_prs: number +- features: [{pr_number, title, summary, impact}] +- bugfixes: [{pr_number, title, summary, impact}] +- other_count: number +``` + +### 3. Collect and Synthesize Results + +- Wait for all sub-agents to complete +- Merge results into a unified view +- Identify cross-package themes (e.g., "streaming improvements across SDK and tools") +- Rank features by impact + +### 4. Generate Digest + +Format the digest as a GitHub issue comment: + +```markdown +# ๐Ÿ“ฆ Strands Release Digest โ€” {date_range} + +## Highlights +[Top 3-5 changes across all packages with brief descriptions] + +## By Package + +### sdk-python +**{N} PRs merged** | {features} features | {fixes} fixes +- ๐Ÿš€ [Feature Title](PR link) โ€” one-line description +- ๐Ÿ› [Fix Title](PR link) โ€” one-line description + +### sdk-typescript +... + +### tools +... + +## Cross-Package Themes +[Any patterns noticed across packages] + +## Stats +| Package | PRs | Features | Fixes | Docs | +|---------|-----|----------|-------|------| +| sdk-python | N | N | N | N | +| ... | ... | ... | ... | ... | +| **Total** | **N** | **N** | **N** | **N** | +``` + +### 5. Post Results + +- Post the digest as a comment on the triggering issue +- Include a summary of sub-agent execution (how many packages analyzed, any failures) + +## Desired Outcome + +- A well-formatted release digest covering all active Strands packages +- Parallel execution via sub-agents for faster analysis +- Clear categorization and impact assessment +- Cross-package theme identification diff --git a/strands-command/scripts/javascript/process-input.cjs b/strands-command/scripts/javascript/process-input.cjs index 82de3b4..140dd8c 100644 --- a/strands-command/scripts/javascript/process-input.cjs +++ b/strands-command/scripts/javascript/process-input.cjs @@ -76,11 +76,48 @@ async function determineBranch(github, context, issueId, mode, isPullRequest) { return { branchName, headRepo }; } -function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) { +function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs, agentType) { const sessionId = inputs.session_id || (mode === 'implementer' ? `${mode}-${branchName}`.replace(/[\/\\]/g, '-') : `${mode}-${issueId}`); + // Beta agent uses BETA_SYSTEM_PROMPT.md (loaded by the runner) + skill activation. + // The system prompt here is just a thin context layer โ€” the real instructions come + // from the BETA_SYSTEM_PROMPT.md file and the activated skill. + if (agentType === 'beta') { + // Read BETA_SYSTEM_PROMPT.md if available โ€” provides the base system prompt + let systemPrompt = ''; + const promptPaths = [ + 'devtools/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md', + 'agent-skills/BETA_SYSTEM_PROMPT.md', + ]; + + for (const promptPath of promptPaths) { + try { + if (fs.existsSync(promptPath)) { + systemPrompt = fs.readFileSync(promptPath, 'utf8'); + console.log(`Loaded beta system prompt from ${promptPath}`); + break; + } + } catch (e) { + console.log(`Could not read ${promptPath}: ${e.message}`); + } + } + + // Fallback if file not found + if (!systemPrompt) { + systemPrompt = `You are an autonomous GitHub agent powered by Strands Agents SDK with extended capabilities including agent skills, sub-agent orchestration, and programmatic tool calling.`; + } + + let prompt = (isPullRequest) + ? 'The pull request id is:' + : 'The issue id is:'; + prompt += `${issueId}\n${command}\nreview and continue`; + + return { sessionId, systemPrompt, prompt, mode }; + } + + // Standard agent uses SOP-based system prompts const scriptFiles = { 'implementer': 'devtools/strands-command/agent-sops/task-implementer.sop.md', 'refiner': 'devtools/strands-command/agent-sops/task-refiner.sop.md', @@ -96,7 +133,7 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) : 'The issue id is:'; prompt += `${issueId}\n${command}\nreview and continue`; - return { sessionId, systemPrompt, prompt }; + return { sessionId, systemPrompt, prompt, mode }; } module.exports = async (context, github, core, inputs) => { @@ -104,27 +141,51 @@ module.exports = async (context, github, core, inputs) => { const { issueId, command, issue } = await getIssueInfo(github, context, inputs); const isPullRequest = !!issue.data.pull_request; + + // Check if this is a beta command: /strands beta + let agentType = 'standard'; + let effectiveCommand = command; + + if (command.startsWith('beta ') || command === 'beta') { + agentType = 'beta'; + effectiveCommand = command.replace(/^beta\s*/, '').trim(); + console.log(`Beta agent requested. Effective command: "${effectiveCommand}"`); + } // Determine mode based on explicit command first, then context let mode; - if (command.startsWith('release-notes') || command.startsWith('release notes')) { + if (effectiveCommand.startsWith('adversarial-test') || effectiveCommand.startsWith('adversarial test')) { + mode = 'adversarial-test'; + } else if (effectiveCommand.startsWith('release-digest') || effectiveCommand.startsWith('release digest')) { + mode = 'release-digest'; + } else if (effectiveCommand.startsWith('meta-reason') || effectiveCommand.startsWith('meta reason')) { + mode = 'meta-reason'; + } else if (effectiveCommand.startsWith('release-notes') || effectiveCommand.startsWith('release notes')) { mode = 'release-notes'; - } else if (command.startsWith('implement')) { + } else if (effectiveCommand.startsWith('implement')) { mode = 'implementer'; - } else if (command.startsWith('review')) { + } else if (effectiveCommand.startsWith('review')) { mode = 'reviewer'; - } else if (command.startsWith('refine')) { + } else if (effectiveCommand.startsWith('refine')) { mode = 'refiner'; } else { - // Default behavior when no explicit command: PR -> implementer, Issue -> refiner - mode = isPullRequest ? 'implementer' : 'refiner'; + // Default behavior when no explicit command: PR -> reviewer, Issue -> refiner + mode = isPullRequest ? 'reviewer' : 'refiner'; } - console.log(`Is PR: ${isPullRequest}, Command: "${command}", Mode: ${mode}`); + + // Beta-only modes require the beta agent + const betaOnlyModes = ['adversarial-test', 'release-digest', 'meta-reason']; + if (betaOnlyModes.includes(mode) && agentType !== 'beta') { + agentType = 'beta'; + console.log(`Mode '${mode}' requires beta agent โ€” auto-promoting to beta`); + } + + console.log(`Is PR: ${isPullRequest}, Command: "${command}", Mode: ${mode}, Agent: ${agentType}`); const { branchName, headRepo } = await determineBranch(github, context, issueId, mode, isPullRequest); console.log(`Building prompts - mode: ${mode}, issue: ${issueId}, is PR: ${isPullRequest}`); - const { sessionId, systemPrompt, prompt } = buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs); + const { sessionId, systemPrompt, prompt } = buildPrompts(mode, issueId, isPullRequest, effectiveCommand, branchName, inputs, agentType); console.log(`Session ID: ${sessionId}`); console.log(`Task prompt: "${prompt}"`); @@ -135,7 +196,9 @@ module.exports = async (context, github, core, inputs) => { system_prompt: systemPrompt, prompt: prompt, issue_id: issueId, - head_repo: headRepo + head_repo: headRepo, + agent_type: agentType, + agent_mode: mode, }; fs.writeFileSync('strands-parsed-input.json', JSON.stringify(outputs, null, 2)); diff --git a/strands-command/scripts/python/beta_agent_runner.py b/strands-command/scripts/python/beta_agent_runner.py new file mode 100644 index 0000000..4523468 --- /dev/null +++ b/strands-command/scripts/python/beta_agent_runner.py @@ -0,0 +1,424 @@ +#!/usr/bin/env python3 +""" +Strands Beta Agent Runner + +A separate agent runner with extended capabilities (skills, sub-agents, +programmatic tool calling, etc.). Reuses shared infrastructure from +agent_runner.py โ€” same pipeline, different agent. + +Usage: /strands beta +""" + +import json +import os +import re +import shutil +import sys +from pathlib import Path +from typing import Any + +from strands import Agent +from strands.session import S3SessionManager +from strands.models import BedrockModel, CacheConfig +from botocore.config import Config + +from strands_tools import http_request, shell, use_agent + +# Reuse shared infrastructure from the standard runner +from agent_runner import ( + _get_all_tools, + _get_trace_attributes, + _send_eval_trigger, + _setup_langfuse_telemetry, + STRANDS_BUDGET_TOKENS, + STRANDS_MAX_TOKENS, + STRANDS_MODEL_ID, + STRANDS_REGION, +) + + +# --------------------------------------------------------------------------- +# System prompt +# --------------------------------------------------------------------------- + +def _load_system_prompt() -> str: + """Load the beta agent system prompt. + + Priority: + 1. INPUT_SYSTEM_PROMPT env var (set by process-input.cjs) + 2. BETA_SYSTEM_PROMPT.md file in agent-skills directory + 3. Minimal fallback + """ + env_prompt = os.getenv("INPUT_SYSTEM_PROMPT", "").strip() + if env_prompt: + return env_prompt + + # Try loading from file + possible_paths = [ + Path("agent-skills/BETA_SYSTEM_PROMPT.md"), + Path("devtools/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md"), + ] + + for path in possible_paths: + try: + if path.exists(): + with open(path, "r", encoding="utf-8") as f: + content = f.read() + print(f"โœ… System prompt loaded from {path}") + return content + except Exception as e: + print(f"โš ๏ธ Failed to read {path}: {e}") + + return "You are an autonomous GitHub agent powered by Strands Agents SDK with extended capabilities including agent skills, sub-agent orchestration, and programmatic tool calling." + + +# --------------------------------------------------------------------------- +# Programmatic Tool Caller (local copy from strands-agents/tools#387) +# --------------------------------------------------------------------------- + +def _load_programmatic_tool_caller(): + """Try to load programmatic_tool_caller from strands_tools or local copy. + + Priority: + 1. strands_tools.programmatic_tool_caller (when merged into tools package) + 2. Local copy at scripts/python/programmatic_tool_caller.py + """ + try: + from strands_tools import programmatic_tool_caller + print("โœ… programmatic_tool_caller loaded from strands_tools") + return programmatic_tool_caller + except ImportError: + pass + + # Try local copy + try: + scripts_dir = Path(__file__).parent + local_ptc = scripts_dir / "programmatic_tool_caller.py" + if local_ptc.exists(): + import importlib.util + spec = importlib.util.spec_from_file_location("programmatic_tool_caller", local_ptc) + if spec and spec.loader: + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + print("โœ… programmatic_tool_caller loaded from local copy") + return mod.programmatic_tool_caller + except Exception as e: + print(f"โš ๏ธ Failed to load local programmatic_tool_caller: {e}") + + print("โ„น๏ธ programmatic_tool_caller not available") + return None + + +# --------------------------------------------------------------------------- +# Tools +# --------------------------------------------------------------------------- + +def _get_beta_tools() -> list[Any]: + """Get tools for the beta agent. + + Starts with all standard tools, then adds beta-only tools. + This ensures the beta agent is a strict superset of the standard agent. + """ + tools = _get_all_tools() + + # Add beta-only tools + tool_names = {getattr(t, "__name__", str(t)) for t in tools} + + if "use_agent" not in tool_names: + tools.append(use_agent) + + # Add programmatic tool caller + ptc = _load_programmatic_tool_caller() + if ptc is not None: + tools.append(ptc) + + return tools + + +# --------------------------------------------------------------------------- +# Skills +# --------------------------------------------------------------------------- + +# Map from command mode โ†’ skill name +SKILL_MAP = { + "adversarial-test": "task-adversarial-tester", + "release-digest": "task-release-digest", + "meta-reason": "task-meta-reasoner", + "reviewer": "task-reviewer", + "review": "task-reviewer", + "implementer": "task-implementer", + "implement": "task-implementer", + "refiner": "task-refiner", + "refine": "task-refiner", + "release-notes": "task-release-notes", +} + + +def _convert_sops_to_skills(skills_dir: Path, sops_dir: Path) -> int: + """Convert existing SOP files to SKILL.md format at runtime. + + Reads .sop.md files from the SOPs directory, adds YAML frontmatter, + and writes them as SKILL.md files in the skills directory. + No source files are modified โ€” conversion is one-way into the skills dir. + + Returns the number of SOPs converted. + """ + if not sops_dir.exists(): + return 0 + + # SOP name โ†’ metadata for frontmatter + sop_metadata = { + "task-implementer": { + "description": "Implement tasks defined in GitHub issues using test-driven development. Write code following existing patterns, create comprehensive tests, generate documentation, and create pull requests for review.", + "allowed_tools": "shell use_github", + }, + "task-refiner": { + "description": "Review and refine feature requests in GitHub issues. Identify ambiguities, post clarifying questions, gather missing information, and prepare issues for implementation.", + "allowed_tools": "shell use_github", + }, + "task-release-notes": { + "description": "Generate high-quality release notes for software releases. Analyze merged PRs between git references, identify major features and bug fixes, extract code examples, and format into well-structured markdown.", + "allowed_tools": "shell use_github", + }, + "task-reviewer": { + "description": "Review code changes in pull requests. Analyze diffs, understand context, and add targeted review comments to improve code quality, maintainability, and adherence to project standards.", + "allowed_tools": "shell use_github", + }, + } + + converted = 0 + for sop_file in sops_dir.glob("*.sop.md"): + # Extract skill name: task-implementer.sop.md โ†’ task-implementer + skill_name = sop_file.stem.replace(".sop", "") + skill_dir = skills_dir / skill_name + + # Skip if skill already exists (don't overwrite dedicated skills) + if (skill_dir / "SKILL.md").exists(): + continue + + metadata = sop_metadata.get(skill_name, {}) + description = metadata.get("description", f"Skill converted from {sop_file.name}") + allowed_tools = metadata.get("allowed_tools", "shell use_github") + + # Read SOP content + try: + sop_content = sop_file.read_text(encoding="utf-8") + except Exception as e: + print(f"โš ๏ธ Failed to read {sop_file}: {e}") + continue + + # Build SKILL.md with frontmatter + skill_content = f"""--- +name: {skill_name} +description: {description} +allowed-tools: {allowed_tools} +--- +{sop_content}""" + + # Write to skills directory + skill_dir.mkdir(parents=True, exist_ok=True) + (skill_dir / "SKILL.md").write_text(skill_content, encoding="utf-8") + converted += 1 + print(f" โœ… Converted SOP โ†’ skill: {skill_name}") + + return converted + + +def _load_skills_plugin(): + """Load agent skills from the agent-skills directory if available. + + Also converts existing SOPs to skills at runtime (without duplicating source files). + Returns AgentSkills plugin instance or None if skills aren't available. + """ + try: + from strands.vended_plugins.skills import AgentSkills + except ImportError: + print("โ„น๏ธ AgentSkills plugin not available (strands.vended_plugins.skills not found)") + return None + + # Look for skills directory + possible_paths = [ + Path("agent-skills"), + Path("devtools/strands-command/agent-skills"), + ] + + skills_dir = None + for path in possible_paths: + if path.exists() and path.is_dir(): + skills_dir = path + break + + if skills_dir is None: + print("โ„น๏ธ No agent-skills directory found (skills not available)") + return None + + # Convert SOPs to skills at runtime + possible_sop_paths = [ + Path("devtools/strands-command/agent-sops"), + Path("agent-sops"), + ] + for sops_dir in possible_sop_paths: + if sops_dir.exists(): + converted = _convert_sops_to_skills(skills_dir, sops_dir) + if converted > 0: + print(f"โœ… Converted {converted} SOPs to skills") + break + + try: + plugin = AgentSkills(skills=str(skills_dir)) + skills = plugin.get_available_skills() + + if skills: + print(f"โœ… AgentSkills plugin: {len(skills)} skills loaded") + for skill in skills: + print(f" - {skill.name}: {skill.description[:60]}...") + return plugin + else: + print("โš ๏ธ AgentSkills plugin: no skills found in directory") + return None + except Exception as e: + print(f"โš ๏ธ Failed to load skills: {e}") + return None + + +def _activate_skill_for_mode(agent: Agent, mode: str) -> None: + """Activate the appropriate skill based on the command mode. + + Maps the command mode (e.g., "review", "implement") to a skill name + and invokes it via agent.tool.skills(). This front-loads the skill + instructions into the agent's context before it starts working. + """ + skill_name = SKILL_MAP.get(mode) + if not skill_name: + print(f"โ„น๏ธ No skill mapped for mode '{mode}'") + return + + if "skills" not in agent.tool_names: + print(f"โš ๏ธ skills tool not available, can't activate '{skill_name}'") + return + + try: + agent.tool.skills(skill_name=skill_name, record_direct_tool_call=True) + print(f"โœ… Activated skill: {skill_name}") + except Exception as e: + print(f"โš ๏ธ Failed to activate skill '{skill_name}': {e}") + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def run_beta_agent(query: str): + """Run the beta agent with extended capabilities.""" + try: + # Shared infrastructure from agent_runner.py + telemetry_enabled = _setup_langfuse_telemetry() + trace_attributes = _get_trace_attributes() if telemetry_enabled else {} + + # Beta agent tools (superset of standard) + tools = _get_beta_tools() + + # Same model configuration as standard agent + additional_request_fields = {} + additional_request_fields["anthropic_beta"] = ["interleaved-thinking-2025-05-14"] + additional_request_fields["thinking"] = { + "type": "enabled", + "budget_tokens": STRANDS_BUDGET_TOKENS, + } + + model = BedrockModel( + model_id=STRANDS_MODEL_ID, + max_tokens=STRANDS_MAX_TOKENS, + region_name=STRANDS_REGION, + boto_client_config=Config( + read_timeout=900, + connect_timeout=900, + retries={"max_attempts": 3, "mode": "adaptive"}, + ), + cache_config=CacheConfig(strategy="auto"), + additional_request_fields=additional_request_fields, + cache_prompt="default", + cache_tools="default", + ) + + system_prompt = _load_system_prompt() + session_id = os.getenv("SESSION_ID") + s3_bucket = os.getenv("S3_SESSION_BUCKET") + + if s3_bucket and session_id: + print(f"๐Ÿค– Using session manager with session ID: {session_id}") + session_manager = S3SessionManager( + session_id=session_id, + bucket=s3_bucket, + prefix=os.getenv("GITHUB_REPOSITORY", ""), + ) + else: + raise ValueError("Both SESSION_ID and S3_SESSION_BUCKET must be set") + + # Beta-only: Load agent skills plugin (includes SOPโ†’skill conversion) + plugins = [] + skills_plugin = _load_skills_plugin() + if skills_plugin: + plugins.append(skills_plugin) + + # Create beta agent + agent_kwargs = { + "model": model, + "system_prompt": system_prompt, + "tools": tools, + "session_manager": session_manager, + } + + if plugins: + agent_kwargs["plugins"] = plugins + + if trace_attributes: + agent_kwargs["trace_attributes"] = trace_attributes + + agent = Agent(**agent_kwargs) + + print(f"๐Ÿงช Beta agent created with {len(tools)} tools and {len(plugins)} plugins") + + # Auto-activate skill based on command mode + # The mode is embedded in the session_id by process-input.cjs (e.g., "reviewer-123") + mode = os.getenv("AGENT_MODE", "") + if mode: + _activate_skill_for_mode(agent, mode) + + print("Processing user query...") + result = agent(query) + + print(f"\n\nAgent Result ๐Ÿค–\nStop Reason: {result.stop_reason}\nMessage: {json.dumps(result.message, indent=2)}") + + # Eval trigger (shared infrastructure) + unique_session_id = trace_attributes.get("session.id", session_id) + eval_type = session_id.split("-")[0] if "-" in session_id else session_id + _send_eval_trigger(unique_session_id, eval_type) + + except Exception as e: + error_msg = f"โŒ Beta agent execution failed: {e}" + print(error_msg) + raise e + + +def main() -> None: + """Main entry point for the beta agent runner.""" + try: + if len(sys.argv) < 2: + raise ValueError("Task argument is required") + + task = " ".join(sys.argv[1:]) + if not task.strip(): + raise ValueError("Task cannot be empty") + print(f"๐Ÿงช Running beta agent with task: {task}") + + run_beta_agent(task) + + except Exception as e: + error_msg = f"Fatal error: {e}" + print(error_msg) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/strands-command/scripts/python/programmatic_tool_caller.py b/strands-command/scripts/python/programmatic_tool_caller.py new file mode 100644 index 0000000..0100820 --- /dev/null +++ b/strands-command/scripts/python/programmatic_tool_caller.py @@ -0,0 +1,316 @@ +"""Programmatic Tool Calling for Strands Agents. + +This module provides a tool that enables programmatic/code-based tool invocation, +similar to Anthropic's Programmatic Tool Calling feature. It allows an agent to +write Python code that calls other tools as functions, reducing API round-trips +and enabling complex orchestration logic. + +Tools are exposed as async functions (e.g., `await calculator(expression="2+2")`). +The code runs in an async context automatically - no boilerplate needed. + +Usage: +```python +from strands import Agent +from strands_tools import programmatic_tool_caller, calculator + +agent = Agent(tools=[programmatic_tool_caller, calculator]) + +result = agent.tool.programmatic_tool_caller( + code=''' +result = await calculator(expression="2 + 2") +print(f"Result: {result}") + +# Parallel execution +results = await asyncio.gather( + calculator(expression="10 * 1"), + calculator(expression="10 * 2"), +) +print(f"Parallel: {results}") +''' +) +``` + +Environment Variables: +- PROGRAMMATIC_TOOL_CALLER_ALLOWED_TOOLS: Comma-separated list of allowed tools +- PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES: Comma-separated list of extra modules to inject + into the namespace (e.g., "json,re,math,collections"). `asyncio` is always available. +- BYPASS_TOOL_CONSENT: Skip user confirmation if "true" + +Namespace: + The execution namespace matches python_repl's base: `{"__name__": "__main__"}`. + `asyncio` is always injected (required for async tool calls). + Additional modules can be added via PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES env var. + Tool functions are injected as async callables (e.g., `await shell(command="ls")`). + +Limitations: Tools that use interrupts (human-in-the-loop) are not supported. The SDK +blocks interrupts during direct/programmatic tool calls โ€” there is no mechanism to pause +execution, collect human input, and resume in this context. If an interrupt-capable tool +is called, it will raise a RuntimeError which surfaces as a failed tool result back to +the agent. +""" + +import asyncio +import importlib +import logging +import os +import sys +import textwrap +import traceback +from io import StringIO +from typing import Any, Callable, Dict, Optional + +from rich import box +from rich.panel import Panel +from rich.syntax import Syntax +from rich.table import Table +from strands import tool +from strands.types.tools import ToolContext + +from strands_tools.utils import console_util +from strands_tools.utils.user_input import get_user_input + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Tool Execution Helpers +# ============================================================================= + + +def _execute_tool(agent: Any, tool_name: str, tool_input: Dict[str, Any]) -> Any: + """Execute a tool through the agent's tool caller. + + Uses agent.tool.() which properly handles all tool types including MCP tools. + """ + if agent is None: + raise RuntimeError("No agent available for tool execution") + + try: + # Use agent.tool.() which works for ALL tool types (including MCP tools) + # record_direct_tool_call=False prevents polluting message history during programmatic calls + tool_func = getattr(agent.tool, tool_name) + result = tool_func(record_direct_tool_call=False, **tool_input) + + if isinstance(result, dict): + if result.get("status") == "error": + error_content = result.get("content", [{"text": "Unknown error"}]) + error_text = error_content[0].get("text", "Unknown error") if error_content else "Unknown error" + raise RuntimeError(f"Tool error: {error_text}") + + content = result.get("content", []) + if content and isinstance(content, list): + text_parts = [item["text"] for item in content if isinstance(item, dict) and "text" in item] + if text_parts: + return "\n".join(text_parts) + return str(result) + + return result + + except AttributeError as e: + raise RuntimeError(f"Tool '{tool_name}' not found in registry") from e + except RuntimeError: + raise + except Exception as e: + logger.error(f"Error executing tool '{tool_name}': {e}") + raise RuntimeError(f"Failed to execute tool '{tool_name}': {e}") from e + + +def _create_async_tool_function(agent: Any, tool_name: str) -> Callable: + """Create an async function wrapper for a tool.""" + + async def tool_function(**kwargs: Any) -> Any: + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, lambda: _execute_tool(agent, tool_name, kwargs)) + + return tool_function + + +def _get_allowed_tools(agent: Any) -> set[str]: + """Get allowed tools from env var or default to all (except self).""" + all_tools = set(agent.tool_registry.registry.keys()) - {"programmatic_tool_caller"} + + env_allowed = os.environ.get("PROGRAMMATIC_TOOL_CALLER_ALLOWED_TOOLS", "").strip() + if env_allowed: + allowed_list = [t.strip() for t in env_allowed.split(",") if t.strip()] + return all_tools & set(allowed_list) + + return all_tools + + +def _build_namespace(available_tools: set[str], agent: Any) -> Dict[str, Any]: + """Build the execution namespace. + + Base namespace matches python_repl: ``{"__name__": "__main__"}``. + ``asyncio`` is always injected (required for async tool wrappers). + Additional stdlib modules can be injected via the + ``PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES`` environment variable + (comma-separated module names, e.g. ``json,re,math,collections``). + Tool functions are injected as async callables. + + Returns: + Namespace dict ready for ``exec()``. + """ + # Base namespace โ€” matches python_repl + namespace: Dict[str, Any] = { + "__name__": "__main__", + } + + # asyncio is always required (async wrapper) + namespace["asyncio"] = asyncio + + # Extra modules from env var + extra_modules = os.environ.get("PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES", "").strip() + if extra_modules: + for mod_name in extra_modules.split(","): + mod_name = mod_name.strip() + if not mod_name: + continue + try: + namespace[mod_name] = importlib.import_module(mod_name) + except ImportError: + logger.warning(f"Could not import extra module '{mod_name}', skipping") + + # Inject tools as async functions + for tool_name in available_tools: + namespace[tool_name] = _create_async_tool_function(agent, tool_name) + + return namespace + + +# ============================================================================= +# Main Tool +# ============================================================================= + + +@tool(context=True) +def programmatic_tool_caller( + code: str, + tool_context: Optional[ToolContext] = None, +) -> Dict[str, Any]: + """Execute Python code with access to agent tools as async functions. + + Tools are available as async functions - use `await` to call them. + Code runs in async context automatically, no boilerplate needed. + + Example: + ```python + # Simple tool call + result = await calculator(expression="2 + 2") + print(result) + + # Loop with tool calls + for i in range(3): + r = await calculator(expression=f"{i} * 10") + print(r) + + # Parallel execution + results = await asyncio.gather( + calculator(expression="1+1"), + calculator(expression="2+2"), + ) + print(results) + ``` + + Environment Variables: + PROGRAMMATIC_TOOL_CALLER_ALLOWED_TOOLS: Comma-separated list of tools to expose + PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES: Comma-separated list of extra modules + to inject into the namespace (e.g., "json,re,math") + BYPASS_TOOL_CONSENT: Skip confirmation if "true" + + Args: + code: Python code to execute. Use `await tool_name(...)` to call tools. + tool_context: Injected automatically. + + Returns: + Dict with status and print() output only. + """ + console = console_util.create() + bypass_consent = os.environ.get("BYPASS_TOOL_CONSENT", "").lower() == "true" + + try: + if tool_context is None or tool_context.agent is None: + return { + "status": "error", + "content": [{"text": "No agent context available. This tool requires an agent."}], + } + + agent = tool_context.agent + + # Show code preview + console.print( + Panel( + Syntax(code, "python", theme="monokai", line_numbers=True), + title="[bold blue]Programmatic Tool Calling[/]", + border_style="blue", + ) + ) + + # Get allowed tools + available_tools = _get_allowed_tools(agent) + + tools_table = Table(show_header=True, header_style="bold cyan", box=box.SIMPLE) + tools_table.add_column("Available Tools", style="green") + for tool_name in sorted(available_tools): + tools_table.add_row(f"await {tool_name}(...)") + console.print(tools_table) + + # User confirmation + if not bypass_consent: + user_input = get_user_input("Execute this code? [y/*]") + if user_input.lower().strip() != "y": + cancel_reason = user_input if user_input.strip() != "n" else get_user_input("Reason:") + return { + "status": "error", + "content": [{"text": f"Cancelled. Reason: {cancel_reason}"}], + } + + # Build execution namespace (matches python_repl base + tools) + exec_namespace = _build_namespace(available_tools, agent) + + console.print("[green]Executing...[/]") + + # Execute code in async context + # Wrap user code in async function for await support + indented_code = textwrap.indent(code, " ") + wrapped_code = f"async def __user_code__():\n{indented_code}\n" + + # Capture output + stdout_capture = StringIO() + stderr_capture = StringIO() + old_stdout, old_stderr = sys.stdout, sys.stderr + + try: + sys.stdout = stdout_capture + sys.stderr = stderr_capture + + # Use compile() for better error tracebacks + compiled = compile(wrapped_code, "", "exec") + exec(compiled, exec_namespace) + asyncio.run(exec_namespace["__user_code__"]()) + + captured_output = stdout_capture.getvalue() + errors = stderr_capture.getvalue() + if errors: + captured_output += f"\n[stderr]\n{errors}" + finally: + sys.stdout = old_stdout + sys.stderr = old_stderr + + console.print("[bold green]โœ“ Done[/]") + if captured_output.strip(): + console.print(Panel(captured_output, title="[bold green]Output[/]", border_style="green")) + + return { + "status": "success", + "content": [{"text": captured_output.strip() if captured_output.strip() else "(no output)"}], + } + + except SyntaxError: + error_msg = f"Syntax error:\n{traceback.format_exc()}" + console.print(Panel(error_msg, title="[bold red]Error[/]", border_style="red")) + return {"status": "error", "content": [{"text": error_msg}]} + + except Exception: + error_msg = f"Execution error:\n{traceback.format_exc()}" + console.print(Panel(error_msg, title="[bold red]Error[/]", border_style="red")) + return {"status": "error", "content": [{"text": error_msg}]}