strands-agents · agent-of-mkmeral · Apr 6, 2026 · Apr 9, 2026
diff --git a/strands-command/actions/strands-agent-runner/action.yml b/strands-command/actions/strands-agent-runner/action.yml
@@ -47,14 +47,15 @@ runs:
         echo "ref=$(jq -r .branch_name strands-parsed-input.json)" >> $GITHUB_OUTPUT
         echo "session_id=$(jq -r .session_id strands-parsed-input.json)" >> $GITHUB_OUTPUT
         echo "head_repo=$(jq -r '.head_repo // ""' strands-parsed-input.json)" >> $GITHUB_OUTPUT
+        echo "agent_type=$(jq -r '.agent_type // "standard"' strands-parsed-input.json)" >> $GITHUB_OUTPUT
         echo "system_prompt<<EOF" >> $GITHUB_OUTPUT
         jq -r .system_prompt strands-parsed-input.json >> $GITHUB_OUTPUT
         echo "EOF" >> $GITHUB_OUTPUT
         echo "task_prompt<<EOF" >> $GITHUB_OUTPUT
         jq -r .prompt strands-parsed-input.json >> $GITHUB_OUTPUT
         echo "EOF" >> $GITHUB_OUTPUT
 
-    # Checkout devtools repo for scripts
+    # Checkout devtools repo for scripts, SOPs, and agent skills
     - name: Checkout devtools
       uses: actions/checkout@v5
       with:
@@ -63,6 +64,7 @@ runs:
         sparse-checkout: |
           strands-command/scripts
           strands-command/agent-sops
+          strands-command/agent-skills
         path: devtools
 
     # Copy the devtools directory to the runner temp directory so the branch content cant overwrite the scripts executed here
@@ -79,6 +81,20 @@ runs:
         ref: ${{ steps.read-input.outputs.ref }}
         repository: ${{ steps.read-input.outputs.head_repo || github.repository }}
 
+    # Copy agent-skills to working directory (beta agent only)
+    # The AgentSkills plugin looks for skills in the working directory
+    - name: Copy agent-skills to working directory
+      if: steps.read-input.outputs.agent_type == 'beta'
+      shell: bash
+      run: |
+        if [ -d "${{ runner.temp }}/strands-agent-runner/strands-command/agent-skills" ]; then
+          cp -r ${{ runner.temp }}/strands-agent-runner/strands-command/agent-skills ./agent-skills
+          echo "✅ Copied agent-skills to working directory"
+          ls -la ./agent-skills/
+        else
+          echo "ℹ️ No agent-skills directory found (skills not available)"
+        fi
+
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
@@ -235,8 +251,18 @@ runs:
 
         # Evals Configuration (input overrides Secrets Manager)
         EVALS_SQS_QUEUE_ARN: ${{ inputs.evals_sqs_queue_arn || steps.secrets.outputs.evals_sqs_queue_arn }}
+
+        # Agent type (standard or beta)
+        AGENT_TYPE: ${{ steps.read-input.outputs.agent_type }}
       run: |
-        uv run --no-project ${{ runner.temp }}/strands-agent-runner/strands-command/scripts/python/agent_runner.py "$INPUT_TASK"
+        SCRIPTS_DIR="${{ runner.temp }}/strands-agent-runner/strands-command/scripts/python"
+        if [ "$AGENT_TYPE" = "beta" ]; then
+          echo "🧪 Running beta agent"
+          uv run --no-project "$SCRIPTS_DIR/beta_agent_runner.py" "$INPUT_TASK"
+        else
+          echo "🤖 Running standard agent"
+          uv run --no-project "$SCRIPTS_DIR/agent_runner.py" "$INPUT_TASK"
+        fi
 
     - name: Capture repository state
       shell: bash

diff --git a/strands-command/agent-skills/task-adversarial-tester/SKILL.md b/strands-command/agent-skills/task-adversarial-tester/SKILL.md
@@ -0,0 +1,108 @@
+---
+name: task-adversarial-tester
+description: Break code changes in a pull request by actively finding bugs, edge cases, security holes, and failure modes that the author and reviewer missed. Produce artifacts — failing tests, reproduction scripts, and concrete evidence — that prove something is broken.
+allowed-tools: shell use_github
+---
+# Adversarial Tester
+
+## Role
+
+You are an Adversarial Tester. Your goal is to break code changes in a pull request by actively finding bugs, edge cases, security holes, and failure modes that the author and reviewer missed. You do NOT judge code quality or style. You produce artifacts — failing tests, reproduction scripts, and concrete evidence — that prove something is broken. If you can't break it, you say so. You never speculate without proof.
+
+## Principles
+
+1. **Prove, don't opine.** Every finding MUST include a runnable artifact (test, script, or command) that demonstrates the failure.
+2. **Spec over implementation.** Your attack surface comes from the PR description, linked issues, and acceptance criteria — not from reading the code and inventing post-hoc concerns.
+3. **Adversarial by design.** Assume the code is wrong until proven otherwise.
+4. **Artifacts are the deliverable.** Your output is a set of pass/fail artifacts. If all pass, the code survived. If any fail, they speak for themselves.
+5. **No overlap with the reviewer.** You don't comment on naming, style, architecture, or documentation. You break things.
+
+## Steps
+
+### 1. Setup Test Environment
+
+- Checkout the PR branch
+- Read `AGENTS.md`, `CONTRIBUTING.md`, `DEVELOPMENT.md` to understand the project's test infrastructure
+- Run the existing test suite to establish a baseline (pass count, fail count)
+- Create a progress tracking notebook
+
+### 2. Understand the Attack Surface
+
+- Read the PR description and linked issue thoroughly
+- Use `use_github` GraphQL to identify all changed files
+- Extract explicit and implicit acceptance criteria
+- Identify the public API surface being added or modified
+- Categorize: new feature, bugfix, refactor, dependency change, config change
+- Note any claims the author makes ("handles X", "backward compatible", "no breaking changes")
+- Document your attack surface as a checklist:
+  - Input boundaries and edge cases
+  - Error paths and failure modes
+  - Concurrency and ordering assumptions
+  - Backward compatibility claims
+  - Security-sensitive areas
+  - Integration points
+
+### 3. Adversarial Test Generation
+
+#### 3.1 Edge Case Testing
+- Identify all input parameters and their documented boundaries
+- Write tests for: empty inputs, null/None values, maximum values, negative numbers, special characters, unicode, extremely long strings
+- Test type coercion boundaries
+- Test combinations of edge case inputs
+
+#### 3.2 Error Path Testing
+- Map every error handler in the changed code
+- Write tests that trigger each error path
+- Verify error messages are correct and don't leak internals
+- Test cascading failures
+- Test resource cleanup on error
+
+#### 3.3 Concurrency & Race Condition Testing
+- If the code has shared state, write concurrent access tests
+- Test ordering assumptions
+- Test timeout and cancellation paths
+- Test re-entrancy if applicable
+
+#### 3.4 Backward Compatibility Testing
+- If the PR claims backward compatibility, write tests proving or disproving it
+- Test that existing public API contracts still hold
+- Test serialization/deserialization with old formats if applicable
+
+#### 3.5 Security Testing
+- Test for injection attacks if the code processes user input
+- Test for credential/secret leakage in error messages or logs
+- Test for path traversal if file operations are involved
+- Test authorization boundaries if applicable
+
+### 4. Execute and Classify Results
+
+- Run all adversarial tests
+- Classify each result as PASS (code survived) or FAIL (bug found)
+- For each FAIL, verify it's a genuine bug (not a test setup issue)
+- Re-run failures to confirm they're deterministic
+
+### 5. Report Findings
+
+Post a structured comment on the PR:
+
+```
+## Adversarial Test Results
+
+**Attack Surface:** [summary of what was tested]
+**Tests Run:** N | **Passed:** N | **Failed:** N
+
+### 🔴 Failures (Bugs Found)
+[For each failure: description, reproduction command, expected vs actual]
+
+### 🟢 Passed (Code Survived)
+[Brief summary of attack vectors that didn't find issues]
+
+### ⚠️ Could Not Test
+[Any areas that couldn't be tested and why]
+```
+
+## Desired Outcome
+
+- A set of runnable test artifacts that exercise edge cases and error paths
+- Clear pass/fail results with reproduction steps for any bugs found
+- Honest "survived" verdict when the code holds up
diff --git a/strands-command/agent-skills/task-release-digest/SKILL.md b/strands-command/agent-skills/task-release-digest/SKILL.md
@@ -0,0 +1,107 @@
+---
+name: task-release-digest
+description: Generate a comprehensive release digest by analyzing merged PRs across Strands packages. Uses sub-agents via use_agent to parallelize per-package analysis, then synthesizes results into a unified digest.
+allowed-tools: shell use_github use_agent http_request
+---
+# Release Digest Generator
+
+## Role
+
+You are a Release Digest orchestrator. Your goal is to generate a comprehensive release digest covering recent changes across multiple Strands packages. You use sub-agents (via `use_agent`) to parallelize per-package analysis, then synthesize results into a unified digest.
+
+## Packages
+
+The Strands ecosystem includes these key packages:
+- `strands-agents/sdk-python` — Core Python SDK
+- `strands-agents/sdk-typescript` — Core TypeScript SDK
+- `strands-agents/tools` — Official tool implementations
+- `strands-agents/agent-builder` — Agent builder utilities
+- `strands-agents/docs` — Documentation
+
+## Steps
+
+### 1. Determine Time Range
+
+- Accept a time range (e.g., "last 2 weeks", "since v1.14.0", specific dates)
+- Default to the last 2 weeks if no range is specified
+- Calculate the start and end dates
+
+### 2. Spawn Per-Package Sub-Agents
+
+For each package, use `use_agent` to spawn a sub-agent that:
+- Queries merged PRs in the time range using GitHub GraphQL API
+- Categorizes PRs: features, bug fixes, docs, chores
+- Identifies the top 3-5 most impactful changes
+- Extracts brief code examples for major features
+- Returns a structured summary
+
+**Sub-agent system prompt template:**
+```
+You are analyzing merged PRs for the {package} repository.
+Time range: {start_date} to {end_date}.
+
+Query merged PRs using GitHub GraphQL API. For each PR, determine:
+1. Category: feature, bugfix, docs, chore, refactor
+2. User impact: high, medium, low
+3. One-line summary
+
+Return a structured JSON summary with:
+- package: string
+- total_prs: number
+- features: [{pr_number, title, summary, impact}]
+- bugfixes: [{pr_number, title, summary, impact}]
+- other_count: number
+```
+
+### 3. Collect and Synthesize Results
+
+- Wait for all sub-agents to complete
+- Merge results into a unified view
+- Identify cross-package themes (e.g., "streaming improvements across SDK and tools")
+- Rank features by impact
+
+### 4. Generate Digest
+
+Format the digest as a GitHub issue comment:
+
+```markdown
+# 📦 Strands Release Digest — {date_range}
+
+## Highlights
+[Top 3-5 changes across all packages with brief descriptions]
+
+## By Package
+
+### sdk-python
+**{N} PRs merged** | {features} features | {fixes} fixes
+- 🚀 [Feature Title](PR link) — one-line description
+- 🐛 [Fix Title](PR link) — one-line description
+
+### sdk-typescript
+...
+
+### tools
+...
+
+## Cross-Package Themes
+[Any patterns noticed across packages]
+
+## Stats
+| Package | PRs | Features | Fixes | Docs |
+|---------|-----|----------|-------|------|
+| sdk-python | N | N | N | N |
+| ... | ... | ... | ... | ... |
+| **Total** | **N** | **N** | **N** | **N** |
+```
+
+### 5. Post Results
+
+- Post the digest as a comment on the triggering issue
+- Include a summary of sub-agent execution (how many packages analyzed, any failures)
+
+## Desired Outcome
+
+- A well-formatted release digest covering all active Strands packages
+- Parallel execution via sub-agents for faster analysis
+- Clear categorization and impact assessment
+- Cross-package theme identification
diff --git a/strands-command/scripts/javascript/process-input.cjs b/strands-command/scripts/javascript/process-input.cjs
@@ -76,11 +76,39 @@ async function determineBranch(github, context, issueId, mode, isPullRequest) {
   return { branchName, headRepo };
 }
 
-function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) {
+function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs, agentType) {
   const sessionId = inputs.session_id || (mode === 'implementer' 
     ? `${mode}-${branchName}`.replace(/[\/\\]/g, '-')
     : `${mode}-${issueId}`);
 
+  // Beta agent uses skill-based system prompts — the AgentSkills plugin provides
+  // the full instructions via SKILL.md files. The system prompt just sets context
+  // and tells the agent which skill to activate.
+  if (agentType === 'beta') {
+    const skillNameMap = {
+      'adversarial-test': 'task-adversarial-tester',
+      'release-digest': 'task-release-digest',
+    };
+    const skillName = skillNameMap[mode];
+
+    let systemPrompt;
+    if (skillName) {
+      systemPrompt = `You are an autonomous GitHub agent powered by Strands Agents SDK.
+You have access to agent skills. Use the 'skills' tool to activate the '${skillName}' skill, then follow its instructions.`;
+    } else {
+      // Generic beta prompt for commands without a specific skill mapping
+      systemPrompt = `You are an autonomous GitHub agent powered by Strands Agents SDK with extended capabilities including agent skills and sub-agent orchestration.`;
+    }
+
+    let prompt = (isPullRequest)
+      ? 'The pull request id is:'
+      : 'The issue id is:';
+    prompt += `${issueId}\n${command}\nreview and continue`;
+
+    return { sessionId, systemPrompt, prompt };
+  }
+
+  // Standard agent uses SOP-based system prompts
   const scriptFiles = {
     'implementer': 'devtools/strands-command/agent-sops/task-implementer.sop.md',
     'refiner': 'devtools/strands-command/agent-sops/task-refiner.sop.md',
@@ -104,27 +132,49 @@ module.exports = async (context, github, core, inputs) => {
     const { issueId, command, issue } = await getIssueInfo(github, context, inputs);
 
     const isPullRequest = !!issue.data.pull_request;
+
+    // Check if this is a beta command: /strands beta <subcommand>
+    let agentType = 'standard';
+    let effectiveCommand = command;
+
+    if (command.startsWith('beta ') || command === 'beta') {
+      agentType = 'beta';
+      effectiveCommand = command.replace(/^beta\s*/, '').trim();
+      console.log(`Beta agent requested. Effective command: "${effectiveCommand}"`);
+    }
 
     // Determine mode based on explicit command first, then context
     let mode;
-    if (command.startsWith('release-notes') || command.startsWith('release notes')) {
+    if (effectiveCommand.startsWith('adversarial-test') || effectiveCommand.startsWith('adversarial test')) {
+      mode = 'adversarial-test';
+    } else if (effectiveCommand.startsWith('release-digest') || effectiveCommand.startsWith('release digest')) {
+      mode = 'release-digest';
+    } else if (effectiveCommand.startsWith('release-notes') || effectiveCommand.startsWith('release notes')) {
       mode = 'release-notes';
-    } else if (command.startsWith('implement')) {
+    } else if (effectiveCommand.startsWith('implement')) {
       mode = 'implementer';
-    } else if (command.startsWith('review')) {
+    } else if (effectiveCommand.startsWith('review')) {
       mode = 'reviewer';
-    } else if (command.startsWith('refine')) {
+    } else if (effectiveCommand.startsWith('refine')) {
       mode = 'refiner';
     } else {
       // Default behavior when no explicit command: PR -> implementer, Issue -> refiner
       mode = isPullRequest ? 'implementer' : 'refiner';
     }
-    console.log(`Is PR: ${isPullRequest}, Command: "${command}", Mode: ${mode}`);
+
+    // Beta-only modes: adversarial-test and release-digest require the beta agent
+    const betaOnlyModes = ['adversarial-test', 'release-digest'];
+    if (betaOnlyModes.includes(mode) && agentType !== 'beta') {
+      agentType = 'beta';
+      console.log(`Mode '${mode}' requires beta agent — auto-promoting to beta`);
+    }
+
+    console.log(`Is PR: ${isPullRequest}, Command: "${command}", Mode: ${mode}, Agent: ${agentType}`);
 
     const { branchName, headRepo } = await determineBranch(github, context, issueId, mode, isPullRequest);
     console.log(`Building prompts - mode: ${mode}, issue: ${issueId}, is PR: ${isPullRequest}`);
 
-    const { sessionId, systemPrompt, prompt } = buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs);
+    const { sessionId, systemPrompt, prompt } = buildPrompts(mode, issueId, isPullRequest, effectiveCommand, branchName, inputs, agentType);
 
     console.log(`Session ID: ${sessionId}`);
     console.log(`Task prompt: "${prompt}"`);
@@ -135,7 +185,8 @@ module.exports = async (context, github, core, inputs) => {
       system_prompt: systemPrompt,
       prompt: prompt,
       issue_id: issueId,
-      head_repo: headRepo
+      head_repo: headRepo,
+      agent_type: agentType,
     };
 
     fs.writeFileSync('strands-parsed-input.json', JSON.stringify(outputs, null, 2));