diff --git a/strands-command/actions/strands-agent-runner/action.yml b/strands-command/actions/strands-agent-runner/action.yml
index 057fb63..d83fd56 100644
--- a/strands-command/actions/strands-agent-runner/action.yml
+++ b/strands-command/actions/strands-agent-runner/action.yml
@@ -47,6 +47,8 @@ runs:
         echo "ref=$(jq -r .branch_name strands-parsed-input.json)" >> $GITHUB_OUTPUT
         echo "session_id=$(jq -r .session_id strands-parsed-input.json)" >> $GITHUB_OUTPUT
         echo "head_repo=$(jq -r '.head_repo // ""' strands-parsed-input.json)" >> $GITHUB_OUTPUT
+        echo "agent_mode=$(jq -r '.agent_mode // ""' strands-parsed-input.json)" >> $GITHUB_OUTPUT
+        echo "agent_type=$(jq -r '.agent_type // "standard"' strands-parsed-input.json)" >> $GITHUB_OUTPUT
         echo "system_prompt<<EOF" >> $GITHUB_OUTPUT
         jq -r .system_prompt strands-parsed-input.json >> $GITHUB_OUTPUT
         echo "EOF" >> $GITHUB_OUTPUT
@@ -54,7 +56,7 @@ runs:
         jq -r .prompt strands-parsed-input.json >> $GITHUB_OUTPUT
         echo "EOF" >> $GITHUB_OUTPUT
 
-    # Checkout devtools repo for scripts
+    # Checkout devtools repo for scripts, SOPs, and agent skills
     - name: Checkout devtools
       uses: actions/checkout@v5
       with:
@@ -63,6 +65,7 @@ runs:
         sparse-checkout: |
           strands-command/scripts
           strands-command/agent-sops
+          strands-command/agent-skills
         path: devtools
 
     # Copy the devtools directory to the runner temp directory so the branch content cant overwrite the scripts executed here
@@ -79,6 +82,24 @@ runs:
         ref: ${{ steps.read-input.outputs.ref }}
         repository: ${{ steps.read-input.outputs.head_repo || github.repository }}
 
+    # Copy agent-skills to working directory (beta agent only)
+    # The AgentSkills plugin looks for skills in the working directory
+    - name: Copy agent-skills to working directory
+      if: steps.read-input.outputs.agent_type == 'beta'
+      shell: bash
+      run: |
+        if [ -d "${{ runner.temp }}/strands-agent-runner/strands-command/agent-skills" ]; then
+          cp -r ${{ runner.temp }}/strands-agent-runner/strands-command/agent-skills ./agent-skills
+          echo "✅ Copied agent-skills to working directory"
+        if [ -d "${{ runner.temp }}/strands-agent-runner/strands-command/agent-sops" ]; then
+          cp -r ${{ runner.temp }}/strands-agent-runner/strands-command/agent-sops ./agent-sops
+          echo "✅ Copied agent-sops to working directory (for runtime skill conversion)"
+        fi
+          ls -la ./agent-skills/
+        else
+          echo "ℹ️ No agent-skills directory found (skills not available)"
+        fi
+
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
@@ -235,8 +256,19 @@ runs:
 
         # Evals Configuration (input overrides Secrets Manager)
         EVALS_SQS_QUEUE_ARN: ${{ inputs.evals_sqs_queue_arn || steps.secrets.outputs.evals_sqs_queue_arn }}
+
+        # Agent type (standard or beta)
+        AGENT_TYPE: ${{ steps.read-input.outputs.agent_type }}
+        AGENT_MODE: ${{ steps.read-input.outputs.agent_mode }}
       run: |
-        uv run --no-project ${{ runner.temp }}/strands-agent-runner/strands-command/scripts/python/agent_runner.py "$INPUT_TASK"
+        SCRIPTS_DIR="${{ runner.temp }}/strands-agent-runner/strands-command/scripts/python"
+        if [ "$AGENT_TYPE" = "beta" ]; then
+          echo "🧪 Running beta agent"
+          uv run --no-project "$SCRIPTS_DIR/beta_agent_runner.py" "$INPUT_TASK"
+        else
+          echo "🤖 Running standard agent"
+          uv run --no-project "$SCRIPTS_DIR/agent_runner.py" "$INPUT_TASK"
+        fi
 
     - name: Capture repository state
       shell: bash
diff --git a/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md b/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md
new file mode 100644
index 0000000..4c86db2
--- /dev/null
+++ b/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md
@@ -0,0 +1,60 @@
+# Strands Agent (Beta) — /strands Command
+
+**Identity**: AI agent for the Strands Agents project, invoked via `/strands beta` in GitHub issues and PRs.
+**Runtime**: GitHub Actions, triggered by `/strands beta <command>` comments.
+
+---
+
+## Guidelines
+
+Follow the [Strands Agent Guidelines](https://github.com/strands-agents/docs/blob/main/team/AGENT_GUIDELINES.md):
+
+- **Add value or stay silent.** If you don't have something concrete to contribute, don't act.
+- **Keep it short.** Lead with what matters, then stop. Use `<details>` blocks for long analysis.
+- **Approvals need reasoning.** Justify decisions — especially rejections.
+- **Prove, don't opine.** Provide evidence — tests, scripts, code — not speculation.
+
+---
+
+## Capabilities
+
+You are an extended agent with access to:
+- **Agent Skills** — Task-specific SOPs loaded on-demand via the `skills` tool
+- **Sub-Agents** — Delegate subtasks to specialized agents via `use_agent`
+- **Programmatic Tool Calling** — Execute Python code that calls tools as async functions
+
+### Skills
+
+Use the `skills` tool to activate task-specific instructions. Available skills are shown in your context. When a skill is activated, follow its instructions precisely.
+
+### Sub-Agents
+
+Use `use_agent` to spawn sub-agents for parallelizable work (e.g., per-package analysis, independent reviews). Each sub-agent gets its own context and tools.
+
+---
+
+## Behavior
+
+1. **Understand the task** — Read the issue/PR, comments, and linked references thoroughly before acting.
+2. **Activate the right skill** — If your task maps to a skill, activate it first.
+3. **Work incrementally** — Commit progress, post updates, iterate on feedback.
+4. **Be honest about limitations** — If you can't do something, say so.
+
+---
+
+## Output Format
+
+- Use GitHub-flavored markdown
+- Structure with headers, tables, and code blocks
+- Keep top-level summaries under 200 words
+- Use `<details>` blocks for verbose content
+
+---
+
+## Anti-Patterns (NEVER)
+
+- Don't post walls of text without structure
+- Don't approve without review
+- Don't speculate without evidence
+- Don't repeat what the user already said
+- Don't create noise — every comment should move things forward
diff --git a/strands-command/agent-skills/task-adversarial-tester/SKILL.md b/strands-command/agent-skills/task-adversarial-tester/SKILL.md
new file mode 100644
index 0000000..c4f8fed
--- /dev/null
+++ b/strands-command/agent-skills/task-adversarial-tester/SKILL.md
@@ -0,0 +1,108 @@
+---
+name: task-adversarial-tester
+description: Break code changes in a pull request by actively finding bugs, edge cases, security holes, and failure modes that the author and reviewer missed. Produce artifacts — failing tests, reproduction scripts, and concrete evidence — that prove something is broken.
+allowed-tools: shell use_github
+---
+# Adversarial Tester
+
+## Role
+
+You are an Adversarial Tester. Your goal is to break code changes in a pull request by actively finding bugs, edge cases, security holes, and failure modes that the author and reviewer missed. You do NOT judge code quality or style. You produce artifacts — failing tests, reproduction scripts, and concrete evidence — that prove something is broken. If you can't break it, you say so. You never speculate without proof.
+
+## Principles
+
+1. **Prove, don't opine.** Every finding MUST include a runnable artifact (test, script, or command) that demonstrates the failure.
+2. **Spec over implementation.** Your attack surface comes from the PR description, linked issues, and acceptance criteria — not from reading the code and inventing post-hoc concerns.
+3. **Adversarial by design.** Assume the code is wrong until proven otherwise.
+4. **Artifacts are the deliverable.** Your output is a set of pass/fail artifacts. If all pass, the code survived. If any fail, they speak for themselves.
+5. **No overlap with the reviewer.** You don't comment on naming, style, architecture, or documentation. You break things.
+
+## Steps
+
+### 1. Setup Test Environment
+
+- Checkout the PR branch
+- Read `AGENTS.md`, `CONTRIBUTING.md`, `DEVELOPMENT.md` to understand the project's test infrastructure
+- Run the existing test suite to establish a baseline (pass count, fail count)
+- Create a progress tracking notebook
+
+### 2. Understand the Attack Surface
+
+- Read the PR description and linked issue thoroughly
+- Use `use_github` GraphQL to identify all changed files
+- Extract explicit and implicit acceptance criteria
+- Identify the public API surface being added or modified
+- Categorize: new feature, bugfix, refactor, dependency change, config change
+- Note any claims the author makes ("handles X", "backward compatible", "no breaking changes")
+- Document your attack surface as a checklist:
+  - Input boundaries and edge cases
+  - Error paths and failure modes
+  - Concurrency and ordering assumptions
+  - Backward compatibility claims
+  - Security-sensitive areas
+  - Integration points
+
+### 3. Adversarial Test Generation
+
+#### 3.1 Edge Case Testing
+- Identify all input parameters and their documented boundaries
+- Write tests for: empty inputs, null/None values, maximum values, negative numbers, special characters, unicode, extremely long strings
+- Test type coercion boundaries
+- Test combinations of edge case inputs
+
+#### 3.2 Error Path Testing
+- Map every error handler in the changed code
+- Write tests that trigger each error path
+- Verify error messages are correct and don't leak internals
+- Test cascading failures
+- Test resource cleanup on error
+
+#### 3.3 Concurrency & Race Condition Testing
+- If the code has shared state, write concurrent access tests
+- Test ordering assumptions
+- Test timeout and cancellation paths
+- Test re-entrancy if applicable
+
+#### 3.4 Backward Compatibility Testing
+- If the PR claims backward compatibility, write tests proving or disproving it
+- Test that existing public API contracts still hold
+- Test serialization/deserialization with old formats if applicable
+
+#### 3.5 Security Testing
+- Test for injection attacks if the code processes user input
+- Test for credential/secret leakage in error messages or logs
+- Test for path traversal if file operations are involved
+- Test authorization boundaries if applicable
+
+### 4. Execute and Classify Results
+
+- Run all adversarial tests
+- Classify each result as PASS (code survived) or FAIL (bug found)
+- For each FAIL, verify it's a genuine bug (not a test setup issue)
+- Re-run failures to confirm they're deterministic
+
+### 5. Report Findings
+
+Post a structured comment on the PR:
+
+```
+## Adversarial Test Results
+
+**Attack Surface:** [summary of what was tested]
+**Tests Run:** N | **Passed:** N | **Failed:** N
+
+### 🔴 Failures (Bugs Found)
+[For each failure: description, reproduction command, expected vs actual]
+
+### 🟢 Passed (Code Survived)
+[Brief summary of attack vectors that didn't find issues]
+
+### ⚠️ Could Not Test
+[Any areas that couldn't be tested and why]
+```
+
+## Desired Outcome
+
+- A set of runnable test artifacts that exercise edge cases and error paths
+- Clear pass/fail results with reproduction steps for any bugs found
+- Honest "survived" verdict when the code holds up
diff --git a/strands-command/agent-skills/task-meta-reasoner/SKILL.md b/strands-command/agent-skills/task-meta-reasoner/SKILL.md
new file mode 100644
index 0000000..b363700
--- /dev/null
+++ b/strands-command/agent-skills/task-meta-reasoner/SKILL.md
@@ -0,0 +1,79 @@
+---
+name: task-meta-reasoner
+description: Meta-reasoning gate that evaluates whether to accept, defer, redirect, reject, or escalate an issue, PR, or task before any work begins. Questions the premise at a high level — assessing layer ownership, existing solutions, architectural alignment, scope, and roadmap fit. Always proposes alternatives, even for seemingly obvious requests. Use this skill as the first checkpoint before task-refiner, task-implementer, task-reviewer, or task-adversarial-tester to prevent wasted effort on misaligned, duplicate, or out-of-scope work.
+allowed-tools: shell use_github
+---
+# Meta-Reasoner
+
+## Role
+
+You are a Meta-Reasoner. Your goal is to evaluate whether a given issue, pull request, or task should be accepted, deferred, or rejected — before any implementation, review, or refinement work begins. You question the request at a high level: Do we need to do this? Is it our concern? Is this the right approach? Is this a duplicate? Does a simpler solution already exist?
+
+## Principles
+
+1. **Question the premise.** Don't assume the request is valid — interrogate it.
+2. **Check for duplicates.** Search existing issues, PRs, and discussions before accepting.
+3. **Assess scope.** Is this the right layer? The right repo? The right team?
+4. **Propose alternatives.** Even for good requests, suggest simpler paths.
+5. **Be decisive.** Your output is a clear verdict with reasoning.
+
+## Steps
+
+### 1. Understand the Request
+
+- Read the issue/PR description, title, and any linked references
+- Identify the core ask — what does the requester actually want?
+- Note any assumptions the requester is making
+
+### 2. Evaluate Fit
+
+- **Layer ownership**: Is this our concern or should it be upstream/downstream?
+- **Existing solutions**: Does something already solve this? Search issues, docs, and code.
+- **Architectural alignment**: Does this fit the project's direction?
+- **Scope**: Is this too big? Too small? Should it be split or combined?
+- **Roadmap fit**: Is this on the roadmap? If not, should it be?
+
+### 3. Search for Duplicates
+
+- Search open and closed issues for similar requests
+- Check recent PRs for related work
+- Look for existing documentation that addresses the concern
+
+### 4. Propose Alternatives
+
+Even if you plan to accept, always propose at least one alternative:
+- A simpler approach
+- An existing solution that might work
+- A different scope (smaller or larger)
+- Deferring to a better time
+
+### 5. Render Verdict
+
+Post a structured comment:
+
+```
+## Meta-Reasoning Assessment
+
+**Verdict:** ACCEPT / DEFER / REDIRECT / REJECT / ESCALATE
+
+**Core Ask:** [one sentence]
+
+**Assessment:**
+- Layer ownership: ✅/❌ [explanation]
+- Existing solutions: ✅/❌ [explanation]
+- Architectural fit: ✅/❌ [explanation]
+- Scope: ✅/❌ [explanation]
+- Duplicates: ✅/❌ [explanation]
+
+**Alternatives Considered:**
+1. [alternative 1]
+2. [alternative 2]
+
+**Recommendation:** [what to do next]
+```
+
+## Desired Outcome
+
+- A clear accept/defer/reject decision with reasoning
+- No wasted effort on misaligned work
+- Alternatives surfaced even for accepted tasks
diff --git a/strands-command/agent-skills/task-release-digest/SKILL.md b/strands-command/agent-skills/task-release-digest/SKILL.md
new file mode 100644
index 0000000..5b13c5c
--- /dev/null
+++ b/strands-command/agent-skills/task-release-digest/SKILL.md
@@ -0,0 +1,107 @@
+---
+name: task-release-digest
+description: Generate a comprehensive release digest by analyzing merged PRs across Strands packages. Uses sub-agents via use_agent to parallelize per-package analysis, then synthesizes results into a unified digest.
+allowed-tools: shell use_github use_agent http_request
+---
+# Release Digest Generator
+
+## Role
+
+You are a Release Digest orchestrator. Your goal is to generate a comprehensive release digest covering recent changes across multiple Strands packages. You use sub-agents (via `use_agent`) to parallelize per-package analysis, then synthesize results into a unified digest.
+
+## Packages
+
+The Strands ecosystem includes these key packages:
+- `strands-agents/sdk-python` — Core Python SDK
+- `strands-agents/sdk-typescript` — Core TypeScript SDK
+- `strands-agents/tools` — Official tool implementations
+- `strands-agents/agent-builder` — Agent builder utilities
+- `strands-agents/docs` — Documentation
+
+## Steps
+
+### 1. Determine Time Range
+
+- Accept a time range (e.g., "last 2 weeks", "since v1.14.0", specific dates)
+- Default to the last 2 weeks if no range is specified
+- Calculate the start and end dates
+
+### 2. Spawn Per-Package Sub-Agents
+
+For each package, use `use_agent` to spawn a sub-agent that:
+- Queries merged PRs in the time range using GitHub GraphQL API
+- Categorizes PRs: features, bug fixes, docs, chores
+- Identifies the top 3-5 most impactful changes
+- Extracts brief code examples for major features
+- Returns a structured summary
+
+**Sub-agent system prompt template:**
+```
+You are analyzing merged PRs for the {package} repository.
+Time range: {start_date} to {end_date}.
+
+Query merged PRs using GitHub GraphQL API. For each PR, determine:
+1. Category: feature, bugfix, docs, chore, refactor
+2. User impact: high, medium, low
+3. One-line summary
+
+Return a structured JSON summary with:
+- package: string
+- total_prs: number
+- features: [{pr_number, title, summary, impact}]
+- bugfixes: [{pr_number, title, summary, impact}]
+- other_count: number
+```
+
+### 3. Collect and Synthesize Results
+
+- Wait for all sub-agents to complete
+- Merge results into a unified view
+- Identify cross-package themes (e.g., "streaming improvements across SDK and tools")
+- Rank features by impact
+
+### 4. Generate Digest
+
+Format the digest as a GitHub issue comment:
+
+```markdown
+# 📦 Strands Release Digest — {date_range}
+
+## Highlights
+[Top 3-5 changes across all packages with brief descriptions]
+
+## By Package
+
+### sdk-python
+**{N} PRs merged** | {features} features | {fixes} fixes
+- 🚀 [Feature Title](PR link) — one-line description
+- 🐛 [Fix Title](PR link) — one-line description
+
+### sdk-typescript
+...
+
+### tools
+...
+
+## Cross-Package Themes
+[Any patterns noticed across packages]
+
+## Stats
+| Package | PRs | Features | Fixes | Docs |
+|---------|-----|----------|-------|------|
+| sdk-python | N | N | N | N |
+| ... | ... | ... | ... | ... |
+| **Total** | **N** | **N** | **N** | **N** |
+```
+
+### 5. Post Results
+
+- Post the digest as a comment on the triggering issue
+- Include a summary of sub-agent execution (how many packages analyzed, any failures)
+
+## Desired Outcome
+
+- A well-formatted release digest covering all active Strands packages
+- Parallel execution via sub-agents for faster analysis
+- Clear categorization and impact assessment
+- Cross-package theme identification
diff --git a/strands-command/scripts/javascript/process-input.cjs b/strands-command/scripts/javascript/process-input.cjs
index 82de3b4..140dd8c 100644
--- a/strands-command/scripts/javascript/process-input.cjs
+++ b/strands-command/scripts/javascript/process-input.cjs
@@ -76,11 +76,48 @@ async function determineBranch(github, context, issueId, mode, isPullRequest) {
   return { branchName, headRepo };
 }
 
-function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) {
+function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs, agentType) {
   const sessionId = inputs.session_id || (mode === 'implementer' 
     ? `${mode}-${branchName}`.replace(/[\/\\]/g, '-')
     : `${mode}-${issueId}`);
 
+  // Beta agent uses BETA_SYSTEM_PROMPT.md (loaded by the runner) + skill activation.
+  // The system prompt here is just a thin context layer — the real instructions come
+  // from the BETA_SYSTEM_PROMPT.md file and the activated skill.
+  if (agentType === 'beta') {
+    // Read BETA_SYSTEM_PROMPT.md if available — provides the base system prompt
+    let systemPrompt = '';
+    const promptPaths = [
+      'devtools/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md',
+      'agent-skills/BETA_SYSTEM_PROMPT.md',
+    ];
+
+    for (const promptPath of promptPaths) {
+      try {
+        if (fs.existsSync(promptPath)) {
+          systemPrompt = fs.readFileSync(promptPath, 'utf8');
+          console.log(`Loaded beta system prompt from ${promptPath}`);
+          break;
+        }
+      } catch (e) {
+        console.log(`Could not read ${promptPath}: ${e.message}`);
+      }
+    }
+
+    // Fallback if file not found
+    if (!systemPrompt) {
+      systemPrompt = `You are an autonomous GitHub agent powered by Strands Agents SDK with extended capabilities including agent skills, sub-agent orchestration, and programmatic tool calling.`;
+    }
+
+    let prompt = (isPullRequest)
+      ? 'The pull request id is:'
+      : 'The issue id is:';
+    prompt += `${issueId}\n${command}\nreview and continue`;
+
+    return { sessionId, systemPrompt, prompt, mode };
+  }
+
+  // Standard agent uses SOP-based system prompts
   const scriptFiles = {
     'implementer': 'devtools/strands-command/agent-sops/task-implementer.sop.md',
     'refiner': 'devtools/strands-command/agent-sops/task-refiner.sop.md',
@@ -96,7 +133,7 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs)
     : 'The issue id is:';
   prompt += `${issueId}\n${command}\nreview and continue`;
 
-  return { sessionId, systemPrompt, prompt };
+  return { sessionId, systemPrompt, prompt, mode };
 }
 
 module.exports = async (context, github, core, inputs) => {
@@ -104,27 +141,51 @@ module.exports = async (context, github, core, inputs) => {
     const { issueId, command, issue } = await getIssueInfo(github, context, inputs);
     
     const isPullRequest = !!issue.data.pull_request;
+
+    // Check if this is a beta command: /strands beta <subcommand>
+    let agentType = 'standard';
+    let effectiveCommand = command;
+
+    if (command.startsWith('beta ') || command === 'beta') {
+      agentType = 'beta';
+      effectiveCommand = command.replace(/^beta\s*/, '').trim();
+      console.log(`Beta agent requested. Effective command: "${effectiveCommand}"`);
+    }
     
     // Determine mode based on explicit command first, then context
     let mode;
-    if (command.startsWith('release-notes') || command.startsWith('release notes')) {
+    if (effectiveCommand.startsWith('adversarial-test') || effectiveCommand.startsWith('adversarial test')) {
+      mode = 'adversarial-test';
+    } else if (effectiveCommand.startsWith('release-digest') || effectiveCommand.startsWith('release digest')) {
+      mode = 'release-digest';
+    } else if (effectiveCommand.startsWith('meta-reason') || effectiveCommand.startsWith('meta reason')) {
+      mode = 'meta-reason';
+    } else if (effectiveCommand.startsWith('release-notes') || effectiveCommand.startsWith('release notes')) {
       mode = 'release-notes';
-    } else if (command.startsWith('implement')) {
+    } else if (effectiveCommand.startsWith('implement')) {
       mode = 'implementer';
-    } else if (command.startsWith('review')) {
+    } else if (effectiveCommand.startsWith('review')) {
       mode = 'reviewer';
-    } else if (command.startsWith('refine')) {
+    } else if (effectiveCommand.startsWith('refine')) {
       mode = 'refiner';
     } else {
-      // Default behavior when no explicit command: PR -> implementer, Issue -> refiner
-      mode = isPullRequest ? 'implementer' : 'refiner';
+      // Default behavior when no explicit command: PR -> reviewer, Issue -> refiner
+      mode = isPullRequest ? 'reviewer' : 'refiner';
     }
-    console.log(`Is PR: ${isPullRequest}, Command: "${command}", Mode: ${mode}`);
+
+    // Beta-only modes require the beta agent
+    const betaOnlyModes = ['adversarial-test', 'release-digest', 'meta-reason'];
+    if (betaOnlyModes.includes(mode) && agentType !== 'beta') {
+      agentType = 'beta';
+      console.log(`Mode '${mode}' requires beta agent — auto-promoting to beta`);
+    }
+
+    console.log(`Is PR: ${isPullRequest}, Command: "${command}", Mode: ${mode}, Agent: ${agentType}`);
 
     const { branchName, headRepo } = await determineBranch(github, context, issueId, mode, isPullRequest);
     console.log(`Building prompts - mode: ${mode}, issue: ${issueId}, is PR: ${isPullRequest}`);
 
-    const { sessionId, systemPrompt, prompt } = buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs);
+    const { sessionId, systemPrompt, prompt } = buildPrompts(mode, issueId, isPullRequest, effectiveCommand, branchName, inputs, agentType);
     
     console.log(`Session ID: ${sessionId}`);
     console.log(`Task prompt: "${prompt}"`);
@@ -135,7 +196,9 @@ module.exports = async (context, github, core, inputs) => {
       system_prompt: systemPrompt,
       prompt: prompt,
       issue_id: issueId,
-      head_repo: headRepo
+      head_repo: headRepo,
+      agent_type: agentType,
+      agent_mode: mode,
     };
     
     fs.writeFileSync('strands-parsed-input.json', JSON.stringify(outputs, null, 2));
diff --git a/strands-command/scripts/python/beta_agent_runner.py b/strands-command/scripts/python/beta_agent_runner.py
new file mode 100644
index 0000000..4523468
--- /dev/null
+++ b/strands-command/scripts/python/beta_agent_runner.py
@@ -0,0 +1,424 @@
+#!/usr/bin/env python3
+"""
+Strands Beta Agent Runner
+
+A separate agent runner with extended capabilities (skills, sub-agents,
+programmatic tool calling, etc.). Reuses shared infrastructure from
+agent_runner.py — same pipeline, different agent.
+
+Usage: /strands beta <command>
+"""
+
+import json
+import os
+import re
+import shutil
+import sys
+from pathlib import Path
+from typing import Any
+
+from strands import Agent
+from strands.session import S3SessionManager
+from strands.models import BedrockModel, CacheConfig
+from botocore.config import Config
+
+from strands_tools import http_request, shell, use_agent
+
+# Reuse shared infrastructure from the standard runner
+from agent_runner import (
+    _get_all_tools,
+    _get_trace_attributes,
+    _send_eval_trigger,
+    _setup_langfuse_telemetry,
+    STRANDS_BUDGET_TOKENS,
+    STRANDS_MAX_TOKENS,
+    STRANDS_MODEL_ID,
+    STRANDS_REGION,
+)
+
+
+# ---------------------------------------------------------------------------
+# System prompt
+# ---------------------------------------------------------------------------
+
+def _load_system_prompt() -> str:
+    """Load the beta agent system prompt.
+
+    Priority:
+    1. INPUT_SYSTEM_PROMPT env var (set by process-input.cjs)
+    2. BETA_SYSTEM_PROMPT.md file in agent-skills directory
+    3. Minimal fallback
+    """
+    env_prompt = os.getenv("INPUT_SYSTEM_PROMPT", "").strip()
+    if env_prompt:
+        return env_prompt
+
+    # Try loading from file
+    possible_paths = [
+        Path("agent-skills/BETA_SYSTEM_PROMPT.md"),
+        Path("devtools/strands-command/agent-skills/BETA_SYSTEM_PROMPT.md"),
+    ]
+
+    for path in possible_paths:
+        try:
+            if path.exists():
+                with open(path, "r", encoding="utf-8") as f:
+                    content = f.read()
+                print(f"✅ System prompt loaded from {path}")
+                return content
+        except Exception as e:
+            print(f"⚠️ Failed to read {path}: {e}")
+
+    return "You are an autonomous GitHub agent powered by Strands Agents SDK with extended capabilities including agent skills, sub-agent orchestration, and programmatic tool calling."
+
+
+# ---------------------------------------------------------------------------
+# Programmatic Tool Caller (local copy from strands-agents/tools#387)
+# ---------------------------------------------------------------------------
+
+def _load_programmatic_tool_caller():
+    """Try to load programmatic_tool_caller from strands_tools or local copy.
+
+    Priority:
+    1. strands_tools.programmatic_tool_caller (when merged into tools package)
+    2. Local copy at scripts/python/programmatic_tool_caller.py
+    """
+    try:
+        from strands_tools import programmatic_tool_caller
+        print("✅ programmatic_tool_caller loaded from strands_tools")
+        return programmatic_tool_caller
+    except ImportError:
+        pass
+
+    # Try local copy
+    try:
+        scripts_dir = Path(__file__).parent
+        local_ptc = scripts_dir / "programmatic_tool_caller.py"
+        if local_ptc.exists():
+            import importlib.util
+            spec = importlib.util.spec_from_file_location("programmatic_tool_caller", local_ptc)
+            if spec and spec.loader:
+                mod = importlib.util.module_from_spec(spec)
+                spec.loader.exec_module(mod)
+                print("✅ programmatic_tool_caller loaded from local copy")
+                return mod.programmatic_tool_caller
+    except Exception as e:
+        print(f"⚠️ Failed to load local programmatic_tool_caller: {e}")
+
+    print("ℹ️ programmatic_tool_caller not available")
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Tools
+# ---------------------------------------------------------------------------
+
+def _get_beta_tools() -> list[Any]:
+    """Get tools for the beta agent.
+
+    Starts with all standard tools, then adds beta-only tools.
+    This ensures the beta agent is a strict superset of the standard agent.
+    """
+    tools = _get_all_tools()
+
+    # Add beta-only tools
+    tool_names = {getattr(t, "__name__", str(t)) for t in tools}
+
+    if "use_agent" not in tool_names:
+        tools.append(use_agent)
+
+    # Add programmatic tool caller
+    ptc = _load_programmatic_tool_caller()
+    if ptc is not None:
+        tools.append(ptc)
+
+    return tools
+
+
+# ---------------------------------------------------------------------------
+# Skills
+# ---------------------------------------------------------------------------
+
+# Map from command mode → skill name
+SKILL_MAP = {
+    "adversarial-test": "task-adversarial-tester",
+    "release-digest": "task-release-digest",
+    "meta-reason": "task-meta-reasoner",
+    "reviewer": "task-reviewer",
+    "review": "task-reviewer",
+    "implementer": "task-implementer",
+    "implement": "task-implementer",
+    "refiner": "task-refiner",
+    "refine": "task-refiner",
+    "release-notes": "task-release-notes",
+}
+
+
+def _convert_sops_to_skills(skills_dir: Path, sops_dir: Path) -> int:
+    """Convert existing SOP files to SKILL.md format at runtime.
+
+    Reads .sop.md files from the SOPs directory, adds YAML frontmatter,
+    and writes them as SKILL.md files in the skills directory.
+    No source files are modified — conversion is one-way into the skills dir.
+
+    Returns the number of SOPs converted.
+    """
+    if not sops_dir.exists():
+        return 0
+
+    # SOP name → metadata for frontmatter
+    sop_metadata = {
+        "task-implementer": {
+            "description": "Implement tasks defined in GitHub issues using test-driven development. Write code following existing patterns, create comprehensive tests, generate documentation, and create pull requests for review.",
+            "allowed_tools": "shell use_github",
+        },
+        "task-refiner": {
+            "description": "Review and refine feature requests in GitHub issues. Identify ambiguities, post clarifying questions, gather missing information, and prepare issues for implementation.",
+            "allowed_tools": "shell use_github",
+        },
+        "task-release-notes": {
+            "description": "Generate high-quality release notes for software releases. Analyze merged PRs between git references, identify major features and bug fixes, extract code examples, and format into well-structured markdown.",
+            "allowed_tools": "shell use_github",
+        },
+        "task-reviewer": {
+            "description": "Review code changes in pull requests. Analyze diffs, understand context, and add targeted review comments to improve code quality, maintainability, and adherence to project standards.",
+            "allowed_tools": "shell use_github",
+        },
+    }
+
+    converted = 0
+    for sop_file in sops_dir.glob("*.sop.md"):
+        # Extract skill name: task-implementer.sop.md → task-implementer
+        skill_name = sop_file.stem.replace(".sop", "")
+        skill_dir = skills_dir / skill_name
+
+        # Skip if skill already exists (don't overwrite dedicated skills)
+        if (skill_dir / "SKILL.md").exists():
+            continue
+
+        metadata = sop_metadata.get(skill_name, {})
+        description = metadata.get("description", f"Skill converted from {sop_file.name}")
+        allowed_tools = metadata.get("allowed_tools", "shell use_github")
+
+        # Read SOP content
+        try:
+            sop_content = sop_file.read_text(encoding="utf-8")
+        except Exception as e:
+            print(f"⚠️ Failed to read {sop_file}: {e}")
+            continue
+
+        # Build SKILL.md with frontmatter
+        skill_content = f"""---
+name: {skill_name}
+description: {description}
+allowed-tools: {allowed_tools}
+---
+{sop_content}"""
+
+        # Write to skills directory
+        skill_dir.mkdir(parents=True, exist_ok=True)
+        (skill_dir / "SKILL.md").write_text(skill_content, encoding="utf-8")
+        converted += 1
+        print(f"  ✅ Converted SOP → skill: {skill_name}")
+
+    return converted
+
+
+def _load_skills_plugin():
+    """Load agent skills from the agent-skills directory if available.
+
+    Also converts existing SOPs to skills at runtime (without duplicating source files).
+    Returns AgentSkills plugin instance or None if skills aren't available.
+    """
+    try:
+        from strands.vended_plugins.skills import AgentSkills
+    except ImportError:
+        print("ℹ️ AgentSkills plugin not available (strands.vended_plugins.skills not found)")
+        return None
+
+    # Look for skills directory
+    possible_paths = [
+        Path("agent-skills"),
+        Path("devtools/strands-command/agent-skills"),
+    ]
+
+    skills_dir = None
+    for path in possible_paths:
+        if path.exists() and path.is_dir():
+            skills_dir = path
+            break
+
+    if skills_dir is None:
+        print("ℹ️ No agent-skills directory found (skills not available)")
+        return None
+
+    # Convert SOPs to skills at runtime
+    possible_sop_paths = [
+        Path("devtools/strands-command/agent-sops"),
+        Path("agent-sops"),
+    ]
+    for sops_dir in possible_sop_paths:
+        if sops_dir.exists():
+            converted = _convert_sops_to_skills(skills_dir, sops_dir)
+            if converted > 0:
+                print(f"✅ Converted {converted} SOPs to skills")
+            break
+
+    try:
+        plugin = AgentSkills(skills=str(skills_dir))
+        skills = plugin.get_available_skills()
+
+        if skills:
+            print(f"✅ AgentSkills plugin: {len(skills)} skills loaded")
+            for skill in skills:
+                print(f"  - {skill.name}: {skill.description[:60]}...")
+            return plugin
+        else:
+            print("⚠️ AgentSkills plugin: no skills found in directory")
+            return None
+    except Exception as e:
+        print(f"⚠️ Failed to load skills: {e}")
+        return None
+
+
+def _activate_skill_for_mode(agent: Agent, mode: str) -> None:
+    """Activate the appropriate skill based on the command mode.
+
+    Maps the command mode (e.g., "review", "implement") to a skill name
+    and invokes it via agent.tool.skills(). This front-loads the skill
+    instructions into the agent's context before it starts working.
+    """
+    skill_name = SKILL_MAP.get(mode)
+    if not skill_name:
+        print(f"ℹ️ No skill mapped for mode '{mode}'")
+        return
+
+    if "skills" not in agent.tool_names:
+        print(f"⚠️ skills tool not available, can't activate '{skill_name}'")
+        return
+
+    try:
+        agent.tool.skills(skill_name=skill_name, record_direct_tool_call=True)
+        print(f"✅ Activated skill: {skill_name}")
+    except Exception as e:
+        print(f"⚠️ Failed to activate skill '{skill_name}': {e}")
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def run_beta_agent(query: str):
+    """Run the beta agent with extended capabilities."""
+    try:
+        # Shared infrastructure from agent_runner.py
+        telemetry_enabled = _setup_langfuse_telemetry()
+        trace_attributes = _get_trace_attributes() if telemetry_enabled else {}
+
+        # Beta agent tools (superset of standard)
+        tools = _get_beta_tools()
+
+        # Same model configuration as standard agent
+        additional_request_fields = {}
+        additional_request_fields["anthropic_beta"] = ["interleaved-thinking-2025-05-14"]
+        additional_request_fields["thinking"] = {
+            "type": "enabled",
+            "budget_tokens": STRANDS_BUDGET_TOKENS,
+        }
+
+        model = BedrockModel(
+            model_id=STRANDS_MODEL_ID,
+            max_tokens=STRANDS_MAX_TOKENS,
+            region_name=STRANDS_REGION,
+            boto_client_config=Config(
+                read_timeout=900,
+                connect_timeout=900,
+                retries={"max_attempts": 3, "mode": "adaptive"},
+            ),
+            cache_config=CacheConfig(strategy="auto"),
+            additional_request_fields=additional_request_fields,
+            cache_prompt="default",
+            cache_tools="default",
+        )
+
+        system_prompt = _load_system_prompt()
+        session_id = os.getenv("SESSION_ID")
+        s3_bucket = os.getenv("S3_SESSION_BUCKET")
+
+        if s3_bucket and session_id:
+            print(f"🤖 Using session manager with session ID: {session_id}")
+            session_manager = S3SessionManager(
+                session_id=session_id,
+                bucket=s3_bucket,
+                prefix=os.getenv("GITHUB_REPOSITORY", ""),
+            )
+        else:
+            raise ValueError("Both SESSION_ID and S3_SESSION_BUCKET must be set")
+
+        # Beta-only: Load agent skills plugin (includes SOP→skill conversion)
+        plugins = []
+        skills_plugin = _load_skills_plugin()
+        if skills_plugin:
+            plugins.append(skills_plugin)
+
+        # Create beta agent
+        agent_kwargs = {
+            "model": model,
+            "system_prompt": system_prompt,
+            "tools": tools,
+            "session_manager": session_manager,
+        }
+
+        if plugins:
+            agent_kwargs["plugins"] = plugins
+
+        if trace_attributes:
+            agent_kwargs["trace_attributes"] = trace_attributes
+
+        agent = Agent(**agent_kwargs)
+
+        print(f"🧪 Beta agent created with {len(tools)} tools and {len(plugins)} plugins")
+
+        # Auto-activate skill based on command mode
+        # The mode is embedded in the session_id by process-input.cjs (e.g., "reviewer-123")
+        mode = os.getenv("AGENT_MODE", "")
+        if mode:
+            _activate_skill_for_mode(agent, mode)
+
+        print("Processing user query...")
+        result = agent(query)
+
+        print(f"\n\nAgent Result 🤖\nStop Reason: {result.stop_reason}\nMessage: {json.dumps(result.message, indent=2)}")
+
+        # Eval trigger (shared infrastructure)
+        unique_session_id = trace_attributes.get("session.id", session_id)
+        eval_type = session_id.split("-")[0] if "-" in session_id else session_id
+        _send_eval_trigger(unique_session_id, eval_type)
+
+    except Exception as e:
+        error_msg = f"❌ Beta agent execution failed: {e}"
+        print(error_msg)
+        raise e
+
+
+def main() -> None:
+    """Main entry point for the beta agent runner."""
+    try:
+        if len(sys.argv) < 2:
+            raise ValueError("Task argument is required")
+
+        task = " ".join(sys.argv[1:])
+        if not task.strip():
+            raise ValueError("Task cannot be empty")
+        print(f"🧪 Running beta agent with task: {task}")
+
+        run_beta_agent(task)
+
+    except Exception as e:
+        error_msg = f"Fatal error: {e}"
+        print(error_msg)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/strands-command/scripts/python/programmatic_tool_caller.py b/strands-command/scripts/python/programmatic_tool_caller.py
new file mode 100644
index 0000000..0100820
--- /dev/null
+++ b/strands-command/scripts/python/programmatic_tool_caller.py
@@ -0,0 +1,316 @@
+"""Programmatic Tool Calling for Strands Agents.
+
+This module provides a tool that enables programmatic/code-based tool invocation,
+similar to Anthropic's Programmatic Tool Calling feature. It allows an agent to
+write Python code that calls other tools as functions, reducing API round-trips
+and enabling complex orchestration logic.
+
+Tools are exposed as async functions (e.g., `await calculator(expression="2+2")`).
+The code runs in an async context automatically - no boilerplate needed.
+
+Usage:
+```python
+from strands import Agent
+from strands_tools import programmatic_tool_caller, calculator
+
+agent = Agent(tools=[programmatic_tool_caller, calculator])
+
+result = agent.tool.programmatic_tool_caller(
+    code='''
+result = await calculator(expression="2 + 2")
+print(f"Result: {result}")
+
+# Parallel execution
+results = await asyncio.gather(
+    calculator(expression="10 * 1"),
+    calculator(expression="10 * 2"),
+)
+print(f"Parallel: {results}")
+'''
+)
+```
+
+Environment Variables:
+- PROGRAMMATIC_TOOL_CALLER_ALLOWED_TOOLS: Comma-separated list of allowed tools
+- PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES: Comma-separated list of extra modules to inject
+  into the namespace (e.g., "json,re,math,collections"). `asyncio` is always available.
+- BYPASS_TOOL_CONSENT: Skip user confirmation if "true"
+
+Namespace:
+    The execution namespace matches python_repl's base: `{"__name__": "__main__"}`.
+    `asyncio` is always injected (required for async tool calls).
+    Additional modules can be added via PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES env var.
+    Tool functions are injected as async callables (e.g., `await shell(command="ls")`).
+
+Limitations: Tools that use interrupts (human-in-the-loop) are not supported. The SDK
+blocks interrupts during direct/programmatic tool calls — there is no mechanism to pause
+execution, collect human input, and resume in this context. If an interrupt-capable tool
+is called, it will raise a RuntimeError which surfaces as a failed tool result back to
+the agent.
+"""
+
+import asyncio
+import importlib
+import logging
+import os
+import sys
+import textwrap
+import traceback
+from io import StringIO
+from typing import Any, Callable, Dict, Optional
+
+from rich import box
+from rich.panel import Panel
+from rich.syntax import Syntax
+from rich.table import Table
+from strands import tool
+from strands.types.tools import ToolContext
+
+from strands_tools.utils import console_util
+from strands_tools.utils.user_input import get_user_input
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# Tool Execution Helpers
+# =============================================================================
+
+
+def _execute_tool(agent: Any, tool_name: str, tool_input: Dict[str, Any]) -> Any:
+    """Execute a tool through the agent's tool caller.
+
+    Uses agent.tool.<name>() which properly handles all tool types including MCP tools.
+    """
+    if agent is None:
+        raise RuntimeError("No agent available for tool execution")
+
+    try:
+        # Use agent.tool.<name>() which works for ALL tool types (including MCP tools)
+        # record_direct_tool_call=False prevents polluting message history during programmatic calls
+        tool_func = getattr(agent.tool, tool_name)
+        result = tool_func(record_direct_tool_call=False, **tool_input)
+
+        if isinstance(result, dict):
+            if result.get("status") == "error":
+                error_content = result.get("content", [{"text": "Unknown error"}])
+                error_text = error_content[0].get("text", "Unknown error") if error_content else "Unknown error"
+                raise RuntimeError(f"Tool error: {error_text}")
+
+            content = result.get("content", [])
+            if content and isinstance(content, list):
+                text_parts = [item["text"] for item in content if isinstance(item, dict) and "text" in item]
+                if text_parts:
+                    return "\n".join(text_parts)
+            return str(result)
+
+        return result
+
+    except AttributeError as e:
+        raise RuntimeError(f"Tool '{tool_name}' not found in registry") from e
+    except RuntimeError:
+        raise
+    except Exception as e:
+        logger.error(f"Error executing tool '{tool_name}': {e}")
+        raise RuntimeError(f"Failed to execute tool '{tool_name}': {e}") from e
+
+
+def _create_async_tool_function(agent: Any, tool_name: str) -> Callable:
+    """Create an async function wrapper for a tool."""
+
+    async def tool_function(**kwargs: Any) -> Any:
+        loop = asyncio.get_running_loop()
+        return await loop.run_in_executor(None, lambda: _execute_tool(agent, tool_name, kwargs))
+
+    return tool_function
+
+
+def _get_allowed_tools(agent: Any) -> set[str]:
+    """Get allowed tools from env var or default to all (except self)."""
+    all_tools = set(agent.tool_registry.registry.keys()) - {"programmatic_tool_caller"}
+
+    env_allowed = os.environ.get("PROGRAMMATIC_TOOL_CALLER_ALLOWED_TOOLS", "").strip()
+    if env_allowed:
+        allowed_list = [t.strip() for t in env_allowed.split(",") if t.strip()]
+        return all_tools & set(allowed_list)
+
+    return all_tools
+
+
+def _build_namespace(available_tools: set[str], agent: Any) -> Dict[str, Any]:
+    """Build the execution namespace.
+
+    Base namespace matches python_repl: ``{"__name__": "__main__"}``.
+    ``asyncio`` is always injected (required for async tool wrappers).
+    Additional stdlib modules can be injected via the
+    ``PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES`` environment variable
+    (comma-separated module names, e.g. ``json,re,math,collections``).
+    Tool functions are injected as async callables.
+
+    Returns:
+        Namespace dict ready for ``exec()``.
+    """
+    # Base namespace — matches python_repl
+    namespace: Dict[str, Any] = {
+        "__name__": "__main__",
+    }
+
+    # asyncio is always required (async wrapper)
+    namespace["asyncio"] = asyncio
+
+    # Extra modules from env var
+    extra_modules = os.environ.get("PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES", "").strip()
+    if extra_modules:
+        for mod_name in extra_modules.split(","):
+            mod_name = mod_name.strip()
+            if not mod_name:
+                continue
+            try:
+                namespace[mod_name] = importlib.import_module(mod_name)
+            except ImportError:
+                logger.warning(f"Could not import extra module '{mod_name}', skipping")
+
+    # Inject tools as async functions
+    for tool_name in available_tools:
+        namespace[tool_name] = _create_async_tool_function(agent, tool_name)
+
+    return namespace
+
+
+# =============================================================================
+# Main Tool
+# =============================================================================
+
+
+@tool(context=True)
+def programmatic_tool_caller(
+    code: str,
+    tool_context: Optional[ToolContext] = None,
+) -> Dict[str, Any]:
+    """Execute Python code with access to agent tools as async functions.
+
+    Tools are available as async functions - use `await` to call them.
+    Code runs in async context automatically, no boilerplate needed.
+
+    Example:
+        ```python
+        # Simple tool call
+        result = await calculator(expression="2 + 2")
+        print(result)
+
+        # Loop with tool calls
+        for i in range(3):
+            r = await calculator(expression=f"{i} * 10")
+            print(r)
+
+        # Parallel execution
+        results = await asyncio.gather(
+            calculator(expression="1+1"),
+            calculator(expression="2+2"),
+        )
+        print(results)
+        ```
+
+    Environment Variables:
+        PROGRAMMATIC_TOOL_CALLER_ALLOWED_TOOLS: Comma-separated list of tools to expose
+        PROGRAMMATIC_TOOL_CALLER_EXTRA_MODULES: Comma-separated list of extra modules
+            to inject into the namespace (e.g., "json,re,math")
+        BYPASS_TOOL_CONSENT: Skip confirmation if "true"
+
+    Args:
+        code: Python code to execute. Use `await tool_name(...)` to call tools.
+        tool_context: Injected automatically.
+
+    Returns:
+        Dict with status and print() output only.
+    """
+    console = console_util.create()
+    bypass_consent = os.environ.get("BYPASS_TOOL_CONSENT", "").lower() == "true"
+
+    try:
+        if tool_context is None or tool_context.agent is None:
+            return {
+                "status": "error",
+                "content": [{"text": "No agent context available. This tool requires an agent."}],
+            }
+
+        agent = tool_context.agent
+
+        # Show code preview
+        console.print(
+            Panel(
+                Syntax(code, "python", theme="monokai", line_numbers=True),
+                title="[bold blue]Programmatic Tool Calling[/]",
+                border_style="blue",
+            )
+        )
+
+        # Get allowed tools
+        available_tools = _get_allowed_tools(agent)
+
+        tools_table = Table(show_header=True, header_style="bold cyan", box=box.SIMPLE)
+        tools_table.add_column("Available Tools", style="green")
+        for tool_name in sorted(available_tools):
+            tools_table.add_row(f"await {tool_name}(...)")
+        console.print(tools_table)
+
+        # User confirmation
+        if not bypass_consent:
+            user_input = get_user_input("<yellow><bold>Execute this code?</bold> [y/*]</yellow>")
+            if user_input.lower().strip() != "y":
+                cancel_reason = user_input if user_input.strip() != "n" else get_user_input("Reason:")
+                return {
+                    "status": "error",
+                    "content": [{"text": f"Cancelled. Reason: {cancel_reason}"}],
+                }
+
+        # Build execution namespace (matches python_repl base + tools)
+        exec_namespace = _build_namespace(available_tools, agent)
+
+        console.print("[green]Executing...[/]")
+
+        # Execute code in async context
+        # Wrap user code in async function for await support
+        indented_code = textwrap.indent(code, "    ")
+        wrapped_code = f"async def __user_code__():\n{indented_code}\n"
+
+        # Capture output
+        stdout_capture = StringIO()
+        stderr_capture = StringIO()
+        old_stdout, old_stderr = sys.stdout, sys.stderr
+
+        try:
+            sys.stdout = stdout_capture
+            sys.stderr = stderr_capture
+
+            # Use compile() for better error tracebacks
+            compiled = compile(wrapped_code, "<programmatic_tool_caller>", "exec")
+            exec(compiled, exec_namespace)
+            asyncio.run(exec_namespace["__user_code__"]())
+
+            captured_output = stdout_capture.getvalue()
+            errors = stderr_capture.getvalue()
+            if errors:
+                captured_output += f"\n[stderr]\n{errors}"
+        finally:
+            sys.stdout = old_stdout
+            sys.stderr = old_stderr
+
+        console.print("[bold green]✓ Done[/]")
+        if captured_output.strip():
+            console.print(Panel(captured_output, title="[bold green]Output[/]", border_style="green"))
+
+        return {
+            "status": "success",
+            "content": [{"text": captured_output.strip() if captured_output.strip() else "(no output)"}],
+        }
+
+    except SyntaxError:
+        error_msg = f"Syntax error:\n{traceback.format_exc()}"
+        console.print(Panel(error_msg, title="[bold red]Error[/]", border_style="red"))
+        return {"status": "error", "content": [{"text": error_msg}]}
+
+    except Exception:
+        error_msg = f"Execution error:\n{traceback.format_exc()}"
+        console.print(Panel(error_msg, title="[bold red]Error[/]", border_style="red"))
+        return {"status": "error", "content": [{"text": error_msg}]}