diff --git a/docs/ref/extensions/sandbox/upstash_box/sandbox.md b/docs/ref/extensions/sandbox/upstash_box/sandbox.md new file mode 100644 index 0000000000..fd92217711 --- /dev/null +++ b/docs/ref/extensions/sandbox/upstash_box/sandbox.md @@ -0,0 +1,3 @@ +# `Sandbox` + +::: agents.extensions.sandbox.upstash_box.sandbox diff --git a/docs/sandbox/clients.md b/docs/sandbox/clients.md index bd21da63d3..3612d8b512 100644 --- a/docs/sandbox/clients.md +++ b/docs/sandbox/clients.md @@ -96,6 +96,7 @@ For provider-specific setup notes and links for the checked-in extension example | `E2BSandboxClient` | `openai-agents[e2b]` | [E2B runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/e2b_runner.py) | | `ModalSandboxClient` | `openai-agents[modal]` | [Modal runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/modal_runner.py) | | `RunloopSandboxClient` | `openai-agents[runloop]` | [Runloop runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/runloop/runner.py) | +| `UpstashBoxSandboxClient` | `openai-agents[upstash-box]` | [Upstash Box runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/upstash_box_runner.py) | | `VercelSandboxClient` | `openai-agents[vercel]` | [Vercel runner](https://github.com/openai/openai-agents-python/blob/main/examples/sandbox/extensions/vercel_runner.py) | @@ -113,6 +114,7 @@ Hosted sandbox clients expose provider-specific mount strategies. Choose the bac | `DaytonaSandboxClient` | Supports rclone-backed cloud storage mounts with `DaytonaCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. | | `E2BSandboxClient` | Supports rclone-backed cloud storage mounts with `E2BCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. | | `RunloopSandboxClient` | Supports rclone-backed cloud storage mounts with `RunloopCloudBucketMountStrategy`; use it with `S3Mount`, `GCSMount`, `R2Mount`, `AzureBlobMount`, and `BoxMount`. | +| `UpstashBoxSandboxClient` | No hosted-specific mount strategy is currently exposed. Use manifest files, repos, or other workspace inputs instead. | | `VercelSandboxClient` | No hosted-specific mount strategy is currently exposed. Use manifest files, repos, or other workspace inputs instead. | @@ -130,6 +132,7 @@ The table below summarizes which remote storage entries each backend can mount d | `DaytonaSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - | | `E2BSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - | | `RunloopSandboxClient` | ✓ | ✓ | ✓ | ✓ | ✓ | - | +| `UpstashBoxSandboxClient` | - | - | - | - | - | - | | `VercelSandboxClient` | - | - | - | - | - | - | diff --git a/examples/run_examples.py b/examples/run_examples.py index 54038b9f48..255c663301 100644 --- a/examples/run_examples.py +++ b/examples/run_examples.py @@ -81,6 +81,7 @@ "examples/sandbox/docker/mounts/s3_mount_read_write.py", "examples/sandbox/extensions/daytona/usaspending_text2sql/setup_db.py", "examples/sandbox/extensions/temporal/temporal_sandbox_agent.py", + "examples/sandbox/extensions/upstash_box_runner.py", "examples/sandbox/extensions/vercel_runner.py", "examples/sandbox/memory_s3.py", "examples/sandbox/sandbox_agent_with_remote_snapshot.py", diff --git a/examples/sandbox/extensions/README.md b/examples/sandbox/extensions/README.md index 837d9dfa28..039cf73673 100644 --- a/examples/sandbox/extensions/README.md +++ b/examples/sandbox/extensions/README.md @@ -376,3 +376,33 @@ Blaxel sandboxes support cloud bucket mounts (S3, R2, GCS) through `BlaxelCloudBucketMountStrategy` and persistent drive mounts through `BlaxelDriveMountStrategy`. See the [Blaxel Drive docs](https://docs.blaxel.ai/Agent-drive/Overview) for details. + +## Upstash Box + +### Setup + +Install the repo extra: + +```bash +uv sync --extra upstash-box +``` + +Create an Upstash Box API key in the [Upstash Console](https://console.upstash.com) and export +the required environment variables: + +```bash +export OPENAI_API_KEY=... +export UPSTASH_BOX_API_KEY=... +``` + +Optionally set `UPSTASH_BOX_BASE_URL` to target a non-default Box API endpoint. + +### Run + +```bash +uv run python examples/sandbox/extensions/upstash_box_runner.py --stream +``` + +Upstash Box has no Python SDK, so this client talks to the Box REST API directly over HTTP. +It supports command execution, file read/write, exposed ports, keep-alive boxes, +create-from-snapshot (via `snapshot_id`), and pause/resume lifecycle. diff --git a/examples/sandbox/extensions/upstash_box_runner.py b/examples/sandbox/extensions/upstash_box_runner.py new file mode 100644 index 0000000000..70697d675d --- /dev/null +++ b/examples/sandbox/extensions/upstash_box_runner.py @@ -0,0 +1,137 @@ +""" +Upstash Box-backed sandbox example for manual validation. + +This example mirrors the other extension runners. It supports a standard agent +run (non-streaming and streaming) against an Upstash Box sandbox. + +Requires ``OPENAI_API_KEY`` and ``UPSTASH_BOX_API_KEY`` in the environment. +""" + +from __future__ import annotations + +import argparse +import asyncio +import os +import sys +from pathlib import Path + +from openai.types.responses import ResponseTextDeltaEvent + +from agents import ModelSettings, Runner, set_tracing_disabled +from agents.run import RunConfig +from agents.sandbox import Manifest, SandboxAgent, SandboxRunConfig +from agents.sandbox.capabilities import Shell + +if __package__ is None or __package__ == "": + sys.path.insert(0, str(Path(__file__).resolve().parents[3])) + +from examples.sandbox.misc.example_support import text_manifest + +try: + from agents.extensions.sandbox import ( + UpstashBoxSandboxClient, + UpstashBoxSandboxClientOptions, + ) +except Exception as exc: # pragma: no cover - import path depends on optional extras + raise SystemExit( + "Upstash Box sandbox examples require the optional repo extra.\n" + "Install it with: uv sync --extra upstash-box" + ) from exc + + +DEFAULT_MODEL = "gpt-5.5" +DEFAULT_QUESTION = "Summarize this cloud sandbox workspace in 2 sentences." + + +def _build_manifest() -> Manifest: + return text_manifest( + { + "README.md": ( + "# Upstash Box Demo Workspace\n\n" + "This workspace exists to validate the Upstash Box sandbox backend manually.\n" + ), + "launch.md": ( + "# Launch\n\n" + "- Customer: Contoso Logistics.\n" + "- Goal: validate the remote sandbox agent path.\n" + ), + "tasks.md": ( + "# Tasks\n\n" + "1. Inspect the workspace files.\n" + "2. Summarize the setup in two sentences.\n" + ), + } + ) + + +def _require_env(name: str) -> str: + value = os.environ.get(name) + if value: + return value + raise SystemExit(f"{name} must be set before running this example.") + + +async def main(*, model: str, question: str, api_key: str | None, stream: bool) -> None: + _require_env("OPENAI_API_KEY") + _require_env("UPSTASH_BOX_API_KEY") + + agent = SandboxAgent( + name="Upstash Box Sandbox Assistant", + model=model, + instructions=( + "Answer questions about the sandbox workspace. Inspect the files before answering " + "and keep the response concise. Cite the file names you inspected." + ), + default_manifest=_build_manifest(), + capabilities=[Shell()], + model_settings=ModelSettings(tool_choice="required"), + ) + + run_config = RunConfig( + sandbox=SandboxRunConfig( + client=UpstashBoxSandboxClient(), + options=UpstashBoxSandboxClientOptions(api_key=api_key), + ), + workflow_name="Upstash Box sandbox example", + ) + + if not stream: + result = await Runner.run(agent, question, run_config=run_config) + print(result.final_output) + return + + stream_result = Runner.run_streamed(agent, question, run_config=run_config) + saw_text_delta = False + async for event in stream_result.stream_events(): + if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent): + if not saw_text_delta: + print("assistant> ", end="", flush=True) + saw_text_delta = True + print(event.data.delta, end="", flush=True) + + if saw_text_delta: + print() + + +if __name__ == "__main__": + set_tracing_disabled(True) + + parser = argparse.ArgumentParser(description="Run an Upstash Box sandbox agent.") + parser.add_argument("--model", default=DEFAULT_MODEL, help="Model name to use.") + parser.add_argument("--question", default=DEFAULT_QUESTION, help="Prompt to send to the agent.") + parser.add_argument( + "--api-key", + default=os.environ.get("UPSTASH_BOX_API_KEY"), + help="Upstash Box API key. Defaults to UPSTASH_BOX_API_KEY.", + ) + parser.add_argument("--stream", action="store_true", default=False, help="Stream the response.") + args = parser.parse_args() + + asyncio.run( + main( + model=args.model, + question=args.question, + api_key=args.api_key, + stream=args.stream, + ) + ) diff --git a/pyproject.toml b/pyproject.toml index 1d4f6b4584..734752c40e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ e2b = ["e2b==2.20.0", "e2b-code-interpreter==2.4.1"] modal = ["modal==1.4.3"] runloop = ["runloop_api_client>=1.16.0,<2.0.0"] vercel = ["vercel>=0.5.6,<0.6"] +upstash-box = ["aiohttp>=3.12,<4"] s3 = ["boto3>=1.34"] temporal = [ "temporalio==1.26.0", diff --git a/src/agents/extensions/sandbox/__init__.py b/src/agents/extensions/sandbox/__init__.py index d7b082ba1f..57f81ff3e8 100644 --- a/src/agents/extensions/sandbox/__init__.py +++ b/src/agents/extensions/sandbox/__init__.py @@ -109,6 +109,18 @@ except Exception: # pragma: no cover _HAS_VERCEL = False +try: + from .upstash_box import ( + UpstashBoxSandboxClient as UpstashBoxSandboxClient, + UpstashBoxSandboxClientOptions as UpstashBoxSandboxClientOptions, + UpstashBoxSandboxSession as UpstashBoxSandboxSession, + UpstashBoxSandboxSessionState as UpstashBoxSandboxSessionState, + ) + + _HAS_UPSTASH_BOX = True +except Exception: # pragma: no cover + _HAS_UPSTASH_BOX = False + __all__: list[str] = [] if _HAS_E2B: @@ -187,6 +199,16 @@ ] ) +if _HAS_UPSTASH_BOX: + __all__.extend( + [ + "UpstashBoxSandboxClient", + "UpstashBoxSandboxClientOptions", + "UpstashBoxSandboxSession", + "UpstashBoxSandboxSessionState", + ] + ) + if _HAS_RUNLOOP: __all__.extend( [ diff --git a/src/agents/extensions/sandbox/upstash_box/__init__.py b/src/agents/extensions/sandbox/upstash_box/__init__.py new file mode 100644 index 0000000000..8494b427ec --- /dev/null +++ b/src/agents/extensions/sandbox/upstash_box/__init__.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from .sandbox import ( + UpstashBoxSandboxClient, + UpstashBoxSandboxClientOptions, + UpstashBoxSandboxSession, + UpstashBoxSandboxSessionState, +) + +__all__ = [ + "UpstashBoxSandboxClient", + "UpstashBoxSandboxClientOptions", + "UpstashBoxSandboxSession", + "UpstashBoxSandboxSessionState", +] diff --git a/src/agents/extensions/sandbox/upstash_box/sandbox.py b/src/agents/extensions/sandbox/upstash_box/sandbox.py new file mode 100644 index 0000000000..fa31ecf77c --- /dev/null +++ b/src/agents/extensions/sandbox/upstash_box/sandbox.py @@ -0,0 +1,1018 @@ +""" +Upstash Box (https://upstash.com/docs/box) sandbox implementation. + +Upstash Box does not ship a Python SDK, so this module talks to the Box REST API +(``/v2/box/...``) directly over HTTP using ``aiohttp``. The session extends +``BaseSandboxSession`` like the other hosted providers and maps the sandbox +contract onto the Box API: command execution, file read/write, exposed ports, +tar-based workspace persistence, and pause/resume lifecycle. + +Note: The ``aiohttp`` dependency is optional (installed via the ``upstash-box`` +extra), so package-level exports guard imports of this module. Within this +module we import ``aiohttp`` normally so IDEs can resolve and navigate types. +""" + +from __future__ import annotations + +import asyncio +import base64 +import io +import logging +import os +import uuid +from pathlib import Path +from typing import Any, Literal +from urllib.parse import urlsplit + +import aiohttp + +from ....sandbox.errors import ( + ConfigurationError, + ErrorCode, + ExecTimeoutError, + ExecTransportError, + ExposedPortUnavailableError, + WorkspaceArchiveReadError, + WorkspaceArchiveWriteError, + WorkspaceReadNotFoundError, + WorkspaceStartError, + WorkspaceWriteTypeError, +) +from ....sandbox.manifest import Manifest +from ....sandbox.session import SandboxSession, SandboxSessionState +from ....sandbox.session.base_sandbox_session import BaseSandboxSession +from ....sandbox.session.dependencies import Dependencies +from ....sandbox.session.manager import Instrumentation +from ....sandbox.session.runtime_helpers import RESOLVE_WORKSPACE_PATH_HELPER, RuntimeHelperScript +from ....sandbox.session.sandbox_client import BaseSandboxClient, BaseSandboxClientOptions +from ....sandbox.session.tar_workspace import shell_tar_exclude_args +from ....sandbox.snapshot import SnapshotBase, SnapshotSpec, resolve_snapshot +from ....sandbox.types import ExecResult, ExposedPortEndpoint, User +from ....sandbox.util.tar_utils import UnsafeTarMemberError, validate_tar_bytes +from ....sandbox.workspace_paths import coerce_posix_path, posix_path_as_path, sandbox_path_str + +_DEFAULT_BASE_URL = "https://us-east-1.box.upstash.com" +# Box provisions the workspace at /workspace/home, so map the core default (/workspace) onto it +# the same way the TypeScript provider does. +_DEFAULT_WORKSPACE_ROOT = "/workspace/home" +# How long to keep retrying from-snapshot creation while the snapshot is still propagating. +_SNAPSHOT_NOT_READY_MAX_WAIT_S = 60.0 +_SNAPSHOT_POLL_INTERVAL_S = 3.0 +_DEFAULT_EXEC_TIMEOUT_S = 60.0 +_DEFAULT_REQUEST_TIMEOUT_S = 120.0 +_WORKSPACE_TAR_TIMEOUT_S = 300.0 +_CREATE_POLL_INTERVAL_S = 2.0 +_CREATE_MAX_WAIT_S = 300.0 +# Box statuses that mean the sandbox is up and able to run commands. +_RUNNING_STATUSES = frozenset({"running", "idle"}) + +logger = logging.getLogger(__name__) + + +class _BoxNotFoundError(Exception): + """Raised internally when a Box file or sandbox returns HTTP 404.""" + + +class UpstashBoxSandboxClientOptions(BaseSandboxClientOptions): + """Options for ``UpstashBoxSandboxClient``.""" + + type: Literal["upstash_box"] = "upstash_box" + api_key: str | None = None + base_url: str | None = None + name: str | None = None + size: Literal["small", "medium", "large"] | None = None + runtime: str | None = None + keep_alive: bool = False + network_policy: dict[str, Any] | None = None + git_token: str | None = None + snapshot_id: str | None = None + pause_on_exit: bool = False + env_vars: dict[str, str] | None = None + exposed_ports: tuple[int, ...] = () + + def __init__( + self, + api_key: str | None = None, + base_url: str | None = None, + name: str | None = None, + size: Literal["small", "medium", "large"] | None = None, + runtime: str | None = None, + keep_alive: bool = False, + network_policy: dict[str, Any] | None = None, + git_token: str | None = None, + snapshot_id: str | None = None, + pause_on_exit: bool = False, + env_vars: dict[str, str] | None = None, + exposed_ports: tuple[int, ...] = (), + *, + type: Literal["upstash_box"] = "upstash_box", + ) -> None: + super().__init__( + type=type, + api_key=api_key, + base_url=base_url, + name=name, + size=size, + runtime=runtime, + keep_alive=keep_alive, + network_policy=network_policy, + git_token=git_token, + snapshot_id=snapshot_id, + pause_on_exit=pause_on_exit, + env_vars=env_vars, + exposed_ports=exposed_ports, + ) + + +class UpstashBoxSandboxSessionState(SandboxSessionState): + """Serializable state for an Upstash Box-backed session.""" + + type: Literal["upstash_box"] = "upstash_box" + box_id: str + base_url: str + base_env_vars: dict[str, str] = {} + keep_alive: bool = False + pause_on_exit: bool = False + snapshot_id: str | None = None + + +class UpstashBoxSandboxSession(BaseSandboxSession): + """``BaseSandboxSession`` backed by Upstash Box over HTTP.""" + + state: UpstashBoxSandboxSessionState + _api_key: str | None + _http: aiohttp.ClientSession | None + _exec_timeout_s: float | None + _request_timeout_s: float | None + + def __init__( + self, + *, + state: UpstashBoxSandboxSessionState, + api_key: str | None = None, + http: aiohttp.ClientSession | None = None, + exec_timeout_s: float | None = None, + request_timeout_s: float | None = None, + ) -> None: + self.state = state + self._api_key = api_key + self._http = http + self._exec_timeout_s = exec_timeout_s + self._request_timeout_s = request_timeout_s + + @classmethod + def from_state( + cls, + state: UpstashBoxSandboxSessionState, + *, + api_key: str | None = None, + http: aiohttp.ClientSession | None = None, + exec_timeout_s: float | None = None, + request_timeout_s: float | None = None, + ) -> UpstashBoxSandboxSession: + return cls( + state=state, + api_key=api_key, + http=http, + exec_timeout_s=exec_timeout_s, + request_timeout_s=request_timeout_s, + ) + + @property + def box_id(self) -> str: + return self.state.box_id + + # ----- HTTP plumbing ------------------------------------------------- + + def _session(self) -> aiohttp.ClientSession: + if self._http is None or self._http.closed: + headers: dict[str, str] = {} + if api_key := self._api_key or os.environ.get("UPSTASH_BOX_API_KEY"): + headers["X-Box-Api-Key"] = api_key + self._http = aiohttp.ClientSession(headers=headers) + return self._http + + def _url(self, path: str) -> str: + base = self.state.base_url.rstrip("/") + return f"{base}/v2/box/{self.state.box_id}/{path.lstrip('/')}" + + def _request_timeout(self) -> aiohttp.ClientTimeout: + total = ( + self._request_timeout_s + if self._request_timeout_s is not None + else _DEFAULT_REQUEST_TIMEOUT_S + ) + return aiohttp.ClientTimeout(total=total) + + async def _close_http(self) -> None: + if self._http is not None and not self._http.closed: + await self._http.close() + self._http = None + + def _runtime_helpers(self) -> tuple[RuntimeHelperScript, ...]: + return (RESOLVE_WORKSPACE_PATH_HELPER,) + + def _current_runtime_helper_cache_key(self) -> object | None: + return self.state.box_id + + async def _validate_path_access(self, path: Path | str, *, for_write: bool = False) -> Path: + return await self._validate_remote_path_access(path, for_write=for_write) + + # ----- exec ---------------------------------------------------------- + + async def _resolved_envs(self) -> dict[str, str]: + manifest_envs = await self.state.manifest.environment.resolve() + return {**self.state.base_env_vars, **manifest_envs} + + async def _box_exec( + self, + argv: list[str], + *, + folder: str | None, + timeout: float | None, + ) -> ExecResult: + payload: dict[str, Any] = {"command": argv} + if folder is not None: + payload["folder"] = folder + + http = self._session() + url = self._url("exec") + request_timeout = aiohttp.ClientTimeout( + total=timeout + 5.0 if timeout is not None else None + ) + try: + async with http.post(url, json=payload, timeout=request_timeout) as resp: + if resp.status != 200: + detail = await _read_error_body(resp) + message = _http_error_message("POST /exec", resp.status, detail) + raise ExecTransportError( + command=tuple(argv), + context=_error_context(status=resp.status, detail=detail), + cause=Exception(message), + message=message, + ) + data = await resp.json(content_type=None) + except asyncio.TimeoutError as e: + raise ExecTimeoutError(command=tuple(argv), timeout_s=timeout, cause=e) from e + except ExecTransportError: + raise + except aiohttp.ClientError as e: + raise _transport_error(command=tuple(argv), cause=e, operation="exec") from e + + output = data.get("output") or "" + error = data.get("error") or "" + exit_code = int(data.get("exit_code", 0) or 0) + return ExecResult( + stdout=output.encode("utf-8") if isinstance(output, str) else bytes(output), + stderr=error.encode("utf-8") if isinstance(error, str) else bytes(error), + exit_code=exit_code, + ) + + async def _exec_internal( + self, + *command: str | Path, + timeout: float | None = None, + ) -> ExecResult: + argv = [str(c) for c in command] + envs = await self._resolved_envs() + if envs: + argv = ["env", "--", *[f"{key}={value}" for key, value in sorted(envs.items())], *argv] + effective_timeout = ( + timeout + if timeout is not None + else ( + self._exec_timeout_s + if self._exec_timeout_s is not None + else _DEFAULT_EXEC_TIMEOUT_S + ) + ) + folder = sandbox_path_str(self._workspace_root_path()) + return await self._box_exec(argv, folder=folder, timeout=effective_timeout) + + async def _prepare_backend_workspace(self) -> None: + # Create the workspace root before exec calls use it as their cwd. Run with no folder so + # this does not depend on the (not-yet-created) root directory existing. + root = sandbox_path_str(self._workspace_root_path()) + try: + result = await self._box_exec( + ["mkdir", "-p", "--", root], + folder=None, + timeout=self._request_timeout_s or _DEFAULT_REQUEST_TIMEOUT_S, + ) + except Exception as e: + raise WorkspaceStartError( + path=self._workspace_root_path(), + context={"backend": "upstash_box", "reason": "prepare_workspace_failed"}, + cause=e, + ) from e + if not result.ok(): + raise WorkspaceStartError( + path=self._workspace_root_path(), + context={ + "backend": "upstash_box", + "reason": "prepare_workspace_nonzero_exit", + "exit_code": result.exit_code, + "output": result.stderr.decode("utf-8", errors="replace"), + }, + ) + + # ----- files --------------------------------------------------------- + + async def _box_read_file(self, path: str) -> bytes: + http = self._session() + params = {"path": path, "encoding": "base64"} + url = self._url("files/read") + async with http.get(url, params=params, timeout=self._request_timeout()) as resp: + if resp.status == 404: + raise _BoxNotFoundError(path) + if resp.status != 200: + detail = await _read_error_body(resp) + raise WorkspaceArchiveReadError( + path=posix_path_as_path(coerce_posix_path(path)), + context={"reason": "http_error", "http_status": resp.status, "message": detail}, + ) + data = await resp.json(content_type=None) + content = data.get("content", "") + return base64.b64decode(content) if content else b"" + + async def _box_write_file(self, path: str, payload: bytes) -> None: + http = self._session() + body = { + "path": path, + "content": base64.b64encode(payload).decode("ascii"), + "encoding": "base64", + } + url = self._url("files/write") + async with http.post(url, json=body, timeout=self._request_timeout()) as resp: + if resp.status != 200: + detail = await _read_error_body(resp) + raise WorkspaceArchiveWriteError( + path=posix_path_as_path(coerce_posix_path(path)), + context={"reason": "http_error", "http_status": resp.status, "message": detail}, + ) + + async def read(self, path: Path | str, *, user: str | User | None = None) -> io.IOBase: + error_path = posix_path_as_path(coerce_posix_path(path)) + if user is not None: + workspace_path = await self._check_read_with_exec(path, user=user) + else: + workspace_path = await self._validate_path_access(path) + + try: + data = await self._box_read_file(sandbox_path_str(workspace_path)) + except _BoxNotFoundError as e: + raise WorkspaceReadNotFoundError(path=error_path, cause=e) from e + except WorkspaceArchiveReadError: + raise + except aiohttp.ClientError as e: + raise WorkspaceArchiveReadError(path=error_path, cause=e) from e + return io.BytesIO(data) + + async def write( + self, + path: Path | str, + data: io.IOBase, + *, + user: str | User | None = None, + ) -> None: + error_path = posix_path_as_path(coerce_posix_path(path)) + if user is not None: + await self._check_write_with_exec(path, user=user) + + payload = data.read() + if isinstance(payload, str): + payload = payload.encode("utf-8") + if not isinstance(payload, bytes | bytearray): + raise WorkspaceWriteTypeError(path=error_path, actual_type=type(payload).__name__) + + workspace_path = await self._validate_path_access(path, for_write=True) + try: + await self._box_write_file(sandbox_path_str(workspace_path), bytes(payload)) + except WorkspaceArchiveWriteError: + raise + except aiohttp.ClientError as e: + raise WorkspaceArchiveWriteError(path=workspace_path, cause=e) from e + + async def running(self) -> bool: + http = self._session() + url = self._url("status") + try: + async with http.get(url, timeout=self._request_timeout()) as resp: + if resp.status != 200: + return False + data = await resp.json(content_type=None) + except Exception: + return False + return str(data.get("status", "")) in _RUNNING_STATUSES + + # ----- exposed ports ------------------------------------------------- + + async def _resolve_exposed_port(self, port: int) -> ExposedPortEndpoint: + http = self._session() + url = self._url("preview") + try: + async with http.post(url, json={"port": port}, timeout=self._request_timeout()) as resp: + if resp.status != 200: + detail = await _read_error_body(resp) + raise ExposedPortUnavailableError( + port=port, + exposed_ports=self.state.exposed_ports, + reason="backend_unavailable", + context={ + "backend": "upstash_box", + "http_status": resp.status, + "detail": detail, + }, + ) + data = await resp.json(content_type=None) + except ExposedPortUnavailableError: + raise + except Exception as e: + raise ExposedPortUnavailableError( + port=port, + exposed_ports=self.state.exposed_ports, + reason="backend_unavailable", + context={"backend": "upstash_box", "detail": "preview_request_failed"}, + cause=e, + ) from e + + url_value = data.get("url") + if not isinstance(url_value, str) or not url_value: + raise ExposedPortUnavailableError( + port=port, + exposed_ports=self.state.exposed_ports, + reason="backend_unavailable", + context={ + "backend": "upstash_box", + "detail": "invalid_preview_url", + "url": url_value, + }, + ) + split = urlsplit(url_value) + host = split.hostname + if host is None: + raise ExposedPortUnavailableError( + port=port, + exposed_ports=self.state.exposed_ports, + reason="backend_unavailable", + context={ + "backend": "upstash_box", + "detail": "invalid_preview_url", + "url": url_value, + }, + ) + resolved_port = split.port or (443 if split.scheme == "https" else 80) + return ExposedPortEndpoint( + host=host, + port=resolved_port, + tls=split.scheme == "https", + query=split.query, + ) + + # ----- lifecycle ----------------------------------------------------- + + async def _shutdown_backend(self) -> None: + # Keep-alive boxes are caller-managed and stay running. + if self.state.keep_alive: + return + http = self._session() + action = "pause" if self.state.pause_on_exit else "delete" + # Cleanup is best-effort (raising here would mask the original flow), but failures are + # logged so a box that fails to pause/delete is not silently orphaned. + try: + if self.state.pause_on_exit: + async with http.post(self._url("pause"), timeout=self._request_timeout()) as resp: + await self._log_lifecycle_failure(action, resp) + return + base = self.state.base_url.rstrip("/") + async with http.delete( + f"{base}/v2/box/{self.state.box_id}", timeout=self._request_timeout() + ) as resp: + await self._log_lifecycle_failure(action, resp) + except Exception: + logger.warning( + "Failed to %s Upstash Box %s on shutdown", action, self.state.box_id, exc_info=True + ) + + async def _log_lifecycle_failure(self, action: str, resp: aiohttp.ClientResponse) -> None: + # 404 on delete is benign (already gone); anything else >=400 is surfaced as a warning. + if resp.status < 400 or (action == "delete" and resp.status == 404): + return + detail = await _read_error_body(resp) + logger.warning( + "Failed to %s Upstash Box %s: HTTP %s%s", + action, + self.state.box_id, + resp.status, + f": {detail}" if detail else "", + ) + + async def _after_stop(self) -> None: + await self._close_http() + + async def _after_shutdown(self) -> None: + await self._close_http() + + # ----- workspace persistence (tar via exec + Box files API) ---------- + + def _tar_exclude_args(self) -> list[str]: + return shell_tar_exclude_args(self._persist_workspace_skip_relpaths()) + + async def persist_workspace(self) -> io.IOBase: + root = self._workspace_root_path() + tar_path = f"/tmp/sandbox-persist-{self.state.session_id.hex}.tar" + excludes = self._tar_exclude_args() + tar_cmd = ["tar", *excludes, "-C", root.as_posix(), "-cf", tar_path, "."] + try: + result = await self._exec_internal(*tar_cmd, timeout=_WORKSPACE_TAR_TIMEOUT_S) + if not result.ok(): + raise WorkspaceArchiveReadError( + path=root, + context={ + "reason": "tar_failed", + "output": result.stderr.decode("utf-8", errors="replace"), + }, + retryable=False, + ) + raw = await self._box_read_file(tar_path) + except WorkspaceArchiveReadError: + raise + except Exception as e: + raise WorkspaceArchiveReadError(path=root, cause=e) from e + finally: + try: + await self._exec_internal("rm", "-f", "--", tar_path) + except Exception: + pass + return io.BytesIO(raw) + + async def hydrate_workspace(self, data: io.IOBase) -> None: + root = self._workspace_root_path() + tar_path = f"/tmp/sandbox-hydrate-{self.state.session_id.hex}.tar" + payload = data.read() + if isinstance(payload, str): + payload = payload.encode("utf-8") + if not isinstance(payload, bytes | bytearray): + raise WorkspaceWriteTypeError(path=Path(tar_path), actual_type=type(payload).__name__) + + try: + validate_tar_bytes(bytes(payload), allow_external_symlink_targets=False) + except UnsafeTarMemberError as e: + raise WorkspaceArchiveWriteError( + path=root, + context={"reason": "unsafe_or_invalid_tar", "member": e.member, "detail": str(e)}, + cause=e, + ) from e + + try: + await self._exec_internal("mkdir", "-p", "--", root.as_posix()) + await self._box_write_file(tar_path, bytes(payload)) + result = await self._exec_internal( + "tar", "-C", root.as_posix(), "-xf", tar_path, timeout=_WORKSPACE_TAR_TIMEOUT_S + ) + if not result.ok(): + raise WorkspaceArchiveWriteError( + path=root, + context={ + "reason": "tar_extract_failed", + "output": result.stderr.decode("utf-8", errors="replace"), + }, + ) + except WorkspaceArchiveWriteError: + raise + except Exception as e: + raise WorkspaceArchiveWriteError(path=root, cause=e) from e + finally: + try: + await self._exec_internal("rm", "-f", "--", tar_path) + except Exception: + pass + + +class UpstashBoxSandboxClient(BaseSandboxClient[UpstashBoxSandboxClientOptions]): + """Upstash Box sandbox client managing box lifecycle over the Box REST API.""" + + backend_id = "upstash_box" + _instrumentation: Instrumentation + _exec_timeout_s: float + _request_timeout_s: float + + def __init__( + self, + *, + api_key: str | None = None, + base_url: str | None = None, + instrumentation: Instrumentation | None = None, + dependencies: Dependencies | None = None, + exec_timeout_s: float = _DEFAULT_EXEC_TIMEOUT_S, + request_timeout_s: float = _DEFAULT_REQUEST_TIMEOUT_S, + ) -> None: + super().__init__() + self._api_key = api_key + self._base_url = base_url + self._instrumentation = instrumentation or Instrumentation() + self._dependencies = dependencies + self._exec_timeout_s = exec_timeout_s + self._request_timeout_s = request_timeout_s + + def _resolve_api_key(self, options: UpstashBoxSandboxClientOptions) -> str: + api_key = options.api_key or self._api_key or os.environ.get("UPSTASH_BOX_API_KEY") + if not api_key: + raise ConfigurationError( + message=( + "Upstash Box requires an API key. Pass api_key to " + "UpstashBoxSandboxClient/options or set UPSTASH_BOX_API_KEY." + ), + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context={"backend": self.backend_id}, + ) + return api_key + + def _resolve_base_url(self, options: UpstashBoxSandboxClientOptions) -> str: + base_url = ( + options.base_url + or self._base_url + or os.environ.get("UPSTASH_BOX_BASE_URL") + or _DEFAULT_BASE_URL + ) + return base_url.rstrip("/") + + async def create( + self, + *, + snapshot: SnapshotSpec | SnapshotBase | None = None, + manifest: Manifest | None = None, + options: UpstashBoxSandboxClientOptions, + ) -> SandboxSession: + if manifest is None: + manifest = Manifest(root=_DEFAULT_WORKSPACE_ROOT) + elif manifest.root == "/workspace": + # Remap the core default root onto Box's workspace home. + manifest = manifest.model_copy(update={"root": _DEFAULT_WORKSPACE_ROOT}) + + api_key = self._resolve_api_key(options) + base_url = self._resolve_base_url(options) + box_id = await self._create_box(api_key=api_key, base_url=base_url, options=options) + + session_id = uuid.uuid4() + snapshot_instance = resolve_snapshot(snapshot, str(session_id)) + state = UpstashBoxSandboxSessionState( + session_id=session_id, + manifest=manifest, + snapshot=snapshot_instance, + box_id=box_id, + base_url=base_url, + base_env_vars=dict(options.env_vars or {}), + keep_alive=options.keep_alive, + pause_on_exit=options.pause_on_exit, + snapshot_id=options.snapshot_id, + exposed_ports=options.exposed_ports, + ) + inner = UpstashBoxSandboxSession.from_state( + state, + api_key=api_key, + exec_timeout_s=self._exec_timeout_s, + request_timeout_s=self._request_timeout_s, + ) + return self._wrap_session(inner, instrumentation=self._instrumentation) + + async def delete(self, session: SandboxSession) -> SandboxSession: + inner = session._inner + if not isinstance(inner, UpstashBoxSandboxSession): + raise TypeError("UpstashBoxSandboxClient.delete expects an UpstashBoxSandboxSession") + # Delegate to shutdown() so the configured lifecycle is honored. The runner cleanup path + # calls session.shutdown() and then client.delete(); force-deleting here would defeat + # pause_on_exit / keep_alive. shutdown() already handles the configured lifecycle. + try: + await inner.shutdown() + except Exception: + logger.warning( + "Failed to shut down Upstash Box %s during delete", + inner.state.box_id, + exc_info=True, + ) + return session + + async def resume(self, state: SandboxSessionState) -> SandboxSession: + if not isinstance(state, UpstashBoxSandboxSessionState): + raise TypeError( + "UpstashBoxSandboxClient.resume expects an UpstashBoxSandboxSessionState" + ) + # The API key is never persisted in serialized state; resolve it from the client/env. + api_key = self._api_key or os.environ.get("UPSTASH_BOX_API_KEY") + inner = UpstashBoxSandboxSession.from_state( + state, + api_key=api_key, + exec_timeout_s=self._exec_timeout_s, + request_timeout_s=self._request_timeout_s, + ) + # Reattach to the existing box. This raises on a missing box or a transient failure + # rather than silently degrading: returning "not preserved" here would make start() + # re-apply the manifest and clobber a healthy live workspace. + await self._reconnect(inner, state) + # The backend is reachable; let the base session's readiness probe decide whether the + # workspace root still exists before reusing it. + inner._set_start_state_preserved(True, system=True) + return self._wrap_session(inner, instrumentation=self._instrumentation) + + async def _reconnect( + self, inner: UpstashBoxSandboxSession, state: UpstashBoxSandboxSessionState + ) -> None: + http = inner._session() + base = state.base_url.rstrip("/") + try: + async with http.get( + f"{base}/v2/box/{state.box_id}", timeout=inner._request_timeout() + ) as resp: + if resp.status == 404: + raise ConfigurationError( + message=( + f"Upstash Box {state.box_id} no longer exists and cannot be resumed. " + "Create a new session instead." + ), + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context={"backend": self.backend_id, "box_id": state.box_id}, + ) + if resp.status != 200: + detail = await _read_error_body(resp) + raise ConfigurationError( + message=_http_error_message("GET /v2/box", resp.status, detail), + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context=_error_context(status=resp.status, detail=detail), + ) + data = await resp.json(content_type=None) + except ConfigurationError: + raise + except aiohttp.ClientError as e: + raise ConfigurationError( + message=f"Failed to reach Upstash Box {state.box_id} during resume: {e}", + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context={"backend": self.backend_id, "cause_type": type(e).__name__}, + ) from e + + status = str(data.get("status", "")) + if status == "paused": + async with http.post( + f"{base}/v2/box/{state.box_id}/resume", timeout=inner._request_timeout() + ) as resp: + if resp.status >= 400: + detail = await _read_error_body(resp) + raise ConfigurationError( + message=_http_error_message("POST /resume", resp.status, detail), + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context=_error_context(status=resp.status, detail=detail), + ) + await self._wait_until_runnable( + http, base, state.box_id, timeout=inner._request_timeout() + ) + return + + if status in _RUNNING_STATUSES: + return + + if status == "creating": + await self._wait_until_runnable( + http, base, state.box_id, timeout=inner._request_timeout() + ) + return + + raise ConfigurationError( + message=f"Upstash Box {state.box_id} is not runnable and cannot be resumed: {status}", + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context={"backend": self.backend_id, "box_id": state.box_id, "status": status}, + ) + + async def _wait_until_runnable( + self, + http: aiohttp.ClientSession, + base_url: str, + box_id: str, + *, + timeout: aiohttp.ClientTimeout, + ) -> None: + deadline = asyncio.get_event_loop().time() + _CREATE_MAX_WAIT_S + while True: + async with http.get(f"{base_url}/v2/box/{box_id}", timeout=timeout) as resp: + if resp.status != 200: + detail = await _read_error_body(resp) + raise ConfigurationError( + message=_http_error_message("GET /v2/box", resp.status, detail), + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context=_error_context(status=resp.status, detail=detail), + ) + data = await resp.json(content_type=None) + + status = str(data.get("status", "")) + if status in _RUNNING_STATUSES: + return + if status == "error": + raise ConfigurationError( + message=f"Upstash Box {box_id} failed while waiting for resume", + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context={"backend": self.backend_id, "box_id": box_id, "status": status}, + ) + if status != "creating": + raise ConfigurationError( + message=f"Upstash Box {box_id} is not runnable and cannot be resumed: {status}", + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context={"backend": self.backend_id, "box_id": box_id, "status": status}, + ) + if asyncio.get_event_loop().time() >= deadline: + raise ConfigurationError( + message=f"Upstash Box {box_id} did not become runnable after resume", + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context={"backend": self.backend_id, "box_id": box_id, "status": status}, + ) + await asyncio.sleep(_CREATE_POLL_INTERVAL_S) + + def deserialize_session_state(self, payload: dict[str, object]) -> SandboxSessionState: + return UpstashBoxSandboxSessionState.model_validate(payload) + + async def _create_box( + self, + *, + api_key: str, + base_url: str, + options: UpstashBoxSandboxClientOptions, + ) -> str: + body: dict[str, Any] = {} + if options.name: + body["name"] = options.name + if options.size: + body["size"] = options.size + if options.runtime: + body["runtime"] = options.runtime + if options.keep_alive: + body["keep_alive"] = True + if options.network_policy: + body["network_policy"] = options.network_policy + if options.git_token: + body["github_token"] = options.git_token + if options.env_vars: + body["env_vars"] = options.env_vars + + headers = {"X-Box-Api-Key": api_key, "Content-Type": "application/json"} + timeout = aiohttp.ClientTimeout(total=self._request_timeout_s) + if options.snapshot_id: + create_url = f"{base_url}/v2/box/from-snapshot" + body["snapshot_id"] = options.snapshot_id + else: + create_url = f"{base_url}/v2/box" + + try: + async with aiohttp.ClientSession(headers=headers) as http: + data = await self._post_box_create( + http, + create_url, + body, + timeout=timeout, + is_snapshot=bool(options.snapshot_id), + ) + box_id = data.get("id") + if not isinstance(box_id, str) or not box_id: + raise ConfigurationError( + message="Box creation returned an invalid id", + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context={"backend": self.backend_id}, + ) + data = await self._poll_until_ready(http, base_url, box_id, data) + return box_id + except ConfigurationError: + raise + except aiohttp.ClientError as e: + raise ConfigurationError( + message=f"Box creation request failed: {e}", + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context={"backend": self.backend_id, "cause_type": type(e).__name__}, + ) from e + + async def _post_box_create( + self, + http: aiohttp.ClientSession, + url: str, + body: dict[str, Any], + *, + timeout: aiohttp.ClientTimeout, + is_snapshot: bool, + ) -> dict[str, Any]: + # Freshly created snapshots may still be propagating; the Box API answers 400 "not ready" + # for a short window, so retry from-snapshot creation like the official SDK does. + deadline = asyncio.get_event_loop().time() + _SNAPSHOT_NOT_READY_MAX_WAIT_S + while True: + async with http.post(url, json=body, timeout=timeout) as resp: + if 200 <= resp.status < 300: + data = await resp.json(content_type=None) + assert isinstance(data, dict) + return data + detail = await _read_error_body(resp) + if ( + is_snapshot + and resp.status == 400 + and detail is not None + and "not ready" in detail.lower() + and asyncio.get_event_loop().time() < deadline + ): + await asyncio.sleep(_SNAPSHOT_POLL_INTERVAL_S) + continue + raise ConfigurationError( + message=_http_error_message("POST /v2/box", resp.status, detail), + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context=_error_context(status=resp.status, detail=detail), + ) + + async def _poll_until_ready( + self, + http: aiohttp.ClientSession, + base_url: str, + box_id: str, + data: dict[str, Any], + ) -> dict[str, Any]: + deadline = asyncio.get_event_loop().time() + _CREATE_MAX_WAIT_S + while str(data.get("status", "")) == "creating": + if asyncio.get_event_loop().time() >= deadline: + raise ConfigurationError( + message="Box creation timed out", + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context={"backend": self.backend_id, "box_id": box_id}, + ) + await asyncio.sleep(_CREATE_POLL_INTERVAL_S) + async with http.get( + f"{base_url}/v2/box/{box_id}", + timeout=aiohttp.ClientTimeout(total=self._request_timeout_s), + ) as poll: + if poll.status == 200: + data = await poll.json(content_type=None) + if str(data.get("status", "")) == "error": + raise ConfigurationError( + message="Box creation failed", + error_code=ErrorCode.SANDBOX_CONFIG_INVALID, + op="start", + context={"backend": self.backend_id, "box_id": box_id}, + ) + return data + + +def _http_error_message(operation: str, status: int, detail: str | None) -> str: + message = f"{operation} failed: HTTP {status}" + if detail: + message += f": {detail}" + return message + + +def _error_context(*, status: int | None = None, detail: str | None = None) -> dict[str, object]: + context: dict[str, object] = {"backend": "upstash_box"} + if status is not None: + context["http_status"] = status + if detail: + context["provider_error"] = detail + return context + + +def _transport_error( + *, command: tuple[str, ...], cause: BaseException, operation: str +) -> ExecTransportError: + detail = str(cause) + provider_error = f"{type(cause).__name__}: {detail}" if detail else type(cause).__name__ + return ExecTransportError( + command=command, + context={ + "backend": "upstash_box", + "operation": operation, + "provider_error": provider_error, + }, + cause=cause, + message=f"Upstash Box {operation} transport failed: {provider_error}", + ) + + +async def _read_error_body(resp: aiohttp.ClientResponse) -> str | None: + try: + raw = await resp.read() + except Exception as e: + return f"failed to read error body: {e}" + text = raw.decode("utf-8", errors="replace").strip() + if not text: + return None + return text[:2000] + + +__all__ = [ + "UpstashBoxSandboxClient", + "UpstashBoxSandboxClientOptions", + "UpstashBoxSandboxSession", + "UpstashBoxSandboxSessionState", +] diff --git a/tests/extensions/sandbox/test_upstash_box.py b/tests/extensions/sandbox/test_upstash_box.py new file mode 100644 index 0000000000..03dbc9c8e9 --- /dev/null +++ b/tests/extensions/sandbox/test_upstash_box.py @@ -0,0 +1,430 @@ +from __future__ import annotations + +import base64 +import io +import json +from typing import Any + +import pytest + +from agents.extensions.sandbox.upstash_box import ( + UpstashBoxSandboxClient, + UpstashBoxSandboxClientOptions, + UpstashBoxSandboxSession, + UpstashBoxSandboxSessionState, +) +from agents.sandbox.errors import ConfigurationError, ExposedPortUnavailableError +from agents.sandbox.manifest import Manifest +from agents.sandbox.snapshot import resolve_snapshot +from agents.sandbox.types import ExposedPortEndpoint + + +class _FakeResponse: + def __init__(self, status: int = 200, json_body: Any = None, raw_body: bytes = b"") -> None: + self.status = status + self._json_body = json_body + self._raw_body = raw_body + + async def json(self, *, content_type: str | None = None) -> Any: + _ = content_type + if self._json_body is not None: + return self._json_body + return json.loads(self._raw_body) + + async def read(self) -> bytes: + if self._json_body is not None: + return json.dumps(self._json_body).encode() + return self._raw_body + + async def __aenter__(self) -> _FakeResponse: + return self + + async def __aexit__(self, *args: object) -> None: + _ = args + + +class _FakeHttp: + def __init__( + self, + responses: dict[str, _FakeResponse | list[_FakeResponse]] | None = None, + ) -> None: + self._responses: dict[tuple[str, str], _FakeResponse | list[_FakeResponse]] = {} + self.default_response = _FakeResponse(status=200, json_body={"ok": True}) + self.calls: list[dict[str, Any]] = [] + self.closed = False + if responses: + for key, val in responses.items(): + method, _, suffix = key.partition(" ") + self._responses[(method.upper(), suffix)] = val + + def _match(self, method: str, url: str) -> _FakeResponse: + # Prefer the most specific (longest) matching suffix. + best_key: tuple[str, str] | None = None + best: _FakeResponse | list[_FakeResponse] | None = None + best_len = -1 + for (m, suffix), resp in self._responses.items(): + if m == method and suffix in url and len(suffix) > best_len: + best_key = (m, suffix) + best, best_len = resp, len(suffix) + if best is None: + return self.default_response + if isinstance(best, list): + if not best: + return self.default_response + response = best.pop(0) + if best_key is not None and not best: + self._responses[best_key] = response + return response + return best + + def _record(self, method: str, url: str, **kwargs: Any) -> _FakeResponse: + self.calls.append({"method": method, "url": url, **kwargs}) + return self._match(method, url) + + def post(self, url: str, **kwargs: Any) -> _FakeResponse: + return self._record("POST", url, **kwargs) + + def get(self, url: str, **kwargs: Any) -> _FakeResponse: + return self._record("GET", url, **kwargs) + + def delete(self, url: str, **kwargs: Any) -> _FakeResponse: + return self._record("DELETE", url, **kwargs) + + async def close(self) -> None: + self.closed = True + + +def _make_session( + http: _FakeHttp, + *, + keep_alive: bool = False, + pause_on_exit: bool = False, +) -> UpstashBoxSandboxSession: + snapshot = resolve_snapshot(None, "00000000-0000-0000-0000-000000000000") + state = UpstashBoxSandboxSessionState( + manifest=Manifest(), + snapshot=snapshot, + box_id="box-test", + base_url="https://box.example.test", + keep_alive=keep_alive, + pause_on_exit=pause_on_exit, + exposed_ports=(3000,), + ) + return UpstashBoxSandboxSession.from_state(state, api_key="key", http=http) + + +@pytest.mark.asyncio +async def test_exec_internal_posts_argv_with_folder() -> None: + http = _FakeHttp({"POST exec": _FakeResponse(json_body={"exit_code": 0, "output": "hi\n"})}) + session = _make_session(http) + + result = await session._exec_internal("echo", "hi") + + assert result.exit_code == 0 + assert result.stdout == b"hi\n" + exec_call = next(c for c in http.calls if c["url"].endswith("/exec")) + assert exec_call["json"]["command"] == ["echo", "hi"] + assert exec_call["json"]["folder"] == "/workspace" + + +@pytest.mark.asyncio +async def test_exec_internal_reports_nonzero_exit() -> None: + http = _FakeHttp( + {"POST exec": _FakeResponse(json_body={"exit_code": 2, "output": "", "error": "boom"})} + ) + session = _make_session(http) + + result = await session._exec_internal("false") + + assert result.exit_code == 2 + assert result.stderr == b"boom" + assert not result.ok() + + +@pytest.mark.asyncio +async def test_box_read_file_decodes_base64() -> None: + payload = bytes([0x89, 0x50, 0x00, 0xFF]) + body = {"content": base64.b64encode(payload).decode("ascii")} + http = _FakeHttp({"GET files/read": _FakeResponse(json_body=body)}) + session = _make_session(http) + + data = await session._box_read_file("/workspace/bin.dat") + + assert data == payload + + +@pytest.mark.asyncio +async def test_box_write_file_sends_base64() -> None: + http = _FakeHttp({"POST files/write": _FakeResponse(status=200, json_body={})}) + session = _make_session(http) + + await session._box_write_file("/workspace/a.txt", b"hello") + + write_call = next(c for c in http.calls if c["url"].endswith("/files/write")) + assert write_call["json"]["path"] == "/workspace/a.txt" + assert write_call["json"]["encoding"] == "base64" + assert base64.b64decode(write_call["json"]["content"]) == b"hello" + + +@pytest.mark.asyncio +async def test_running_reflects_status() -> None: + http = _FakeHttp({"GET status": _FakeResponse(json_body={"status": "running"})}) + assert await _make_session(http).running() is True + + http2 = _FakeHttp({"GET status": _FakeResponse(json_body={"status": "paused"})}) + assert await _make_session(http2).running() is False + + +@pytest.mark.asyncio +async def test_resolve_exposed_port_parses_preview_url() -> None: + body = {"url": "https://3000-box.example.test/?token=abc", "port": 3000} + http = _FakeHttp({"POST preview": _FakeResponse(json_body=body)}) + session = _make_session(http) + + endpoint = await session.resolve_exposed_port(3000) + + assert endpoint == ExposedPortEndpoint( + host="3000-box.example.test", port=443, tls=True, query="token=abc" + ) + + +@pytest.mark.asyncio +async def test_resolve_exposed_port_rejects_unconfigured_port() -> None: + session = _make_session(_FakeHttp()) + with pytest.raises(ExposedPortUnavailableError): + await session.resolve_exposed_port(9999) + + +@pytest.mark.asyncio +async def test_shutdown_deletes_by_default() -> None: + http = _FakeHttp() + session = _make_session(http) + + await session.shutdown() + + assert any(c["method"] == "DELETE" for c in http.calls) + assert not any(c["url"].endswith("/pause") for c in http.calls) + + +@pytest.mark.asyncio +async def test_shutdown_pauses_when_pause_on_exit() -> None: + http = _FakeHttp() + session = _make_session(http, pause_on_exit=True) + + await session.shutdown() + + assert any(c["url"].endswith("/pause") for c in http.calls) + assert not any(c["method"] == "DELETE" for c in http.calls) + + +@pytest.mark.asyncio +async def test_shutdown_leaves_keep_alive_box_running() -> None: + http = _FakeHttp() + session = _make_session(http, keep_alive=True) + + await session.shutdown() + + assert not any(c["method"] == "DELETE" for c in http.calls) + assert not any(c["url"].endswith("/pause") for c in http.calls) + + +@pytest.mark.asyncio +async def test_persist_and_hydrate_roundtrip_via_tar() -> None: + tar_bytes = _make_tar_bytes() + persist_http = _FakeHttp( + { + "POST exec": _FakeResponse(json_body={"exit_code": 0, "output": ""}), + "GET files/read": _FakeResponse( + json_body={"content": base64.b64encode(tar_bytes).decode("ascii")} + ), + } + ) + session = _make_session(persist_http) + + stream = await session.persist_workspace() + assert stream.read() == tar_bytes + + hydrate_http = _FakeHttp( + { + "POST exec": _FakeResponse(json_body={"exit_code": 0, "output": ""}), + "POST files/write": _FakeResponse(status=200, json_body={}), + } + ) + hydrate_session = _make_session(hydrate_http) + await hydrate_session.hydrate_workspace(io.BytesIO(tar_bytes)) + assert any(c["url"].endswith("/files/write") for c in hydrate_http.calls) + + +@pytest.mark.asyncio +async def test_client_create_builds_box(monkeypatch: pytest.MonkeyPatch) -> None: + client = UpstashBoxSandboxClient(api_key="key") + + async def _fake_create_box(**kwargs: Any) -> str: + return "box-created" + + monkeypatch.setattr(client, "_create_box", _fake_create_box) + options = UpstashBoxSandboxClientOptions(size="medium", env_vars={"K": "V"}) + + session = await client.create(options=options) + inner = session._inner + assert isinstance(inner, UpstashBoxSandboxSession) + assert inner.state.box_id == "box-created" + assert inner.state.base_env_vars == {"K": "V"} + + +@pytest.mark.asyncio +async def test_client_post_box_create_accepts_created_response() -> None: + client = UpstashBoxSandboxClient(api_key="key") + http = _FakeHttp({"POST v2/box": _FakeResponse(status=201, json_body={"id": "box-created"})}) + + data = await client._post_box_create( + http, # type: ignore[arg-type] + "https://box.example.test/v2/box", + {}, + timeout=None, # type: ignore[arg-type] + is_snapshot=False, + ) + + assert data == {"id": "box-created"} + + +@pytest.mark.asyncio +async def test_client_resume_reconnects_to_running_box() -> None: + client = UpstashBoxSandboxClient(api_key="key") + state = UpstashBoxSandboxSessionState( + manifest=Manifest(), + snapshot=resolve_snapshot(None, "11111111-1111-1111-1111-111111111111"), + box_id="box-test", + base_url="https://box.example.test", + ) + http = _FakeHttp({"GET v2/box/box-test": _FakeResponse(json_body={"status": "running"})}) + + # Inject the fake transport for the reconnect probe. + state_session = UpstashBoxSandboxSession.from_state(state, api_key="key", http=http) + # Succeeds without raising when the box is reachable. + await client._reconnect(state_session, state) + + +@pytest.mark.asyncio +async def test_client_resume_resumes_paused_box() -> None: + client = UpstashBoxSandboxClient(api_key="key") + state = UpstashBoxSandboxSessionState( + manifest=Manifest(), + snapshot=resolve_snapshot(None, "33333333-3333-3333-3333-333333333333"), + box_id="box-test", + base_url="https://box.example.test", + ) + http = _FakeHttp( + { + "GET v2/box/box-test": [ + _FakeResponse(json_body={"status": "paused"}), + _FakeResponse(json_body={"status": "running"}), + ], + "POST v2/box/box-test/resume": _FakeResponse(status=200, json_body={}), + } + ) + state_session = UpstashBoxSandboxSession.from_state(state, api_key="key", http=http) + + await client._reconnect(state_session, state) + assert any(c["url"].endswith("/resume") for c in http.calls) + + +@pytest.mark.asyncio +async def test_client_resume_waits_for_creating_box() -> None: + client = UpstashBoxSandboxClient(api_key="key") + state = UpstashBoxSandboxSessionState( + manifest=Manifest(), + snapshot=resolve_snapshot(None, "55555555-5555-5555-5555-555555555555"), + box_id="box-test", + base_url="https://box.example.test", + ) + http = _FakeHttp( + { + "GET v2/box/box-test": [ + _FakeResponse(json_body={"status": "creating"}), + _FakeResponse(json_body={"status": "idle"}), + ], + } + ) + state_session = UpstashBoxSandboxSession.from_state(state, api_key="key", http=http) + + await client._reconnect(state_session, state) + assert len([c for c in http.calls if c["method"] == "GET"]) == 2 + + +@pytest.mark.asyncio +async def test_client_resume_rejects_error_status() -> None: + client = UpstashBoxSandboxClient(api_key="key") + state = UpstashBoxSandboxSessionState( + manifest=Manifest(), + snapshot=resolve_snapshot(None, "66666666-6666-6666-6666-666666666666"), + box_id="box-test", + base_url="https://box.example.test", + ) + http = _FakeHttp({"GET v2/box/box-test": _FakeResponse(json_body={"status": "error"})}) + state_session = UpstashBoxSandboxSession.from_state(state, api_key="key", http=http) + + with pytest.raises(ConfigurationError): + await client._reconnect(state_session, state) + + +@pytest.mark.asyncio +async def test_client_resume_raises_when_box_missing() -> None: + client = UpstashBoxSandboxClient(api_key="key") + state = UpstashBoxSandboxSessionState( + manifest=Manifest(), + snapshot=resolve_snapshot(None, "22222222-2222-2222-2222-222222222222"), + box_id="gone", + base_url="https://box.example.test", + ) + http = _FakeHttp({"GET v2/box/gone": _FakeResponse(status=404)}) + state_session = UpstashBoxSandboxSession.from_state(state, api_key="key", http=http) + + # A genuinely missing box is surfaced, not silently treated as a fresh workspace. + with pytest.raises(ConfigurationError): + await client._reconnect(state_session, state) + + +@pytest.mark.asyncio +async def test_client_resume_fails_fast_on_transient_error() -> None: + client = UpstashBoxSandboxClient(api_key="key") + state = UpstashBoxSandboxSessionState( + manifest=Manifest(), + snapshot=resolve_snapshot(None, "44444444-4444-4444-4444-444444444444"), + box_id="box-test", + base_url="https://box.example.test", + ) + # A transient 503 must raise (fail fast) rather than degrade to a manifest re-apply. + http = _FakeHttp({"GET v2/box/box-test": _FakeResponse(status=503)}) + state_session = UpstashBoxSandboxSession.from_state(state, api_key="key", http=http) + + with pytest.raises(ConfigurationError): + await client._reconnect(state_session, state) + + +@pytest.mark.asyncio +async def test_shutdown_failure_is_best_effort_but_not_silent( + caplog: pytest.LogCaptureFixture, +) -> None: + # A failed delete during cleanup must not raise, but must be logged (not silently swallowed). + http = _FakeHttp( + {"DELETE v2/box/box-test": _FakeResponse(status=500, json_body={"error": "boom"})} + ) + session = _make_session(http) + + with caplog.at_level("WARNING"): + await session.shutdown() + + assert any("box-test" in record.getMessage() for record in caplog.records) + + +def _make_tar_bytes() -> bytes: + import tarfile + + buf = io.BytesIO() + with tarfile.open(fileobj=buf, mode="w") as tar: + data = b"hello\n" + info = tarfile.TarInfo(name="hello.txt") + info.size = len(data) + tar.addfile(info, io.BytesIO(data)) + return buf.getvalue() diff --git a/tests/sandbox/test_client_options.py b/tests/sandbox/test_client_options.py index 8c71dc4028..a7f6c56f72 100644 --- a/tests/sandbox/test_client_options.py +++ b/tests/sandbox/test_client_options.py @@ -8,6 +8,7 @@ from agents.extensions.sandbox.cloudflare import CloudflareSandboxClientOptions from agents.extensions.sandbox.daytona import DaytonaSandboxClientOptions from agents.extensions.sandbox.e2b import E2BSandboxClientOptions +from agents.extensions.sandbox.upstash_box import UpstashBoxSandboxClientOptions from agents.sandbox.config import DEFAULT_PYTHON_SANDBOX_IMAGE from agents.sandbox.sandboxes import DockerSandboxClientOptions, UnixLocalSandboxClientOptions from agents.sandbox.session import BaseSandboxClientOptions @@ -69,6 +70,7 @@ def test_sandbox_client_options_exclude_unset_preserves_type_discriminator() -> E2BSandboxClientOptions(sandbox_type="e2b", template="base"), DaytonaSandboxClientOptions(image=DEFAULT_PYTHON_SANDBOX_IMAGE), CloudflareSandboxClientOptions(worker_url="https://example.com"), + UpstashBoxSandboxClientOptions(api_key="key"), ], ) def test_sandbox_client_options_roundtrip_preserves_concrete_type( diff --git a/tests/sandbox/test_compatibility_guards.py b/tests/sandbox/test_compatibility_guards.py index 5a11e5bf77..accb1a9f43 100644 --- a/tests/sandbox/test_compatibility_guards.py +++ b/tests/sandbox/test_compatibility_guards.py @@ -333,6 +333,15 @@ def test_core_sandbox_public_export_surface_is_stable() -> None: "VercelSandboxSessionState", }, ), + ( + "agents.extensions.sandbox.upstash_box", + { + "UpstashBoxSandboxClient", + "UpstashBoxSandboxClientOptions", + "UpstashBoxSandboxSession", + "UpstashBoxSandboxSessionState", + }, + ), ], ) def test_extension_sandbox_package_export_surfaces_are_stable( @@ -510,6 +519,24 @@ def test_optional_sandbox_dataclass_constructor_field_order_is_stable( "network_policy", ), ), + ( + "agents.extensions.sandbox.upstash_box", + "UpstashBoxSandboxClientOptions", + ( + "api_key", + "base_url", + "name", + "size", + "runtime", + "keep_alive", + "network_policy", + "git_token", + "snapshot_id", + "pause_on_exit", + "env_vars", + "exposed_ports", + ), + ), ], ) def test_optional_sandbox_client_options_positional_field_order_is_stable( @@ -745,6 +772,26 @@ def test_optional_sandbox_client_options_positional_field_order_is_stable( "network_policy", ), ), + ( + "agents.extensions.sandbox.upstash_box", + "UpstashBoxSandboxSessionState", + ( + "type", + "session_id", + "snapshot", + "manifest", + "exposed_ports", + "snapshot_fingerprint", + "snapshot_fingerprint_version", + "workspace_root_ready", + "box_id", + "base_url", + "base_env_vars", + "keep_alive", + "pause_on_exit", + "snapshot_id", + ), + ), ], ) def test_sandbox_session_state_field_order_is_stable( @@ -786,6 +833,12 @@ def test_sandbox_session_state_field_order_is_stable( ("agents.extensions.sandbox.daytona", "DaytonaSandboxClientOptions", (), "daytona"), ("agents.extensions.sandbox.runloop", "RunloopSandboxClientOptions", (), "runloop"), ("agents.extensions.sandbox.vercel", "VercelSandboxClientOptions", (), "vercel"), + ( + "agents.extensions.sandbox.upstash_box", + "UpstashBoxSandboxClientOptions", + (), + "upstash_box", + ), ], ) def test_optional_sandbox_client_options_json_round_trip_preserves_type( @@ -851,6 +904,11 @@ def test_optional_sandbox_client_options_json_round_trip_preserves_type( "VercelSandboxSessionState", {"sandbox_id": "sandbox-123"}, ), + ( + "agents.extensions.sandbox.upstash_box", + "UpstashBoxSandboxSessionState", + {"box_id": "box-123", "base_url": "https://box.example.test"}, + ), ], ) def test_optional_sandbox_session_state_json_round_trip_preserves_type( @@ -901,6 +959,16 @@ def test_core_discriminator_type_strings_are_stable() -> None: ("agents.sandbox.sandboxes.unix_local", "UnixLocalSandboxSessionState", "unix_local"), ("agents.sandbox.sandboxes.docker", "DockerSandboxClientOptions", "docker"), ("agents.sandbox.sandboxes.docker", "DockerSandboxSessionState", "docker"), + ( + "agents.extensions.sandbox.upstash_box", + "UpstashBoxSandboxClientOptions", + "upstash_box", + ), + ( + "agents.extensions.sandbox.upstash_box", + "UpstashBoxSandboxSessionState", + "upstash_box", + ), ], ) def test_optional_sandbox_discriminator_type_strings_are_stable( diff --git a/tests/test_run_examples_script.py b/tests/test_run_examples_script.py index 09794c4569..49fb4e7747 100644 --- a/tests/test_run_examples_script.py +++ b/tests/test_run_examples_script.py @@ -13,6 +13,7 @@ def test_default_auto_skip_excludes_prerequisite_bound_examples() -> None: "examples/sandbox/docker/mounts/s3_mount_read_write.py", "examples/sandbox/extensions/daytona/usaspending_text2sql/setup_db.py", "examples/sandbox/extensions/temporal/temporal_sandbox_agent.py", + "examples/sandbox/extensions/upstash_box_runner.py", "examples/sandbox/extensions/vercel_runner.py", "examples/sandbox/memory_s3.py", "examples/sandbox/misc/reference_policy_mcp_server.py", diff --git a/uv.lock b/uv.lock index 685cfbac6d..f522bd721c 100644 --- a/uv.lock +++ b/uv.lock @@ -9,7 +9,7 @@ resolution-markers = [ ] [options] -exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values. +exclude-newer = "2026-06-03T15:18:58.611598Z" exclude-newer-span = "P7D" [[package]] @@ -2492,6 +2492,9 @@ temporal = [ { name = "temporalio" }, { name = "textual" }, ] +upstash-box = [ + { name = "aiohttp" }, +] vercel = [ { name = "vercel" }, ] @@ -2542,6 +2545,7 @@ dev = [ requires-dist = [ { name = "aiohttp", marker = "extra == 'blaxel'", specifier = ">=3.12,<4" }, { name = "aiohttp", marker = "extra == 'cloudflare'", specifier = ">=3.12,<4" }, + { name = "aiohttp", marker = "extra == 'upstash-box'", specifier = ">=3.12,<4" }, { name = "any-llm-sdk", marker = "python_full_version >= '3.11' and extra == 'any-llm'", specifier = ">=1.11.0,<2" }, { name = "asyncpg", marker = "extra == 'sqlalchemy'", specifier = ">=0.29.0" }, { name = "blaxel", marker = "extra == 'blaxel'", specifier = ">=0.2.50" }, @@ -2575,7 +2579,7 @@ requires-dist = [ { name = "websockets", marker = "extra == 'realtime'", specifier = ">=15.0,<17" }, { name = "websockets", marker = "extra == 'voice'", specifier = ">=15.0,<17" }, ] -provides-extras = ["voice", "viz", "litellm", "any-llm", "realtime", "sqlalchemy", "encrypt", "redis", "dapr", "mongodb", "docker", "blaxel", "daytona", "cloudflare", "e2b", "modal", "runloop", "vercel", "s3", "temporal"] +provides-extras = ["voice", "viz", "litellm", "any-llm", "realtime", "sqlalchemy", "encrypt", "redis", "dapr", "mongodb", "docker", "blaxel", "daytona", "cloudflare", "e2b", "modal", "runloop", "vercel", "upstash-box", "s3", "temporal"] [package.metadata.requires-dev] dev = [