From 8b50281b8f7ee4ca84d10e47a0840e5605ca1679 Mon Sep 17 00:00:00 2001 From: Sam Xu Date: Sun, 3 May 2026 19:57:47 -0700 Subject: [PATCH 1/2] feat(commonly): commonly_attach_file extension tool + path-policy plugin-sdk export MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the kernel-layer file-delivery verb described in ADR-013 (Team-Commonly/commonly). Agents call commonly_attach_file with a workspace-relative path; the tool reads the file, uploads it via the agent runtime endpoint, and (optionally) posts a chat message containing the [[upload:...]] directive that the v2 inspector renders as a clickable preview pill. Three changes: 1. extensions/commonly/src/client.ts — new uploadFile() method that posts multipart/form-data to /api/agents/runtime/pods/:podId/uploads using the runtime token. Returns {_id, fileName, originalName, size, kind} which the tool embeds in the directive. 2. extensions/commonly/src/tools.ts — new commonly_attach_file tool. Validates the path stays inside /workspace// via toRelativeWorkspacePath, reads up to 25 MB, detects MIME from extension (server validates the allowlist), uploads, and either posts a message with the directive or returns metadata for the caller to compose its own. 3. src/plugin-sdk/index.ts — exports toRelativeWorkspacePath / toRelativeSandboxPath / resolvePathFromInput from src/agents/path-policy. Previously these were available in core but not visible to plugins. Runtime-agnostic by construction: any runtime that produces a file in the agent workspace can deliver it to chat through this verb. No skill or runtime mechanism is hard-coded. Co-Authored-By: Claude Opus 4.7 (1M context) --- extensions/commonly/src/client.ts | 43 ++++++++++ extensions/commonly/src/tools.ts | 125 ++++++++++++++++++++++++++++++ src/plugin-sdk/index.ts | 9 +++ 3 files changed, 177 insertions(+) diff --git a/extensions/commonly/src/client.ts b/extensions/commonly/src/client.ts index fb922889c67a..237743811ca8 100644 --- a/extensions/commonly/src/client.ts +++ b/extensions/commonly/src/client.ts @@ -204,6 +204,49 @@ export class CommonlyClient { return res.json(); } + /** + * Upload a file to a pod via the agent runtime endpoint. + * + * Multipart/form-data POST to /api/agents/runtime/pods/:podId/uploads. + * Returns metadata the caller can embed in a [[upload:...]] directive + * via postMessage. + */ + async uploadFile( + podId: string, + fileBytes: Uint8Array, + originalName: string, + mimeType?: string, + ): Promise<{ + _id: string; + fileName: string; + originalName: string; + size: number; + kind: string; + }> { + const token = this.config.runtimeToken?.trim(); + if (!token) { + throw new Error('Commonly runtime token is required'); + } + + const form = new FormData(); + const blob = new Blob([fileBytes], { type: mimeType || 'application/octet-stream' }); + form.append('file', blob, originalName); + + const res = await fetch( + `${this.config.baseUrl}/api/agents/runtime/pods/${podId}/uploads`, + { + method: 'POST', + headers: { Authorization: `Bearer ${token}` }, + body: form, + }, + ); + if (!res.ok) { + const text = await res.text().catch(() => ''); + throw new Error(`Failed to upload file: ${res.status} ${text}`); + } + return res.json(); + } + /** * Post a comment to a thread */ diff --git a/extensions/commonly/src/tools.ts b/extensions/commonly/src/tools.ts index e2d74b8030c4..9d0dd619cb9d 100644 --- a/extensions/commonly/src/tools.ts +++ b/extensions/commonly/src/tools.ts @@ -8,11 +8,45 @@ import { jsonResult, readNumberParam, readStringParam, + toRelativeWorkspacePath, } from "openclaw/plugin-sdk"; import { parseInlineDirectives } from "./directive-tags.js"; import type { MemorySectionName, MemoryVisibility } from "./client.js"; +// MIME detection for commonly_attach_file. Backend validates against the +// ADR-002 allowlist; this is a best-effort hint based on extension. +const MIME_BY_EXT: Record = { + pdf: "application/pdf", + docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + pptx: "application/vnd.openxmlformats-officedocument.presentationml.presentation", + doc: "application/msword", + xls: "application/vnd.ms-excel", + ppt: "application/vnd.ms-powerpoint", + csv: "text/csv", + tsv: "text/tab-separated-values", + txt: "text/plain", + md: "text/markdown", + json: "application/json", + yaml: "application/x-yaml", + yml: "application/x-yaml", + html: "text/html", + xml: "application/xml", + svg: "image/svg+xml", + png: "image/png", + jpg: "image/jpeg", + jpeg: "image/jpeg", + gif: "image/gif", + webp: "image/webp", +}; + +function detectMimeFromPath(filePath: string): string | undefined { + const dot = filePath.lastIndexOf("."); + if (dot === -1 || dot === filePath.length - 1) return undefined; + return MIME_BY_EXT[filePath.slice(dot + 1).toLowerCase()]; +} + // ADR-003 Phase 2 section taxonomy — mirrors the backend validator in // backend/routes/agentsRuntime.ts validateSectionsPayload. Keep in sync. const ALL_SECTIONS: ReadonlyArray = [ @@ -336,6 +370,97 @@ export class CommonlyTools { return jsonResult({ ok: true, message: result }); }, }, + { + name: "commonly_attach_file", + label: "Commonly Attach File", + description: + "Attach a file from your workspace to pod chat. Use after producing a deliverable (PDF, DOCX, XLSX, PPTX, CSV, MD, image). " + + "Reads the file from /workspace//, uploads it via the runtime upload endpoint, and posts a chat message " + + "with an inline [[upload:...]] directive that the recipient renders as a clickable preview pill. " + + "Examples: after `pandoc input.md -o report.pdf`, call commonly_attach_file({ podId, filePath: 'report.pdf', message: 'Q1 brief attached.' }). " + + "After `officecli create deck.pptx && officecli add ...`, call commonly_attach_file({ podId, filePath: 'deck.pptx', message: 'Stakeholder deck.' }). " + + "Path must stay inside the agent workspace (no '..', no symlinks pointing outside) — escape attempts are rejected. " + + "Max file size 25 MB. If `message` is omitted, returns file metadata so you can compose your own message.", + parameters: Type.Object({ + podId: Type.String({ description: "Pod ID to post the attachment into." }), + filePath: Type.String({ + description: + "File path relative to the agent's workspace root (e.g. 'report.pdf' or 'output/deck.pptx'). Must not escape the workspace.", + }), + message: Type.Optional( + Type.String({ + description: + "Optional caption text. If provided, a chat message is posted with the caption followed by the upload directive. If omitted, returns file metadata for the caller to compose its own message.", + }), + ), + replyToId: Type.Optional( + Type.String({ description: "Optional message ID to reply to (creates a threaded reply)." }), + ), + }), + async execute(_id: string, params: Record) { + const podId = readStringParam(params, "podId", { required: true }); + const filePath = readStringParam(params, "filePath", { required: true }); + const caption = readStringParam(params, "message"); + const replyToId = readStringParam(params, "replyToId") || undefined; + + // Workspace boundary: validate the path stays inside /workspace// + // before reading any bytes. Uses the same plugin-sdk helper that path-policy + // exposes for boundary enforcement. + const accountId = process.env.OPENCLAW_ACCOUNT_ID || "default"; + const workspaceRoot = `/workspace/${accountId}`; + let safeRelative: string; + try { + safeRelative = toRelativeWorkspacePath(workspaceRoot, filePath); + } catch (err) { + throw new Error( + `commonly_attach_file: workspace boundary violation — ${(err as Error).message}`, + ); + } + const absolutePath = `${workspaceRoot}/${safeRelative}`; + + // Read bytes (size cap enforced before upload). + const MAX_BYTES = 25 * 1024 * 1024; + let bytes: Buffer; + try { + bytes = readFileSync(absolutePath); + } catch (err) { + throw new Error( + `commonly_attach_file: cannot read file at '${filePath}' — ${(err as Error).message}`, + ); + } + if (bytes.length > MAX_BYTES) { + throw new Error( + `commonly_attach_file: file size ${bytes.length} bytes exceeds 25 MB limit`, + ); + } + + // Detect MIME from extension. Server validates against the allowlist. + const mimeType = detectMimeFromPath(safeRelative); + const originalName = safeRelative.split("/").pop() || safeRelative; + + // Upload, then optionally post the directive in a chat message. + const uploaded = await client.uploadFile( + podId, + new Uint8Array(bytes), + originalName, + mimeType, + ); + + if (caption !== undefined && caption !== "") { + const directive = `[[upload:${uploaded.fileName}|${uploaded.originalName}|${uploaded.size}|${uploaded.kind}|${uploaded._id}]]`; + const content = `${caption}\n${directive}`; + const result = await client.postMessage(podId, content, {}, replyToId); + return jsonResult({ ok: true, file: uploaded, message: result }); + } + + // Caller composes its own message — return metadata + the ready-made directive. + return jsonResult({ + ok: true, + file: uploaded, + directive: `[[upload:${uploaded.fileName}|${uploaded.originalName}|${uploaded.size}|${uploaded.kind}|${uploaded._id}]]`, + }); + }, + }, { name: "commonly_post_thread_comment", label: "Commonly Post Thread Comment", diff --git a/src/plugin-sdk/index.ts b/src/plugin-sdk/index.ts index ca3f54a479bc..48c5fa99119b 100644 --- a/src/plugin-sdk/index.ts +++ b/src/plugin-sdk/index.ts @@ -797,3 +797,12 @@ export type { ContextEngineFactory } from "../context-engine/registry.js"; // Security utilities export { redactSensitiveText } from "../logging/redact.js"; + +// Path-policy utilities — workspace-boundary enforcement for plugins that +// accept caller-supplied filesystem paths. Rejects '..', absolute paths, +// and resolved paths that escape the workspace root. +export { + toRelativeWorkspacePath, + toRelativeSandboxPath, + resolvePathFromInput, +} from "../agents/path-policy.js"; From acd8d2482e6bb38b9b17f1d23d7c8d19f5d66a69 Mon Sep 17 00:00:00 2001 From: Sam Xu Date: Sun, 3 May 2026 20:13:52 -0700 Subject: [PATCH 2/2] =?UTF-8?q?feat(gateway):=20doc-toolchain=20build-arg?= =?UTF-8?q?=20=E2=80=94=20OfficeCLI=20+=20pandoc=20+=20markitdown?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds OPENCLAW_INSTALL_DOC_TOOLCHAIN gated install of the document-generation toolchain that backs commonly_attach_file's deliverable flow per ADR-013 (Team-Commonly/commonly#287). Same opt-in pattern as the existing OPENCLAW_INSTALL_GH_CLI / OPENCLAW_INSTALL_DOCKER_CLI / OPENCLAW_INSTALL_BROWSER flags so upstream openclaw's default image stays unchanged. What gets installed (~170 MB total when enabled): * OfficeCLI (iOfficeAI, Apache-2.0): pinned static binary for DOCX/XLSX/PPTX create+edit+validate. Pinned via OPENCLAW_OFFICECLI_VERSION (default 1.0.70). Downloads platform-appropriate asset, verifies SHA256 against the release's published SHA256SUMS artifact, installs to /usr/local/bin/officecli. * pandoc + texlive-xetex + texlive-fonts-recommended: markdown → PDF (LaTeX engine), markdown → simple DOCX fallback. * poppler-utils: pdftoppm/pdftotext for PDF-skill workflows. * python3 + pip + markitdown + pypdf: parse direction — agent reads user-attached PDFs/DOCX/XLSX and converts to markdown for input. Build-time self-test runs `officecli --version`, `pandoc --version`, and a `python3 -c "import markitdown, pypdf"` check so a regression surfaces at build time rather than as "command not found" at agent runtime. Architecture-aware: x86_64 → officecli-linux-x64; aarch64 → officecli-linux-arm64; other arches fail the build (rather than silently skipping). Commonly's deploy invocation enables this with --build-arg OPENCLAW_INSTALL_DOC_TOOLCHAIN=1. Upstream builds unaffected. Co-Authored-By: Claude Opus 4.7 (1M context) --- Dockerfile | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/Dockerfile b/Dockerfile index 76a472b20a4c..e16b2f64dc8a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -202,6 +202,60 @@ RUN if [ -n "$OPENCLAW_INSTALL_GH_CLI" ]; then \ rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*; \ fi +# Optional: install document-generation toolchain for the commonly extension's +# commonly_attach_file flow. Adds ~170MB total. Build with: +# --build-arg OPENCLAW_INSTALL_DOC_TOOLCHAIN=1 +# Includes: +# - OfficeCLI (iOfficeAI, Apache-2.0): single ~30MB static binary for +# DOCX/XLSX/PPTX create + edit + validate, LLM-optimized addressing. +# Pinned to OPENCLAW_OFFICECLI_VERSION; SHA256 verified against the +# SHA256SUMS artifact published on the release. +# - pandoc + texlive-xetex + texlive-fonts-recommended (~80MB): md → PDF +# via LaTeX engine, md → simple DOCX fallback. +# - poppler-utils: pdftoppm / pdftotext for PDF-skill workflows. +# - python3 + pip + markitdown + pypdf: parse direction (binary doc → md +# for agent input). +ARG OPENCLAW_INSTALL_DOC_TOOLCHAIN="" +ARG OPENCLAW_OFFICECLI_VERSION="1.0.70" +RUN if [ -n "$OPENCLAW_INSTALL_DOC_TOOLCHAIN" ]; then \ + apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates curl \ + pandoc texlive-xetex texlive-fonts-recommended \ + poppler-utils python3 python3-pip && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/* && \ + \ + # OfficeCLI: download pinned binary, verify SHA256 against the + # release's SHA256SUMS artifact, install to /usr/local/bin. + ARCH="$(uname -m)" && \ + case "$ARCH" in \ + x86_64) ASSET="officecli-linux-x64" ;; \ + aarch64) ASSET="officecli-linux-arm64" ;; \ + *) echo "Unsupported architecture for OfficeCLI: $ARCH" >&2; exit 1 ;; \ + esac && \ + RELEASE_URL="https://github.com/iOfficeAI/OfficeCLI/releases/download/v${OPENCLAW_OFFICECLI_VERSION}" && \ + curl -fsSL "${RELEASE_URL}/${ASSET}" -o /usr/local/bin/officecli && \ + curl -fsSL "${RELEASE_URL}/SHA256SUMS" -o /tmp/officecli-SHA256SUMS && \ + ( cd /usr/local/bin && \ + EXPECTED="$(grep " ${ASSET}\$" /tmp/officecli-SHA256SUMS | awk '{print $1}')" && \ + if [ -z "$EXPECTED" ]; then echo "OfficeCLI SHA256 not found for ${ASSET}" >&2; exit 1; fi && \ + echo "${EXPECTED} officecli" | sha256sum -c - ) && \ + rm -f /tmp/officecli-SHA256SUMS && \ + chmod +x /usr/local/bin/officecli && \ + \ + # Python parse-direction utilities. --break-system-packages is required + # on Debian Bookworm's PEP-668-protected system Python. + pip3 install --break-system-packages --no-cache-dir \ + markitdown pypdf && \ + \ + # Self-test the toolchain so a regression (lost binary, broken pip) + # surfaces at build time, not at agent runtime via "command not found". + officecli --version && \ + pandoc --version | head -1 && \ + python3 -c "import markitdown, pypdf; print('parse-direction OK')"; \ + fi + # Normalize extension paths so plugin safety checks do not reject # world-writable directories inherited from source file modes. RUN for dir in /app/extensions /app/.agent /app/.agents; do \