diff --git a/.agents/skills/summon/SKILL.md b/.agents/skills/summon/SKILL.md index 7a2bbc8..fafa29b 100644 --- a/.agents/skills/summon/SKILL.md +++ b/.agents/skills/summon/SKILL.md @@ -1,6 +1,6 @@ --- name: summon -description: "Build, debug, or integrate Summon sandboxed generative UI: SurfacePlan contracts, contract-first prompts, JSONL protocol streaming, host-owned capabilities/resources, PolicyEngine grants, StreamGraph diagnostics, safety smoke tests, and adoption docs. Use when working in the Summon repo, adding capabilities/resources/workers/approval actions, debugging validation or sandbox behavior, or creating agent-authored Summon UIs." +description: "Build, debug, or integrate Summon sandboxed generative UI: SurfacePolicy contracts, Arrow JSONL artifact streaming, host-owned tools/resources, PolicyEngine grants, StreamGraph diagnostics, safety smoke tests, and adoption docs. Use when working in the Summon repo, adding tools/resources/workers/approval actions, debugging validation or sandbox behavior, or creating agent-authored Summon UIs." --- # Summon @@ -19,31 +19,30 @@ host app. native-wrapper behavior. 6. Read `docs/adoption/security.md` before changing sandbox, CSP, grants, script policy, worker, approval, or production-tier behavior. -7. Read `docs/adoption/debugging.md` before changing validation, repair, - stream graph, protocol, Devtools, or sandbox diagnostics. +7. Read `docs/adoption/debugging.md` before changing validation, stream graph, + protocol, Devtools, or sandbox diagnostics. ## Core Architecture Follow this path unless the user explicitly asks for a runtime redesign: ```txt -host capability registry +host tool registry -> SurfacePolicy: tier/grants/components/purpose/persistence -> compiled SurfacePlan: purpose/runtime/data/authority/persistence - -> createCapabilityRegistry(...).toContract() + -> createToolRegistry(...).toContract() -> compileSystemContracts() - -> protocol hardener and repair feedback - -> SectionAccumulator and StreamGraph + -> Arrow protocol hardener + -> StreamGraph artifact diagnostics -> PolicyEngine and spawnSandbox() ``` -Capabilities are host-owned. The model sees the contract; the host owns -handlers, network, credentials, state, grants, and the selected `SurfacePolicy`. +Tools are host-owned. The model sees the contract; the host owns handlers, +network, credentials, state, grants, and the selected `SurfacePolicy`. Generated artifacts must not emit or widen `/surface-policy` or `/surface-plan`. -New generation servers should prefer `runSurfaceGeneration(input, emit)` from -`@anarchitecture/summon-server`; `generateSurfaceStream()` remains available for -existing async-generator integrations. Applications should consume built public +Generation servers should use `runSurfaceGeneration(input, emit)` from +`@anarchitecture/summon-server`. Applications should consume built public package exports, not `src/*.ts` paths or `@summon-internal/*` packages. Use `defineAction` and `defineDataResource` for common host-backed @@ -54,12 +53,11 @@ host approval adapter. ## Safe Output Rules - Keep the iframe null-origin. Do not add `allow-same-origin`. -- Grant intents and capabilities from the host with `grantedIntents` and - `grantedCapabilities`; never trust artifact-declared intents or capabilities - as permission. -- Prefer declarative interactive surfaces with `scriptPolicy: "forbid"` and - `data-summon-*` bindings. Treat `scriptPolicy: "allow"` as an escalation for - hosts that intentionally permit custom artifact scripts. +- Grant tools from the host with `grantedTools`; never trust artifact-declared + tools as permission. +- Prefer Arrow-native generated artifacts with `host-bridge:summon` and + `callTool()`. Generated custom scripts, legacy runtime controls, and raw + section/fragment protocols are rejected before generation or at the parser. - Use `defineDataResource` for host-backed async data, with loading, error, and data state keys. - Resource UIs must render loading, error, and data states. @@ -70,14 +68,14 @@ host approval adapter. ## Debug Loop For generation failures, inspect `/error`, `/validation-summary`, -`/validation-blocked`, `/repair-feedback`, `/repair-summary`, -`/stream-graph-summary`, `/protocol-skip`, `/surface-policy`, -`/surface-plan`, `/shape`, `/token-overrides`, `/screen-synthesized`, and -`/mode-upgraded`. - -For client behavior, inspect Devtools events: `surface-plan`, `protocol-line`, -`protocol-parse-error`, `sandbox-ready`, `render`, `intent-emitted`, -`intent-rejected`, `intent-dispatched`, `intent-settled`, `state-pushed`, +`/validation-blocked`, `/stream-graph-summary`, `/protocol-skip`, +`/surface-policy`, `/surface-plan`, `/surface-contract`, `/agent-goal`, +`/agent-policy-resolution`, `/shape`, `/token-overrides`, and `/mode-upgraded`. + +For client behavior, inspect Devtools events: `surface-plan`, +`surface-contract`, `protocol-line`, `protocol-parse-error`, `sandbox-ready`, +`render`, `rendered`, `tool-called`, `tool-rejected`, +`tool-dispatched`, `tool-settled`, `state-pushed`, `component-sync`, `stream-graph`, and `sandbox-fatal`. Use `ContractIssue` plus `hintsForContractIssue(issue)` when feeding validation @@ -91,7 +89,10 @@ the requested grant/component exceeds the selected `SurfacePolicy` or compiled pnpm typecheck pnpm test pnpm test:safety +pnpm test:gallery pnpm build +pnpm check:public-api +pnpm smoke:public-packages pnpm pack:dry-run pnpm dev:gallery pnpm dev:workbench @@ -107,7 +108,7 @@ boot. It starts only the Vite demo app and does not require Manual smoke path: run `pnpm dev:workbench`, open `http://localhost:5173/generate`, choose the **Host-resource search** showcase scenario, keep **Free layout**, confirm the -contract cockpit shows `explore/declarative/host-resource/read/replayable` and +contract cockpit shows `explore/arrow/host-resource/read/replayable` and `Grants 1: search`, run the scenario, submit a generated search such as `chicken pasta`, inspect the Stream and Devtools drawers, replay from Saved surfaces, then open `http://localhost:5173/adversarial`. Use `/batch`, diff --git a/README.md b/README.md index aca26e6..6ee0b54 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ the canonical `.ghost/fingerprint/manifest.yml` package layout. The Surface Gallery adds a Ghost fingerprint preset for each root, and the Generate workbench adds a `Fingerprint · ` option. A fingerprint run is not a bundled visual direction: Summon consumes the Ghost relay brief as product design -direction, then applies host-owned policy, capabilities, and token CSS. +direction, then applies host-owned policy, tools, and token CSS. The full guided path lives in [docs/adoption/quickstart.md](docs/adoption/quickstart.md). @@ -98,7 +98,7 @@ pnpm dev:demos surface, and a small event strip. - `/generate` - diagnostic maintainer workbench for broker-selected surface configs, allowed host tools, trusted host components, token - overrides, validation retries, edit/replay, Ghost steering, Devtools, and + overrides, validation summaries, replay, Ghost steering, Devtools, and stream diagnostics. - `/batch` - parallel broker harness for prompt coverage, host tool wiring, direction-token visual coverage, throughput, and consistency checks. @@ -114,7 +114,8 @@ pnpm dev:demos helpers, and explicit subpaths for advanced browser, engine, host, policy, envelope, assets, and Devtools APIs. - `@anarchitecture/summon-server` - provider-neutral generation lifecycle, - validation retries, summaries, and model-provider interfaces. + Arrow protocol hardening, validation summaries, and model-provider + interfaces. - `@anarchitecture/summon-react` - `SummonSurface` and React trusted-component adapter. `react` and `react-dom` are peer dependencies. @@ -125,7 +126,7 @@ pnpm dev:demos `packages/sandbox-runtime`, `packages/server`, `packages/react` - private implementation workspaces published only through the public facades. - `apps/server` - multi-provider demo server for Anthropic, OpenAI, and Gemini, - direction loading, validation retry feedback, and demo backing routes. + direction loading, Arrow protocol diagnostics, and demo backing routes. - `apps/surface-gallery` - first-run live example app for OSS adopters. - `apps/demo` - Vite maintainer workbench for generation, batch runs, adversarial checks, strict input, Ghost steering, diagnostics, and fatal diff --git a/apps/demo/src/App.tsx b/apps/demo/src/App.tsx index 4f51fe6..7fe2d38 100644 --- a/apps/demo/src/App.tsx +++ b/apps/demo/src/App.tsx @@ -7,7 +7,6 @@ import { ThemeProvider, ThemeToggle } from './theme.js'; const AdversarialPage = lazy(() => import('./pages/AdversarialPage.js').then((module) => ({ default: module.AdversarialPage }))); const BatchPage = lazy(() => import('./pages/BatchPage.js').then((module) => ({ default: module.BatchPage }))); const FatalPage = lazy(() => import('./pages/FatalPage.js').then((module) => ({ default: module.FatalPage }))); -const FragmentComparePage = lazy(() => import('./pages/FragmentComparePage.js').then((module) => ({ default: module.FragmentComparePage }))); const GeneratePage = lazy(() => import('./pages/generate/GeneratePage.js').then((module) => ({ default: module.GeneratePage }))); const StrictPage = lazy(() => import('./pages/StrictPage.js').then((module) => ({ default: module.StrictPage }))); @@ -27,7 +26,6 @@ function AppRoutes() { } /> } /> } /> - } /> } /> } /> } /> diff --git a/apps/demo/src/adversarial-artifact.ts b/apps/demo/src/adversarial-artifact.ts index 93124e5..99fb3c6 100644 --- a/apps/demo/src/adversarial-artifact.ts +++ b/apps/demo/src/adversarial-artifact.ts @@ -1,246 +1,91 @@ -/** - * Adversarial artifact body. Each test tries to break out of the sandbox in a - * different way. All results are reported back via sandbox.emit('report', ...). - * - * PASS = reported as "blocked" - * FAIL = reported as "allowed" - * - * The host tallies results. - */ -export const ADVERSARIAL_BODY_HTML = /* html */ ` -
-
Adversarial sandbox
-
Running breakout tests and reporting back…
-
-
- - + await expectBlocked("emit-unknown-tool", () => rejectedTool("emit-unknown-tool", "exfiltrate", { data: "secret" })); + await expectBlocked("emit-declared-but-not-granted", () => rejectedTool("emit-declared-but-not-granted", "escalate", { test: "emit-declared-but-not-granted" })); + await expectBlocked("empty-tool-name", () => rejectedTool("empty-tool-name", "", {})); + await report("__DONE__", "info", ""); +} + +void runAll(); + +export default html\` +
+
Adversarial sandbox
+
Running Arrow VM boundary checks and reporting back through callTool().
+
Tests started
+
+\`; `; + +export const ADVERSARIAL_ARTIFACT: ArrowSurfaceArtifact = { + runtime: 'arrow', + source: { + 'main.ts': adversarialMain, + }, +}; diff --git a/apps/demo/src/components.tsx b/apps/demo/src/components.tsx deleted file mode 100644 index 47f8558..0000000 --- a/apps/demo/src/components.tsx +++ /dev/null @@ -1,194 +0,0 @@ -import { - createComponentRegistry, - type ComponentDefinition, - type ComponentRegistry, -} from '@anarchitecture/summon'; -import type { ComponentPack } from '@anarchitecture/summon'; -import { defineReactComponent } from '@anarchitecture/summon-react'; -import type { CSSProperties } from 'react'; -import { z } from 'zod'; - -const metricCardPropsSchema = z.object({ - label: z.string(), - value: z.string(), - delta: z.string().optional(), - tone: z.enum(['neutral', 'good', 'warn']).optional(), -}); - -const trendSparklinePropsSchema = z.object({ - label: z.string(), - points: z.array(z.number()).min(2).max(12), - caption: z.string().optional(), -}); - -const approvalStatusPropsSchema = z.object({ - status: z.enum(['pending', 'approved', 'blocked']), - title: z.string(), - detail: z.string().optional(), -}); - -type MetricCardProps = z.infer; -type TrendSparklineProps = z.infer; -type ApprovalStatusProps = z.infer; - -export function createDemoComponentRegistry(componentNames?: readonly string[]): ComponentRegistry { - const allowed = componentNames ? new Set(componentNames) : null; - const definitions = demoComponentDefinitions().filter((definition) => - allowed ? allowed.has(definition.name) : true, - ); - return createComponentRegistry(definitions); -} - -export function baseDemoComponentPack(): ComponentPack { - return createDemoComponentRegistry().toContract().pack; -} - -export function narrowComponentPack(pack: ComponentPack, componentNames: readonly string[]): ComponentPack { - const allowed = new Set(componentNames); - return { - components: pack.components.filter((component) => allowed.has(component.name)), - }; -} - -function demoComponentDefinitions(): ComponentDefinition[] { - return [ - defineReactComponent({ - name: 'MetricCard', - description: - 'Displays one compact KPI with an optional delta and tone. Use for launch metrics, readiness scores, revenue, risk, or progress numbers.', - propsSchema: metricCardPropsSchema, - sizing: { height: 'var(--space-10)', description: 'Works well in a 2-4 column metric grid.' }, - examples: [ - { - name: 'KPI placeholder', - code: `
`, - }, - ], - component: MetricCard, - }), - defineReactComponent({ - name: 'TrendSparkline', - description: - 'Displays a small trend line from numeric points. Use when a generated surface needs a compact visual trend instead of a text-only metric.', - propsSchema: trendSparklinePropsSchema, - sizing: { height: 'var(--space-11)', description: 'Needs enough height for the chart and caption.' }, - examples: [ - { - name: 'Trend placeholder', - code: `
`, - }, - ], - component: TrendSparkline, - }), - defineReactComponent({ - name: 'ApprovalStatus', - description: - 'Displays a launch or publish approval state with a strong status treatment. Use for pending, approved, or blocked readiness gates.', - propsSchema: approvalStatusPropsSchema, - sizing: { height: 'var(--space-9)', description: 'Fits a compact status row or card.' }, - examples: [ - { - name: 'Approval placeholder', - code: `
`, - }, - ], - component: ApprovalStatus, - }), - ]; -} - -function MetricCard({ label, value, delta, tone = 'neutral' }: MetricCardProps) { - const border = tone === 'warn' ? '#cc4b03' : '#e6e6e6'; - const background = tone === 'good' ? '#f2fff6' : tone === 'warn' ? '#fff7ed' : '#fff'; - const deltaColor = tone === 'warn' ? '#cc4b03' : tone === 'good' ? '#008c2e' : '#6b6b6b'; - return ( -
-
{label}
-
- {value} - {delta ? {delta} : null} -
-
- ); -} - -function TrendSparkline({ label, points, caption }: TrendSparklineProps) { - const safePoints = points.length >= 2 ? points : [0, 0]; - const min = Math.min(...safePoints); - const max = Math.max(...safePoints); - const spread = max - min || 1; - const d = safePoints.map((point, index) => { - const x = (index / Math.max(safePoints.length - 1, 1)) * 220 + 10; - const y = 74 - ((point - min) / spread) * 54 + 10; - return `${index === 0 ? 'M' : 'L'}${x.toFixed(1)} ${y.toFixed(1)}`; - }).join(' '); - return ( -
-
- {label} - {safePoints.length} pts -
- - - - - {caption ?
{caption}
: null} -
- ); -} - -function ApprovalStatus({ status, title, detail }: ApprovalStatusProps) { - const colors = { - pending: ['#fff7ed', '#cc4b03', 'Pending'], - approved: ['#f2fff6', '#008c2e', 'Approved'], - blocked: ['#fff1f2', '#cc0023', 'Blocked'], - } as const; - const [bg, fg, label] = colors[status]; - return ( -
- {label} - {title} - {detail ? {detail} : null} -
- ); -} - -const hostCardStyle: CSSProperties = { - height: '100%', - boxSizing: 'border-box', - padding: '14px 16px', - borderRadius: 14, - border: '1px solid #e6e6e6', - color: '#101010', - fontFamily: 'system-ui, -apple-system, Segoe UI, sans-serif', -}; - -const metricLabelStyle: CSSProperties = { - fontSize: 11, - textTransform: 'uppercase', - letterSpacing: '0.06em', - color: '#6b6b6b', - fontWeight: 700, -}; - -const metricValueStyle: CSSProperties = { - fontSize: 34, - lineHeight: 1, - letterSpacing: '-0.03em', -}; - -const metricDeltaStyle: CSSProperties = { - fontSize: 13, - fontWeight: 700, -}; - -const approvalBadgeStyle: CSSProperties = { - width: 'max-content', - padding: '3px 9px', - borderRadius: 999, - color: 'white', - fontSize: 11, - fontWeight: 800, - letterSpacing: '0.04em', - textTransform: 'uppercase', -}; diff --git a/apps/demo/src/components/TrustedFixtureSurface.tsx b/apps/demo/src/components/TrustedFixtureSurface.tsx deleted file mode 100644 index 73748d4..0000000 --- a/apps/demo/src/components/TrustedFixtureSurface.tsx +++ /dev/null @@ -1,231 +0,0 @@ -import { - forwardRef, - useEffect, - useImperativeHandle, - useMemo, - useRef, - type CSSProperties, -} from 'react'; -import bootstrapSource from '@anarchitecture/summon/bootstrap.js?raw'; -import tokensSource from '@anarchitecture/summon/tokens.css?raw'; - -export interface TrustedFixtureSurfaceHandle { - iframe: HTMLIFrameElement | null; - sandboxId: string | null; - pushState(state: Record): void; -} - -export interface TrustedFixtureSurfaceProps { - html: string; - grantedIntents: string[]; - initialState?: Record; - onIntent?: (intent: string, args: Record) => void; - onIntentRejected?: (reason: string, raw: unknown) => void; - onFatal?: (reason: string) => void; - id?: string; - title?: string; - className?: string; - style?: CSSProperties; -} - -function randomId(): string { - const bytes = new Uint8Array(16); - crypto.getRandomValues(bytes); - return Array.from(bytes, (b) => b.toString(16).padStart(2, '0')).join(''); -} - -function escapeHtml(s: string): string { - return s - .replaceAll('&', '&') - .replaceAll('<', '<') - .replaceAll('>', '>') - .replaceAll('"', '"'); -} - -function escapeScript(s: string): string { - return s.replace(/<\/script/gi, '<\\/script'); -} - -function escapeScriptJson(value: unknown): string { - return JSON.stringify(value).replaceAll('<', '\\u003c'); -} - -function cspForNonce(nonce: string): string { - return [ - "default-src 'none'", - `script-src 'nonce-${nonce}'`, - "style-src 'unsafe-inline'", - "img-src data:", - "font-src data:", - "connect-src 'none'", - "form-action 'none'", - "base-uri 'none'", - "frame-src 'none'", - "child-src 'none'", - "media-src 'none'", - "object-src 'none'", - "worker-src 'none'", - ].join('; '); -} - -function nonceFixtureScripts(html: string, nonce: string): string { - return html.replace(/]*\bnonce=)/gi, ` - - - -
${nonceFixtureScripts(params.html, params.nonce)}
-`; -} - -/** - * Demo-only escape hatch for trusted adversarial fixtures. It keeps the sandbox - * and host intent bridge under test without letting generated artifacts regain - * public script execution. - */ -export const TrustedFixtureSurface = forwardRef( - function TrustedFixtureSurface(props, ref) { - const iframeRef = useRef(null); - const sandboxId = useMemo(randomId, []); - const nonce = useMemo(randomId, []); - const readyRef = useRef(false); - const pendingStatesRef = useRef[]>([]); - - function postState(state: Record) { - const iframe = iframeRef.current; - if (!readyRef.current || !iframe?.contentWindow) { - pendingStatesRef.current.push(state); - return; - } - iframe.contentWindow.postMessage({ - type: 'SUMMON_STATE', - sandbox_id: sandboxId, - state, - }, '*'); - } - - useImperativeHandle(ref, () => ({ - get iframe() { - return iframeRef.current; - }, - get sandboxId() { - return sandboxId; - }, - pushState(state: Record) { - postState(state); - }, - }), [sandboxId]); - - useEffect(() => { - readyRef.current = false; - pendingStatesRef.current = props.initialState ? [props.initialState] : []; - const intentAllowlist = new Set(props.grantedIntents); - - function flushPending() { - const iframe = iframeRef.current; - if (!readyRef.current || !iframe?.contentWindow) return; - while (pendingStatesRef.current.length > 0) { - const state = pendingStatesRef.current.shift()!; - iframe.contentWindow.postMessage({ - type: 'SUMMON_STATE', - sandbox_id: sandboxId, - state, - }, '*'); - } - } - - function handleMessage(event: MessageEvent) { - const data = event.data as { - type?: string; - sandbox_id?: string; - reason?: unknown; - intent?: unknown; - args?: unknown; - } | undefined; - if (!data || typeof data !== 'object') return; - if ( - data.type !== 'SUMMON_READY' && - data.type !== 'SUMMON_FATAL' && - data.type !== 'SUMMON_INTENT' - ) { - return; - } - if (data.sandbox_id !== sandboxId) return; - - if (data.type === 'SUMMON_FATAL') { - readyRef.current = false; - props.onFatal?.(typeof data.reason === 'string' ? data.reason : 'unknown'); - return; - } - - if (data.type === 'SUMMON_READY') { - readyRef.current = true; - flushPending(); - return; - } - - const intent = data.intent; - if (typeof intent !== 'string' || !intent) { - props.onIntentRejected?.('intent not a non-empty string', data); - return; - } - if (!intentAllowlist.has(intent)) { - props.onIntentRejected?.(`intent "${intent}" not granted`, data); - return; - } - const args = data.args && typeof data.args === 'object' - ? data.args as Record - : {}; - props.onIntent?.(intent, args); - } - - window.addEventListener('message', handleMessage); - if (iframeRef.current) { - iframeRef.current.setAttribute('sandbox', 'allow-scripts'); - iframeRef.current.srcdoc = buildSrcdoc({ - sandboxId, - nonce, - html: props.html, - }); - } - - return () => { - window.removeEventListener('message', handleMessage); - readyRef.current = false; - pendingStatesRef.current = []; - if (iframeRef.current) iframeRef.current.srcdoc = ''; - }; - }, [ - nonce, - props.grantedIntents, - props.html, - props.initialState, - props.onFatal, - props.onIntent, - props.onIntentRejected, - sandboxId, - ]); - - return ( - + role="region" + aria-label="Summon surface gallery sandbox" + >