diff --git a/.env.example b/.env.example index 7c0c4c8..e03e339 100644 --- a/.env.example +++ b/.env.example @@ -2,14 +2,13 @@ # # These are consumed by docker-compose.yml. Foragent and the RockBot agent can # be pointed at different models — Foragent uses FORAGENT_LLM_* for its own -# LLM-backed capabilities (extract-structured-data and beyond), the RockBot -# agent uses LLM_* for its own reasoning. +# LLM-backed capabilities (browser-task planner, form-schema enrichment), the +# RockBot agent uses LLM_* for its own reasoning. # -# Direct curl tests of capabilities that don't need an LLM (fetch-page-title) -# still work without either set; anything LLM-backed will fail until Foragent's -# config is populated. +# Every Foragent capability needs the LLM wired; without it, the host fails +# fast at startup. -# ── Foragent's LLM (REQUIRED for extract-structured-data) ──────────────────── +# ── Foragent's LLM (REQUIRED — browser-task + form-schema enrichment) ──────── # Azure AI Foundry / OpenAI-compatible endpoints are both fine. # Foundry endpoint shape: https://-.cognitiveservices.azure.com/openai/v1/ FORAGENT_LLM_ENDPOINT=https://your-resource-region.cognitiveservices.azure.com/openai/v1/ diff --git a/CLAUDE.md b/CLAUDE.md index bf6d232..70bd521 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Status -Foragent is at **milestone 8 shipped, step 9 next**. Five capabilities are advertised (`browser-task`, `learn-form-schema`, `execute-form-batch`, `fetch-page-title`, `extract-structured-data`); the A2A loop is wired end-to-end against RockBot via the `docker-compose.yml` harness (image pinned to `rockylhotka/rockbot-agent:0.9.11`, which brings the structured-data `invoke_agent` surface from PR #291 so RockBot can consume Foragent's FormSchema JSON natively rather than as prose). RockBot framework refs are on `0.9.*` (bumped from `0.8.*` for step 8's multi-file skill API). Step 6 shipped the generalist `browser-task` planner (LLM-in-the-loop over ref-annotated aria snapshots + `aria-ref=eN` locator resolution, built on `Microsoft.Playwright` 1.59 — bumped from 1.50 for the Ai aria-snapshot mode; see Appendix A #16). Tiered chat clients are wired via `AddRockBotTieredChatClients` with one model aliased across Low/Balanced/High per spec §3.7. Step 7 wired the learning substrate: `ISkillStore` + `ILongTermMemory` via `WithSkills()` + `WithLongTermMemory()`, `BrowserTaskPriming` injects retrieved skill + memory content into the planner prompt, successful tasks write a learned skill at `sites/{host}/learned/{slug}`, and `BskySeedSkillService` seeds `sites/bsky.app/login` on first start (idempotent — only writes when absent). Step 8 added `learn-form-schema` + `execute-form-batch` (spec §5.5 multi-phase flow): learn returns a typed `FormSchema` and persists it as a `Skill` plus a `SkillResourceType.JsonSchema` resource at `sites/{host}/forms/{slug}`; execute resolves schemas via `ISkillStore.GetResourceAsync("schema.json")` (or accepts them inline), submits each row, and streams per-row progress via `AgentTaskContext.PublishStatus`. Default mode is **abort-on-first** (spec open-question #8 resolution); callers opt into `"continue"` for known-messy batches. Spec open-question #6 (typed JSON alongside markdown skills) is resolved by the RockBot 0.9 multi-file skill API — no parallel Foragent-local store was needed. Embeddings are optional and configured separately under `ForagentEmbeddings` so they can live on a different Azure Foundry subscription than the chat model; missing embeddings downgrade retrieval to BM25-only with a single startup warning. `post-to-site` has been removed from both the advertised skill list and the codebase (greenfield deletion — `browser-task` + the learned bsky skill cover the use case). The governing spec is `docs/foragent-specification.md` **v0.2**. Storage-state persistence, 2FA input-required flow, k8s-secrets broker, and per-tenant credential namespaces remain deferred — tracked in `docs/framework-feedback.md`. Framework-level observations from each milestone are captured in `docs/framework-feedback.md`. +Foragent is at **milestone 9 shipped — v0.2 complete**. Three capabilities are advertised (`browser-task`, `learn-form-schema`, `execute-form-batch`); the A2A loop is wired end-to-end against RockBot via the `docker-compose.yml` harness (image pinned to `rockylhotka/rockbot-agent:0.9.15`, which brings the structured-data `invoke_agent` surface from PR #291 (0.9.11), a softened `data`-parameter description in 0.9.14 that encourages the caller LLM to use DataParts when the target skill description advertises them, and the A2A cancel-capability publisher in 0.9.15 that actually sends `agent.task.cancel.{agentName}` messages when a wisp's local state goes wrong after an A2A dispatch — the message Foragent's `ForagentCancelHandler` now consumes). Step 9 removed the v0.1 `fetch-page-title` and `extract-structured-data` specialists: both are reachable via `browser-task` (the planner's `done(result=...)` channel carries JSON when the intent asks for it), at roughly 2–3× the tokens per call — acceptable given zero deterministic high-volume callers today. `extract-structured-data` was also out of spec on §7.1 mandatory allowlists, which the generalist enforces by design. Session-level `FetchPageTitleAsync` / `CapturePageSnapshotAsync` were removed from `IBrowserSession` alongside the capabilities that used them. RockBot framework refs are on `0.9.*` (bumped from `0.8.*` for step 8's multi-file skill API). Step 6 shipped the generalist `browser-task` planner (LLM-in-the-loop over ref-annotated aria snapshots + `aria-ref=eN` locator resolution, built on `Microsoft.Playwright` 1.59 — bumped from 1.50 for the Ai aria-snapshot mode; see Appendix A #16). Tiered chat clients are wired via `AddRockBotTieredChatClients` with one model aliased across Low/Balanced/High per spec §3.7. Step 7 wired the learning substrate: `ISkillStore` + `ILongTermMemory` via `WithSkills()` + `WithLongTermMemory()`, `BrowserTaskPriming` injects retrieved skill + memory content into the planner prompt, successful tasks write a learned skill at `sites/{host}/learned/{slug}`, and `BskySeedSkillService` seeds `sites/bsky.app/login` on first start (idempotent — only writes when absent). Step 8 added `learn-form-schema` + `execute-form-batch` (spec §5.5 multi-phase flow): learn returns a typed `FormSchema` and persists it as a `Skill` plus a `SkillResourceType.JsonSchema` resource at `sites/{host}/forms/{slug}`; execute resolves schemas via `ISkillStore.GetResourceAsync("schema.json")` (or accepts them inline), submits each row, and streams per-row progress via `AgentTaskContext.PublishStatus`. Default mode is **abort-on-first** (spec open-question #8 resolution); callers opt into `"continue"` for known-messy batches. Spec open-question #6 (typed JSON alongside markdown skills) is resolved by the RockBot 0.9 multi-file skill API — no parallel Foragent-local store was needed. Embeddings are optional and configured separately under `ForagentEmbeddings` so they can live on a different Azure Foundry subscription than the chat model; missing embeddings downgrade retrieval to BM25-only with a single startup warning. `post-to-site` has been removed from both the advertised skill list and the codebase (greenfield deletion — `browser-task` + the learned bsky skill cover the use case). The governing spec is `docs/foragent-specification.md` **v0.2**. Storage-state persistence, 2FA input-required flow, k8s-secrets broker, and per-tenant credential namespaces remain deferred — tracked in `docs/framework-feedback.md`. Framework-level observations from each milestone are captured in `docs/framework-feedback.md`. ## Build / test @@ -74,7 +74,7 @@ Foragent requires an LLM. Config lives under `ForagentLlm` — separate from any ## Browser -`Foragent.Browser` wraps Playwright. `AddForagentBrowser()` in `Foragent.Agent/Program.cs` registers `PlaywrightBrowserHost` (`IHostedService` owning one shared Chromium per process) and `IBrowserSessionFactory` (hands out a fresh `IBrowserContext` per A2A task — isolation guarantee from spec §3.5). `IBrowserSession` exposes `FetchPageTitleAsync` / `CapturePageSnapshotAsync` for one-shot reads, `OpenPageAsync` → `IBrowserPage` (navigate / fill / click / wait / read / `SelectOptionAsync` / `SetCheckedAsync` / `ScanFormAsync`) for multi-step flows like login + post and form submission, and `OpenAgentPageAsync` → `IBrowserAgentPage` for LLM-in-the-loop planners (ref-annotated aria snapshots + `aria-ref=eN` locator resolution). `ScanFormAsync` (added in step 8) runs a single JS pass in the page to collect inputs/selects/textareas with labels, validation attributes, and select/radio options — returns a typed `FormScan` that the form capabilities lift into `FormSchema`. The snapshot uses Chromium's aria-snapshot (via `Locator.AriaSnapshotAsync`; `Mode = AriaSnapshotMode.Ai` gets the ref-annotated form) and falls back to `` inner text when the tree is empty. Selectors passed to `IBrowserPage` use Playwright's string-selector dialect (CSS + `role=role[name="..."]`); **regex is not accepted in string form**, use exact attribute matches. `Foragent.Browser` has `InternalsVisibleTo("Foragent.Browser.Tests")` so tests drive the real `PlaywrightBrowserSessionFactory` without promoting its implementation types to public. +`Foragent.Browser` wraps Playwright. `AddForagentBrowser()` in `Foragent.Agent/Program.cs` registers `PlaywrightBrowserHost` (`IHostedService` owning one shared Chromium per process) and `IBrowserSessionFactory` (hands out a fresh `IBrowserContext` per A2A task — isolation guarantee from spec §3.5). `IBrowserSession` exposes `OpenPageAsync` → `IBrowserPage` (navigate / fill / click / wait / read / `SelectOptionAsync` / `SetCheckedAsync` / `ScanFormAsync`) for multi-step flows like login + post and form submission, and `OpenAgentPageAsync` → `IBrowserAgentPage` for LLM-in-the-loop planners (ref-annotated aria snapshots + `aria-ref=eN` locator resolution). The one-shot `FetchPageTitleAsync` / `CapturePageSnapshotAsync` + `PageSnapshot` record were removed in step 9 along with the specialists that used them — `browser-task`'s `snapshot` tool covers the same ground. `ScanFormAsync` (added in step 8) runs a single JS pass in the page to collect inputs/selects/textareas with labels, validation attributes, and select/radio options — returns a typed `FormScan` that the form capabilities lift into `FormSchema`. The snapshot uses Chromium's aria-snapshot (via `Locator.AriaSnapshotAsync`; `Mode = AriaSnapshotMode.Ai` gets the ref-annotated form) and falls back to `` inner text when the tree is empty. Selectors passed to `IBrowserPage` use Playwright's string-selector dialect (CSS + `role=role[name="..."]`); **regex is not accepted in string form**, use exact attribute matches. `Foragent.Browser` has `InternalsVisibleTo("Foragent.Browser.Tests")` so tests drive the real `PlaywrightBrowserSessionFactory` without promoting its implementation types to public. `CreateSessionAsync(Func allowedHost, ...)` is the step-6 entry point for allowlist-scoped sessions. The factory installs a context-wide `RouteAsync("**/*", ...)` that aborts off-list document/subframe navigations before Playwright issues the request (spec §7.1). The no-argument overload accepts any host and stays available for specialists that enforce narrower rules elsewhere. @@ -85,7 +85,9 @@ Foragent requires an LLM. Config lives under `ForagentLlm` — separate from any - Each capability implements `ICapability` — owns its own `AgentSkill` metadata (exposed as a static `SkillDefinition`) and its own `ExecuteAsync` logic. - `ForagentTaskHandler` is a pure dispatcher that resolves `IEnumerable` from DI and routes on `SkillId`. **Do not add skill-specific logic to the handler.** New capabilities go in new `ICapability` classes. - `ForagentCapabilities.Skills` (static array) is the single source of truth for advertised skills — both the bus-side `AgentCard.Skills` and the HTTP gateway's `opts.Skills` read from it. -- `CapabilityInput.Parse` is the shared URL + description shim used by `fetch-page-title` and `extract-structured-data`. Capabilities with different input shapes parse their own input near the capability (e.g. `BrowserTaskInput` in `BrowserTask/`). Don't overload `CapabilityInput` for unrelated shapes. +- Capabilities parse their own input near the capability (`BrowserTaskInput` in `BrowserTask/`, the `*Input` classes in `Forms/`). The `CapabilityInput.Parse` shared URL+description shim was removed in step 9 with its only consumers. +- **Cancel messages are honored.** `ForagentTaskHandler` registers every in-flight task with `InFlightTaskRegistry` (singleton, a `ConcurrentDictionary`) and wraps the capability's `AgentTaskContext` so the CT it observes is linked to both the framework's message CT and an external cancel trigger. `ForagentCancelHandler` (registered via `agent.HandleMessage()` in Program.cs — last `AddScoped` wins, overriding the framework's default stateless handler) looks up the task by id and calls `TryCancel`. Successful cancel publishes nothing extra (the running task's own terminal result is the acknowledgment); a cancel for an unknown task publishes `AgentTaskError{Code=TaskNotFound}`. This matters because RockBot's wisp/A2A coordination can publish cancel messages when a local wisp fails after already dispatching an A2A task — without this handler, Foragent would orphan the browser run. +- **Structured input arrives on an A2A DataPart** (`AgentMessagePart { Kind = "data", Data = , MimeType = "application/json" }`) — the shape RockBot's `invoke_agent` tool produces when the caller populates its `data` parameter. All three Foragent parsers look for a data part first, then fall back to JSON in the text part (for curl callers), then metadata overrides either source. Every `SkillDefinition.Description` tells the LLM to pass inputs via the `data` parameter, not in the text message — otherwise the LLM writes prose like `allowedHosts: ['*']` that strict JSON parsers reject. Text parts remain the home for human-readable prose (used as `intent` fallback in `browser-task` when the data part doesn't carry one). - `browser-task` (in `BrowserTask/`) is the generalist planner (spec §5.2). `BrowserTaskInput` parses intent + mandatory `allowedHosts` + optional `url` / `credentialId` / `maxSteps` (default 60, ceiling 150) / `maxSeconds` (default 120, ceiling 600). `BrowserTaskTools` wraps `snapshot` / `navigate` / `click` / `type` / `wait_for` / `done` / `fail` as `AIFunction`s via `AIFunctionFactory.Create` and passes them in `ChatOptions.Tools`; the RockBot-wrapped function-invoking `IChatClient` runs the full model ↔ tool loop inside one `GetResponseAsync` call. Budget is enforced tool-side (each tool checks `BrowserTaskState.BudgetExhausted`) because Microsoft.Extensions.AI does not surface per-request iteration caps through `ChatOptions`; wall-clock is a linked `CancellationTokenSource`. **Never log tool arguments verbatim** — `type` carries user-supplied values that may be sensitive (log length only). Refs from a snapshot are valid only until the next mutating call; the system prompt and tool descriptions both state this, but don't code anything that assumes cross-snapshot ref stability. - `learn-form-schema` and `execute-form-batch` (both in `Forms/`) are the step-8 phase-1 / phase-3 pair (spec §5.5). `FormSchema` / `FormField` are the wire contract — stable JSON shape, stored via `FormSchema.SerializerOptions`. `LearnFormSchemaCapability` navigates, calls `IBrowserPage.ScanFormAsync`, maps the raw scan to `FormSchema` via the deterministic `FormSchemaMapper` (pure — no LLM), then runs `FormSchemaEnricher` for one LLM turn to infer `dependsOn` and a note (skipped when there are no select/radio fields). The schema is persisted as a `Skill` bundle at `sites/{host}/forms/{slug}` with a `SkillResourceType.JsonSchema` resource named `schema.json`. Only the enricher can add `dependsOn` / `notes` — structural fields (type, selector, required, options) come only from the DOM scan, so the LLM cannot fabricate fields or rewrite selectors. `ExecuteFormBatchCapability` accepts `schemaRef` (resolves via `ISkillStore.GetResourceAsync(name, "schema.json")`) or an inline `schema`, and submits each row with `FillAsync` / `SelectOptionAsync` / `SetCheckedAsync` per field type. Per-row progress is published via `AgentTaskContext.PublishStatus(new AgentTaskStatusUpdate { State = Working, Message = … })`. Default `mode` is `"abort-on-first"` (spec open-question #8 resolution: a failed submit on a mutating form usually indicates a real problem, so continuing would just generate more bad submissions); callers opt into `"continue"` for known-messy batches. Success detection: an optional `successIndicator` CSS selector is the preferred signal; the fallback is URL change after submit, which fails (correctly) for forms that submit in place. File uploads and multi-step wizards are **out of scope** for v0.2 — `ScanFormAsync` explicitly skips `type=file`, and there's no flow control for wizards. @@ -102,7 +104,7 @@ Embeddings are optional. `ForagentEmbeddings:Endpoint` / `ModelId` / `ApiKey` ar On successful completion (`state.IsDone`), `BrowserTaskCapability.TryWriteLearnedSkillAsync` runs one extra synthesizer LLM turn (same `IChatClient`, no tools) to author a reusable skill at `sites/{primaryHost}/learned/{intent-slug}`. The synthesizer prompt forbids including credential values or typed field contents. Writes are skipped when the task was trivial (≤1 navigation) or the primary host can't be determined. Errors are logged but never fail the completed task. -`BskySeedSkillService` (IHostedService) seeds `sites/bsky.app/login` on first start by calling `ISkillStore.GetAsync` and only writing if absent — docker volume recreation reseeds cleanly; operator edits to the skill through other channels are preserved. +`BskySeedSkillService` (IHostedService) seeds `sites/bsky-app/login` on first start by calling `ISkillStore.GetAsync` and only writing if absent — docker volume recreation reseeds cleanly; operator edits to the skill through other channels are preserved. **Skill names sanitize host segments via `SkillNaming.SanitizeHost` (replace `.` with `-`) because RockBot 0.9's `FileSkillStore.ValidateName` rejects dots.** `bsky.app` → `bsky-app`, `apple.com` → `apple-com`. Allowlist patterns and memory categories stay unsanitized — only skill names need this. Skill naming follows spec §5.6: `sites/{host}/{intent}` for human-authored primers, `sites/{host}/learned/{slug}` for agent-generated, `sites/{host}/forms/{slug}` for learned form schemas (step 8). `Skill.SeeAlso` cross-references related skills to surface clusters rather than single entries. **Note:** `Skill` (from `RockBot.Host 0.9.x`) carries `Manifest: IReadOnlyList?` for multi-file bundles but still doesn't carry tags, metadata, or importance — the `agent-learned` vs `human-authored` vs `form-schema` distinction is encoded in the name prefix only. The dream loop (below) keeps the distinction from mattering at retrieval time: skills get improved, merged, and deduped across origins on a daily cadence; `SaveAsync(skill)` with null resources preserves the existing manifest so dream-loop markdown edits don't orphan attached resource files. @@ -113,7 +115,7 @@ Foragent runs a daily RockBot dream pass to consolidate accumulated skills and m - **Enabled:** main orchestrator (`dream.md`), skill-optimize (merge/dedup), skill-gap (detect missing coverage), sequence-skill (detect repeated tool patterns), memory-mining (promote durable observations to `ILongTermMemory`). - **Disabled:** preference inference, episode extraction, tier-routing review, entity extraction, graph consolidation, identity reflection, DLQ review, Wisp failure analysis. All personality-agent territory. -`ProtectedSkillPrefixes = []` — empty on purpose. Operator primers like `sites/bsky.app/login` are *improved in place* by the dream, not frozen; the seed service only writes on a cold boot, so later dream-authored improvements survive restarts. Operators who need to reset a primer can delete the stored skill file and bounce the host. +`ProtectedSkillPrefixes = []` — empty on purpose. Operator primers like `sites/bsky-app/login` are *improved in place* by the dream, not frozen; the seed service only writes on a cold boot, so later dream-authored improvements survive restarts. Operators who need to reset a primer can delete the stored skill file and bounce the host. Directive files live at `src/Foragent.Agent/directives/*.md` and ship with the binary via ``. `DreamService` resolves each `DreamOptions.*DirectivePath` relative to `AgentProfileOptions.BasePath` (confirmed by IL inspection — relative base paths combine against `AppContext.BaseDirectory`, which is the binary output dir). Program.cs configures `AgentProfileOptions.BasePath = "directives"`; no `WithProfile()` call, Foragent doesn't need the personality-profile doc set. diff --git a/Directory.Build.props b/Directory.Build.props index 74c6bcf..a1616ee 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -6,7 +6,7 @@ enable true true - 0.2.0-alpha.8 + 0.2.0-alpha.9 Marimer LLC Marimer LLC Copyright (c) Marimer LLC diff --git a/deploy/rockbot-seed/agent-trust.json b/deploy/rockbot-seed/agent-trust.json index 05a69b7..1deca74 100644 --- a/deploy/rockbot-seed/agent-trust.json +++ b/deploy/rockbot-seed/agent-trust.json @@ -2,7 +2,7 @@ { "agentId": "Foragent", "level": 4, - "approvedSkills": ["browser-task", "fetch-page-title", "extract-structured-data"], + "approvedSkills": ["browser-task", "learn-form-schema", "execute-form-batch"], "firstSeen": "2026-04-21T00:00:00+00:00", "lastInteraction": "2026-04-21T00:00:00+00:00", "interactionCount": 0 diff --git a/deploy/rockbot-seed/well-known-agents.json b/deploy/rockbot-seed/well-known-agents.json index c973a20..2f5f05d 100644 --- a/deploy/rockbot-seed/well-known-agents.json +++ b/deploy/rockbot-seed/well-known-agents.json @@ -1,7 +1,7 @@ [ { "agentName": "Foragent", - "description": "Browser agent — navigates pages with Chromium and exposes task-level skills over HTTP A2A.", + "description": "Browser agent — navigates pages with Chromium and exposes task-level skills over HTTP A2A. All skills consume structured input on the A2A DataPart (invoke_agent 'data' parameter); the text 'message' is only for human-readable summaries.", "version": "1.0", "url": "http://foragent:8080", "protocolVersion": "1.0", @@ -11,17 +11,17 @@ { "id": "browser-task", "name": "Browser Task (generalist)", - "description": "Drive a browser with an LLM-in-the-loop planner to accomplish a free-form intent. Input JSON {\"intent\":\"...\",\"allowedHosts\":[\"host\",\"*.host\",\"*\"],\"url\":\"optional start\",\"credentialId\":\"optional\",\"maxSteps\":60,\"maxSeconds\":120}. allowedHosts is required and empty rejects. Returns a structured JSON result with status (done/failed/incomplete), summary, optional result, step count, and navigations." + "description": "Drive a browser with an LLM-in-the-loop planner to accomplish a free-form intent. PASS INPUT AS AN A2A DATA PART — populate invoke_agent's 'data' parameter with {\"intent\":\"...\",\"allowedHosts\":[\"host\",\"*.host\",\"*\"],\"url\":\"optional start\",\"credentialId\":\"optional\",\"maxSteps\":60,\"maxSeconds\":120}. 'intent' and 'allowedHosts' are required; an empty allowlist rejects the task. Use [\"*\"] explicitly when any host is acceptable. Returns a structured JSON result with status (done/failed/incomplete), summary, optional result, step count, and navigations." }, { - "id": "fetch-page-title", - "name": "Fetch Page Title", - "description": "Navigate to a URL with a real browser and return the contents of its element." + "id": "learn-form-schema", + "name": "Learn Form Schema", + "description": "Navigate to a web form, extract its structure (fields, types, options, validation), and persist it as a reusable skill. PASS INPUT AS AN A2A DATA PART — populate invoke_agent's 'data' parameter with {\"url\":\"https://...\",\"allowedHosts\":[\"host\"],\"formSelector\":\"optional\",\"credentialId\":\"optional\",\"skillName\":\"optional override\",\"intent\":\"optional prose\"}. 'url' and 'allowedHosts' are required. Returns the typed form schema plus the skill name it was persisted under." }, { - "id": "extract-structured-data", - "name": "Extract Structured Data", - "description": "Navigate to a URL and extract data matching a natural-language description, returning JSON. Input the target URL and a description of what to extract." + "id": "execute-form-batch", + "name": "Execute Form Batch", + "description": "Submit a batch of rows against a learned form schema. PASS INPUT AS AN A2A DATA PART — populate invoke_agent's 'data' parameter with {\"schemaRef\":\"sites/host/forms/name\" OR \"schema\":{...FormSchema...},\"rows\":[{fieldName:value,...}],\"allowedHosts\":[\"host\"],\"credentialId\":\"optional\",\"mode\":\"abort-on-first\"|\"continue\",\"successIndicator\":\"optional CSS selector\"}. 'rows', 'allowedHosts', and exactly one of schemaRef/schema are required. Streams per-row progress. Default mode aborts on first failure." } ] } diff --git a/docker-compose.yml b/docker-compose.yml index 4c8a7dc..ae4c62b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,7 +5,7 @@ # - foragent — this project; exposes HTTP A2A on port 5210 # - rockbot-init — seeds /data/agent with RockBot profile + well-known-agents.json # pointing at foragent -# - rockbot — rockylhotka/rockbot-agent:0.9.11, configured to know Foragent +# - rockbot — rockylhotka/rockbot-agent:0.9.15, configured to know Foragent # as an A2A peer it can delegate tasks to. 0.9.11 brings # the structured-data invoke_agent surface (PR #291) so # RockBot can consume Foragent's FormSchema JSON results @@ -21,7 +21,7 @@ # curl -X POST http://localhost:5210/ \ # -H "X-Api-Key: rockbot-calls-foragent" \ # -H "Content-Type: application/json" \ -# -d '{"jsonrpc":"2.0","id":1,"method":"message/send","params":{"message":{"role":"ROLE_USER","messageId":"m1","parts":[{"text":"https://example.com"}]},"metadata":{"skill":"fetch-page-title"}}}' +# -d '{"jsonrpc":"2.0","id":1,"method":"message/send","params":{"message":{"role":"ROLE_USER","messageId":"m1","parts":[{"text":"{\"intent\":\"fetch the page title\",\"url\":\"https://example.com\",\"allowedHosts\":[\"example.com\"]}"}]},"metadata":{"skill":"browser-task"}}}' # Note: the A2A v1-preview schema uses protobuf-style enum values (ROLE_USER, not "user") # and parts are bare {"text":"..."} objects — no "kind" field. # @@ -47,12 +47,27 @@ services: timeout: 3s retries: 15 + foragent-init: + # One-shot ownership fix for the foragent-data named volume. The Foragent + # Dockerfile chowns /data to the non-root `foragent` user at build time, but + # Docker mounts a fresh named volume root-owned and masks the chown — + # FileSkillStore then hits UnauthorizedAccessException on first boot. This + # init container runs as root once per volume creation and mirrors the + # rockbot-init pattern. Subsequent boots skip the mkdir+chown if already set. + image: busybox:latest + user: root + command: ["sh", "-c", "mkdir -p /data/foragent/skills /data/foragent/memory && chmod -R 777 /data/foragent"] + volumes: + - foragent-data:/data/foragent + foragent: build: context: . depends_on: rabbitmq: condition: service_healthy + foragent-init: + condition: service_completed_successfully ports: - "5210:8080" environment: @@ -64,12 +79,12 @@ services: RabbitMq__VirtualHost: / Gateway__AgentName: Foragent Gateway__InternalAgentName: Foragent - Gateway__Description: "Browser agent — browser-task (generalist), learn-form-schema, execute-form-batch, fetch-page-title, extract-structured-data" + Gateway__Description: "Browser agent — browser-task (generalist), learn-form-schema, execute-form-batch" # RockBot will call Foragent with header X-Api-Key: rockbot-calls-foragent ApiKeys__rockbot-calls-foragent__AgentId: RockBot ApiKeys__rockbot-calls-foragent__DisplayName: RockBot - # LLM required for the extract-structured-data capability. Namespaced so - # Foragent can point at a different model than the RockBot side. + # LLM required for the browser-task planner and form-schema enrichment. + # Namespaced so Foragent can point at a different model than the RockBot side. ForagentLlm__Endpoint: ${FORAGENT_LLM_ENDPOINT:?FORAGENT_LLM_ENDPOINT is required} ForagentLlm__ModelId: ${FORAGENT_LLM_MODEL_ID:?FORAGENT_LLM_MODEL_ID is required} ForagentLlm__ApiKey: ${FORAGENT_LLM_API_KEY:?FORAGENT_LLM_API_KEY is required} @@ -100,7 +115,7 @@ services: - foragent-data:/data/foragent rockbot-init: - image: rockylhotka/rockbot-agent:0.9.11 + image: rockylhotka/rockbot-agent:0.9.15 user: root entrypoint: ["/bin/sh", "-c"] command: @@ -136,7 +151,7 @@ services: - ./deploy/rockbot-seed:/seed:ro rockbot: - image: rockylhotka/rockbot-agent:0.9.11 + image: rockylhotka/rockbot-agent:0.9.15 depends_on: rockbot-init: condition: service_completed_successfully diff --git a/docs/capabilities.md b/docs/capabilities.md index e8ac2e3..768eee7 100644 --- a/docs/capabilities.md +++ b/docs/capabilities.md @@ -8,13 +8,22 @@ invoke capabilities by name; Foragent handles the browser mechanics. - `browser-task` — **generalist**, spec §5.2. LLM-in-the-loop planner that drives a real browser to accomplish a free-form intent. Shipped in step 6; step 7 added skills + memory priming (spec §5.6). -- `fetch-page-title` — specialist. Inherited from step 1/2. -- `extract-structured-data` — specialist. Inherited from step 3. - -The step-4 `post-to-site` capability was removed in step 7 — the -generalist `browser-task` plus the seeded `sites/bsky.app/login` skill -subsume its function, and the project is still pre-public so no consumer -needed a deprecation path. +- `learn-form-schema` — specialist (phase-1, spec §5.5). Introspects a + form and returns a typed `FormSchema` persisted at `sites/{host}/forms/{slug}`. +- `execute-form-batch` — specialist (phase-3, spec §5.5). Submits rows + against a learned schema, streaming per-row progress over A2A. + +Three v0.1/v0.2 specialists have been removed as `browser-task` subsumes +them. The project is pre-public so no deprecation path was required: + +- `post-to-site` — removed in step 7. `browser-task` + the seeded + `sites/bsky-app/login` skill cover the use case. +- `fetch-page-title` — removed in step 9. Was a milestone-1 smoke + target; `browser-task` with a simple intent produces the same result. +- `extract-structured-data` — removed in step 9. `browser-task` with a + "return JSON: {…}" intent produces the same result. Its typed input + shape also lacked the mandatory allowlist required by spec §7.1; the + generalist enforces that by design. ## `browser-task` input shape @@ -60,7 +69,12 @@ A JSON object in a single text part: ``` `incomplete` means the budget was exhausted before `done`/`fail` was -called. +called. For extraction-style tasks, instruct the planner to return JSON +via the `result` field — e.g. intent `"Open https://shop.example/p/42 +and return {\"name\":..., \"price_usd\":...} as JSON in the result +field."`. The planner is not schema-enforced the way +`extract-structured-data` used to be, so keep the target shape explicit +in the intent. ## `browser-task` tool surface diff --git a/docs/foragent-specification.md b/docs/foragent-specification.md index 4d105d2..33082d2 100644 --- a/docs/foragent-specification.md +++ b/docs/foragent-specification.md @@ -311,15 +311,15 @@ callers cheap — not to proliferate. | `browser-task` | Generalist | Given intent + optional URL, credential id, and allowed-hosts list, plan and drive the browser to fulfill the intent. Uses RockBot skills + memory as priming. Returns a result or a structured intermediate artifact (e.g. a learned form schema). | | `learn-form-schema` | Specialist (phase-1) | Given a URL and optional credential, introspect a form and return its schema — fields, types, dropdown dependencies, validation rules. Persists the schema as a skill (§5.6). Returns the schema to the caller for review. | | `execute-form-batch` | Specialist (phase-2) | Given a learned schema (by id or inline) and a batch of row data, submit the form once per row. Streams A2A progress updates. Handles partial failure. | -| `fetch-page-title` | Specialist | Return the `<title>` of a URL. Inherited from milestone 2. | -| `extract-structured-data` | Specialist | Extract structured data from a page matching a natural-language description. Inherited from milestone 3. | -The v0.1 `post-to-site` capability ships in the main codebase as a -regression test for credential handling. After step 7 it is removed -from the advertised skill list; `browser-task` subsumes its function. - -The v0.1 `monitor-page` and `fill-form` capabilities fold into -`browser-task` and do not ship as separate advertised skills. +After step 9 the v0.2 surface is three skills. The v0.1 `post-to-site` +capability was removed in step 7 once the seeded `bsky.app` skill + +`browser-task` covered it; the v0.1 `fetch-page-title` and v0.1 +`extract-structured-data` specialists were removed in step 9 — the +generalist subsumes both at the cost of 2–3× tokens per call, which +is acceptable given zero deterministic high-volume callers today. The +v0.1 `monitor-page` and `fill-form` capabilities fold into `browser-task` +and do not ship as separate advertised skills either. ### 5.3 Capabilities explicitly out of scope (v1) @@ -393,10 +393,10 @@ site knowledge, rather than building a Foragent-local store. `RockBot.Host.Abstractions` + `RockBot.Host.AgentMemoryExtensions.WithSkills()`). Stores site knowledge as markdown skills. Two origin categories: - **Human-authored skills** — operator-written primers for a site - (e.g. `sites/bsky.app/overview`). Treated as priming hints for the + (e.g. `sites/bsky-app/overview`). Treated as priming hints for the generalist planner. - **Agent-learned skills** — written by the generalist on successful - task completion (e.g. `sites/bsky.app/learned/login-flow`). Tagged + task completion (e.g. `sites/bsky-app/learned/login-flow`). Tagged with `metadata.source = "agent-learned"` and an importance score. - **`ILongTermMemory`** (file-backed, BM25 + semantic — `WithLongTermMemory()`). Declarative observations that don't fit the @@ -404,7 +404,10 @@ site knowledge, rather than building a Foragent-local store. facts. **Skill naming:** `sites/{host}/{phase-or-intent}` — e.g. -`sites/bsky.app/login`, `sites/bsky.app/compose-post`. Hierarchical `/` +`sites/bsky-app/login`, `sites/bsky-app/compose-post`. Host segments are +sanitized (`.` → `-`) because RockBot 0.9's `FileSkillStore.ValidateName` +rejects dots; `bsky.app` becomes `bsky-app`. Allowlists and memory +categories keep the original dotted host. Hierarchical `/` nesting is supported by the store. `seeAlso` links cross-reference skills for the same site so retrieval surfaces a small knowledge cluster, not one skill at a time. @@ -714,10 +717,15 @@ hard design questions until usage forces them. Resolve open question #6 (how to persist typed JSON alongside markdown skills) in the deliverable. -9. **Deprecate subsumed specialists.** Review whether `fetch-page-title` - / `extract-structured-data` still pay their way or fold into - `browser-task` with equivalent cost. Land on the minimum advertised - capability set v0.2 actually needs. +9. **Deprecate subsumed specialists.** Reviewed whether `fetch-page-title` + / `extract-structured-data` still paid their way vs. `browser-task` at + equivalent cost. Both removed: `fetch-page-title` was a milestone-1 + smoke-test relic that `browser-task` subsumes trivially; + `extract-structured-data` was functionally equivalent to a `browser-task` + intent that asks for JSON in the `done.result` channel (cost delta + ~2–3× tokens per call, zero deterministic high-volume callers today), + and was out of spec on §7.1 mandatory allowlists. Advertised surface + lands at `browser-task` + `learn-form-schema` + `execute-form-batch`. Each milestone produces framework feedback. Capture it in `docs/framework-feedback.md` — some will be small ergonomic fixes; some diff --git a/docs/framework-feedback.md b/docs/framework-feedback.md index 0481e9c..0e3a8e2 100644 --- a/docs/framework-feedback.md +++ b/docs/framework-feedback.md @@ -573,3 +573,152 @@ the operator turns the harness on for a sustained session. `FORAGENT_LLM_*`. - Existing step-6 benchmark still 3/3 — framework bump didn't regress anything else. + +## Step 9 — Deprecate subsumed specialists + +Step 9 is a decision milestone, not a feature ship, so the framework +observations are few. Advertised surface lands at three skills: +`browser-task`, `learn-form-schema`, `execute-form-batch`. + +### What was deleted and why + +- **`fetch-page-title`** — milestone-1 smoke-test relic. Pure + specialist wrapping `<title>` reads. `browser-task` with intent + `"fetch the page title of <url>"` and `done.result` produces the + same value for ~2× the tokens, which is fine given no deterministic + high-volume caller actually exists. +- **`extract-structured-data`** — single-turn typed extraction with + `ResponseFormat = Json`. Two deciding factors on top of the "no + caller" argument: (1) it was out of spec on §7.1 — it called the + no-argument `CreateSessionAsync` overload and accepted any host, + which the generalist refuses by design; (2) bringing it into spec + would have added mandatory `allowedHosts` to its input shape and + erased its simplicity advantage. +- **`IBrowserSession.FetchPageTitleAsync` / + `CapturePageSnapshotAsync` / `PageSnapshot` / `PageSnapshotSource`** + — orphaned once the two capabilities went. Deleted from the + `Foragent.Browser` surface rather than left as dead code. The + `snapshot` tool inside `browser-task` uses `IBrowserAgentPage` and + never touched this API path. +- **`CapabilityInput.Parse`** — the shared URL+description shim had + only the deleted specialists as consumers. `BrowserTaskInput` + handles its own shape; `Forms/*Input.cs` handles theirs. Deleted. +- **Integration test `ExtractStructuredDataIntegrationTests`** — the + only `[SkippableFact]` in the browser-tests project that needed + `FORAGENT_LLM_*`. Removed; `BrowserTaskIntegrationTests` remains the + real-LLM benchmark. + +### Framework observations + +- **Capability-surface evolution is painless.** Removing + `ICapability` implementations is a three-line edit to + `ForagentCapabilitiesServiceCollectionExtensions` + a one-line edit + to the static `Skills` array. No framework API made the deletion + harder than it had to be — `IAgentTaskHandler.HandleTaskAsync` + + DI-resolved capabilities remain the right shape for a fast-moving + pre-1.0 product surface. Confirms that foragent#5 / + [rockbot#283](https://github.com/MarimerLLC/rockbot/issues/283) (per-skill + handler registration) is a quality-of-life improvement, not a + blocker. +- **`AgentCard` and `Gateway:Skills` share one source of truth now.** + Post-step-1 refactor landed the `ForagentCapabilities.Skills` static + that both the A2A card and the HTTP gateway read from — step 9 + touched exactly that one array and both sides updated. That worked + exactly as intended; the step-1 feedback entry about duplicate card + declarations is effectively closed on the Foragent side via the + local static. An upstream generalization — framework reads + `A2AOptions.Card.Skills` directly in the gateway — would remove the + small local static but isn't urgent. +- **No spec open-questions closed or opened.** Open items #3, #4, #5, + #7 remain as written; #6 and #8 closed in step 8; #1 and #2 closed + in v0.2 spec adoption. v0.2 is the shipped minimum surface. + +### Follow-up fixes surfaced while validating step 9 + +Running RockBot → Foragent end-to-end (MacBook-price search across +apple.com + bestbuy.com) surfaced three pre-existing issues. All fixed +on the step-9 branch since step 9's test plan claims end-to-end +validation that didn't actually work without them. + +- **`BrowserTaskPriming` required `IEmbeddingGenerator`** — the + primary-constructor parameter was already annotated nullable + (`IEmbeddingGenerator<string, Embedding<float>>?`), but MSDI ignores + C# nullable annotations; it only honors default parameter values. + Reordered to put `embeddingGenerator` last with `= null` so MSDI + treats it as optional. Spec §5.6 says missing embeddings should + downgrade to BM25-only — this made that claim actually true. + Framework observation: MSDI's "nullable means optional" footgun is + well-documented but still catches people; worth a sentence in the + RockBot host-wiring docs if they exist. +- **Skill names with dotted hosts fail silently** — RockBot 0.9's + `FileSkillStore.ValidateName` rejects `.` (only alphanumeric, + hyphens, underscores, `/`). `sites/bsky.app/login`, + `sites/apple.com/learned/…`, `sites/example.com/forms/…` — all + common real hosts — threw `ArgumentException` on save, which + `BskySeedSkillService` swallowed as a warning and + `TryWriteLearnedSkillAsync` swallowed on the error path. Added + `SkillNaming.SanitizeHost(host)` that replaces `.` → `-` + (`bsky.app` → `bsky-app`). Three call sites updated: + `BskySeedSkillService`, `BrowserTaskCapability.TryWriteLearnedSkillAsync`, + `LearnFormSchemaCapability.DeriveSkillName`. Framework observation: + the validator's error is informative but the fact that *every real + host fails validation* suggests either `.` should be allowed in + skill names (it's fine on filesystems) or the framework should + offer a canonical sanitizer so every consumer doesn't reinvent one. + Allowlist matching and memory-search categories keep the original + dotted host. +- **Named-volume permissions on fresh compose boot** — the Foragent + Dockerfile chowns `/data` to the non-root `foragent` user (uid 1655) + at image-build time, but Docker mounts a fresh named volume + root-owned and masks the build-time chown. Added a `foragent-init` + busybox one-shot (mirroring the `rockbot-init` pattern) that + `chmod -R 777 /data/foragent` on volume creation. Harness issue, + not framework — noting here because it's the kind of thing that + could bite any RockBot consumer that mounts persistent state as a + named volume. +- **Cancel handler override pattern** — RockBot ships a default + `AgentTaskCancelHandler` inside `AddA2A` that assumes stateless agents + and always replies `TaskNotCancelable`. Foragent is stateful (browser + task per request, potentially minutes long), and RockBot's + wisp/A2A coordination can send cancel messages when a local wisp + fails after already dispatching an A2A task. Replacing the default + is simple — call `agent.HandleMessage<AgentTaskCancelRequest, + ForagentCancelHandler>()` after `AddA2A` and the later `AddScoped` + wins — but worth calling out as the canonical pattern for any + long-running agent. Foragent's impl uses a singleton + `InFlightTaskRegistry` (concurrent dict of taskId → linked CTS); + the task handler registers on entry and removes in `finally`, the + cancel handler calls `TryCancel`. Publishing nothing on successful + cancel is deliberate — the task's own terminal reply is the + acknowledgment. Framework-level observation: a `WithTaskCancellation()` + opt-in on `AgentHostBuilder` (or a non-internal registry type the + framework ships so consumers don't roll their own concurrent dict) + would save every stateful-agent consumer ~50 lines. Filed as a + candidate enhancement; non-blocking. +- **Structured A2A input requires a DataPart, not JSON-in-text** — + running RockBot → Foragent through the Blazor UI, the caller LLM + kept looping on `"Missing 'allowedHosts'"` rejections. Root cause: + Foragent's three input parsers (`BrowserTaskInput`, + `LearnFormSchemaInput`, `ExecuteFormBatchInput`) only consumed the + first `Kind = "text"` part, requiring JSON-as-a-string. RockBot + 0.9.11's `invoke_agent` tool instead sends structured fields as a + separate `AgentMessagePart { Kind = "data", Data = <json>, + MimeType = "application/json" }` when its `data` parameter is + filled — the proper A2A DataPart shape. But Foragent never + advertised that it consumed data parts, and `invoke_agent`'s tool + description steered the LLM away from sending them unless the + target was "known to consume data." The LLM saw Foragent's skill + description say "Input: JSON {…}" and interpreted that as + "stringify the JSON into message," which strict parsers reject, + which looped. **Fix**: taught all three parsers to read a + `Kind = "data"` part first; taught the skill descriptions to + explicitly say "PASS INPUT AS AN A2A DATA PART — populate + invoke_agent's 'data' parameter with …"; updated the seed card + identically. Framework-level observation: the A2A protocol cleanly + supports typed payloads via DataPart, but the interoperability + chain (sender's tool description ↔ target's skill description ↔ + target's parser) all need to agree that the data part is the + canonical shape. RockBot's `invoke_agent` tool description was + softened upstream in 0.9.14 (removing the "most agents only + understand text" steer) to help with this — consumers should + upgrade when available. diff --git a/src/Foragent.Agent/BskySeedSkillService.cs b/src/Foragent.Agent/BskySeedSkillService.cs index 4b2947c..8995064 100644 --- a/src/Foragent.Agent/BskySeedSkillService.cs +++ b/src/Foragent.Agent/BskySeedSkillService.cs @@ -5,7 +5,7 @@ namespace Foragent.Agent; /// <summary> -/// Seeds the operator-authored <c>sites/bsky.app/login</c> skill into the +/// Seeds the operator-authored <c>sites/bsky-app/login</c> skill into the /// configured <see cref="ISkillStore"/> on startup if it is not already /// present. Idempotent — subsequent starts are no-ops, so recreating the /// persistence volume does not wipe human-authored priming, but editing the @@ -14,12 +14,17 @@ namespace Foragent.Agent; /// Human-authored skills seed the generalist planner with site-specific /// primers (spec §5.6). Agent-learned skills are written by the capability /// itself on successful task completion. +/// +/// Note: the host segment is <c>bsky-app</c> (not <c>bsky.app</c>) because +/// RockBot 0.9's <c>FileSkillStore.ValidateName</c> rejects dots — all +/// skill names with hosts use the <see cref="SkillNaming.SanitizeHost"/> +/// substitution. /// </summary> internal sealed class BskySeedSkillService( ISkillStore skillStore, ILogger<BskySeedSkillService> logger) : IHostedService { - private const string SkillName = "sites/bsky.app/login"; + private const string SkillName = "sites/bsky-app/login"; private const string SkillSummary = "Log in to bsky.app with an app password; look for 2FA challenges."; @@ -80,7 +85,7 @@ before assuming it worked. ## See also - - `sites/bsky.app/compose-post` once that skill is learned. + - `sites/bsky-app/compose-post` once that skill is learned. """; public async Task StartAsync(CancellationToken cancellationToken) @@ -101,7 +106,7 @@ public async Task StartAsync(CancellationToken cancellationToken) CreatedAt: DateTimeOffset.UtcNow, UpdatedAt: null, LastUsedAt: null, - SeeAlso: ["sites/bsky.app/compose-post"]); + SeeAlso: ["sites/bsky-app/compose-post"]); await skillStore.SaveAsync(skill); logger.LogInformation("Seeded human-authored skill '{Name}'.", SkillName); diff --git a/src/Foragent.Agent/Program.cs b/src/Foragent.Agent/Program.cs index a0fceef..60e6779 100644 --- a/src/Foragent.Agent/Program.cs +++ b/src/Foragent.Agent/Program.cs @@ -16,9 +16,9 @@ builder.Configuration.AddUserSecrets<Program>(optional: true); -// ── LLM (required — capabilities like extract-structured-data reason over -// page content via Microsoft.Extensions.AI). Config is namespaced under -// ForagentLlm so it can differ from the host's RockBot LLM config. ─────── +// ── LLM (required — browser-task's planner and form-schema enrichment reason +// over page content via Microsoft.Extensions.AI). Config is namespaced +// under ForagentLlm so it can differ from the host's RockBot LLM config. ─ var llmSection = builder.Configuration.GetSection("ForagentLlm"); var llmEndpoint = llmSection["Endpoint"] @@ -155,6 +155,13 @@ agent.Services.AddForagentCapabilities(); agent.Services.AddHostedService<BskySeedSkillService>(); + + // Replace the framework's default stateless AgentTaskCancelHandler + // (which always returns TaskNotCancelable) with Foragent's stateful + // one that actually cancels the running browser task via + // InFlightTaskRegistry. Last AddScoped wins, so this call after + // AddA2A above shadows the framework default. + agent.HandleMessage<AgentTaskCancelRequest, ForagentCancelHandler>(); }); builder.Services.AddForagentBrowser(); diff --git a/src/Foragent.Agent/directives/skill-optimize.md b/src/Foragent.Agent/directives/skill-optimize.md index 079aaf7..e0eda6b 100644 --- a/src/Foragent.Agent/directives/skill-optimize.md +++ b/src/Foragent.Agent/directives/skill-optimize.md @@ -34,7 +34,7 @@ Merge two skills when **all three** are true: - The combined skill would still fit comfortably inside ~400 words. Do not merge skills that happen to target the same site but different -intents (e.g. `sites/bsky.app/login` vs `sites/bsky.app/compose-post`). +intents (e.g. `sites/bsky-app/login` vs `sites/bsky-app/compose-post`). Those stay separate — different retrieval contexts. ## Output format @@ -48,9 +48,9 @@ For each change you want to make, emit one of: Example: ``` -DELETE sites/bsky.app/learned/sign-in-with-app-password -DELETE sites/bsky.app/learned/log-into-bluesky -UPSERT sites/bsky.app/login | Log in to bsky.app with an app password; watch for 2FA challenges. +DELETE sites/bsky-app/learned/sign-in-with-app-password +DELETE sites/bsky-app/learned/log-into-bluesky +UPSERT sites/bsky-app/login | Log in to bsky.app with an app password; watch for 2FA challenges. Bluesky's public web app is at https://bsky.app… (full markdown body) --- diff --git a/src/Foragent.Browser/IBrowserSession.cs b/src/Foragent.Browser/IBrowserSession.cs index 65629a4..bceca44 100644 --- a/src/Foragent.Browser/IBrowserSession.cs +++ b/src/Foragent.Browser/IBrowserSession.cs @@ -8,23 +8,6 @@ namespace Foragent.Browser; /// </summary> public interface IBrowserSession : IAsyncDisposable { - /// <summary> - /// Navigates to <paramref name="url"/> and returns the contents of the - /// rendered <c><title></c> element, or <c>null</c> if the page - /// does not expose one. - /// </summary> - Task<string?> FetchPageTitleAsync(Uri url, CancellationToken cancellationToken = default); - - /// <summary> - /// Navigates to <paramref name="url"/> and returns a compact, - /// LLM-consumable representation of the page. Uses the Chromium - /// accessibility tree when available (spec §3.2 — the right grain for an - /// extraction prompt); falls back to <c><body></c> inner text when - /// the accessibility snapshot is empty or unavailable. Includes the page - /// title at the top so the LLM has enough context to reason. - /// </summary> - Task<PageSnapshot> CapturePageSnapshotAsync(Uri url, CancellationToken cancellationToken = default); - /// <summary> /// Opens a page for a multi-step flow (login, fill form, navigate, read /// back confirmation). The caller drives the page with the methods on @@ -210,21 +193,6 @@ public sealed record FormScanField( /// <summary>An option entry for a <c><select></c> or radio group.</summary> public sealed record FormScanOption(string Value, string? Label); -/// <summary> -/// A compact rendering of a page suitable for LLM prompting. -/// </summary> -/// <param name="Url">The final URL after any redirects.</param> -/// <param name="Title">Page title, or <c>null</c>.</param> -/// <param name="Content">Accessibility-tree rendering or inner text.</param> -/// <param name="Source">Whether the content came from the accessibility tree or from a text fallback.</param> -public sealed record PageSnapshot(Uri Url, string? Title, string Content, PageSnapshotSource Source); - -public enum PageSnapshotSource -{ - Accessibility, - InnerText -} - /// <summary> /// Creates fresh <see cref="IBrowserSession"/> instances against the shared /// long-lived browser. Each session wraps a new browser context; disposing diff --git a/src/Foragent.Browser/PlaywrightBrowserSessionFactory.cs b/src/Foragent.Browser/PlaywrightBrowserSessionFactory.cs index 7dcfb30..703f429 100644 --- a/src/Foragent.Browser/PlaywrightBrowserSessionFactory.cs +++ b/src/Foragent.Browser/PlaywrightBrowserSessionFactory.cs @@ -49,66 +49,6 @@ internal sealed class PlaywrightBrowserSession( IBrowserContext context, Func<Uri, bool> allowedHost) : IBrowserSession { - public async Task<string?> FetchPageTitleAsync(Uri url, CancellationToken cancellationToken = default) - { - cancellationToken.ThrowIfCancellationRequested(); - EnsureAllowed(url); - var page = await context.NewPageAsync(); - try - { - var response = await page.GotoAsync(url.ToString(), new PageGotoOptions - { - WaitUntil = WaitUntilState.DOMContentLoaded - }); - if (response is null || !response.Ok) - throw new InvalidOperationException( - $"Navigation to {url} returned status {response?.Status.ToString() ?? "no response"}."); - - var title = await page.TitleAsync(); - return string.IsNullOrEmpty(title) ? null : title; - } - finally - { - await page.CloseAsync(); - } - } - - public async Task<PageSnapshot> CapturePageSnapshotAsync(Uri url, CancellationToken cancellationToken = default) - { - cancellationToken.ThrowIfCancellationRequested(); - EnsureAllowed(url); - var page = await context.NewPageAsync(); - try - { - var response = await page.GotoAsync(url.ToString(), new PageGotoOptions - { - WaitUntil = WaitUntilState.DOMContentLoaded - }); - if (response is null || !response.Ok) - throw new InvalidOperationException( - $"Navigation to {url} returned status {response?.Status.ToString() ?? "no response"}."); - - var finalUrl = new Uri(page.Url); - var title = await page.TitleAsync(); - title = string.IsNullOrEmpty(title) ? null : title; - - // Locator.AriaSnapshotAsync returns a compact YAML-like rendering of the - // accessibility tree (roles + accessible names + values), which is what - // Playwright's own assertion helpers use. Replaces the deprecated - // IPage.Accessibility.SnapshotAsync() API. - var ariaSnapshot = await page.Locator("body").AriaSnapshotAsync(); - if (!string.IsNullOrWhiteSpace(ariaSnapshot)) - return new PageSnapshot(finalUrl, title, ariaSnapshot, PageSnapshotSource.Accessibility); - - var body = await page.Locator("body").InnerTextAsync(); - return new PageSnapshot(finalUrl, title, body ?? string.Empty, PageSnapshotSource.InnerText); - } - finally - { - await page.CloseAsync(); - } - } - public async Task<IBrowserPage> OpenPageAsync(Uri url, CancellationToken cancellationToken = default) { cancellationToken.ThrowIfCancellationRequested(); diff --git a/src/Foragent.Capabilities/BrowserTask/BrowserTaskCapability.cs b/src/Foragent.Capabilities/BrowserTask/BrowserTaskCapability.cs index c1ca8a9..483831c 100644 --- a/src/Foragent.Capabilities/BrowserTask/BrowserTaskCapability.cs +++ b/src/Foragent.Capabilities/BrowserTask/BrowserTaskCapability.cs @@ -34,7 +34,11 @@ public sealed class BrowserTaskCapability( Id = "browser-task", Name = "Browser Task (generalist)", Description = "Drive a browser with an LLM-in-the-loop planner to accomplish a free-form intent. " - + "Input: JSON {\"intent\":\"...\",\"allowedHosts\":[\"host\",\"*.host\",\"*\"],\"url\":\"optional start\",\"credentialId\":\"optional\",\"maxSteps\":60,\"maxSeconds\":120}. " + + "PASS INPUT AS AN A2A DATA PART (a structured JSON object), not as prose inside the text message. " + + "When calling via RockBot's invoke_agent, populate the 'data' parameter with this object; " + + "the text 'message' should only be a human-readable summary. " + + "Fields: {\"intent\":\"what to accomplish\",\"allowedHosts\":[\"host\",\"*.host\",\"*\"],\"url\":\"optional start\",\"credentialId\":\"optional\",\"maxSteps\":60,\"maxSeconds\":120}. " + + "'intent' and 'allowedHosts' are REQUIRED — an empty allowlist rejects the task (spec §7.1). Use [\"*\"] explicitly when any host is acceptable. " + "Returns a short summary plus optional structured result string." }; @@ -185,8 +189,9 @@ private async Task TryWriteLearnedSkillAsync( if (string.IsNullOrEmpty(primaryHost)) return; + var hostSegment = SkillNaming.SanitizeHost(primaryHost); var slug = Slugify(input.Intent!); - var name = $"sites/{primaryHost}/learned/{slug}"; + var name = $"sites/{hostSegment}/learned/{slug}"; try { @@ -213,7 +218,7 @@ private async Task TryWriteLearnedSkillAsync( CreatedAt: existing?.CreatedAt ?? DateTimeOffset.UtcNow, UpdatedAt: existing is null ? null : DateTimeOffset.UtcNow, LastUsedAt: DateTimeOffset.UtcNow, - SeeAlso: [$"sites/{primaryHost}/login"]); + SeeAlso: [$"sites/{hostSegment}/login"]); await skillStore.SaveAsync(skill); logger.LogInformation("Wrote learned skill '{Name}' ({Nav} navigations, {Steps} steps).", name, state.Navigations.Count, state.Steps); diff --git a/src/Foragent.Capabilities/BrowserTask/BrowserTaskInput.cs b/src/Foragent.Capabilities/BrowserTask/BrowserTaskInput.cs index cb49508..e81e066 100644 --- a/src/Foragent.Capabilities/BrowserTask/BrowserTaskInput.cs +++ b/src/Foragent.Capabilities/BrowserTask/BrowserTaskInput.cs @@ -6,9 +6,18 @@ namespace Foragent.Capabilities.BrowserTask; /// <summary> /// Parses the <c>browser-task</c> input shape (spec §5.2). /// -/// Accepts either a JSON object in the first text part or field-by-field -/// metadata on the message/request. Metadata overrides JSON when both are -/// present. Shape: +/// Accepts structured input in three forms, checked in order: +/// <list type="number"> +/// <item>An A2A <c>DataPart</c> (message part with <c>Kind = "data"</c>, +/// <c>MimeType = "application/json"</c>) carrying the JSON object. This +/// is the preferred shape and the one RockBot's <c>invoke_agent</c> tool +/// produces when the caller fills its <c>data</c> parameter.</item> +/// <item>A JSON object in the first text part (curl callers, agents that +/// don't use data parts).</item> +/// <item>Field-by-field metadata on the message/request. Metadata +/// overrides fields from the JSON source when both are present.</item> +/// </list> +/// Shape: /// <list type="bullet"> /// <item><c>intent</c> — required. Free-form description of what to do.</item> /// <item><c>allowedHosts</c> — required. Array of host patterns. Empty rejects.</item> @@ -41,18 +50,30 @@ public static BrowserTaskInput Parse(AgentTaskRequest request) int? maxSteps = null; int? maxSeconds = null; + // Preferred shape: RockBot's invoke_agent sends structured fields as + // a DataPart alongside the prose text part. Prefer it when present so + // the text part can stay human-readable. + var dataJson = request.Message.Parts + .Where(p => p.Kind == "data" && !string.IsNullOrWhiteSpace(p.Data)) + .Select(p => p.Data) + .FirstOrDefault(); + var text = request.Message.Parts .Where(p => p.Kind == "text") .Select(p => p.Text) .FirstOrDefault(t => !string.IsNullOrWhiteSpace(t)) ?.Trim(); - if (!string.IsNullOrEmpty(text) && text.StartsWith('{')) + string? jsonPayload = dataJson ?? (!string.IsNullOrEmpty(text) && text.StartsWith('{') ? text : null); + + if (jsonPayload is not null) { try { - using var doc = JsonDocument.Parse(text); + using var doc = JsonDocument.Parse(jsonPayload); var root = doc.RootElement; + if (root.ValueKind != JsonValueKind.Object) + return Fail("Structured input must be a JSON object."); if (root.TryGetProperty("intent", out var i)) intent = i.GetString(); if (root.TryGetProperty("url", out var u)) url = u.GetString(); if (root.TryGetProperty("credentialId", out var c)) credentialId = c.GetString(); @@ -67,6 +88,12 @@ public static BrowserTaskInput Parse(AgentTaskRequest request) { return Fail("Input must be a JSON object with intent, allowedHosts, and optional url/credentialId/maxSteps/maxSeconds."); } + + // When a DataPart carried the structured fields, the text part is + // the human-readable prose from the caller — use it as a fallback + // intent if the data part didn't supply one. + if (string.IsNullOrWhiteSpace(intent) && !string.IsNullOrEmpty(text) && !text.StartsWith('{')) + intent = text; } else if (!string.IsNullOrEmpty(text)) { @@ -81,10 +108,12 @@ public static BrowserTaskInput Parse(AgentTaskRequest request) allowedHosts = [.. hostsCsv.Split(',', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries)]; if (string.IsNullOrWhiteSpace(intent)) - return Fail("Missing 'intent' — a natural-language description of what to do."); + return Fail(HintedError("Missing 'intent' — a natural-language description of what to do.")); if (allowedHosts is null || allowedHosts.Count == 0) - return Fail("Missing 'allowedHosts' — browser-task requires an explicit allowlist (spec §7.1). Use ['*'] to accept any host."); + return Fail(HintedError( + "Missing 'allowedHosts' — browser-task requires an explicit host allowlist (spec §7.1). " + + "Use [\"*\"] to accept any host.")); HostAllowlist allowlist; try @@ -115,6 +144,16 @@ public static BrowserTaskInput Parse(AgentTaskRequest request) private static BrowserTaskInput Fail(string message) => new(null, null, null, null, DefaultMaxSteps, DefaultMaxSeconds, message); + /// <summary> + /// Appends the DataPart usage hint to an error message so the caller LLM + /// (which likely never read the skill description) learns the correct + /// invocation pattern from the rejection itself. The hint tells it *how* + /// to fix the call, not just *what* was missing. + /// </summary> + private static string HintedError(string reason) => reason + " " + + "Pass inputs as a JSON object on the A2A DataPart — in RockBot's invoke_agent tool, that means filling the 'data' parameter, NOT adding fields to the 'message' text. " + + "Example data: {\"intent\":\"...\",\"allowedHosts\":[\"*\"],\"url\":\"optional\"}."; + private static int Clamp(int value, int min, int max) => value < min ? min : value > max ? max : value; diff --git a/src/Foragent.Capabilities/BrowserTask/BrowserTaskPriming.cs b/src/Foragent.Capabilities/BrowserTask/BrowserTaskPriming.cs index c47bda6..aa99be3 100644 --- a/src/Foragent.Capabilities/BrowserTask/BrowserTaskPriming.cs +++ b/src/Foragent.Capabilities/BrowserTask/BrowserTaskPriming.cs @@ -14,11 +14,18 @@ namespace Foragent.Capabilities.BrowserTask; /// Isolated from <see cref="BrowserTaskCapability"/> so tests can inject /// fake stores without going through the capability's full execute path. /// </summary> +/// <remarks> +/// <paramref name="embeddingGenerator"/> is placed last with a default of +/// <c>null</c> so MSDI resolves the type even when no embedding generator is +/// registered — the C# nullable annotation alone is not enough; MSDI only +/// honors optional parameters via default values (spec §5.6 — "missing +/// embeddings downgrade retrieval to BM25-only"). +/// </remarks> public sealed class BrowserTaskPriming( ISkillStore skillStore, ILongTermMemory longTermMemory, - IEmbeddingGenerator<string, Embedding<float>>? embeddingGenerator, - ILogger<BrowserTaskPriming> logger) + ILogger<BrowserTaskPriming> logger, + IEmbeddingGenerator<string, Embedding<float>>? embeddingGenerator = null) { public const int MaxSkills = 5; public const int MaxMemories = 5; diff --git a/src/Foragent.Capabilities/CapabilityInput.cs b/src/Foragent.Capabilities/CapabilityInput.cs deleted file mode 100644 index c4bcdaf..0000000 --- a/src/Foragent.Capabilities/CapabilityInput.cs +++ /dev/null @@ -1,124 +0,0 @@ -using System.Text.Json; -using System.Text.RegularExpressions; -using RockBot.A2A; - -namespace Foragent.Capabilities; - -/// <summary> -/// Helpers for pulling structured input out of an <see cref="AgentTaskRequest"/>. -/// Foragent's capability contract (spec §5.3 — capabilities own their input shape): -/// <list type="bullet"> -/// <item>Target URL in <c>url</c> metadata</item> -/// <item>Natural-language description in the first text part, or <c>description</c> metadata</item> -/// </list> -/// Parse order, most-structured-first: -/// <list type="number"> -/// <item>Message or request metadata <c>url</c>/<c>description</c> (rockbot 0.8.5+)</item> -/// <item>JSON blob in a text part: <c>{"url":"...","description":"..."}</c></item> -/// <item>Bare URL text part (step 1/2 callers)</item> -/// <item>First <c>http(s)://…</c> URL embedded in free-form text (handles -/// LLM-wrapped prompts like "please fetch the title of https://example.com")</item> -/// </list> -/// </summary> -internal static partial class CapabilityInput -{ - // Matches the first http(s) URL inside a larger string. Stops at whitespace - // or common terminators so trailing punctuation doesn't poison the URL. - [GeneratedRegex(@"https?://[^\s<>""'\)\]\}]+", RegexOptions.IgnoreCase)] - private static partial Regex EmbeddedUrlRegex(); - - /// <summary> - /// Parses a capability request into (url, description). Either field may be - /// null if the caller didn't supply it — capabilities decide which are - /// required. Parse order: (1) JSON shim, (2) bare URL, (3) first embedded - /// URL in free-form text — so LLM-authored requests like "please fetch - /// the title of https://example.com" are accepted. If (3) is used and the - /// text carries more than just the URL, the surrounding text becomes the - /// description. - /// </summary> - public static (Uri? Url, string? Description) Parse(AgentTaskRequest request) - { - // 1. Metadata (rockbot 0.8.5+). Message-level wins over request-level - // when both are present — the URL is about the message content. - var metaUrl = ReadMetadata(request, "url"); - var metaDescription = ReadMetadata(request, "description"); - var urlFromMetadata = TryParseAbsoluteHttpUri(metaUrl); - if (urlFromMetadata is not null) - return (urlFromMetadata, string.IsNullOrWhiteSpace(metaDescription) ? null : metaDescription); - - var text = request.Message.Parts - .Where(p => p.Kind == "text") - .Select(p => p.Text) - .FirstOrDefault(t => !string.IsNullOrWhiteSpace(t)) - ?.Trim(); - - if (string.IsNullOrEmpty(text)) - return (null, string.IsNullOrWhiteSpace(metaDescription) ? null : metaDescription); - - // JSON shim: {"url": "...", "description": "..."} - if (text.StartsWith('{')) - { - try - { - using var doc = JsonDocument.Parse(text); - var root = doc.RootElement; - var urlStr = root.TryGetProperty("url", out var u) ? u.GetString() : null; - var description = root.TryGetProperty("description", out var d) ? d.GetString() : null; - return (TryParseAbsoluteHttpUri(urlStr), description); - } - catch (JsonException) - { - // Not valid JSON — fall through to bare-URL handling. - } - } - - // Bare URL — covers step 1/2 callers and simple single-input skills. - var bare = TryParseAbsoluteHttpUri(text); - if (bare is not null) - return (bare, null); - - // Fall back to extracting the first http(s) URL embedded in free-form - // text. Anything around it becomes the description for capabilities - // that want one. - var match = EmbeddedUrlRegex().Match(text); - if (match.Success) - { - var embedded = TryParseAbsoluteHttpUri(TrimUrlTerminators(match.Value)); - if (embedded is not null) - { - var remainder = (text[..match.Index] + text[(match.Index + match.Length)..]).Trim(); - return (embedded, string.IsNullOrWhiteSpace(remainder) ? null : remainder); - } - } - - return (null, null); - } - - private static string TrimUrlTerminators(string value) => - value.TrimEnd('.', ',', ';', ':', '!', '?'); - - private static string? ReadMetadata(AgentTaskRequest request, string key) - { - if (request.Message.Metadata is not null - && request.Message.Metadata.TryGetValue(key, out var msgValue) - && !string.IsNullOrWhiteSpace(msgValue)) - { - return msgValue; - } - if (request.Metadata is not null - && request.Metadata.TryGetValue(key, out var reqValue) - && !string.IsNullOrWhiteSpace(reqValue)) - { - return reqValue; - } - return null; - } - - private static Uri? TryParseAbsoluteHttpUri(string? value) - { - if (string.IsNullOrWhiteSpace(value)) return null; - if (!Uri.TryCreate(value.Trim(), UriKind.Absolute, out var uri)) return null; - if (uri.Scheme != Uri.UriSchemeHttp && uri.Scheme != Uri.UriSchemeHttps) return null; - return uri; - } -} diff --git a/src/Foragent.Capabilities/ExtractStructuredDataCapability.cs b/src/Foragent.Capabilities/ExtractStructuredDataCapability.cs deleted file mode 100644 index 68e5872..0000000 --- a/src/Foragent.Capabilities/ExtractStructuredDataCapability.cs +++ /dev/null @@ -1,112 +0,0 @@ -using System.Text; -using Foragent.Browser; -using Microsoft.Extensions.AI; -using Microsoft.Extensions.Logging; -using RockBot.A2A; - -namespace Foragent.Capabilities; - -public sealed class ExtractStructuredDataCapability( - IBrowserSessionFactory browserFactory, - IChatClient chatClient, - ILogger<ExtractStructuredDataCapability> logger) : ICapability -{ - public static AgentSkill SkillDefinition { get; } = new() - { - Id = "extract-structured-data", - Name = "Extract Structured Data", - Description = "Navigate to a URL and extract data matching a natural-language description, returning JSON." - }; - - // Keep the prompt short — the page content already dominates the token budget. - private const string SystemPrompt = """ - You extract structured data from web pages on behalf of other agents. - The user gives you (1) a description of what to extract and (2) a - compact accessibility-tree or text rendering of a page. Respond with a - single JSON object containing only the fields that answer the request. - Use null for fields that are not present on the page. Do not wrap the - JSON in code fences or prose. - """; - - // Extraction calls are usually a few seconds. Cap snapshot size so we don't - // blow past the model's context window on a page dump. - private const int MaxSnapshotChars = 40_000; - - public string SkillId => SkillDefinition.Id; - public AgentSkill Skill => SkillDefinition; - - public async Task<AgentTaskResult> ExecuteAsync(AgentTaskRequest request, AgentTaskContext context) - { - var ct = context.MessageContext.CancellationToken; - var (url, description) = CapabilityInput.Parse(request); - - if (url is null) - return CapabilityResult.Error( - request, - "Provide the target URL (as 'url' in the request payload)."); - - if (string.IsNullOrWhiteSpace(description)) - return CapabilityResult.Error( - request, - "Provide a natural-language description of the data to extract."); - - PageSnapshot snapshot; - try - { - await using var session = await browserFactory.CreateSessionAsync(ct); - snapshot = await session.CapturePageSnapshotAsync(url, ct); - logger.LogInformation( - "Captured {Source} snapshot of {Url} ({Chars} chars)", - snapshot.Source, snapshot.Url, snapshot.Content.Length); - } - catch (Exception ex) when (ex is not OperationCanceledException) - { - logger.LogWarning(ex, "Snapshot failed for {Url}", url); - return CapabilityResult.Error(request, $"Fetch failed: {ex.Message}"); - } - - var prompt = BuildPrompt(snapshot, description!); - - try - { - var response = await chatClient.GetResponseAsync( - [ - new ChatMessage(ChatRole.System, SystemPrompt), - new ChatMessage(ChatRole.User, prompt) - ], - new ChatOptions { ResponseFormat = ChatResponseFormat.Json }, - ct); - - var text = response.Text?.Trim() ?? string.Empty; - if (string.IsNullOrEmpty(text)) - return CapabilityResult.Error(request, "LLM returned an empty response."); - - return CapabilityResult.Completed(request, text); - } - catch (Exception ex) when (ex is not OperationCanceledException) - { - logger.LogWarning(ex, "LLM extraction failed for {Url}", url); - return CapabilityResult.Error(request, $"Extraction failed: {ex.Message}"); - } - } - - private static string BuildPrompt(PageSnapshot snapshot, string description) - { - var content = snapshot.Content.Length > MaxSnapshotChars - ? snapshot.Content[..MaxSnapshotChars] + "\n…(truncated)" - : snapshot.Content; - - var sb = new StringBuilder(); - sb.Append("Page URL: ").AppendLine(snapshot.Url.ToString()); - if (snapshot.Title is not null) - sb.Append("Page title: ").AppendLine(snapshot.Title); - sb.Append("Snapshot source: ").AppendLine(snapshot.Source.ToString()); - sb.AppendLine(); - sb.AppendLine("Description of data to extract:"); - sb.AppendLine(description); - sb.AppendLine(); - sb.AppendLine("Page content:"); - sb.AppendLine(content); - return sb.ToString(); - } -} diff --git a/src/Foragent.Capabilities/FetchPageTitleCapability.cs b/src/Foragent.Capabilities/FetchPageTitleCapability.cs deleted file mode 100644 index 150258e..0000000 --- a/src/Foragent.Capabilities/FetchPageTitleCapability.cs +++ /dev/null @@ -1,44 +0,0 @@ -using Foragent.Browser; -using Microsoft.Extensions.Logging; -using RockBot.A2A; - -namespace Foragent.Capabilities; - -public sealed class FetchPageTitleCapability( - IBrowserSessionFactory browserFactory, - ILogger<FetchPageTitleCapability> logger) : ICapability -{ - public static AgentSkill SkillDefinition { get; } = new() - { - Id = "fetch-page-title", - Name = "Fetch Page Title", - Description = "Navigate to a URL with a real browser and return the contents of its <title> element." - }; - - public string SkillId => SkillDefinition.Id; - public AgentSkill Skill => SkillDefinition; - - public async Task<AgentTaskResult> ExecuteAsync(AgentTaskRequest request, AgentTaskContext context) - { - var ct = context.MessageContext.CancellationToken; - var (url, _) = CapabilityInput.Parse(request); - - if (url is null) - return CapabilityResult.Error(request, "Provide an absolute http(s) URL as the task message."); - - try - { - await using var session = await browserFactory.CreateSessionAsync(ct); - var title = await session.FetchPageTitleAsync(url, ct); - var text = string.IsNullOrEmpty(title) ? "(no title)" : title; - - logger.LogInformation("Fetched title from {Url}: {Title}", url, text); - return CapabilityResult.Completed(request, text); - } - catch (Exception ex) when (ex is not OperationCanceledException) - { - logger.LogWarning(ex, "Fetch failed for {Url}", url); - return CapabilityResult.Error(request, $"Fetch failed: {ex.Message}"); - } - } -} diff --git a/src/Foragent.Capabilities/ForagentCancelHandler.cs b/src/Foragent.Capabilities/ForagentCancelHandler.cs new file mode 100644 index 0000000..6f6b768 --- /dev/null +++ b/src/Foragent.Capabilities/ForagentCancelHandler.cs @@ -0,0 +1,54 @@ +using Microsoft.Extensions.Logging; +using RockBot.A2A; +using RockBot.Host; +using RockBot.Messaging; + +namespace Foragent.Capabilities; + +/// <summary> +/// Replaces the framework's default <c>AgentTaskCancelHandler</c> (which +/// always replies <see cref="AgentTaskError.Codes.TaskNotCancelable"/> +/// because stateless agents can't do anything useful). Foragent is stateful +/// — a browser task is long-running and must respond to cancel — so this +/// handler cancels the linked <see cref="CancellationToken"/> held by +/// <see cref="InFlightTaskRegistry"/>. +/// +/// Reply semantics: on a successful cancel we publish nothing here. The +/// running task's own terminal result (an <see cref="AgentTaskResult"/> in +/// an error/cancelled shape) will reach the caller's ReplyTo topic shortly +/// afterward as the task unwinds. On a missed cancel (no matching task), +/// we publish <see cref="AgentTaskError.Codes.TaskNotFound"/> so the caller +/// knows the cancel was a no-op. +/// </summary> +public sealed class ForagentCancelHandler( + InFlightTaskRegistry inFlight, + IMessagePublisher publisher, + A2AOptions options, + AgentIdentity agent, + ILogger<ForagentCancelHandler> logger) : IMessageHandler<AgentTaskCancelRequest> +{ + public async Task HandleAsync(AgentTaskCancelRequest request, MessageHandlerContext context) + { + if (inFlight.TryCancel(request.TaskId)) + { + logger.LogInformation("Cancel requested for in-flight task {TaskId} — cancellation issued.", request.TaskId); + return; + } + + logger.LogDebug("Cancel requested for task {TaskId}, but no in-flight registration exists.", request.TaskId); + + var replyTo = context.Envelope.ReplyTo ?? options.DefaultResultTopic; + var error = new AgentTaskError + { + TaskId = request.TaskId, + ContextId = request.ContextId, + Code = AgentTaskError.Codes.TaskNotFound, + Message = "No in-flight task with that id; nothing to cancel.", + IsRetryable = false + }; + var envelope = error.ToEnvelope<AgentTaskError>( + source: agent.Name, + correlationId: context.Envelope.CorrelationId); + await publisher.PublishAsync(replyTo, envelope, context.CancellationToken); + } +} diff --git a/src/Foragent.Capabilities/ForagentCapabilitiesServiceCollectionExtensions.cs b/src/Foragent.Capabilities/ForagentCapabilitiesServiceCollectionExtensions.cs index 169de19..0149038 100644 --- a/src/Foragent.Capabilities/ForagentCapabilitiesServiceCollectionExtensions.cs +++ b/src/Foragent.Capabilities/ForagentCapabilitiesServiceCollectionExtensions.cs @@ -14,14 +14,16 @@ public static class ForagentCapabilitiesServiceCollectionExtensions /// </summary> public static IServiceCollection AddForagentCapabilities(this IServiceCollection services) { - services.AddScoped<ICapability, FetchPageTitleCapability>(); - services.AddScoped<ICapability, ExtractStructuredDataCapability>(); services.AddScoped<ICapability, BrowserTaskCapability>(); services.AddScoped<ICapability, LearnFormSchemaCapability>(); services.AddScoped<ICapability, ExecuteFormBatchCapability>(); services.AddScoped<BrowserTaskPriming>(); services.AddScoped<FormSchemaEnricher>(); services.AddScoped<IAgentTaskHandler, ForagentTaskHandler>(); + // Process-wide registry: the cancel message is dispatched in a + // different DI scope than the task request, so the registry must + // outlive scopes. + services.AddSingleton<InFlightTaskRegistry>(); return services; } } @@ -38,8 +40,6 @@ public static class ForagentCapabilities [ BrowserTaskCapability.SkillDefinition, LearnFormSchemaCapability.SkillDefinition, - ExecuteFormBatchCapability.SkillDefinition, - FetchPageTitleCapability.SkillDefinition, - ExtractStructuredDataCapability.SkillDefinition + ExecuteFormBatchCapability.SkillDefinition ]; } diff --git a/src/Foragent.Capabilities/ForagentTaskHandler.cs b/src/Foragent.Capabilities/ForagentTaskHandler.cs index ff524f1..590b3bb 100644 --- a/src/Foragent.Capabilities/ForagentTaskHandler.cs +++ b/src/Foragent.Capabilities/ForagentTaskHandler.cs @@ -1,5 +1,6 @@ using Microsoft.Extensions.Logging; using RockBot.A2A; +using RockBot.Host; namespace Foragent.Capabilities; @@ -8,9 +9,16 @@ namespace Foragent.Capabilities; /// instances and routes an incoming task to the one whose <see cref="ICapability.SkillId"/> /// matches <see cref="AgentTaskRequest.Skill"/>. Unknown skills return a user- /// facing error rather than throwing, so the A2A bridge still sees a result. +/// +/// Registers each in-flight task with <see cref="InFlightTaskRegistry"/> and +/// wraps the context so the capability's <see cref="CancellationToken"/> is +/// linked to both the framework's message CT and the registry's external +/// cancel trigger — letting <c>agent.task.cancel.Foragent</c> messages stop +/// long-running browser work mid-flight. /// </summary> public sealed class ForagentTaskHandler( IEnumerable<ICapability> capabilities, + InFlightTaskRegistry inFlight, ILogger<ForagentTaskHandler> logger) : IAgentTaskHandler { private readonly Dictionary<string, ICapability> _bySkill = @@ -19,26 +27,49 @@ public sealed class ForagentTaskHandler( public async Task<AgentTaskResult> HandleTaskAsync( AgentTaskRequest request, AgentTaskContext context) { - var ct = context.MessageContext.CancellationToken; + var parentCt = context.MessageContext.CancellationToken; + var linkedCt = inFlight.Register(request.TaskId, parentCt); - logger.LogInformation("Handling task {TaskId} (skill={Skill})", - request.TaskId, request.Skill); - - await context.PublishStatus(new AgentTaskStatusUpdate + try { - TaskId = request.TaskId, - ContextId = request.ContextId, - State = AgentTaskState.Working - }, ct); + logger.LogInformation("Handling task {TaskId} (skill={Skill})", + request.TaskId, request.Skill); + + await context.PublishStatus(new AgentTaskStatusUpdate + { + TaskId = request.TaskId, + ContextId = request.ContextId, + State = AgentTaskState.Working + }, linkedCt); + + if (!_bySkill.TryGetValue(request.Skill, out var capability)) + { + var known = string.Join(", ", _bySkill.Keys.OrderBy(k => k)); + return CapabilityResult.Error( + request, + $"Unknown skill '{request.Skill}'. Known skills: {known}"); + } - if (!_bySkill.TryGetValue(request.Skill, out var capability)) + // Wrap the context so the capability observes the linked token + // instead of the raw message CT. Capabilities keep reading + // context.MessageContext.CancellationToken — no signature change. + var cancellableContext = new AgentTaskContext + { + MessageContext = new MessageHandlerContext + { + Envelope = context.MessageContext.Envelope, + Agent = context.MessageContext.Agent, + Services = context.MessageContext.Services, + CancellationToken = linkedCt + }, + PublishStatus = context.PublishStatus + }; + + return await capability.ExecuteAsync(request, cancellableContext); + } + finally { - var known = string.Join(", ", _bySkill.Keys.OrderBy(k => k)); - return CapabilityResult.Error( - request, - $"Unknown skill '{request.Skill}'. Known skills: {known}"); + inFlight.Remove(request.TaskId); } - - return await capability.ExecuteAsync(request, context); } } diff --git a/src/Foragent.Capabilities/Forms/ExecuteFormBatchCapability.cs b/src/Foragent.Capabilities/Forms/ExecuteFormBatchCapability.cs index 3b83aee..08df635 100644 --- a/src/Foragent.Capabilities/Forms/ExecuteFormBatchCapability.cs +++ b/src/Foragent.Capabilities/Forms/ExecuteFormBatchCapability.cs @@ -30,7 +30,10 @@ public sealed class ExecuteFormBatchCapability( Id = "execute-form-batch", Name = "Execute Form Batch", Description = "Submit a batch of rows against a learned form schema. " - + "Input: JSON {\"schemaRef\":\"sites/host/forms/name\" OR \"schema\":{...FormSchema...},\"rows\":[{field:value,...}],\"allowedHosts\":[\"host\"],\"credentialId\":\"optional\",\"mode\":\"abort-on-first\"|\"continue\",\"successIndicator\":\"optional CSS selector\"}. " + + "PASS INPUT AS AN A2A DATA PART (a structured JSON object), not as prose inside the text message. " + + "When calling via RockBot's invoke_agent, populate the 'data' parameter with this object — the multi-row shape doesn't fit in plain text. " + + "Fields: {\"schemaRef\":\"sites/host/forms/name\" OR \"schema\":{...FormSchema...},\"rows\":[{fieldName:value,...}],\"allowedHosts\":[\"host\"],\"credentialId\":\"optional\",\"mode\":\"abort-on-first\"|\"continue\",\"successIndicator\":\"optional CSS selector\"}. " + + "'rows', 'allowedHosts', and exactly one of 'schemaRef'/'schema' are REQUIRED. " + "Streams per-row progress. Default mode aborts on first failure." }; diff --git a/src/Foragent.Capabilities/Forms/ExecuteFormBatchInput.cs b/src/Foragent.Capabilities/Forms/ExecuteFormBatchInput.cs index 82d9fdd..9fdf225 100644 --- a/src/Foragent.Capabilities/Forms/ExecuteFormBatchInput.cs +++ b/src/Foragent.Capabilities/Forms/ExecuteFormBatchInput.cs @@ -7,8 +7,13 @@ namespace Foragent.Capabilities.Forms; /// <summary> /// Parses input for <c>execute-form-batch</c> (spec §5.2, phase-3 of §5.5). /// -/// Shape (JSON in the first text part — metadata pass-through doesn't fit -/// multi-row shapes): +/// Accepts input on either an A2A <c>DataPart</c> (preferred — the shape +/// RockBot's <c>invoke_agent</c> produces when its <c>data</c> parameter is +/// filled) or a JSON object in the first text part (curl callers). Metadata +/// pass-through is not supported — the multi-row shape doesn't fit in +/// string-valued metadata. +/// +/// Shape: /// <list type="bullet"> /// <item><c>schemaRef</c> — optional. Skill name produced by <c>learn-form-schema</c>.</item> /// <item><c>schema</c> — optional. Inline <see cref="FormSchema"/> JSON.</item> @@ -32,14 +37,21 @@ internal readonly record struct ExecuteFormBatchInput( { public static ExecuteFormBatchInput Parse(AgentTaskRequest request) { + var dataJson = request.Message.Parts + .Where(p => p.Kind == "data" && !string.IsNullOrWhiteSpace(p.Data)) + .Select(p => p.Data) + .FirstOrDefault(); + var text = request.Message.Parts .Where(p => p.Kind == "text") .Select(p => p.Text) .FirstOrDefault(t => !string.IsNullOrWhiteSpace(t)) ?.Trim(); - if (string.IsNullOrEmpty(text) || !text.StartsWith('{')) - return Fail("Input must be a JSON object with rows, allowedHosts, and either schemaRef or schema."); + string? jsonPayload = dataJson ?? (!string.IsNullOrEmpty(text) && text.StartsWith('{') ? text : null); + + if (jsonPayload is null) + return Fail(HintedError("No structured input found.")); string? schemaRef; FormSchema? inlineSchema = null; @@ -51,8 +63,10 @@ public static ExecuteFormBatchInput Parse(AgentTaskRequest request) try { - using var doc = JsonDocument.Parse(text); + using var doc = JsonDocument.Parse(jsonPayload); var root = doc.RootElement; + if (root.ValueKind != JsonValueKind.Object) + return Fail("Structured input must be a JSON object."); schemaRef = root.TryGetProperty("schemaRef", out var sr) ? sr.GetString() : null; if (root.TryGetProperty("schema", out var s) && s.ValueKind == JsonValueKind.Object) { @@ -104,13 +118,13 @@ public static ExecuteFormBatchInput Parse(AgentTaskRequest request) } if (schemaRef is null && inlineSchema is null) - return Fail("Provide either 'schemaRef' (a skill name) or inline 'schema'."); + return Fail(HintedError("Provide either 'schemaRef' (a skill name) or inline 'schema'.")); if (schemaRef is not null && inlineSchema is not null) return Fail("Provide only one of 'schemaRef' or 'schema', not both."); if (rows is null || rows.Count == 0) - return Fail("Missing 'rows' — must be a non-empty array of objects."); + return Fail(HintedError("Missing 'rows' — must be a non-empty array of objects.")); if (allowedHosts is null || allowedHosts.Count == 0) - return Fail("Missing 'allowedHosts' — execute-form-batch requires an explicit allowlist (spec §7.1)."); + return Fail(HintedError("Missing 'allowedHosts' — execute-form-batch requires an explicit allowlist (spec §7.1).")); HostAllowlist allowlist; try @@ -143,6 +157,10 @@ public static ExecuteFormBatchInput Parse(AgentTaskRequest request) private static ExecuteFormBatchInput Fail(string message) => new(null, null, null, null, null, ExecuteFormBatchMode.AbortOnFirst, null, message); + + private static string HintedError(string reason) => reason + " " + + "Pass inputs as a JSON object on the A2A DataPart — in RockBot's invoke_agent tool, that means filling the 'data' parameter with the object, NOT adding fields to the 'message' text. " + + "Example data: {\"schemaRef\":\"sites/host/forms/name\",\"rows\":[{\"field\":\"value\"}],\"allowedHosts\":[\"host\"]}."; } internal enum ExecuteFormBatchMode diff --git a/src/Foragent.Capabilities/Forms/LearnFormSchemaCapability.cs b/src/Foragent.Capabilities/Forms/LearnFormSchemaCapability.cs index 83769cd..db918f1 100644 --- a/src/Foragent.Capabilities/Forms/LearnFormSchemaCapability.cs +++ b/src/Foragent.Capabilities/Forms/LearnFormSchemaCapability.cs @@ -29,7 +29,10 @@ public sealed class LearnFormSchemaCapability( Id = "learn-form-schema", Name = "Learn Form Schema", Description = "Navigate to a web form, extract its structure (fields, types, options, validation), and persist it as a reusable skill. " - + "Input: JSON {\"url\":\"https://...\",\"allowedHosts\":[\"host\"],\"formSelector\":\"optional\",\"credentialId\":\"optional\",\"skillName\":\"optional override\",\"intent\":\"optional prose\"}. " + + "PASS INPUT AS AN A2A DATA PART (a structured JSON object), not as prose inside the text message. " + + "When calling via RockBot's invoke_agent, populate the 'data' parameter with this object. " + + "Fields: {\"url\":\"https://...\",\"allowedHosts\":[\"host\"],\"formSelector\":\"optional CSS selector\",\"credentialId\":\"optional\",\"skillName\":\"optional override\",\"intent\":\"optional prose\"}. " + + "'url' and 'allowedHosts' are REQUIRED (spec §7.1). " + "Returns the typed form schema plus the skill name it was persisted under." }; @@ -197,7 +200,7 @@ private static string BuildPrimer(FormSchema schema, string? intent) private static string DeriveSkillName(Uri url, string? intent) { - var host = url.Host.ToLowerInvariant(); + var host = SkillNaming.SanitizeHost(url.Host.ToLowerInvariant()); var slug = string.IsNullOrWhiteSpace(intent) ? SlugFromPath(url.AbsolutePath) : Slugify(intent!); diff --git a/src/Foragent.Capabilities/Forms/LearnFormSchemaInput.cs b/src/Foragent.Capabilities/Forms/LearnFormSchemaInput.cs index 1a487ec..1ec7598 100644 --- a/src/Foragent.Capabilities/Forms/LearnFormSchemaInput.cs +++ b/src/Foragent.Capabilities/Forms/LearnFormSchemaInput.cs @@ -8,7 +8,13 @@ namespace Foragent.Capabilities.Forms; /// Parses input for the <c>learn-form-schema</c> capability (spec §5.2, /// phase-1 of the learn→review→execute pattern in §5.5). /// -/// Shape (JSON in the first text part, field-by-field metadata overrides): +/// Accepts the structured input in three forms: preferred is an A2A +/// <c>DataPart</c> (<c>Kind = "data"</c>, <c>MimeType = "application/json"</c>) +/// — the shape RockBot's <c>invoke_agent</c> produces when the caller fills +/// its <c>data</c> parameter. Falls back to a JSON object in the first text +/// part for curl callers. Field-by-field metadata overrides either source. +/// +/// Shape: /// <list type="bullet"> /// <item><c>url</c> — required. Absolute http(s) URL of the page hosting the form.</item> /// <item><c>allowedHosts</c> — required. Host allowlist (spec §7.1). Empty rejects.</item> @@ -36,18 +42,27 @@ public static LearnFormSchemaInput Parse(AgentTaskRequest request) string? intent = null; List<string>? allowedHosts = null; + var dataJson = request.Message.Parts + .Where(p => p.Kind == "data" && !string.IsNullOrWhiteSpace(p.Data)) + .Select(p => p.Data) + .FirstOrDefault(); + var text = request.Message.Parts .Where(p => p.Kind == "text") .Select(p => p.Text) .FirstOrDefault(t => !string.IsNullOrWhiteSpace(t)) ?.Trim(); - if (!string.IsNullOrEmpty(text) && text.StartsWith('{')) + string? jsonPayload = dataJson ?? (!string.IsNullOrEmpty(text) && text.StartsWith('{') ? text : null); + + if (jsonPayload is not null) { try { - using var doc = JsonDocument.Parse(text); + using var doc = JsonDocument.Parse(jsonPayload); var root = doc.RootElement; + if (root.ValueKind != JsonValueKind.Object) + return Fail("Structured input must be a JSON object."); if (root.TryGetProperty("url", out var u)) url = u.GetString(); if (root.TryGetProperty("formSelector", out var fs)) formSelector = fs.GetString(); if (root.TryGetProperty("credentialId", out var c)) credentialId = c.GetString(); @@ -72,10 +87,10 @@ public static LearnFormSchemaInput Parse(AgentTaskRequest request) allowedHosts = [.. hostsCsv.Split(',', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries)]; if (string.IsNullOrWhiteSpace(url)) - return Fail("Missing 'url' — the page hosting the form."); + return Fail(HintedError("Missing 'url' — the page hosting the form.")); if (allowedHosts is null || allowedHosts.Count == 0) - return Fail("Missing 'allowedHosts' — learn-form-schema requires an explicit allowlist (spec §7.1)."); + return Fail(HintedError("Missing 'allowedHosts' — learn-form-schema requires an explicit allowlist (spec §7.1).")); if (!Uri.TryCreate(url, UriKind.Absolute, out var parsedUrl) || (parsedUrl.Scheme != Uri.UriSchemeHttp && parsedUrl.Scheme != Uri.UriSchemeHttps)) @@ -100,6 +115,10 @@ public static LearnFormSchemaInput Parse(AgentTaskRequest request) private static LearnFormSchemaInput Fail(string message) => new(null, null, null, null, null, null, message); + private static string HintedError(string reason) => reason + " " + + "Pass inputs as a JSON object on the A2A DataPart — in RockBot's invoke_agent tool, that means filling the 'data' parameter, NOT adding fields to the 'message' text. " + + "Example data: {\"url\":\"https://...\",\"allowedHosts\":[\"host\"]}."; + private static string? ReadMetadata(AgentTaskRequest request, string key) { if (request.Message.Metadata is not null diff --git a/src/Foragent.Capabilities/InFlightTaskRegistry.cs b/src/Foragent.Capabilities/InFlightTaskRegistry.cs new file mode 100644 index 0000000..aa3a313 --- /dev/null +++ b/src/Foragent.Capabilities/InFlightTaskRegistry.cs @@ -0,0 +1,78 @@ +using System.Collections.Concurrent; + +namespace Foragent.Capabilities; + +/// <summary> +/// Tracks in-flight A2A tasks so an out-of-band cancel message can reach the +/// running task's <see cref="CancellationToken"/>. The framework's default +/// <c>AgentTaskCancelHandler</c> assumes stateless agents and always replies +/// <c>TaskNotCancelable</c>; Foragent is stateful (long-running browser work +/// per task), so we replace it with a registry-aware handler. +/// +/// One instance per process — the registry crosses DI scopes because the +/// cancel message is handled in a different scope than the task request. +/// </summary> +public sealed class InFlightTaskRegistry +{ + private readonly ConcurrentDictionary<string, CancellationTokenSource> _tokens = new(StringComparer.Ordinal); + + /// <summary> + /// Creates a linked <see cref="CancellationTokenSource"/> for + /// <paramref name="taskId"/>, registers it, and returns the linked token + /// that the running task should observe. If the task id is already + /// registered — the framework redelivered a message — the prior + /// registration is cancelled first so the stale task unwinds before the + /// new one starts. + /// </summary> + public CancellationToken Register(string taskId, CancellationToken parentToken) + { + var cts = CancellationTokenSource.CreateLinkedTokenSource(parentToken); + _tokens.AddOrUpdate( + taskId, + _ => cts, + (_, existing) => + { + // A redelivered task id should not keep the prior CTS alive — + // cancel it so any residual work stops, then replace. + try { existing.Cancel(); } catch (ObjectDisposedException) { } + existing.Dispose(); + return cts; + }); + return cts.Token; + } + + /// <summary> + /// Cancels the task if registered. Returns true when a cancel was issued, + /// false when no task with that id is in flight. + /// </summary> + public bool TryCancel(string taskId) + { + if (!_tokens.TryGetValue(taskId, out var cts)) + return false; + try + { + cts.Cancel(); + return true; + } + catch (ObjectDisposedException) + { + // Race: the task finished and disposed its CTS between our + // TryGetValue and Cancel. Equivalent to not-found for the caller. + return false; + } + } + + /// <summary> + /// Removes and disposes the registration for <paramref name="taskId"/>. + /// Called from the task handler's <c>finally</c> so both normal completion + /// and failure clean up. + /// </summary> + public void Remove(string taskId) + { + if (_tokens.TryRemove(taskId, out var cts)) + cts.Dispose(); + } + + /// <summary>Count of in-flight registrations. Exposed for tests and diagnostics.</summary> + public int Count => _tokens.Count; +} diff --git a/src/Foragent.Capabilities/SkillNaming.cs b/src/Foragent.Capabilities/SkillNaming.cs new file mode 100644 index 0000000..7fc4b6b --- /dev/null +++ b/src/Foragent.Capabilities/SkillNaming.cs @@ -0,0 +1,19 @@ +namespace Foragent.Capabilities; + +/// <summary> +/// Shared helpers for constructing skill names that pass RockBot 0.9's +/// <c>FileSkillStore.ValidateName</c> — only alphanumeric, hyphens, +/// underscores, and <c>/</c> are allowed. Real hosts (<c>bsky.app</c>, +/// <c>apple.com</c>) contain dots, so anything that embeds a host inside a +/// skill name must go through <see cref="SanitizeHost"/>. +/// </summary> +internal static class SkillNaming +{ + /// <summary> + /// Replaces characters the skill-store validator rejects. Dots become + /// hyphens (<c>bsky.app</c> → <c>bsky-app</c>) — readable, reversible + /// by humans, and keeps the host recognizable in the stored skill path. + /// </summary> + public static string SanitizeHost(string host) => + host.Replace('.', '-'); +} diff --git a/tests/Foragent.Agent.Tests/BrowserTask/BrowserTaskCapabilityTests.cs b/tests/Foragent.Agent.Tests/BrowserTask/BrowserTaskCapabilityTests.cs index e93b3b0..6b91eb0 100644 --- a/tests/Foragent.Agent.Tests/BrowserTask/BrowserTaskCapabilityTests.cs +++ b/tests/Foragent.Agent.Tests/BrowserTask/BrowserTaskCapabilityTests.cs @@ -64,6 +64,55 @@ public async Task DonePayload_ReturnsStructuredJson() Assert.Contains("snapshot", page.Actions); } + [Fact] + public async Task AcceptsStructuredInput_OnDataPart() + { + // Simulates the shape RockBot's invoke_agent produces when the caller + // fills its 'data' parameter: a text part with human prose plus a + // DataPart with the structured JSON. The data part wins over any + // JSON embedded in text. + var (capability, page, _) = Build( + ScriptedChatClient.ToolCall("snapshot"), + ScriptedChatClient.ToolCall("done", new { summary = "picked up from DataPart" }), + ScriptedChatClient.Text("stopping")); + var (ctx, _) = TestContext.Build(); + + var result = await capability.ExecuteAsync( + TestContext.RequestWithData( + "browser-task", + dataJson: """{"intent":"read the page title","url":"https://example.com/","allowedHosts":["example.com"]}""", + text: "Please read the page title for me."), + ctx); + + Assert.Equal(AgentTaskState.Completed, result.State); + using var doc = JsonDocument.Parse(TestContext.TextOf(result)); + Assert.Equal("done", doc.RootElement.GetProperty("status").GetString()); + Assert.Contains("snapshot", page.Actions); + } + + [Fact] + public async Task DataPart_UsesProseTextAsIntentFallback() + { + // If the DataPart doesn't carry 'intent' but the text part is prose, + // the text fills in for intent. Lets callers keep 'intent' in the + // data payload without duplicating it in the message. + var (capability, _, _) = Build( + ScriptedChatClient.ToolCall("done", new { summary = "ok" }), + ScriptedChatClient.Text("stopping")); + var (ctx, _) = TestContext.Build(); + + var result = await capability.ExecuteAsync( + TestContext.RequestWithData( + "browser-task", + dataJson: """{"allowedHosts":["example.com"],"url":"https://example.com/"}""", + text: "Find the title of the page."), + ctx); + + Assert.Equal(AgentTaskState.Completed, result.State); + using var doc = JsonDocument.Parse(TestContext.TextOf(result)); + Assert.Equal("done", doc.RootElement.GetProperty("status").GetString()); + } + [Fact] public async Task Fail_ReturnsFailedStatus() { @@ -171,7 +220,6 @@ internal static ( var priming = new BrowserTaskPriming( skillStore, memory, - embeddingGenerator: null, NullLogger<BrowserTaskPriming>.Instance); var capability = new BrowserTaskCapability( @@ -192,7 +240,7 @@ public async Task Priming_InjectsRetrievedSkillIntoUserPrompt() { var skills = new FakeSkillStore(); await skills.SaveAsync(new Skill( - Name: "sites/example.com/login", + Name: "sites/example-com/login", Summary: "Use the app password, not the account password.", Content: "Click 'Sign in', enter handle, then password.", CreatedAt: DateTimeOffset.UtcNow, @@ -214,7 +262,7 @@ await capability.ExecuteAsync( var userMessage = scripted.FirstMessages.Single(m => m.Role == ChatRole.User).Text ?? string.Empty; Assert.Contains("Known site knowledge", userMessage); - Assert.Contains("sites/example.com/login", userMessage); + Assert.Contains("sites/example-com/login", userMessage); Assert.Contains("app password", userMessage); } @@ -243,7 +291,7 @@ public async Task LearnedSkill_IsWrittenOnSuccess_WhenMultipleNavigations() ctx); Assert.Equal(AgentTaskState.Completed, result.State); - var learned = skills.Saved.Keys.SingleOrDefault(k => k.StartsWith("sites/example.com/learned/")); + var learned = skills.Saved.Keys.SingleOrDefault(k => k.StartsWith("sites/example-com/learned/")); Assert.NotNull(learned); var skill = skills.Saved[learned!]; Assert.Equal("Navigate home then click the details link.", skill.Summary); @@ -268,7 +316,7 @@ await capability.ExecuteAsync( """{"intent":"read one page","url":"https://example.com/","allowedHosts":["example.com"]}"""), ctx); - Assert.DoesNotContain(skills.Saved.Keys, k => k.StartsWith("sites/example.com/learned/")); + Assert.DoesNotContain(skills.Saved.Keys, k => k.StartsWith("sites/example-com/learned/")); } private sealed class StubCredentialBroker : ICredentialBroker diff --git a/tests/Foragent.Agent.Tests/BrowserTask/FakeBrowserAgentPage.cs b/tests/Foragent.Agent.Tests/BrowserTask/FakeBrowserAgentPage.cs index 8027593..327abe4 100644 --- a/tests/Foragent.Agent.Tests/BrowserTask/FakeBrowserAgentPage.cs +++ b/tests/Foragent.Agent.Tests/BrowserTask/FakeBrowserAgentPage.cs @@ -52,12 +52,6 @@ public ValueTask DisposeAsync() internal sealed class FakeAgentBrowserSession(FakeBrowserAgentPage page) : IBrowserSession { - public Task<string?> FetchPageTitleAsync(Uri url, CancellationToken ct = default) => - throw new NotSupportedException(); - - public Task<PageSnapshot> CapturePageSnapshotAsync(Uri url, CancellationToken ct = default) => - throw new NotSupportedException(); - public Task<IBrowserPage> OpenPageAsync(Uri url, CancellationToken ct = default) => throw new NotSupportedException(); diff --git a/tests/Foragent.Agent.Tests/BskySeedSkillServiceTests.cs b/tests/Foragent.Agent.Tests/BskySeedSkillServiceTests.cs index 5a8c542..7719cb3 100644 --- a/tests/Foragent.Agent.Tests/BskySeedSkillServiceTests.cs +++ b/tests/Foragent.Agent.Tests/BskySeedSkillServiceTests.cs @@ -18,10 +18,10 @@ public async Task Seed_IsWritten_WhenSkillMissing() await service.StartAsync(CancellationToken.None); - Assert.True(store.Saved.ContainsKey("sites/bsky.app/login")); - var skill = store.Saved["sites/bsky.app/login"]; + Assert.True(store.Saved.ContainsKey("sites/bsky-app/login")); + var skill = store.Saved["sites/bsky-app/login"]; Assert.Contains("app password", skill.Summary, StringComparison.OrdinalIgnoreCase); - Assert.Contains("sites/bsky.app/compose-post", skill.SeeAlso!); + Assert.Contains("sites/bsky-app/compose-post", skill.SeeAlso!); } [Fact] @@ -29,7 +29,7 @@ public async Task Seed_LeavesExistingSkillIntact() { var store = new FakeSkillStore(); var existing = new Skill( - Name: "sites/bsky.app/login", + Name: "sites/bsky-app/login", Summary: "operator-edited summary", Content: "operator-edited content", CreatedAt: DateTimeOffset.UtcNow.AddDays(-7), @@ -41,7 +41,7 @@ public async Task Seed_LeavesExistingSkillIntact() var service = new BskySeedSkillService(store, NullLogger<BskySeedSkillService>.Instance); await service.StartAsync(CancellationToken.None); - var after = store.Saved["sites/bsky.app/login"]; + var after = store.Saved["sites/bsky-app/login"]; Assert.Equal("operator-edited summary", after.Summary); Assert.Equal("operator-edited content", after.Content); } @@ -53,9 +53,9 @@ public async Task Seed_IsNoop_OnSecondStart() var service = new BskySeedSkillService(store, NullLogger<BskySeedSkillService>.Instance); await service.StartAsync(CancellationToken.None); - var firstContent = store.Saved["sites/bsky.app/login"].Content; + var firstContent = store.Saved["sites/bsky-app/login"].Content; await service.StartAsync(CancellationToken.None); - var secondContent = store.Saved["sites/bsky.app/login"].Content; + var secondContent = store.Saved["sites/bsky-app/login"].Content; Assert.Same(firstContent, secondContent); } diff --git a/tests/Foragent.Agent.Tests/ExtractStructuredDataCapabilityTests.cs b/tests/Foragent.Agent.Tests/ExtractStructuredDataCapabilityTests.cs deleted file mode 100644 index da38a95..0000000 --- a/tests/Foragent.Agent.Tests/ExtractStructuredDataCapabilityTests.cs +++ /dev/null @@ -1,210 +0,0 @@ -using Foragent.Browser; -using Foragent.Capabilities; -using Microsoft.Extensions.AI; -using Microsoft.Extensions.Logging.Abstractions; -using RockBot.A2A; -using Xunit; - -namespace Foragent.Agent.Tests; - -public class ExtractStructuredDataCapabilityTests -{ - private const string ValidInput = """ - {"url":"https://example.com","description":"the main heading"} - """; - - [Fact] - public async Task ReturnsLlmText_OnSuccess() - { - var factory = new StubBrowserSessionFactory(); - var chat = new StubChatClient((_, _) => - Task.FromResult(new ChatResponse(new ChatMessage(ChatRole.Assistant, """{"heading":"Example"}""")))); - var capability = new ExtractStructuredDataCapability( - factory, chat, NullLogger<ExtractStructuredDataCapability>.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("extract-structured-data", ValidInput), - context); - - Assert.Equal(AgentTaskState.Completed, result.State); - Assert.Equal("""{"heading":"Example"}""", TestContext.TextOf(result)); - } - - [Fact] - public async Task PromptIncludesUrl_Description_AndSnapshot() - { - var factory = new StubBrowserSessionFactory - { - SnapshotResponder = (url, _) => - Task.FromResult(new PageSnapshot(url, "Page Title", "body - Hello", PageSnapshotSource.Accessibility)) - }; - var chat = new StubChatClient((_, _) => - Task.FromResult(new ChatResponse(new ChatMessage(ChatRole.Assistant, "{}")))); - var capability = new ExtractStructuredDataCapability( - factory, chat, NullLogger<ExtractStructuredDataCapability>.Instance); - var (context, _) = TestContext.Build(); - - await capability.ExecuteAsync( - TestContext.Request("extract-structured-data", ValidInput), - context); - - Assert.Equal(1, chat.CallCount); - var userMessage = chat.LastMessages.First(m => m.Role == ChatRole.User).Text ?? string.Empty; - Assert.Contains("https://example.com", userMessage); - Assert.Contains("the main heading", userMessage); - Assert.Contains("body - Hello", userMessage); - Assert.Contains("Page Title", userMessage); - } - - [Fact] - public async Task RequestsJsonResponseFormat() - { - ChatOptions? capturedOptions = null; - var factory = new StubBrowserSessionFactory(); - var chat = new StubChatClient((_, opts) => - { - capturedOptions = opts; - return Task.FromResult(new ChatResponse(new ChatMessage(ChatRole.Assistant, "{}"))); - }); - var capability = new ExtractStructuredDataCapability( - factory, chat, NullLogger<ExtractStructuredDataCapability>.Instance); - var (context, _) = TestContext.Build(); - - await capability.ExecuteAsync( - TestContext.Request("extract-structured-data", ValidInput), - context); - - Assert.NotNull(capturedOptions?.ResponseFormat); - Assert.IsType<ChatResponseFormatJson>(capturedOptions!.ResponseFormat); - } - - [Fact] - public async Task RejectsMissingUrl() - { - var factory = new StubBrowserSessionFactory(); - var chat = new StubChatClient((_, _) => - Task.FromResult(new ChatResponse(new ChatMessage(ChatRole.Assistant, "{}")))); - var capability = new ExtractStructuredDataCapability( - factory, chat, NullLogger<ExtractStructuredDataCapability>.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("extract-structured-data", """{"description":"no url"}"""), - context); - - Assert.Equal(0, factory.SessionsCreated); - Assert.Equal(0, chat.CallCount); - Assert.Contains("URL", TestContext.TextOf(result)); - } - - [Fact] - public async Task RejectsMissingDescription() - { - var factory = new StubBrowserSessionFactory(); - var chat = new StubChatClient((_, _) => - Task.FromResult(new ChatResponse(new ChatMessage(ChatRole.Assistant, "{}")))); - var capability = new ExtractStructuredDataCapability( - factory, chat, NullLogger<ExtractStructuredDataCapability>.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("extract-structured-data", """{"url":"https://example.com"}"""), - context); - - Assert.Equal(0, factory.SessionsCreated); - Assert.Equal(0, chat.CallCount); - Assert.Contains("description", TestContext.TextOf(result)); - } - - [Fact] - public async Task ReportsError_WhenBrowserThrows() - { - var factory = new StubBrowserSessionFactory - { - SnapshotResponder = (_, _) => Task.FromException<PageSnapshot>(new InvalidOperationException("nav failed")) - }; - var chat = new StubChatClient((_, _) => - Task.FromResult(new ChatResponse(new ChatMessage(ChatRole.Assistant, "{}")))); - var capability = new ExtractStructuredDataCapability( - factory, chat, NullLogger<ExtractStructuredDataCapability>.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("extract-structured-data", ValidInput), - context); - - Assert.Contains("Fetch failed: nav failed", TestContext.TextOf(result)); - Assert.Equal(0, chat.CallCount); - } - - [Fact] - public async Task ReportsError_WhenLlmThrows() - { - var factory = new StubBrowserSessionFactory(); - var chat = new StubChatClient((_, _) => - Task.FromException<ChatResponse>(new InvalidOperationException("llm boom"))); - var capability = new ExtractStructuredDataCapability( - factory, chat, NullLogger<ExtractStructuredDataCapability>.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("extract-structured-data", ValidInput), - context); - - Assert.Contains("Extraction failed: llm boom", TestContext.TextOf(result)); - } - - [Fact] - public async Task AcceptsInputs_FromMetadata() - { - Uri? capturedUrl = null; - var factory = new StubBrowserSessionFactory - { - SnapshotResponder = (url, _) => - { - capturedUrl = url; - return Task.FromResult(new PageSnapshot(url, null, "content", PageSnapshotSource.Accessibility)); - } - }; - string? descriptionInPrompt = null; - var chat = new StubChatClient((msgs, _) => - { - descriptionInPrompt = msgs.First(m => m.Role == ChatRole.User).Text; - return Task.FromResult(new ChatResponse(new ChatMessage(ChatRole.Assistant, "{}"))); - }); - var capability = new ExtractStructuredDataCapability( - factory, chat, NullLogger<ExtractStructuredDataCapability>.Instance); - var (context, _) = TestContext.Build(); - var request = TestContext.RequestWithMetadata( - "extract-structured-data", - messageMetadata: new Dictionary<string, string> - { - ["url"] = "https://metadata.example", - ["description"] = "the shipping address" - }); - - var result = await capability.ExecuteAsync(request, context); - - Assert.Equal(AgentTaskState.Completed, result.State); - Assert.Equal(new Uri("https://metadata.example"), capturedUrl); - Assert.Contains("the shipping address", descriptionInPrompt); - } - - [Fact] - public async Task ReportsError_OnEmptyLlmResponse() - { - var factory = new StubBrowserSessionFactory(); - var chat = new StubChatClient((_, _) => - Task.FromResult(new ChatResponse(new ChatMessage(ChatRole.Assistant, "")))); - var capability = new ExtractStructuredDataCapability( - factory, chat, NullLogger<ExtractStructuredDataCapability>.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("extract-structured-data", ValidInput), - context); - - Assert.Contains("empty response", TestContext.TextOf(result)); - } -} diff --git a/tests/Foragent.Agent.Tests/FetchPageTitleCapabilityTests.cs b/tests/Foragent.Agent.Tests/FetchPageTitleCapabilityTests.cs deleted file mode 100644 index 83931bc..0000000 --- a/tests/Foragent.Agent.Tests/FetchPageTitleCapabilityTests.cs +++ /dev/null @@ -1,197 +0,0 @@ -using Foragent.Capabilities; -using Microsoft.Extensions.Logging.Abstractions; -using RockBot.A2A; -using Xunit; - -namespace Foragent.Agent.Tests; - -public class FetchPageTitleCapabilityTests -{ - [Fact] - public async Task ReturnsTitle_FromBrowser() - { - var factory = new StubBrowserSessionFactory - { - TitleResponder = (_, _) => Task.FromResult<string?>("Hello World") - }; - var capability = new FetchPageTitleCapability(factory, NullLogger<FetchPageTitleCapability>.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("fetch-page-title", "https://example.com"), - context); - - Assert.Equal(AgentTaskState.Completed, result.State); - Assert.Equal("Hello World", TestContext.TextOf(result)); - } - - [Fact] - public async Task ReportsNoTitle_WhenBrowserReturnsNull() - { - var factory = new StubBrowserSessionFactory - { - TitleResponder = (_, _) => Task.FromResult<string?>(null) - }; - var capability = new FetchPageTitleCapability(factory, NullLogger<FetchPageTitleCapability>.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("fetch-page-title", "https://example.com"), - context); - - Assert.Equal("(no title)", TestContext.TextOf(result)); - } - - [Fact] - public async Task ReportsError_WhenBrowserThrows() - { - var factory = new StubBrowserSessionFactory - { - TitleResponder = (_, _) => Task.FromException<string?>(new InvalidOperationException("boom")) - }; - var capability = new FetchPageTitleCapability(factory, NullLogger<FetchPageTitleCapability>.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("fetch-page-title", "https://example.com"), - context); - - Assert.Contains("Fetch failed: boom", TestContext.TextOf(result)); - } - - [Fact] - public async Task RejectsNonAbsoluteUrl_WithoutCreatingSession() - { - var factory = new StubBrowserSessionFactory(); - var capability = new FetchPageTitleCapability(factory, NullLogger<FetchPageTitleCapability>.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("fetch-page-title", "not a url"), - context); - - Assert.Equal(0, factory.SessionsCreated); - Assert.Contains("absolute http(s) URL", TestContext.TextOf(result)); - } - - [Fact] - public async Task DisposesSession_AfterUse() - { - var factory = new StubBrowserSessionFactory - { - TitleResponder = (_, _) => Task.FromResult<string?>("t") - }; - var capability = new FetchPageTitleCapability(factory, NullLogger<FetchPageTitleCapability>.Instance); - var (context, _) = TestContext.Build(); - - await capability.ExecuteAsync( - TestContext.Request("fetch-page-title", "https://example.com"), - context); - - Assert.Equal(1, factory.SessionsCreated); - Assert.Equal(1, factory.SessionsDisposed); - } - - [Fact] - public async Task ExtractsUrlFromFreeFormText() - { - // Simulates what RockBot's LLM actually sends — conversational wrapping - // around a URL. The capability should still find it. - var factory = new StubBrowserSessionFactory - { - TitleResponder = (url, _) => - Task.FromResult<string?>(url.ToString() == "https://lhotka.net/" ? "Rockford Lhotka" : null) - }; - var capability = new FetchPageTitleCapability(factory, NullLogger<FetchPageTitleCapability>.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("fetch-page-title", "Please fetch the title of https://lhotka.net."), - context); - - Assert.Equal("Rockford Lhotka", TestContext.TextOf(result)); - } - - [Fact] - public async Task AcceptsUrlFromJsonShim() - { - var factory = new StubBrowserSessionFactory - { - TitleResponder = (_, _) => Task.FromResult<string?>("Hello") - }; - var capability = new FetchPageTitleCapability(factory, NullLogger<FetchPageTitleCapability>.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("fetch-page-title", """{"url":"https://example.com"}"""), - context); - - Assert.Equal("Hello", TestContext.TextOf(result)); - } - - [Fact] - public async Task AcceptsUrl_FromMessageMetadata() - { - Uri? capturedUrl = null; - var factory = new StubBrowserSessionFactory - { - TitleResponder = (url, _) => - { - capturedUrl = url; - return Task.FromResult<string?>("From Metadata"); - } - }; - var capability = new FetchPageTitleCapability(factory, NullLogger<FetchPageTitleCapability>.Instance); - var (context, _) = TestContext.Build(); - var request = TestContext.RequestWithMetadata( - "fetch-page-title", - messageMetadata: new Dictionary<string, string> { ["url"] = "https://example.com" }); - - var result = await capability.ExecuteAsync(request, context); - - Assert.Equal("From Metadata", TestContext.TextOf(result)); - Assert.Equal(new Uri("https://example.com"), capturedUrl); - } - - [Fact] - public async Task AcceptsUrl_FromRequestMetadata() - { - var factory = new StubBrowserSessionFactory - { - TitleResponder = (_, _) => Task.FromResult<string?>("Request Metadata") - }; - var capability = new FetchPageTitleCapability(factory, NullLogger<FetchPageTitleCapability>.Instance); - var (context, _) = TestContext.Build(); - var request = TestContext.RequestWithMetadata( - "fetch-page-title", - requestMetadata: new Dictionary<string, string> { ["url"] = "https://example.com" }); - - var result = await capability.ExecuteAsync(request, context); - - Assert.Equal("Request Metadata", TestContext.TextOf(result)); - } - - [Fact] - public async Task MessageMetadata_WinsOverTextPart() - { - Uri? capturedUrl = null; - var factory = new StubBrowserSessionFactory - { - TitleResponder = (url, _) => - { - capturedUrl = url; - return Task.FromResult<string?>("ok"); - } - }; - var capability = new FetchPageTitleCapability(factory, NullLogger<FetchPageTitleCapability>.Instance); - var (context, _) = TestContext.Build(); - var request = TestContext.RequestWithMetadata( - "fetch-page-title", - text: "some conversational text with https://ignore-me.example", - messageMetadata: new Dictionary<string, string> { ["url"] = "https://authoritative.example" }); - - await capability.ExecuteAsync(request, context); - - Assert.Equal(new Uri("https://authoritative.example"), capturedUrl); - } -} diff --git a/tests/Foragent.Agent.Tests/ForagentCancelHandlerTests.cs b/tests/Foragent.Agent.Tests/ForagentCancelHandlerTests.cs new file mode 100644 index 0000000..526a718 --- /dev/null +++ b/tests/Foragent.Agent.Tests/ForagentCancelHandlerTests.cs @@ -0,0 +1,112 @@ +using System.Text.Json; +using Foragent.Capabilities; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; +using RockBot.A2A; +using RockBot.Host; +using RockBot.Messaging; +using Xunit; + +namespace Foragent.Agent.Tests; + +public class ForagentCancelHandlerTests +{ + [Fact] + public async Task CancelsInFlightTask_WithoutPublishingReply() + { + var registry = new InFlightTaskRegistry(); + using var parent = new CancellationTokenSource(); + var token = registry.Register("task-1", parent.Token); + + var publisher = new CapturingPublisher(); + var handler = BuildHandler(registry, publisher); + + await handler.HandleAsync( + new AgentTaskCancelRequest { TaskId = "task-1" }, + BuildContext()); + + Assert.True(token.IsCancellationRequested); + // On a successful cancel we deliberately don't publish a reply — + // the task's own terminal result is the acknowledgment. + Assert.Empty(publisher.Published); + } + + [Fact] + public async Task PublishesTaskNotFound_WhenNoInFlightRegistration() + { + var registry = new InFlightTaskRegistry(); + var publisher = new CapturingPublisher(); + var handler = BuildHandler(registry, publisher); + + await handler.HandleAsync( + new AgentTaskCancelRequest { TaskId = "nope", ContextId = "ctx-7" }, + BuildContext(replyTo: "caller.replies")); + + var (topic, envelope) = Assert.Single(publisher.Published); + Assert.Equal("caller.replies", topic); + // Use the framework's GetPayload extension so the camelCase naming + // policy matches what ToEnvelope used on the write side. + var error = envelope.GetPayload<AgentTaskError>()!; + Assert.Equal("nope", error.TaskId); + Assert.Equal("ctx-7", error.ContextId); + Assert.Equal(AgentTaskError.Codes.TaskNotFound, error.Code); + } + + [Fact] + public async Task FallsBackToDefaultResultTopic_WhenEnvelopeHasNoReplyTo() + { + var registry = new InFlightTaskRegistry(); + var publisher = new CapturingPublisher(); + var handler = BuildHandler(registry, publisher); + + await handler.HandleAsync( + new AgentTaskCancelRequest { TaskId = "missing" }, + BuildContext(replyTo: null)); + + var (topic, _) = Assert.Single(publisher.Published); + Assert.Equal("agent.response", topic); // A2AOptions.DefaultResultTopic default + } + + // ── helpers ───────────────────────────────────────────────────────────── + + private static ForagentCancelHandler BuildHandler( + InFlightTaskRegistry registry, + IMessagePublisher publisher) + { + var options = new A2AOptions(); + var identity = new AgentIdentity("Foragent"); + return new ForagentCancelHandler( + registry, publisher, options, identity, + NullLogger<ForagentCancelHandler>.Instance); + } + + private static MessageHandlerContext BuildContext(string? replyTo = null) + { + var envelope = MessageEnvelope.Create( + messageType: typeof(AgentTaskCancelRequest).FullName!, + body: ReadOnlyMemory<byte>.Empty, + source: "test", + replyTo: replyTo); + return new MessageHandlerContext + { + Envelope = envelope, + Agent = new AgentIdentity("Foragent"), + Services = new ServiceCollection().BuildServiceProvider(), + CancellationToken = CancellationToken.None + }; + } + + private sealed class CapturingPublisher : IMessagePublisher + { + public List<(string Topic, MessageEnvelope Envelope)> Published { get; } = []; + + public Task PublishAsync(string topic, MessageEnvelope envelope, CancellationToken cancellationToken = default) + { + Published.Add((topic, envelope)); + return Task.CompletedTask; + } + + public ValueTask DisposeAsync() => ValueTask.CompletedTask; + } +} diff --git a/tests/Foragent.Agent.Tests/ForagentTaskHandlerTests.cs b/tests/Foragent.Agent.Tests/ForagentTaskHandlerTests.cs index 970fc16..d33be92 100644 --- a/tests/Foragent.Agent.Tests/ForagentTaskHandlerTests.cs +++ b/tests/Foragent.Agent.Tests/ForagentTaskHandlerTests.cs @@ -13,7 +13,7 @@ public async Task DispatchesToMatchingCapability() var executed = new List<string>(); ICapability alpha = new TrackingCapability("alpha", executed); ICapability beta = new TrackingCapability("beta", executed); - var handler = new ForagentTaskHandler([alpha, beta], NullLogger<ForagentTaskHandler>.Instance); + var handler = new ForagentTaskHandler([alpha, beta], new InFlightTaskRegistry(), NullLogger<ForagentTaskHandler>.Instance); var (context, _) = TestContext.Build(); await handler.HandleTaskAsync(TestContext.Request("beta", "in"), context); @@ -25,7 +25,7 @@ public async Task DispatchesToMatchingCapability() public async Task PublishesWorkingStatus_BeforeDispatch() { ICapability alpha = new TrackingCapability("alpha", []); - var handler = new ForagentTaskHandler([alpha], NullLogger<ForagentTaskHandler>.Instance); + var handler = new ForagentTaskHandler([alpha], new InFlightTaskRegistry(), NullLogger<ForagentTaskHandler>.Instance); var (context, capture) = TestContext.Build(); await handler.HandleTaskAsync(TestContext.Request("alpha", "in"), context); @@ -38,7 +38,7 @@ public async Task PublishesWorkingStatus_BeforeDispatch() public async Task ReturnsErrorResult_ForUnknownSkill() { ICapability alpha = new TrackingCapability("alpha", []); - var handler = new ForagentTaskHandler([alpha], NullLogger<ForagentTaskHandler>.Instance); + var handler = new ForagentTaskHandler([alpha], new InFlightTaskRegistry(), NullLogger<ForagentTaskHandler>.Instance); var (context, _) = TestContext.Build(); var result = await handler.HandleTaskAsync(TestContext.Request("nope", "in"), context); @@ -48,12 +48,80 @@ public async Task ReturnsErrorResult_ForUnknownSkill() Assert.Contains("alpha", TestContext.TextOf(result)); } + [Fact] + public async Task RegistersTask_ForDurationOfExecution_ThenRemovesOnCompletion() + { + var registry = new InFlightTaskRegistry(); + CancellationToken? observedCt = null; + int countDuringExecute = 0; + var capability = new CallbackCapability("probe", (req, ctx) => + { + countDuringExecute = registry.Count; + observedCt = ctx.MessageContext.CancellationToken; + return Task.FromResult(CapabilityResultFactory.Completed(req, "ok")); + }); + var handler = new ForagentTaskHandler([capability], registry, NullLogger<ForagentTaskHandler>.Instance); + var (context, _) = TestContext.Build(); + + await handler.HandleTaskAsync(TestContext.Request("probe", "in"), context); + + Assert.Equal(1, countDuringExecute); + Assert.Equal(0, registry.Count); + Assert.NotNull(observedCt); + } + + [Fact] + public async Task ExternalCancel_CancelsTheTaskToken() + { + // Simulates the cancel handler firing mid-execution: the capability's + // observed CT should be cancelled when TryCancel is called, even + // though the parent message CT is still live. + var registry = new InFlightTaskRegistry(); + var observedCt = new TaskCompletionSource<CancellationToken>(); + var released = new TaskCompletionSource(); + var capability = new CallbackCapability("probe", async (req, ctx) => + { + observedCt.TrySetResult(ctx.MessageContext.CancellationToken); + await released.Task; + return CapabilityResultFactory.Completed(req, "ok"); + }); + var handler = new ForagentTaskHandler([capability], registry, NullLogger<ForagentTaskHandler>.Instance); + var (context, _) = TestContext.Build(); + var request = TestContext.Request("probe", "in"); + + var task = handler.HandleTaskAsync(request, context); + var ct = await observedCt.Task; + + Assert.False(ct.IsCancellationRequested); + Assert.True(registry.TryCancel(request.TaskId)); + Assert.True(ct.IsCancellationRequested); + + released.SetResult(); + await task; + Assert.Equal(0, registry.Count); + } + + [Fact] + public async Task Registry_IsClearedEven_WhenCapabilityThrows() + { + var registry = new InFlightTaskRegistry(); + var capability = new CallbackCapability("probe", (_, _) => + throw new InvalidOperationException("boom")); + var handler = new ForagentTaskHandler([capability], registry, NullLogger<ForagentTaskHandler>.Instance); + var (context, _) = TestContext.Build(); + + await Assert.ThrowsAsync<InvalidOperationException>(() => + handler.HandleTaskAsync(TestContext.Request("probe", "in"), context)); + + Assert.Equal(0, registry.Count); + } + [Fact] public async Task SkillLookup_IsCaseInsensitive() { var executed = new List<string>(); ICapability alpha = new TrackingCapability("alpha", executed); - var handler = new ForagentTaskHandler([alpha], NullLogger<ForagentTaskHandler>.Instance); + var handler = new ForagentTaskHandler([alpha], new InFlightTaskRegistry(), NullLogger<ForagentTaskHandler>.Instance); var (context, _) = TestContext.Build(); await handler.HandleTaskAsync(TestContext.Request("Alpha", "in"), context); @@ -61,6 +129,17 @@ public async Task SkillLookup_IsCaseInsensitive() Assert.Equal(["alpha"], executed); } + private sealed class CallbackCapability( + string skillId, + Func<AgentTaskRequest, AgentTaskContext, Task<AgentTaskResult>> callback) : ICapability + { + public string SkillId => skillId; + public AgentSkill Skill => new() { Id = skillId, Name = skillId, Description = skillId }; + + public Task<AgentTaskResult> ExecuteAsync(AgentTaskRequest request, AgentTaskContext context) => + callback(request, context); + } + private sealed class TrackingCapability(string skillId, List<string> executed) : ICapability { public string SkillId => skillId; diff --git a/tests/Foragent.Agent.Tests/Forms/ExecuteFormBatchCapabilityTests.cs b/tests/Foragent.Agent.Tests/Forms/ExecuteFormBatchCapabilityTests.cs index 828f773..b4237d8 100644 --- a/tests/Foragent.Agent.Tests/Forms/ExecuteFormBatchCapabilityTests.cs +++ b/tests/Foragent.Agent.Tests/Forms/ExecuteFormBatchCapabilityTests.cs @@ -39,6 +39,40 @@ public async Task RejectsInput_WhenRowsMissing() Assert.Contains("rows", TestContext.TextOf(result)); } + [Fact] + public async Task AcceptsStructuredInput_OnDataPart() + { + var (cap, factory, skills) = Build(); + await SeedSchemaAsync(skills, "sites/example.com/forms/contact", SampleSchema()); + var page = SubmittingPage(); + factory.PageResponder = (_, _) => + { + page.CurrentUrl = new Uri("https://example.com/form"); + return Task.FromResult<IBrowserPage>(page); + }; + + var (ctx, _) = TestContext.Build(); + + var result = await cap.ExecuteAsync( + TestContext.RequestWithData( + "execute-form-batch", + dataJson: """ + { + "schemaRef": "sites/example.com/forms/contact", + "allowedHosts": ["example.com"], + "rows": [ + {"email":"a@example.com","message":"hi"} + ] + } + """, + text: "Submit one contact form row."), + ctx); + + using var doc = JsonDocument.Parse(TestContext.TextOf(result)); + Assert.Equal("done", doc.RootElement.GetProperty("status").GetString()); + Assert.Equal(1, doc.RootElement.GetProperty("successCount").GetInt32()); + } + [Fact] public async Task ResolvesSchemaFromRef_AndSubmitsRows() { diff --git a/tests/Foragent.Agent.Tests/Forms/LearnFormSchemaCapabilityTests.cs b/tests/Foragent.Agent.Tests/Forms/LearnFormSchemaCapabilityTests.cs index 489b798..abe76a9 100644 --- a/tests/Foragent.Agent.Tests/Forms/LearnFormSchemaCapabilityTests.cs +++ b/tests/Foragent.Agent.Tests/Forms/LearnFormSchemaCapabilityTests.cs @@ -38,6 +38,29 @@ public async Task RejectsInput_WhenAllowlistMissing() Assert.Contains("allowedHosts", TestContext.TextOf(result)); } + [Fact] + public async Task AcceptsStructuredInput_OnDataPart() + { + var scan = SampleScan(); + var (cap, factory, skills) = Build(); + var page = new StubBrowserPage { FormScan = scan }; + factory.PageResponder = (_, _) => Task.FromResult<IBrowserPage>(page); + var (ctx, _) = TestContext.Build(); + + var result = await cap.ExecuteAsync( + TestContext.RequestWithData( + "learn-form-schema", + dataJson: """{"url":"https://example.com/contact","allowedHosts":["example.com"]}""", + text: "Learn the schema of the contact form."), + ctx); + + Assert.Equal(AgentTaskState.Completed, result.State); + using var doc = JsonDocument.Parse(TestContext.TextOf(result)); + Assert.Equal("done", doc.RootElement.GetProperty("status").GetString()); + var skillName = doc.RootElement.GetProperty("skillName").GetString()!; + Assert.StartsWith("sites/example-com/forms/", skillName); + } + [Fact] public async Task RejectsInput_WhenUrlOffAllowlist() { @@ -70,7 +93,7 @@ public async Task ReturnsSchema_AndPersistsSkillWithResource() using var doc = JsonDocument.Parse(TestContext.TextOf(result)); Assert.Equal("done", doc.RootElement.GetProperty("status").GetString()); var skillName = doc.RootElement.GetProperty("skillName").GetString()!; - Assert.StartsWith("sites/example.com/forms/", skillName); + Assert.StartsWith("sites/example-com/forms/", skillName); var saved = skills.Saved[skillName]; Assert.NotNull(saved.Manifest); diff --git a/tests/Foragent.Agent.Tests/InFlightTaskRegistryTests.cs b/tests/Foragent.Agent.Tests/InFlightTaskRegistryTests.cs new file mode 100644 index 0000000..76cbe5d --- /dev/null +++ b/tests/Foragent.Agent.Tests/InFlightTaskRegistryTests.cs @@ -0,0 +1,71 @@ +using Foragent.Capabilities; +using Xunit; + +namespace Foragent.Agent.Tests; + +public class InFlightTaskRegistryTests +{ + [Fact] + public void Register_ReturnsLinkedToken_ThatFiresWhenCancelled() + { + var registry = new InFlightTaskRegistry(); + using var parent = new CancellationTokenSource(); + + var token = registry.Register("task-1", parent.Token); + + Assert.False(token.IsCancellationRequested); + Assert.True(registry.TryCancel("task-1")); + Assert.True(token.IsCancellationRequested); + } + + [Fact] + public void Register_ReturnsLinkedToken_ThatFiresWhenParentCancels() + { + var registry = new InFlightTaskRegistry(); + using var parent = new CancellationTokenSource(); + + var token = registry.Register("task-1", parent.Token); + + parent.Cancel(); + Assert.True(token.IsCancellationRequested); + } + + [Fact] + public void TryCancel_ReturnsFalse_ForUnknownTask() + { + var registry = new InFlightTaskRegistry(); + + Assert.False(registry.TryCancel("nope")); + } + + [Fact] + public void Remove_DropsRegistration() + { + var registry = new InFlightTaskRegistry(); + using var parent = new CancellationTokenSource(); + registry.Register("task-1", parent.Token); + Assert.Equal(1, registry.Count); + + registry.Remove("task-1"); + + Assert.Equal(0, registry.Count); + Assert.False(registry.TryCancel("task-1")); + } + + [Fact] + public void Register_ReplacesPriorRegistration_AndCancelsIt() + { + // A redelivered message with the same task id should not orphan the + // previous registration's CTS. The first CTS is cancelled so any + // residual work unwinds before the new execution begins. + var registry = new InFlightTaskRegistry(); + using var parent = new CancellationTokenSource(); + + var firstToken = registry.Register("task-1", parent.Token); + var secondToken = registry.Register("task-1", parent.Token); + + Assert.True(firstToken.IsCancellationRequested); + Assert.False(secondToken.IsCancellationRequested); + Assert.Equal(1, registry.Count); + } +} diff --git a/tests/Foragent.Agent.Tests/TestDoubles.cs b/tests/Foragent.Agent.Tests/TestDoubles.cs index 67c7d2f..5c1cc8e 100644 --- a/tests/Foragent.Agent.Tests/TestDoubles.cs +++ b/tests/Foragent.Agent.Tests/TestDoubles.cs @@ -27,6 +27,35 @@ public static AgentTaskRequest Request(string skill, string text) }; } + /// <summary> + /// Builds a request that carries a structured payload on an A2A DataPart + /// (<c>Kind = "data"</c>), matching what RockBot's <c>invoke_agent</c> tool + /// produces when the caller populates its <c>data</c> parameter. An + /// optional prose text part can be supplied for the human-readable + /// message; omit for pure data-only requests. + /// </summary> + public static AgentTaskRequest RequestWithData( + string skill, + string dataJson, + string? text = null) + { + var parts = new List<AgentMessagePart>(); + if (text is not null) + parts.Add(new AgentMessagePart { Kind = "text", Text = text }); + parts.Add(new AgentMessagePart { Kind = "data", Data = dataJson, MimeType = "application/json" }); + return new AgentTaskRequest + { + TaskId = Guid.NewGuid().ToString(), + ContextId = "ctx", + Skill = skill, + Message = new AgentMessage + { + Role = "user", + Parts = parts + } + }; + } + public static AgentTaskRequest RequestWithMetadata( string skill, string? text = null, @@ -88,12 +117,6 @@ internal sealed class StatusCapture internal sealed class StubBrowserSessionFactory : IBrowserSessionFactory { - public Func<Uri, CancellationToken, Task<string?>> TitleResponder { get; set; } = - (_, _) => Task.FromResult<string?>(null); - - public Func<Uri, CancellationToken, Task<PageSnapshot>> SnapshotResponder { get; set; } = - (url, _) => Task.FromResult(new PageSnapshot(url, "stub", "stub content", PageSnapshotSource.Accessibility)); - public Func<Uri, CancellationToken, Task<IBrowserPage>> PageResponder { get; set; } = (_, _) => Task.FromResult<IBrowserPage>(new StubBrowserPage()); @@ -114,12 +137,6 @@ public Task<IBrowserSession> CreateSessionAsync(Func<Uri, bool> allowedHost, Can private sealed class StubSession(StubBrowserSessionFactory owner) : IBrowserSession { - public Task<string?> FetchPageTitleAsync(Uri url, CancellationToken ct = default) => - owner.TitleResponder(url, ct); - - public Task<PageSnapshot> CapturePageSnapshotAsync(Uri url, CancellationToken ct = default) => - owner.SnapshotResponder(url, ct); - public Task<IBrowserPage> OpenPageAsync(Uri url, CancellationToken ct = default) => owner.PageResponder(url, ct); diff --git a/tests/Foragent.Browser.Tests/BrowserTaskIntegrationTests.cs b/tests/Foragent.Browser.Tests/BrowserTaskIntegrationTests.cs index 82c6bb1..1f11674 100644 --- a/tests/Foragent.Browser.Tests/BrowserTaskIntegrationTests.cs +++ b/tests/Foragent.Browser.Tests/BrowserTaskIntegrationTests.cs @@ -172,7 +172,6 @@ private BrowserTaskCapability BuildCapability(LlmConfig config) var priming = new BrowserTaskPriming( skillStore, memory, - embeddingGenerator: null, NullLogger<BrowserTaskPriming>.Instance); return new BrowserTaskCapability( diff --git a/tests/Foragent.Browser.Tests/ExtractStructuredDataIntegrationTests.cs b/tests/Foragent.Browser.Tests/ExtractStructuredDataIntegrationTests.cs deleted file mode 100644 index 4f60dd2..0000000 --- a/tests/Foragent.Browser.Tests/ExtractStructuredDataIntegrationTests.cs +++ /dev/null @@ -1,157 +0,0 @@ -using System.ClientModel; -using System.Text.Json; -using Foragent.Capabilities; -using Microsoft.AspNetCore.Builder; -using Microsoft.AspNetCore.Http; -using Microsoft.Extensions.AI; -using Microsoft.Extensions.Logging.Abstractions; -using OpenAI; -using RockBot.A2A; -using RockBot.Host; -using RockBot.Messaging; -using Microsoft.Extensions.DependencyInjection; -using Xunit; - -namespace Foragent.Browser.Tests; - -/// <summary> -/// End-to-end test that runs the real <see cref="ExtractStructuredDataCapability"/> -/// against a local Kestrel page, driving real Playwright Chromium and a real -/// LLM. Skipped automatically when <c>FORAGENT_LLM_*</c> env vars aren't set — -/// so the unit + browser test suites still run in CI without credentials. -/// </summary> -[Collection("Playwright")] -public class ExtractStructuredDataIntegrationTests(TestPageServerFixture fixture) -{ - [SkippableFact] - public async Task ExtractsProductPrice_FromFakeShopPage() - { - var config = LlmConfig.FromEnvironment(); - Skip.If(config is null, "FORAGENT_LLM_* env vars not set — skipping real-LLM test."); - - await using var shop = await StartFakeShopAsync(); - var capability = BuildCapability(config!); - var request = BuildRequest( - $"{shop.BaseUrl}/product", - "the product name and price as fields called 'name' and 'price_usd'"); - - var result = await capability.ExecuteAsync(request, BuildContext()); - - Assert.Equal(AgentTaskState.Completed, result.State); - var text = result.Message?.Parts.FirstOrDefault(p => p.Kind == "text")?.Text ?? string.Empty; - - using var doc = JsonDocument.Parse(text); - var name = doc.RootElement.GetProperty("name").GetString(); - Assert.Equal("Premium Widget", name, ignoreCase: true); - - // price_usd may come back as a number or string — accept either. - var priceElement = doc.RootElement.GetProperty("price_usd"); - var price = priceElement.ValueKind == JsonValueKind.Number - ? priceElement.GetDecimal() - : decimal.Parse(priceElement.GetString() ?? "0", System.Globalization.CultureInfo.InvariantCulture); - Assert.Equal(49.99m, price); - } - - // ── helpers ────────────────────────────────────────────────────────────── - - private ExtractStructuredDataCapability BuildCapability(LlmConfig config) - { - var openAi = new OpenAIClient( - new ApiKeyCredential(config.ApiKey), - new OpenAIClientOptions { Endpoint = new Uri(config.Endpoint) }); - var chatClient = openAi.GetChatClient(config.ModelId).AsIChatClient(); - return new ExtractStructuredDataCapability( - fixture.Factory, chatClient, NullLogger<ExtractStructuredDataCapability>.Instance); - } - - private static AgentTaskRequest BuildRequest(string url, string description) => new() - { - TaskId = Guid.NewGuid().ToString(), - Skill = ExtractStructuredDataCapability.SkillDefinition.Id, - Message = new AgentMessage - { - Role = "user", - Parts = - [ - new AgentMessagePart - { - Kind = "text", - Text = JsonSerializer.Serialize(new { url, description }) - } - ] - } - }; - - private static AgentTaskContext BuildContext() - { - var envelope = MessageEnvelope.Create( - messageType: typeof(AgentTaskRequest).FullName!, - body: ReadOnlyMemory<byte>.Empty, - source: "test"); - var messageContext = new MessageHandlerContext - { - Envelope = envelope, - Agent = new AgentIdentity("Foragent"), - Services = new ServiceCollection().BuildServiceProvider(), - CancellationToken = CancellationToken.None - }; - return new AgentTaskContext - { - MessageContext = messageContext, - PublishStatus = (_, _) => Task.CompletedTask - }; - } - - private static async Task<FakeShop> StartFakeShopAsync() - { - var builder = WebApplication.CreateEmptyBuilder(new WebApplicationOptions()); - builder.WebHost.UseKestrelCore(); - builder.WebHost.UseUrls("http://127.0.0.1:0"); - builder.Services.AddRoutingCore(); - builder.Logging.ClearProviders(); - - var app = builder.Build(); - app.UseRouting(); - app.MapGet("/product", () => Results.Content(""" - <!doctype html> - <html> - <head><title>Premium Widget | The Shop - -

Premium Widget

-

A top-of-the-line widget for demanding applications.

-

USD 49.99

- - - - """, "text/html")); - - await app.StartAsync(); - var addresses = app.Services - .GetRequiredService() - .Features.Get()! - .Addresses; - return new FakeShop(app, addresses.First().TrimEnd('/')); - } - - private sealed record FakeShop(WebApplication App, string BaseUrl) : IAsyncDisposable - { - public async ValueTask DisposeAsync() => await App.DisposeAsync(); - } - - private sealed record LlmConfig(string Endpoint, string ModelId, string ApiKey) - { - public static LlmConfig? FromEnvironment() - { - var endpoint = Environment.GetEnvironmentVariable("FORAGENT_LLM_ENDPOINT"); - var model = Environment.GetEnvironmentVariable("FORAGENT_LLM_MODEL_ID"); - var key = Environment.GetEnvironmentVariable("FORAGENT_LLM_API_KEY"); - if (string.IsNullOrWhiteSpace(endpoint) - || string.IsNullOrWhiteSpace(model) - || string.IsNullOrWhiteSpace(key)) - { - return null; - } - return new LlmConfig(endpoint, model, key); - } - } -} diff --git a/tests/Foragent.Browser.Tests/PageSnapshotTests.cs b/tests/Foragent.Browser.Tests/PageSnapshotTests.cs deleted file mode 100644 index 914a0ad..0000000 --- a/tests/Foragent.Browser.Tests/PageSnapshotTests.cs +++ /dev/null @@ -1,41 +0,0 @@ -using Xunit; - -namespace Foragent.Browser.Tests; - -[Collection("Playwright")] -public class PageSnapshotTests(TestPageServerFixture fixture) -{ - [Fact] - public async Task CapturePageSnapshotAsync_ReturnsAccessibilityRendering_ForPlainHtml() - { - await using var session = await fixture.Factory.CreateSessionAsync(); - - var snapshot = await session.CapturePageSnapshotAsync(fixture.Url("/plain")); - - Assert.Equal("Hello World", snapshot.Title); - Assert.Equal(PageSnapshotSource.Accessibility, snapshot.Source); - Assert.False(string.IsNullOrWhiteSpace(snapshot.Content)); - // The body text "hi" should appear somewhere in the accessibility tree. - Assert.Contains("hi", snapshot.Content); - } - - [Fact] - public async Task CapturePageSnapshotAsync_FollowsRedirect_AndReportsFinalUrl() - { - await using var session = await fixture.Factory.CreateSessionAsync(); - - var snapshot = await session.CapturePageSnapshotAsync(fixture.Url("/redirect")); - - Assert.EndsWith("/plain", snapshot.Url.AbsolutePath); - Assert.Equal("Hello World", snapshot.Title); - } - - [Fact] - public async Task CapturePageSnapshotAsync_ThrowsOn404() - { - await using var session = await fixture.Factory.CreateSessionAsync(); - - await Assert.ThrowsAsync(() => - session.CapturePageSnapshotAsync(fixture.Url("/not-found"))); - } -} diff --git a/tests/Foragent.Browser.Tests/PlaywrightBrowserSessionTests.cs b/tests/Foragent.Browser.Tests/PlaywrightBrowserSessionTests.cs deleted file mode 100644 index 2450d2a..0000000 --- a/tests/Foragent.Browser.Tests/PlaywrightBrowserSessionTests.cs +++ /dev/null @@ -1,83 +0,0 @@ -using Xunit; - -namespace Foragent.Browser.Tests; - -[Collection("Playwright")] -public class PlaywrightBrowserSessionTests(TestPageServerFixture fixture) -{ - [Fact] - public async Task FetchPageTitleAsync_ReturnsTitle_FromPlainHtml() - { - await using var session = await fixture.Factory.CreateSessionAsync(); - - var title = await session.FetchPageTitleAsync(fixture.Url("/plain")); - - Assert.Equal("Hello World", title); - } - - [Fact] - public async Task FetchPageTitleAsync_ReturnsNull_WhenTitleMissing() - { - await using var session = await fixture.Factory.CreateSessionAsync(); - - var title = await session.FetchPageTitleAsync(fixture.Url("/no-title")); - - Assert.Null(title); - } - - [Fact] - public async Task FetchPageTitleAsync_DecodesHtmlEntities() - { - await using var session = await fixture.Factory.CreateSessionAsync(); - - var title = await session.FetchPageTitleAsync(fixture.Url("/entities")); - - Assert.Equal("AT&T \u2014 Home", title); - } - - [Fact] - public async Task FetchPageTitleAsync_ReturnsJsUpdatedTitle() - { - await using var session = await fixture.Factory.CreateSessionAsync(); - - var title = await session.FetchPageTitleAsync(fixture.Url("/js-title")); - - Assert.Equal("updated by js", title); - } - - [Fact] - public async Task FetchPageTitleAsync_FollowsRedirects() - { - await using var session = await fixture.Factory.CreateSessionAsync(); - - var title = await session.FetchPageTitleAsync(fixture.Url("/redirect")); - - Assert.Equal("Hello World", title); - } - - [Fact] - public async Task FetchPageTitleAsync_ThrowsOn404() - { - await using var session = await fixture.Factory.CreateSessionAsync(); - - await Assert.ThrowsAsync(() => - session.FetchPageTitleAsync(fixture.Url("/not-found"))); - } - - [Fact] - public async Task Sessions_AreIndependent() - { - // Each session should get a fresh context per spec §3.5. A smoke check that - // the factory doesn't hand back the same session twice or leak state. - await using var sessionA = await fixture.Factory.CreateSessionAsync(); - await using var sessionB = await fixture.Factory.CreateSessionAsync(); - - Assert.NotSame(sessionA, sessionB); - - var titleA = await sessionA.FetchPageTitleAsync(fixture.Url("/plain")); - var titleB = await sessionB.FetchPageTitleAsync(fixture.Url("/entities")); - - Assert.Equal("Hello World", titleA); - Assert.Equal("AT&T \u2014 Home", titleB); - } -}