From d1952c23d651e0dea576140afb287d1eb0b94823 Mon Sep 17 00:00:00 2001
From: Mario <mario.marquez@monkeywebs.com>
Date: Sat, 27 Jun 2026 17:51:33 -0700
Subject: [PATCH 1/8] refactor(ai): extract shared chat/completions adapter
 from ollama driver

---
 .../ai/drivers/http/chatCompletions.test.ts   |  51 +++
 server/ai/drivers/http/chatCompletions.ts     | 350 +++++++++++++++++
 server/ai/drivers/ollama.ts                   | 367 +-----------------
 .../ai-driver-shared-helpers.test.ts          |   5 +-
 4 files changed, 419 insertions(+), 354 deletions(-)
 create mode 100644 server/ai/drivers/http/chatCompletions.test.ts
 create mode 100644 server/ai/drivers/http/chatCompletions.ts
diff --git a/server/ai/drivers/http/chatCompletions.test.ts b/server/ai/drivers/http/chatCompletions.test.ts
new file mode 100644
index 000000000..f8c3ba40d
--- /dev/null
+++ b/server/ai/drivers/http/chatCompletions.test.ts
@@ -0,0 +1,51 @@
+import { describe, it, expect } from 'bun:test'
+import {
+  mapChatHistory,
+  ChatCompletionsTurnTranslator,
+  trimSlash,
+} from './chatCompletions'
+import type { SseFrame } from './sse'
+
+function frame(obj: unknown): SseFrame {
+  return { event: null, data: JSON.stringify(obj) }
+}
+
+describe('chatCompletions shared adapter', () => {
+  it('trimSlash strips trailing slashes', () => {
+    expect(trimSlash('http://x/v1/')).toBe('http://x/v1')
+    expect(trimSlash('http://x/v1')).toBe('http://x/v1')
+  })
+
+  it('mapChatHistory prepends the system prompt as a system message', () => {
+    const turns = mapChatHistory(['be terse'], [
+      { role: 'user', content: [{ kind: 'text', text: 'hi' }] },
+    ])
+    expect(turns[0]).toEqual([{ role: 'system', content: 'be terse' }])
+    expect(turns[1]).toEqual([{ role: 'user', content: 'hi' }])
+  })
+
+  it('translator accumulates streamed text and finishes with stop=true when no tool calls', () => {
+    const t = new ChatCompletionsTurnTranslator()
+    const events = t.translate(frame({ choices: [{ delta: { content: 'Hello' } }] }))
+    expect(events).toEqual([{ type: 'text', text: 'Hello' }])
+    const result = t.finish()
+    expect(result.stop).toBe(true)
+    expect(result.toolCalls).toEqual([])
+  })
+
+  it('translator emits one toolCall event per accumulated call at finish_reason', () => {
+    const t = new ChatCompletionsTurnTranslator()
+    t.translate(frame({ choices: [{ delta: { tool_calls: [
+      { index: 0, id: 'c1', function: { name: 'insertHtml', arguments: '{"ht' } },
+    ] } }] }))
+    const ev = t.translate(frame({ choices: [{ delta: { tool_calls: [
+      { index: 0, function: { arguments: 'ml":"<p/>"}' } },
+    ] }, finish_reason: 'tool_calls' }] }))
+    const toolEvent = ev.find((e) => e.type === 'toolCall')
+    expect(toolEvent).toBeTruthy()
+    expect(toolEvent).toMatchObject({ type: 'toolCall', toolName: 'insertHtml', toolCallId: 'c1' })
+    const result = t.finish()
+    expect(result.stop).toBe(false)
+    expect(result.toolCalls[0]).toMatchObject({ name: 'insertHtml' })
+  })
+})
diff --git a/server/ai/drivers/http/chatCompletions.ts b/server/ai/drivers/http/chatCompletions.ts
new file mode 100644
index 000000000..c4a006cc9
--- /dev/null
+++ b/server/ai/drivers/http/chatCompletions.ts
@@ -0,0 +1,350 @@
+/**
+ * Shared chat/completions adapter for OpenAI-compatible providers.
+ *
+ * Extracted from ollama.ts — all drivers using the OpenAI chat/completions
+ * wire protocol share this module. The factory `makeChatCompletionsAdapter`
+ * wires the per-provider options (baseUrl, apiKey, label) into the generic
+ * adapter shape consumed by `runToolLoop`.
+ */
+
+import { Type, parseValue, type Static } from '@core/utils/typeboxHelpers'
+import {
+  SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
+  type AiContentBlock,
+  type AiMessage,
+  type AiStreamEvent,
+  type AiToolOutput,
+} from '../../runtime/types'
+import {
+  type ProviderAdapter,
+  type TurnResult,
+  type TurnToolCall,
+  type TurnToolResult,
+  type TurnTranslator,
+  type TurnUsage,
+} from './toolLoop'
+import type { SseFrame } from './sse'
+import { parseToolArguments } from './toolArgs'
+import { nanoid } from 'nanoid'
+
+// ---------------------------------------------------------------------------
+// Provider-native chat/completions message shapes (request side)
+// ---------------------------------------------------------------------------
+
+export type ChatContentPart =
+  | { type: 'text'; text: string }
+  | { type: 'image_url'; image_url: { url: string } }
+
+export type ChatToolCall = {
+  id: string
+  type: 'function'
+  function: { name: string; arguments: string }
+}
+
+export type ChatMessage =
+  | { role: 'system'; content: string }
+  | { role: 'user'; content: string | ChatContentPart[] }
+  | { role: 'assistant'; content: string; tool_calls?: ChatToolCall[] }
+  | { role: 'tool'; tool_call_id: string; content: string }
+
+// Each canonical `AiMessage` maps to one or more chat messages (an assistant
+// turn carries text + tool_calls in one message, but several tool results fan
+// out into several `role:'tool'` messages), so the loop's `TMessage` is a
+// message *array* and the request body flattens before sending.
+export type ChatTurn = ChatMessage[]
+
+// ---------------------------------------------------------------------------
+// AiMessage[] → chat/completions messages[]
+// ---------------------------------------------------------------------------
+
+/**
+ * Map the canonical log into chat/completions messages. The system prompt is
+ * prepended as a `role:'system'` message (chat/completions has no separate
+ * `instructions` field). Assistant `toolCall` blocks ride on the assistant
+ * message as `tool_calls`; `role:'tool'` results become `role:'tool'` messages
+ * paired by `tool_call_id`.
+ */
+export function mapChatHistory(systemPrompt: string[], messages: AiMessage[]): ChatTurn[] {
+  const out: ChatTurn[] = []
+  const system = joinSystemPrompt(systemPrompt)
+  if (system) out.push([{ role: 'system', content: system }])
+
+  for (const msg of messages) {
+    if (msg.role === 'user') {
+      out.push([{ role: 'user', content: userContent(msg.content) }])
+    } else if (msg.role === 'assistant') {
+      out.push([assistantMessage(msg.content)])
+    } else if (msg.role === 'tool') {
+      out.push([{ role: 'tool', tool_call_id: msg.toolCallId, content: toolOutputToString(msg.output) }])
+    }
+    // role:'system' from the log is ignored — system is the prepended block.
+  }
+  return out
+}
+
+function joinSystemPrompt(systemPrompt: string[]): string {
+  return systemPrompt.filter((s) => s !== SYSTEM_PROMPT_DYNAMIC_BOUNDARY).join('\n\n')
+}
+
+function userContent(blocks: AiContentBlock[]): string | ChatContentPart[] {
+  const hasImage = blocks.some((b) => b.kind === 'image')
+  if (!hasImage) {
+    return blocks
+      .map((b) => (b.kind === 'text' ? b.text : ''))
+      .filter((s) => s.length > 0)
+      .join(' ')
+  }
+  const parts: ChatContentPart[] = []
+  for (const block of blocks) {
+    if (block.kind === 'text') parts.push({ type: 'text', text: block.text })
+    else if (block.kind === 'image') {
+      // Base64 data URL — the OpenAI-compatible image_url part.
+      parts.push({ type: 'image_url', image_url: { url: `data:${block.mimeType};base64,${block.data}` } })
+    }
+  }
+  return parts
+}
+
+function assistantMessage(blocks: AiContentBlock[]): ChatMessage {
+  let text = ''
+  const toolCalls: ChatToolCall[] = []
+  for (const block of blocks) {
+    if (block.kind === 'text') text += block.text
+    else if (block.kind === 'toolCall') {
+      toolCalls.push({
+        id: block.toolCallId,
+        type: 'function',
+        function: { name: block.toolName, arguments: JSON.stringify(block.input ?? {}) },
+      })
+    }
+  }
+  return toolCalls.length > 0
+    ? { role: 'assistant', content: text, tool_calls: toolCalls }
+    : { role: 'assistant', content: text }
+}
+
+function toolOutputToString(output: AiToolOutput): string {
+  if (!output.ok) return output.error ?? 'Tool call failed.'
+  const text = JSON.stringify(output.data ?? { ok: true })
+  // The OpenAI-compatible `role:'tool'` message is text-only — an image can't
+  // ride in a tool result here. Drop it with a note so the model knows visual
+  // evidence exists but wasn't delivered through this channel.
+  if (output.images && output.images.length > 0) {
+    return `${text}\n\n[${output.images.length} screenshot(s) omitted: this provider delivers tool results as text only.]`
+  }
+  return text
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+export function trimSlash(url: string): string {
+  return url.replace(/\/+$/, '')
+}
+
+// ---------------------------------------------------------------------------
+// SSE event schema (boundary validation — no `as` on parsed JSON)
+// ---------------------------------------------------------------------------
+
+const ChatToolCallDeltaSchema = Type.Object(
+  {
+    index: Type.Optional(Type.Number()),
+    id: Type.Optional(Type.String()),
+    function: Type.Optional(
+      Type.Object(
+        { name: Type.Optional(Type.String()), arguments: Type.Optional(Type.String()) },
+        { additionalProperties: true },
+      ),
+    ),
+  },
+  { additionalProperties: true },
+)
+
+const ChatChunkSchema = Type.Object(
+  {
+    choices: Type.Optional(
+      Type.Array(
+        Type.Object(
+          {
+            delta: Type.Optional(
+              Type.Object(
+                {
+                  content: Type.Optional(Type.Union([Type.String(), Type.Null()])),
+                  tool_calls: Type.Optional(Type.Array(ChatToolCallDeltaSchema)),
+                },
+                { additionalProperties: true },
+              ),
+            ),
+            finish_reason: Type.Optional(Type.Union([Type.String(), Type.Null()])),
+          },
+          { additionalProperties: true },
+        ),
+      ),
+    ),
+    usage: Type.Optional(
+      Type.Object(
+        { prompt_tokens: Type.Optional(Type.Number()), completion_tokens: Type.Optional(Type.Number()) },
+        { additionalProperties: true },
+      ),
+    ),
+  },
+  { additionalProperties: true },
+)
+
+// ---------------------------------------------------------------------------
+// SSE translator — one per API call in the loop
+// ---------------------------------------------------------------------------
+
+interface MutableToolCall {
+  id: string
+  name: string
+  arguments: string
+}
+
+export class ChatCompletionsTurnTranslator implements TurnTranslator<ChatTurn> {
+  private text = ''
+  // Tool calls accumulate by their streamed `index`; fragments arrive across
+  // chunks (id + name on the first, arguments piecemeal after).
+  private readonly toolsByIndex = new Map<number, MutableToolCall>()
+  private readonly order: number[] = []
+  private emitted = false
+  private usage: TurnUsage | null = null
+
+  translate(frame: SseFrame): AiStreamEvent[] {
+    let chunk: Static<typeof ChatChunkSchema>
+    try {
+      chunk = parseValue(ChatChunkSchema, JSON.parse(frame.data))
+    } catch {
+      // Keep-alive / unparseable frame — not fatal.
+      return []
+    }
+
+    if (chunk.usage) {
+      this.usage = {
+        promptTokens: chunk.usage.prompt_tokens ?? 0,
+        completionTokens: chunk.usage.completion_tokens ?? 0,
+      }
+    }
+
+    const choice = chunk.choices?.[0]
+    if (!choice) return []
+
+    const events: AiStreamEvent[] = []
+    const delta = choice.delta
+    if (delta) {
+      if (typeof delta.content === 'string' && delta.content.length > 0) {
+        this.text += delta.content
+        events.push({ type: 'text', text: delta.content })
+      }
+      if (delta.tool_calls) {
+        for (const tc of delta.tool_calls) {
+          const index = tc.index ?? 0
+          let acc = this.toolsByIndex.get(index)
+          if (!acc) {
+            acc = { id: tc.id ?? `tool-${nanoid()}`, name: '', arguments: '' }
+            this.toolsByIndex.set(index, acc)
+            this.order.push(index)
+          }
+          if (tc.id) acc.id = tc.id
+          if (tc.function?.name) acc.name = tc.function.name
+          if (typeof tc.function?.arguments === 'string') acc.arguments += tc.function.arguments
+        }
+      }
+    }
+
+    // The finish chunk signals all tool-call fragments are in; emit one
+    // canonical toolCall event per accumulated call (we don't stream partial
+    // arguments to the UI — see plan §11).
+    if (choice.finish_reason && this.toolsByIndex.size > 0 && !this.emitted) {
+      this.emitted = true
+      for (const index of this.order) {
+        const acc = this.toolsByIndex.get(index)!
+        events.push({
+          type: 'toolCall',
+          toolCallId: acc.id,
+          toolName: acc.name || 'tool',
+          input: parseToolArguments(acc.arguments),
+          status: 'pending',
+        })
+      }
+    }
+
+    return events
+  }
+
+  finish(): TurnResult<ChatTurn> {
+    const toolCalls: TurnToolCall[] = []
+    const chatToolCalls: ChatToolCall[] = []
+    for (const index of this.order) {
+      const acc = this.toolsByIndex.get(index)!
+      toolCalls.push({ id: acc.id, name: acc.name || 'tool', input: parseToolArguments(acc.arguments) })
+      chatToolCalls.push({
+        id: acc.id,
+        type: 'function',
+        function: { name: acc.name || 'tool', arguments: acc.arguments || '{}' },
+      })
+    }
+
+    const assistant: ChatMessage =
+      chatToolCalls.length > 0
+        ? { role: 'assistant', content: this.text, tool_calls: chatToolCalls }
+        : { role: 'assistant', content: this.text }
+
+    return {
+      stop: toolCalls.length === 0,
+      toolCalls,
+      assistantMessage: this.text || chatToolCalls.length > 0 ? [assistant] : null,
+      usage: this.usage,
+    }
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Generalized adapter factory
+// ---------------------------------------------------------------------------
+
+export function makeChatCompletionsAdapter(opts: {
+  baseUrl: string
+  apiKey: string | null
+  label: string
+}): ProviderAdapter<ChatTurn> {
+  const { baseUrl, apiKey, label } = opts
+  return {
+    label,
+    endpoint: `${trimSlash(baseUrl)}/v1/chat/completions`,
+    buildHeaders() {
+      const headers: Record<string, string> = { 'content-type': 'application/json' }
+      if (apiKey) headers.Authorization = `Bearer ${apiKey}`
+      return headers
+    },
+    mapHistory(req) {
+      return mapChatHistory(req.systemPrompt, req.messages)
+    },
+    buildRequestBody(messages, req) {
+      const body: Record<string, unknown> = {
+        model: req.modelId,
+        messages: messages.flat(),
+        stream: true,
+        stream_options: { include_usage: true },
+      }
+      if (req.tools.length > 0) {
+        body.tools = req.tools.map((t) => ({
+          type: 'function',
+          function: { name: t.name, description: t.description, parameters: t.inputSchema },
+        }))
+      }
+      return body
+    },
+    buildToolResultMessage(results: TurnToolResult[]): ChatTurn {
+      return results.map((r) => ({
+        role: 'tool' as const,
+        tool_call_id: r.id,
+        content: toolOutputToString(r.output),
+      }))
+    },
+    createTurnTranslator() {
+      return new ChatCompletionsTurnTranslator()
+    },
+  }
+}
diff --git a/server/ai/drivers/ollama.ts b/server/ai/drivers/ollama.ts
index 8c45c9b76..3e4767020 100644
--- a/server/ai/drivers/ollama.ts
+++ b/server/ai/drivers/ollama.ts
@@ -1,11 +1,10 @@
 /**
  * Ollama driver — direct HTTP against an OpenAI-compatible local endpoint.
  *
- * Ollama speaks the OpenAI **chat/completions** wire protocol (NOT the
- * Responses protocol the OpenAI/OpenRouter drivers use), so it carries its own
- * message mapping + SSE translation here; the shared `http/` layer still owns
- * SSE framing, the multi-turn tool loop, tool execution, and error
- * classification.
+ * Ollama speaks the OpenAI **chat/completions** wire protocol; the shared
+ * `http/chatCompletions.ts` module owns the message mapping + SSE translation.
+ * This file owns only Ollama-specific concerns: credential validation, live
+ * model catalogue (`/api/tags`), and fallback models.
  *
  * Auth: `baseUrl` mode. The endpoint is the credential's `baseUrl`; an optional
  * stored API key is sent as a bearer (some Ollama deployments sit behind a
@@ -16,15 +15,11 @@
  *   - listModels(): GET `${baseUrl}/api/tags` (native Ollama catalogue).
  */
 
-import { Type, parseValue, type Static } from '@core/utils/typeboxHelpers'
+import { Type, parseValue } from '@core/utils/typeboxHelpers'
 import {
-  SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
   type AiAuthMode,
-  type AiContentBlock,
-  type AiMessage,
   type AiProviderId,
   type AiStreamEvent,
-  type AiToolOutput,
 } from '../runtime/types'
 import type {
   AiProvider,
@@ -32,18 +27,8 @@ import type {
   AiResolvedCredential,
   AiStreamRequest,
 } from './types'
-import {
-  runToolLoop,
-  type ProviderAdapter,
-  type TurnResult,
-  type TurnToolCall,
-  type TurnToolResult,
-  type TurnTranslator,
-  type TurnUsage,
-} from './http/toolLoop'
-import type { SseFrame } from './http/sse'
-import { parseToolArguments } from './http/toolArgs'
-import { nanoid } from 'nanoid'
+import { runToolLoop } from './http/toolLoop'
+import { makeChatCompletionsAdapter, trimSlash } from './http/chatCompletions'
 
 const SUPPORTED_AUTH_MODES: AiAuthMode[] = ['baseUrl']
 
@@ -104,7 +89,14 @@ export const ollamaDriver: AiProvider = {
       }
       return
     }
-    yield* runToolLoop(makeOllamaAdapter(req.credentials.baseUrl, req.credentials.apiKey), req)
+    yield* runToolLoop(
+      makeChatCompletionsAdapter({
+        baseUrl: req.credentials.baseUrl,
+        apiKey: req.credentials.apiKey,
+        label: 'Ollama',
+      }),
+      req,
+    )
   },
 }
 
@@ -140,332 +132,3 @@ async function fetchOllamaModels(baseUrl: string): Promise<AiProviderModel[]> {
     return FALLBACK_MODELS
   }
 }
-
-// ---------------------------------------------------------------------------
-// Provider-native chat/completions message shapes (request side)
-// ---------------------------------------------------------------------------
-
-type ChatContentPart =
-  | { type: 'text'; text: string }
-  | { type: 'image_url'; image_url: { url: string } }
-
-interface ChatToolCall {
-  id: string
-  type: 'function'
-  function: { name: string; arguments: string }
-}
-
-export type ChatMessage =
-  | { role: 'system'; content: string }
-  | { role: 'user'; content: string | ChatContentPart[] }
-  | { role: 'assistant'; content: string; tool_calls?: ChatToolCall[] }
-  | { role: 'tool'; tool_call_id: string; content: string }
-
-// Each canonical `AiMessage` maps to one or more chat messages (an assistant
-// turn carries text + tool_calls in one message, but several tool results fan
-// out into several `role:'tool'` messages), so the loop's `TMessage` is a
-// message *array* and the request body flattens before sending.
-type ChatTurn = ChatMessage[]
-
-// ---------------------------------------------------------------------------
-// AiMessage[] → chat/completions messages[]
-// ---------------------------------------------------------------------------
-
-/**
- * Map the canonical log into chat/completions messages. The system prompt is
- * prepended as a `role:'system'` message (chat/completions has no separate
- * `instructions` field). Assistant `toolCall` blocks ride on the assistant
- * message as `tool_calls`; `role:'tool'` results become `role:'tool'` messages
- * paired by `tool_call_id`.
- */
-export function mapChatHistory(systemPrompt: string[], messages: AiMessage[]): ChatTurn[] {
-  const out: ChatTurn[] = []
-  const system = joinSystemPrompt(systemPrompt)
-  if (system) out.push([{ role: 'system', content: system }])
-
-  for (const msg of messages) {
-    if (msg.role === 'user') {
-      out.push([{ role: 'user', content: userContent(msg.content) }])
-    } else if (msg.role === 'assistant') {
-      out.push([assistantMessage(msg.content)])
-    } else if (msg.role === 'tool') {
-      out.push([{ role: 'tool', tool_call_id: msg.toolCallId, content: toolOutputToString(msg.output) }])
-    }
-    // role:'system' from the log is ignored — system is the prepended block.
-  }
-  return out
-}
-
-function joinSystemPrompt(systemPrompt: string[]): string {
-  return systemPrompt.filter((s) => s !== SYSTEM_PROMPT_DYNAMIC_BOUNDARY).join('\n\n')
-}
-
-function userContent(blocks: AiContentBlock[]): string | ChatContentPart[] {
-  const hasImage = blocks.some((b) => b.kind === 'image')
-  if (!hasImage) {
-    return blocks
-      .map((b) => (b.kind === 'text' ? b.text : ''))
-      .filter((s) => s.length > 0)
-      .join(' ')
-  }
-  const parts: ChatContentPart[] = []
-  for (const block of blocks) {
-    if (block.kind === 'text') parts.push({ type: 'text', text: block.text })
-    else if (block.kind === 'image') {
-      // Base64 data URL — the OpenAI-compatible image_url part.
-      parts.push({ type: 'image_url', image_url: { url: `data:${block.mimeType};base64,${block.data}` } })
-    }
-  }
-  return parts
-}
-
-function assistantMessage(blocks: AiContentBlock[]): ChatMessage {
-  let text = ''
-  const toolCalls: ChatToolCall[] = []
-  for (const block of blocks) {
-    if (block.kind === 'text') text += block.text
-    else if (block.kind === 'toolCall') {
-      toolCalls.push({
-        id: block.toolCallId,
-        type: 'function',
-        function: { name: block.toolName, arguments: JSON.stringify(block.input ?? {}) },
-      })
-    }
-  }
-  return toolCalls.length > 0
-    ? { role: 'assistant', content: text, tool_calls: toolCalls }
-    : { role: 'assistant', content: text }
-}
-
-function toolOutputToString(output: AiToolOutput): string {
-  if (!output.ok) return output.error ?? 'Tool call failed.'
-  const text = JSON.stringify(output.data ?? { ok: true })
-  // The OpenAI-compatible `role:'tool'` message is text-only — an image can't
-  // ride in a tool result here. Drop it with a note so the model knows visual
-  // evidence exists but wasn't delivered through this channel.
-  if (output.images && output.images.length > 0) {
-    return `${text}\n\n[${output.images.length} screenshot(s) omitted: this provider delivers tool results as text only.]`
-  }
-  return text
-}
-
-// ---------------------------------------------------------------------------
-// Adapter
-// ---------------------------------------------------------------------------
-
-function makeOllamaAdapter(baseUrl: string, apiKey: string | null): ProviderAdapter<ChatTurn> {
-  return {
-    label: 'Ollama',
-    endpoint: `${trimSlash(baseUrl)}/v1/chat/completions`,
-
-    buildHeaders() {
-      const headers: Record<string, string> = { 'content-type': 'application/json' }
-      // Optional bearer — some Ollama deployments sit behind an auth proxy.
-      if (apiKey) headers.Authorization = `Bearer ${apiKey}`
-      return headers
-    },
-
-    mapHistory(req) {
-      return mapChatHistory(req.systemPrompt, req.messages)
-    },
-
-    buildRequestBody(messages, req) {
-      const body: Record<string, unknown> = {
-        model: req.modelId,
-        messages: messages.flat(),
-        stream: true,
-        // Ollama emits a final usage-only chunk when asked.
-        stream_options: { include_usage: true },
-      }
-      if (req.tools.length > 0) {
-        body.tools = req.tools.map((t) => ({
-          type: 'function',
-          function: {
-            name: t.name,
-            description: t.description,
-            // TypeBox schema IS JSON Schema — pass it straight through.
-            parameters: t.inputSchema,
-          },
-        }))
-      }
-      return body
-    },
-
-    buildToolResultMessage(results: TurnToolResult[]): ChatTurn {
-      return results.map((r) => ({
-        role: 'tool' as const,
-        tool_call_id: r.id,
-        content: toolOutputToString(r.output),
-      }))
-    },
-
-    createTurnTranslator() {
-      return new ChatCompletionsTurnTranslator()
-    },
-  }
-}
-
-// ---------------------------------------------------------------------------
-// SSE event schema (boundary validation — no `as` on parsed JSON)
-// ---------------------------------------------------------------------------
-
-const ChatToolCallDeltaSchema = Type.Object(
-  {
-    index: Type.Optional(Type.Number()),
-    id: Type.Optional(Type.String()),
-    function: Type.Optional(
-      Type.Object(
-        { name: Type.Optional(Type.String()), arguments: Type.Optional(Type.String()) },
-        { additionalProperties: true },
-      ),
-    ),
-  },
-  { additionalProperties: true },
-)
-
-const ChatChunkSchema = Type.Object(
-  {
-    choices: Type.Optional(
-      Type.Array(
-        Type.Object(
-          {
-            delta: Type.Optional(
-              Type.Object(
-                {
-                  content: Type.Optional(Type.Union([Type.String(), Type.Null()])),
-                  tool_calls: Type.Optional(Type.Array(ChatToolCallDeltaSchema)),
-                },
-                { additionalProperties: true },
-              ),
-            ),
-            finish_reason: Type.Optional(Type.Union([Type.String(), Type.Null()])),
-          },
-          { additionalProperties: true },
-        ),
-      ),
-    ),
-    usage: Type.Optional(
-      Type.Object(
-        { prompt_tokens: Type.Optional(Type.Number()), completion_tokens: Type.Optional(Type.Number()) },
-        { additionalProperties: true },
-      ),
-    ),
-  },
-  { additionalProperties: true },
-)
-
-// ---------------------------------------------------------------------------
-// SSE translator — one per API call in the loop
-// ---------------------------------------------------------------------------
-
-interface MutableToolCall {
-  id: string
-  name: string
-  arguments: string
-}
-
-export class ChatCompletionsTurnTranslator implements TurnTranslator<ChatTurn> {
-  private text = ''
-  // Tool calls accumulate by their streamed `index`; fragments arrive across
-  // chunks (id + name on the first, arguments piecemeal after).
-  private readonly toolsByIndex = new Map<number, MutableToolCall>()
-  private readonly order: number[] = []
-  private emitted = false
-  private usage: TurnUsage | null = null
-
-  translate(frame: SseFrame): AiStreamEvent[] {
-    let chunk: Static<typeof ChatChunkSchema>
-    try {
-      chunk = parseValue(ChatChunkSchema, JSON.parse(frame.data))
-    } catch {
-      // Keep-alive / unparseable frame — not fatal.
-      return []
-    }
-
-    if (chunk.usage) {
-      this.usage = {
-        promptTokens: chunk.usage.prompt_tokens ?? 0,
-        completionTokens: chunk.usage.completion_tokens ?? 0,
-      }
-    }
-
-    const choice = chunk.choices?.[0]
-    if (!choice) return []
-
-    const events: AiStreamEvent[] = []
-    const delta = choice.delta
-    if (delta) {
-      if (typeof delta.content === 'string' && delta.content.length > 0) {
-        this.text += delta.content
-        events.push({ type: 'text', text: delta.content })
-      }
-      if (delta.tool_calls) {
-        for (const tc of delta.tool_calls) {
-          const index = tc.index ?? 0
-          let acc = this.toolsByIndex.get(index)
-          if (!acc) {
-            acc = { id: tc.id ?? `tool-${nanoid()}`, name: '', arguments: '' }
-            this.toolsByIndex.set(index, acc)
-            this.order.push(index)
-          }
-          if (tc.id) acc.id = tc.id
-          if (tc.function?.name) acc.name = tc.function.name
-          if (typeof tc.function?.arguments === 'string') acc.arguments += tc.function.arguments
-        }
-      }
-    }
-
-    // The finish chunk signals all tool-call fragments are in; emit one
-    // canonical toolCall event per accumulated call (we don't stream partial
-    // arguments to the UI — see plan §11).
-    if (choice.finish_reason && this.toolsByIndex.size > 0 && !this.emitted) {
-      this.emitted = true
-      for (const index of this.order) {
-        const acc = this.toolsByIndex.get(index)!
-        events.push({
-          type: 'toolCall',
-          toolCallId: acc.id,
-          toolName: acc.name || 'tool',
-          input: parseToolArguments(acc.arguments),
-          status: 'pending',
-        })
-      }
-    }
-
-    return events
-  }
-
-  finish(): TurnResult<ChatTurn> {
-    const toolCalls: TurnToolCall[] = []
-    const chatToolCalls: ChatToolCall[] = []
-    for (const index of this.order) {
-      const acc = this.toolsByIndex.get(index)!
-      toolCalls.push({ id: acc.id, name: acc.name || 'tool', input: parseToolArguments(acc.arguments) })
-      chatToolCalls.push({
-        id: acc.id,
-        type: 'function',
-        function: { name: acc.name || 'tool', arguments: acc.arguments || '{}' },
-      })
-    }
-
-    const assistant: ChatMessage =
-      chatToolCalls.length > 0
-        ? { role: 'assistant', content: this.text, tool_calls: chatToolCalls }
-        : { role: 'assistant', content: this.text }
-
-    return {
-      stop: toolCalls.length === 0,
-      toolCalls,
-      assistantMessage: this.text || chatToolCalls.length > 0 ? [assistant] : null,
-      usage: this.usage,
-    }
-  }
-}
-
-// ---------------------------------------------------------------------------
-// Helpers
-// ---------------------------------------------------------------------------
-
-function trimSlash(url: string): string {
-  return url.replace(/\/+$/, '')
-}
diff --git a/src/__tests__/architecture/ai-driver-shared-helpers.test.ts b/src/__tests__/architecture/ai-driver-shared-helpers.test.ts
index 474572525..e13628a59 100644
--- a/src/__tests__/architecture/ai-driver-shared-helpers.test.ts
+++ b/src/__tests__/architecture/ai-driver-shared-helpers.test.ts
@@ -30,7 +30,7 @@ const read = (rel: string) => readFileSync(join(REPO_ROOT, rel), 'utf8')
 const DRIVER_FILES = [
   'server/ai/drivers/anthropic.ts',
   'server/ai/drivers/responses-shared.ts',
-  'server/ai/drivers/ollama.ts',
+  'server/ai/drivers/http/chatCompletions.ts',
 ]
 
 describe('ai-driver-shared-helpers gate', () => {
@@ -42,7 +42,7 @@ describe('ai-driver-shared-helpers gate', () => {
   it('every driver imports parseToolArguments from the shared module', () => {
     for (const file of DRIVER_FILES) {
       const src = read(file)
-      expect(src).toContain("from './http/toolArgs'")
+      expect(src).toMatch(/from\s+'(\.\/|\.\.\/)+(http\/)?toolArgs'/)
       // No private copy of the parser may shadow the shared one.
       expect(src).not.toMatch(/function\s+parseJsonOrEmpty\b/)
       expect(src).not.toMatch(/function\s+parseToolArguments\b/)
@@ -58,6 +58,7 @@ describe('ai-driver-shared-helpers gate', () => {
   it('SYSTEM_PROMPT_DYNAMIC_BOUNDARY is declared exactly once', () => {
     const SCAN = [
       ...DRIVER_FILES,
+      'server/ai/drivers/ollama.ts',
       'server/ai/drivers/types.ts',
       'server/ai/runtime/types.ts',
       'server/ai/tools/site/systemPrompt.ts',

From 19d02f55925d6e20f967dd837cf42ec0e7942f66 Mon Sep 17 00:00:00 2001
From: Mario <mario.marquez@monkeywebs.com>
Date: Sat, 27 Jun 2026 18:11:49 -0700
Subject: [PATCH 2/8] feat(ai): add openai-compatible provider driver (custom
 base URL)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 server/ai/drivers/index.ts                 |   2 +
 server/ai/drivers/openaiCompatible.test.ts |  39 ++++++++
 server/ai/drivers/openaiCompatible.ts      | 106 +++++++++++++++++++++
 server/ai/handlers/credentials.ts          |   1 +
 server/ai/handlers/models.ts               |   7 +-
 server/ai/runtime/types.ts                 |   2 +-
 6 files changed, 154 insertions(+), 3 deletions(-)
 create mode 100644 server/ai/drivers/openaiCompatible.test.ts
 create mode 100644 server/ai/drivers/openaiCompatible.ts

diff --git a/server/ai/drivers/index.ts b/server/ai/drivers/index.ts
index fc2097e61..ae02458cb 100644
--- a/server/ai/drivers/index.ts
+++ b/server/ai/drivers/index.ts
@@ -12,12 +12,14 @@ import { anthropicDriver } from './anthropic'
 import { openaiDriver } from './openai'
 import { ollamaDriver } from './ollama'
 import { openrouterDriver } from './openrouter'
+import { openaiCompatibleDriver } from './openaiCompatible'
 
 const DRIVERS: Record<AiProviderId, AiProvider> = {
   anthropic: anthropicDriver,
   openai: openaiDriver,
   ollama: ollamaDriver,
   openrouter: openrouterDriver,
+  'openai-compatible': openaiCompatibleDriver,
 }
 
 /** Returns the driver for a provider id, or throws if unknown. */
diff --git a/server/ai/drivers/openaiCompatible.test.ts b/server/ai/drivers/openaiCompatible.test.ts
new file mode 100644
index 000000000..1bc0408c6
--- /dev/null
+++ b/server/ai/drivers/openaiCompatible.test.ts
@@ -0,0 +1,39 @@
+import { describe, it, expect, afterEach } from 'bun:test'
+import { openaiCompatibleDriver } from './openaiCompatible'
+import type { AiResolvedCredential } from './types'
+
+const realFetch = globalThis.fetch
+afterEach(() => { globalThis.fetch = realFetch })
+
+function creds(baseUrl: string | null): AiResolvedCredential {
+  return { id: 'c1', providerId: 'openai-compatible', authMode: 'baseUrl', apiKey: 'k', baseUrl }
+}
+
+describe('openai-compatible driver', () => {
+  it('reports baseUrl as its only auth mode', () => {
+    expect(openaiCompatibleDriver.supportedAuthModes).toEqual(['baseUrl'])
+  })
+
+  it('listModels maps /v1/models data[].id into picker models', async () => {
+    globalThis.fetch = (async (url: string) => {
+      expect(String(url)).toBe('https://api.groq.com/openai/v1/models')
+      return new Response(JSON.stringify({ data: [{ id: 'llama-3.3-70b' }, { id: 'mixtral-8x7b' }] }), { status: 200 })
+    }) as unknown as typeof fetch
+    const models = await openaiCompatibleDriver.listModels(creds('https://api.groq.com/openai'))
+    expect(models.map((m) => m.id)).toEqual(['llama-3.3-70b', 'mixtral-8x7b'])
+    expect(models[0]).toMatchObject({ label: 'llama-3.3-70b', catalogueSource: 'live' })
+  })
+
+  it('listModels returns [] when the endpoint is unreachable or non-OK', async () => {
+    globalThis.fetch = (async () => new Response('nope', { status: 500 })) as unknown as typeof fetch
+    expect(await openaiCompatibleDriver.listModels(creds('https://x/v1'))).toEqual([])
+  })
+
+  it('listModels returns [] with no base URL', async () => {
+    expect(await openaiCompatibleDriver.listModels(creds(null))).toEqual([])
+  })
+
+  it('capabilities default to tool-calling + streaming', () => {
+    expect(openaiCompatibleDriver.capabilities('anything')).toMatchObject({ toolCalling: true, streaming: true })
+  })
+})
diff --git a/server/ai/drivers/openaiCompatible.ts b/server/ai/drivers/openaiCompatible.ts
new file mode 100644
index 000000000..da29c0387
--- /dev/null
+++ b/server/ai/drivers/openaiCompatible.ts
@@ -0,0 +1,106 @@
+/**
+ * OpenAI-Compatible driver — direct HTTP against any endpoint that speaks the
+ * OpenAI `/v1/chat/completions` wire protocol (Groq, Together, DeepSeek,
+ * Mistral, Fireworks, self-hosted vLLM / LM Studio, …).
+ *
+ * Auth: `baseUrl` mode. The endpoint is the credential's `baseUrl`; an optional
+ * stored API key is sent as a bearer (hosted services need one; local servers
+ * often don't). The chat/completions machinery is shared with the Ollama driver
+ * via `http/chatCompletions.ts`; this file owns only model discovery
+ * (`GET ${baseUrl}/v1/models`) and the generic capability defaults.
+ */
+
+import { Type, parseValue } from '@core/utils/typeboxHelpers'
+import type { AiAuthMode, AiProviderId, AiStreamEvent } from '../runtime/types'
+import type {
+  AiProvider,
+  AiProviderModel,
+  AiResolvedCredential,
+  AiStreamRequest,
+} from './types'
+import { runToolLoop } from './http/toolLoop'
+import { makeChatCompletionsAdapter, trimSlash } from './http/chatCompletions'
+
+const SUPPORTED_AUTH_MODES: AiAuthMode[] = ['baseUrl']
+
+const GENERIC_CAPABILITIES = {
+  toolCalling: true,
+  visionInput: false,
+  promptCache: false,
+  streaming: true,
+} as const
+
+export const openaiCompatibleDriver: AiProvider = {
+  id: 'openai-compatible' as AiProviderId,
+  label: 'OpenAI-Compatible',
+  supportedAuthModes: SUPPORTED_AUTH_MODES,
+
+  capabilities(_modelId: string) {
+    // Arbitrary endpoints report no per-model capability flags. Tool-calling
+    // must default true — the site/content agents require it; picking a model
+    // that lacks it is the operator's choice.
+    return { ...GENERIC_CAPABILITIES }
+  },
+
+  async listModels(creds: AiResolvedCredential) {
+    if (creds.authMode !== 'baseUrl' || !creds.baseUrl) return []
+    return fetchOpenAiCompatibleModels(creds.baseUrl, creds.apiKey)
+  },
+
+  async *stream(req: AiStreamRequest): AsyncIterable<AiStreamEvent> {
+    if (req.credentials.authMode !== 'baseUrl' || !req.credentials.baseUrl) {
+      yield {
+        type: 'error',
+        message:
+          'This provider requires a base URL. Add a base-URL credential in /admin/ai/providers and pick it for the site default.',
+      }
+      return
+    }
+    yield* runToolLoop(
+      makeChatCompletionsAdapter({
+        baseUrl: req.credentials.baseUrl,
+        apiKey: req.credentials.apiKey,
+        label: 'OpenAI-Compatible',
+      }),
+      req,
+    )
+  },
+}
+
+// ---------------------------------------------------------------------------
+// Live model catalogue — GET /v1/models (standard OpenAI list shape)
+// ---------------------------------------------------------------------------
+
+const ModelsResponseSchema = Type.Object(
+  { data: Type.Array(Type.Object({ id: Type.String() }, { additionalProperties: true })) },
+  { additionalProperties: true },
+)
+
+/**
+ * Fetch the model catalogue from `GET ${baseUrl}/v1/models`. Unlike the OpenAI
+ * driver we do NOT filter by family or derive tiers — the endpoint is arbitrary,
+ * so the id is the label and capabilities are the generic defaults. Any failure
+ * (offline, non-OK, unparseable) returns [] so the picker stays empty and the
+ * credential Test button surfaces the underlying error.
+ */
+async function fetchOpenAiCompatibleModels(
+  baseUrl: string,
+  apiKey: string | null,
+): Promise<AiProviderModel[]> {
+  try {
+    const headers: Record<string, string> = {}
+    if (apiKey) headers.Authorization = `Bearer ${apiKey}`
+    const res = await fetch(`${trimSlash(baseUrl)}/v1/models`, { headers })
+    if (!res.ok) return []
+    const parsed = parseValue(ModelsResponseSchema, await res.json())
+    return parsed.data.map((m) => ({
+      id: m.id,
+      label: m.id,
+      catalogueSource: 'live' as const,
+      capabilities: { ...GENERIC_CAPABILITIES },
+    }))
+  } catch (err) {
+    console.error('[ai/openai-compatible] models request failed:', err)
+    return []
+  }
+}
diff --git a/server/ai/handlers/credentials.ts b/server/ai/handlers/credentials.ts
index d105541d0..faf500eff 100644
--- a/server/ai/handlers/credentials.ts
+++ b/server/ai/handlers/credentials.ts
@@ -35,6 +35,7 @@ const ProviderId = Type.Union([
   Type.Literal('openai'),
   Type.Literal('ollama'),
   Type.Literal('openrouter'),
+  Type.Literal('openai-compatible'),
 ])
 
 const CreateBodySchema = Type.Union([
diff --git a/server/ai/handlers/models.ts b/server/ai/handlers/models.ts
index 11384e100..f94915424 100644
--- a/server/ai/handlers/models.ts
+++ b/server/ai/handlers/models.ts
@@ -19,7 +19,7 @@ import { getModelCatalogue, pricingKey } from '../pricing'
 import type { AiProviderModel } from '../drivers/types'
 import type { AiProviderId } from '../runtime/types'
 
-const VALID_PROVIDERS: AiProviderId[] = ['anthropic', 'openai', 'ollama', 'openrouter']
+const VALID_PROVIDERS: AiProviderId[] = ['anthropic', 'openai', 'ollama', 'openrouter', 'openai-compatible']
 
 export function tryHandleAiModels(
   req: Request,
@@ -75,7 +75,10 @@ async function handleModels(
     resolved = {
       id: '',
       providerId,
-      authMode: providerId === 'ollama' ? ('baseUrl' as const) : ('apiKey' as const),
+      authMode:
+        providerId === 'ollama' || providerId === 'openai-compatible'
+          ? ('baseUrl' as const)
+          : ('apiKey' as const),
       apiKey: null,
       baseUrl: null,
     }
diff --git a/server/ai/runtime/types.ts b/server/ai/runtime/types.ts
index 29f90acca..f8aca9ebd 100644
--- a/server/ai/runtime/types.ts
+++ b/server/ai/runtime/types.ts
@@ -24,7 +24,7 @@ export type { AiContentBlock, AiToolImage, AiToolOutput } from '@core/ai'
 // Provider identity + auth modes
 // ---------------------------------------------------------------------------
 
-export type AiProviderId = 'anthropic' | 'openai' | 'ollama' | 'openrouter'
+export type AiProviderId = 'anthropic' | 'openai' | 'ollama' | 'openrouter' | 'openai-compatible'
 /**
  * Credential auth modes.
  *

From 533c2935da9fcfbaa6a81f7d0b456d49edc2def6 Mon Sep 17 00:00:00 2001
From: Mario <mario.marquez@monkeywebs.com>
Date: Sat, 27 Jun 2026 18:24:58 -0700
Subject: [PATCH 3/8] feat(ai): expose openai-compatible provider in the admin
 providers UI

---
 src/admin/ai/api.ts                      |  7 ++++---
 src/admin/pages/ai/tabs/ProvidersTab.tsx | 15 +++++++++++----
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/admin/ai/api.ts b/src/admin/ai/api.ts
index 13b77655f..3fec25640 100644
--- a/src/admin/ai/api.ts
+++ b/src/admin/ai/api.ts
@@ -27,6 +27,7 @@ const ProviderId = Type.Union([
   Type.Literal('openai'),
   Type.Literal('ollama'),
   Type.Literal('openrouter'),
+  Type.Literal('openai-compatible'),
 ])
 
 const AuthMode = Type.Union([
@@ -170,13 +171,13 @@ export async function listCredentials(signal?: AbortSignal): Promise<CredentialV
 
 export type CreateCredentialBody =
   | {
-      providerId: 'anthropic' | 'openai' | 'ollama' | 'openrouter'
+      providerId: 'anthropic' | 'openai' | 'ollama' | 'openrouter' | 'openai-compatible'
       authMode: 'apiKey'
       displayLabel: string
       apiKey: string
     }
   | {
-      providerId: 'anthropic' | 'openai' | 'ollama' | 'openrouter'
+      providerId: 'anthropic' | 'openai' | 'ollama' | 'openrouter' | 'openai-compatible'
       authMode: 'baseUrl'
       displayLabel: string
       baseUrl: string
@@ -224,7 +225,7 @@ export async function testCredential(id: string): Promise<TestResult> {
 // ---------------------------------------------------------------------------
 
 export async function listModels(
-  providerId: 'anthropic' | 'openai' | 'ollama' | 'openrouter',
+  providerId: 'anthropic' | 'openai' | 'ollama' | 'openrouter' | 'openai-compatible',
   credentialId?: string,
 ): Promise<AiModel[]> {
   const body = await apiRequest(`/admin/api/ai/providers/${providerId}/models`, {
diff --git a/src/admin/pages/ai/tabs/ProvidersTab.tsx b/src/admin/pages/ai/tabs/ProvidersTab.tsx
index fa15b3eb8..e980632ce 100644
--- a/src/admin/pages/ai/tabs/ProvidersTab.tsx
+++ b/src/admin/pages/ai/tabs/ProvidersTab.tsx
@@ -27,7 +27,7 @@ import { ApiError } from '@core/http'
 import styles from '../AiPage.module.css'
 import { getErrorMessage } from '@core/utils/errorMessage'
 
-type ProviderId = 'anthropic' | 'openai' | 'ollama' | 'openrouter'
+type ProviderId = 'anthropic' | 'openai' | 'ollama' | 'openrouter' | 'openai-compatible'
 type AuthMode = 'apiKey' | 'baseUrl'
 
 // Each provider has exactly one credential shape; the UI derives it instead
@@ -37,6 +37,7 @@ const PROVIDERS: Array<{ id: ProviderId; label: string; authMode: AuthMode }> =
   { id: 'openai', label: 'OpenAI', authMode: 'apiKey' },
   { id: 'openrouter', label: 'OpenRouter', authMode: 'apiKey' },
   { id: 'ollama', label: 'Ollama (local)', authMode: 'baseUrl' },
+  { id: 'openai-compatible', label: 'OpenAI-Compatible', authMode: 'baseUrl' },
 ]
 
 const AUTH_MODE_LABEL: Record<AuthMode, string> = {
@@ -49,12 +50,14 @@ const PROVIDER_LABEL: Record<ProviderId, string> = {
   openai: 'OpenAI',
   openrouter: 'OpenRouter',
   ollama: 'Ollama',
+  'openai-compatible': 'OpenAI-Compatible',
 }
 
 // Hint text for the API-key field, per provider key prefix.
 const API_KEY_PLACEHOLDER: Partial<Record<ProviderId, string>> = {
   anthropic: 'sk-ant-...',
   openrouter: 'sk-or-...',
+  'openai-compatible': 'sk-... (optional)',
 }
 
 async function deleteCredentialAction(
@@ -278,12 +281,14 @@ function AddCredentialDialog({
   const [providerId, setProviderId] = useState<ProviderId>('anthropic')
   const [displayLabel, setDisplayLabel] = useState('')
   const [apiKey, setApiKey] = useState('')
-  const [baseUrl, setBaseUrl] = useState('http://localhost:11434')
+  const [baseUrl, setBaseUrl] = useState('')
   const [busy, setBusy] = useState(false)
   const [error, setError] = useState<string | null>(null)
 
   const providerSpec = PROVIDERS.find((p) => p.id === providerId)!
   const effectiveAuthMode = providerSpec.authMode
+  const baseUrlPlaceholder =
+    providerId === 'ollama' ? 'http://localhost:11434' : 'https://api.your-provider.com/v1'
 
   async function handleSubmit(e: React.FormEvent) {
     e.preventDefault()
@@ -360,12 +365,14 @@ function AddCredentialDialog({
                 id={baseUrlInputId}
                 value={baseUrl}
                 onChange={(e) => setBaseUrl(e.currentTarget.value)}
-                placeholder="http://localhost:11434"
+                placeholder={baseUrlPlaceholder}
                 required
               />
             </div>
             <div className={styles.dialogField}>
-              <label htmlFor={apiKeyInputId} className={styles.dialogFieldLabel}>Bearer token (optional)</label>
+              <label htmlFor={apiKeyInputId} className={styles.dialogFieldLabel}>
+                {providerId === 'ollama' ? 'Bearer token (optional)' : 'API key (optional)'}
+              </label>
               <Input
                 id={apiKeyInputId}
                 type="password"

From c57ee7431339f4e5dc017ff03e8c959599709e6e Mon Sep 17 00:00:00 2001
From: Mario <mario.marquez@monkeywebs.com>
Date: Sat, 27 Jun 2026 18:32:03 -0700
Subject: [PATCH 4/8] docs(ai): document the openai-compatible provider

---
 docs/features/agent.md | 44 +++++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/docs/features/agent.md b/docs/features/agent.md
index 9b1c3c93c..1fbaa3a36 100644
--- a/docs/features/agent.md
+++ b/docs/features/agent.md
@@ -2,7 +2,7 @@
 
 The AI Agent is a model-powered assistant integrated into the visual editor. The user types a request in the Agent Panel; the agent reads the current page snapshot, plans a sequence of edits, and executes them by calling tools. Structure is written as semantic HTML (`insertHtml` / `replaceNodeHtml`); styling is written as CSS — a `<style>` block and/or `class=` attributes inside the insert, or the dedicated `applyCss` tool for authoring/editing any CSS on its own. There is one CSS path and it accepts every selector; `assignClass` / `removeClass` attach existing classes to nodes.
 
-The agent runs on a provider-agnostic AI runtime (`server/ai/`) that can drive any supported model (Anthropic Claude, OpenAI, OpenRouter, Ollama). Every driver talks directly to its provider's REST API over HTTP/SSE — no provider SDKs. All four share one multi-turn tool loop (`drivers/http/toolLoop.ts`); each supplies only a small `ProviderAdapter` of pure mapping functions. The plain `@anthropic-ai/sdk` (and any provider SDK) is banned repo-wide. Gated by `ai-driver-isolation.test.ts`.
+The agent runs on a provider-agnostic AI runtime (`server/ai/`) that can drive any supported model (Anthropic Claude, OpenAI, OpenRouter, Ollama, or any OpenAI-compatible endpoint). Every driver talks directly to its provider's REST API over HTTP/SSE — no provider SDKs. All drivers share one multi-turn tool loop (`drivers/http/toolLoop.ts`); each supplies only a small `ProviderAdapter` of pure mapping functions. The plain `@anthropic-ai/sdk` (and any provider SDK) is banned repo-wide. Gated by `ai-driver-isolation.test.ts`.
 
 ---
 
@@ -12,7 +12,7 @@ The agent runs on a provider-agnostic AI runtime (`server/ai/`) that can drive a
 - **Styling via CSS.** The agent emits CSS the same way a human pastes it: a `<style>` block and/or `class=` attributes inside the `insertHtml`/`replaceNodeHtml` payload, or the standalone `applyCss` tool. The importer (`cssToStyleRules`) classifies every selector — a bare `.foo {}` rule becomes a reusable Selectors-panel class bound to `class="foo"`; any other selector (`.hero a`, `a:hover`, `nav > li`) becomes an ambient rule; `style=` attributes land on the node's inline styles. There is no structured `classes` parameter — the agent never hand-builds classes node-by-node at insert time. `applyCss` is the single tool for authoring/editing CSS on its own; it **upserts**, so re-applying a selector edits the existing rule (the way descendant/pseudo rules get restyled).
 - **35 tools total.** 6 server-side catalog read tools (resolved server-side from the posted snapshot / DB) + 29 browser-bridged tools.
 - **Two-endpoint bridge.** `POST /admin/api/ai/chat/site` opens an NDJSON stream. When the model calls a browser-bridged tool, the server emits `toolRequest`; the browser executor reads or mutates the editor store and POSTs the `AiToolOutput` result to `POST /admin/api/ai/tool-result`.
-- **Provider-agnostic.** The runtime selects a driver (Anthropic, OpenAI, OpenRouter, Ollama) from the conversation's configured credential.
+- **Provider-agnostic.** The runtime selects a driver (Anthropic, OpenAI, OpenRouter, Ollama, OpenAI-Compatible) from the conversation's configured credential.
 - **Tool input schemas are a single source of truth** in `@core/ai` (`src/core/ai/toolSchemas.ts`). The server tool registry (`server/ai/tools/site/writeTools.ts`) and the browser executor (`executor.ts` + `tokenRunners.ts`) import the exact same schema objects — a constraint added once is enforced on both sides at build time. Gated by `ai-tool-schema-ssot.test.ts` and `ai-tools-typebox-only.test.ts`.
 - **Capabilities.** `ai.chat` required to stream; `ai.tools.write` required for write tools. Gated by `ai-handlers-capability-gated.test.ts`.
 
@@ -56,16 +56,18 @@ server/ai/
 │   └── content/            — content-workspace tools (separate scope)
 ├── drivers/
 │   ├── http/
-│   │   ├── sse.ts          — parseSseStream(res): reassemble SSE frames across chunks
-│   │   ├── execTool.ts     — executeAiTool(): server-handler vs browser-bridge dispatch; normaliseToolOutput(): wraps raw handler results in the canonical AiToolOutput envelope, validated via TypeBox (not duck-typed)
-│   │   ├── toolLoop.ts     — runToolLoop(): provider-agnostic multi-turn loop
-│   │   ├── toolArgs.ts     — parseToolArguments(json): shared tool-argument JSON parsing (one copy for all drivers)
-│   │   └── errors.ts       — isAbortError / classifyHttpError
-│   ├── responses-shared.ts — OpenAI-Responses mapping + SSE translator + adapter factory (openai + openrouter)
-│   ├── anthropic.ts        — Anthropic driver: direct POST /v1/messages (no SDK)
-│   ├── openai.ts           — OpenAI driver: direct POST /v1/responses (no SDK)
-│   ├── openrouter.ts       — OpenRouter driver: direct POST /v1/responses (shared Responses path; live /models; native cost)
-│   └── ollama.ts           — Ollama driver: direct POST /v1/chat/completions (no SDK)
+│   │   ├── sse.ts             — parseSseStream(res): reassemble SSE frames across chunks
+│   │   ├── execTool.ts        — executeAiTool(): server-handler vs browser-bridge dispatch; normaliseToolOutput(): wraps raw handler results in the canonical AiToolOutput envelope, validated via TypeBox (not duck-typed)
+│   │   ├── toolLoop.ts        — runToolLoop(): provider-agnostic multi-turn loop
+│   │   ├── toolArgs.ts        — parseToolArguments(json): shared tool-argument JSON parsing (one copy for all drivers)
+│   │   ├── chatCompletions.ts — shared /v1/chat/completions SSE adapter (makeChatCompletionsAdapter); used by ollama + openai-compatible
+│   │   └── errors.ts          — isAbortError / classifyHttpError
+│   ├── responses-shared.ts    — OpenAI-Responses mapping + SSE translator + adapter factory (openai + openrouter)
+│   ├── anthropic.ts           — Anthropic driver: direct POST /v1/messages (no SDK)
+│   ├── openai.ts              — OpenAI driver: direct POST /v1/responses (no SDK)
+│   ├── openrouter.ts          — OpenRouter driver: direct POST /v1/responses (shared Responses path; live /models; native cost)
+│   ├── ollama.ts              — Ollama driver: POST /v1/chat/completions via shared chatCompletions adapter; live /api/tags catalogue
+│   └── openaiCompatible.ts    — OpenAI-Compatible driver: any /v1/chat/completions endpoint; live GET /v1/models catalogue
 └── runtime/
     ├── runner.ts           — runChat(): drives a driver, emits stream events
     ├── persister.ts        — ConversationsPersister: messages + usage to DB; writes contextTokens snapshot
@@ -129,6 +131,22 @@ The composer area includes a `<ContextMeter>` that shows "context used / window"
 
 ---
 
+## Providers
+
+Each entry in **Settings → AI → Providers** stores one credential. The provider id is fixed; the auth mode and input fields are derived from it — the UI never asks you to choose.
+
+| Provider | Label in UI | Auth mode | Required field | Optional field | Model discovery |
+|---|---|---|---|---|---|
+| `anthropic` | Anthropic (Claude) | `apiKey` | API key (`sk-ant-…`) | — | Static `claude-*` catalogue enriched with OpenRouter prices + context windows |
+| `openai` | OpenAI | `apiKey` | API key (`sk-…`) | — | Static `gpt-*` / `o*` catalogue enriched with OpenRouter prices + context windows |
+| `openrouter` | OpenRouter | `apiKey` | API key (`sk-or-…`) | — | Live `GET /api/v1/models` (cross-provider; native cost reporting) |
+| `ollama` | Ollama (local) | `baseUrl` | Base URL (e.g. `http://localhost:11434`) | API key (bearer, for proxied deployments) | Live `GET {baseUrl}/api/tags`; static fallback list when unreachable |
+| `openai-compatible` | OpenAI-Compatible | `baseUrl` | Base URL — any host serving the OpenAI `/v1/chat/completions` wire protocol | API key (bearer; cloud services need one, local servers often don't) | Live `GET {baseUrl}/v1/models` (standard OpenAI list shape); model `id` used as label |
+
+**OpenAI-Compatible** is the generic adapter for any endpoint that speaks the OpenAI chat/completions wire protocol — Groq (`https://api.groq.com/openai`), Together, DeepSeek, Mistral, Fireworks, self-hosted vLLM, LM Studio, and others. Capabilities default to `{ toolCalling: true, visionInput: false, promptCache: false, streaming: true }`; the operator is responsible for selecting a model that actually supports tool calling. Because arbitrary endpoints are not in the OpenRouter catalogue, no context-window enrichment is available and the context meter stays hidden for these models.
+
+---
+
 ## Flow
 
 ```text
@@ -561,7 +579,7 @@ The `<ContextMeter>` shows how much of the active model's context window the cur
 - **Window** (`windowTokens` prop from `AgentPanel`): the model's max total tokens, resolved once from `GET /admin/api/ai/providers/:id/models?credentialId=…`. The models endpoint enriches Anthropic and OpenAI models with `contextWindow` from the live OpenRouter catalogue (`server/ai/pricing/`); OpenRouter populates it from its own native fetch. Ollama models and uncatalogued models have no window — the meter hides.
 - **Used** (`agentContextTokens` in the store): the provider-normalised "context used" — the CURRENT context size, computed by `normalizeContextTokens(providerId, buckets)` in `server/ai/contextTokens.ts`:
   - Anthropic reports `input_tokens` excluding cache buckets, so the true total is `promptTokens + cacheReadTokens + cacheCreationTokens`.
-  - OpenAI / OpenRouter / Ollama report `input_tokens` as the full input; `promptTokens` alone is the total.
+  - OpenAI / OpenRouter / Ollama / OpenAI-Compatible report `input_tokens` as the full input; `promptTokens` alone is the total.
 
 **Live, per-round, not summed.** A turn makes one provider round-trip per tool batch. The toolLoop emits a `context` event **each round** carrying THAT round's input buckets; the chat handler injects the normalised `contextTokens` and the browser updates the meter on every round — so it climbs *during* a long tool loop instead of only at the end. The meter is the LATEST round's input (the current window fill), never the sum across rounds (which would over-count, since each round re-sends the growing context). The terminal `usage` event is **billing only** — its `promptTokens` stays summed across rounds (you pay input per round). The persister keeps the latest `context` value in memory (`recordContext`) and writes it once to `ai_conversations.context_tokens` with the final `usage` (overwritten per turn), so `loadAgentConversation` restores the true context on reload.
 

From 691689e10ea66e62a6c814efa4a013ba95ae09c2 Mon Sep 17 00:00:00 2001
From: Mario <mario.marquez@monkeywebs.com>
Date: Sat, 27 Jun 2026 19:00:08 -0700
Subject: [PATCH 5/8] fix(ai): normalize openai-compatible base URL /v1 suffix
 + doc/test polish

- Add normalizeOpenAiBaseUrl() to chatCompletions.ts that strips trailing
  slashes and an optional trailing /v1 segment, preventing the /v1/v1/
  double-append footgun when users paste provider-documented URLs.
- Use normalizeOpenAiBaseUrl in makeChatCompletionsAdapter (endpoint) and
  fetchOpenAiCompatibleModels (/v1/models fetch); drop the now-unused
  trimSlash import from openaiCompatible.ts.
- Remove redundant 'as AiProviderId' cast (M4); drop the unused import.
- Add normalizeOpenAiBaseUrl test coverage in chatCompletions.test.ts and
  a /v1-suffixed base-URL normalization case in openaiCompatible.test.ts.
- Update AiAuthMode baseUrl JSDoc to reflect Ollama + openai-compatible (M1).
- Add OpenAI-Compatible to contextTokens.ts comment for parity (M3).
- Update ProvidersTab base-URL placeholder to https://api.groq.com/openai/v1
  so the UI matches the now-correct /v1-inclusive provider-documented form.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_0115W5vEDNwsWeaeS5PyTFgG
---
 server/ai/contextTokens.ts                     |  5 +++--
 server/ai/drivers/http/chatCompletions.test.ts | 12 ++++++++++++
 server/ai/drivers/http/chatCompletions.ts      | 14 +++++++++++++-
 server/ai/drivers/openaiCompatible.test.ts     | 10 ++++++++++
 server/ai/drivers/openaiCompatible.ts          |  8 ++++----
 server/ai/runtime/types.ts                     |  3 ++-
 src/admin/pages/ai/tabs/ProvidersTab.tsx       |  2 +-
 7 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/server/ai/contextTokens.ts b/server/ai/contextTokens.ts
index 63fd36d18..e6814cf1f 100644
--- a/server/ai/contextTokens.ts
+++ b/server/ai/contextTokens.ts
@@ -5,8 +5,9 @@
  *
  *   - Anthropic reports `input_tokens` EXCLUDING the cache buckets, so the true
  *     total is prompt + cacheRead + cacheCreation.
- *   - OpenAI / OpenRouter / Ollama report `input_tokens` as the full input (any
- *     cached tokens are already a subset), so prompt alone is the total.
+ *   - OpenAI / OpenRouter / Ollama / OpenAI-Compatible report `input_tokens` as
+ *     the full input (any cached tokens are already a subset), so prompt alone
+ *     is the total.
  *
  * Two callers share this: the chat handler injects the value onto the wire
  * `usage` event for the live meter, and the persister writes it to the
diff --git a/server/ai/drivers/http/chatCompletions.test.ts b/server/ai/drivers/http/chatCompletions.test.ts
index f8c3ba40d..2635154c5 100644
--- a/server/ai/drivers/http/chatCompletions.test.ts
+++ b/server/ai/drivers/http/chatCompletions.test.ts
@@ -3,6 +3,7 @@ import {
   mapChatHistory,
   ChatCompletionsTurnTranslator,
   trimSlash,
+  normalizeOpenAiBaseUrl,
 } from './chatCompletions'
 import type { SseFrame } from './sse'
 
@@ -16,6 +17,17 @@ describe('chatCompletions shared adapter', () => {
     expect(trimSlash('http://x/v1')).toBe('http://x/v1')
   })
 
+  it('normalizeOpenAiBaseUrl strips trailing /v1 so it is not doubled when building the endpoint', () => {
+    // With /v1 suffix — should strip it so appending /v1/... is correct.
+    expect(normalizeOpenAiBaseUrl('https://api.groq.com/openai/v1')).toBe('https://api.groq.com/openai')
+    expect(normalizeOpenAiBaseUrl('https://api.groq.com/openai/v1/')).toBe('https://api.groq.com/openai')
+    // Without /v1 suffix — no-op.
+    expect(normalizeOpenAiBaseUrl('https://api.groq.com/openai')).toBe('https://api.groq.com/openai')
+    // Ollama-style URL with no path — no-op.
+    expect(normalizeOpenAiBaseUrl('http://localhost:11434')).toBe('http://localhost:11434')
+    expect(normalizeOpenAiBaseUrl('http://localhost:11434/')).toBe('http://localhost:11434')
+  })
+
   it('mapChatHistory prepends the system prompt as a system message', () => {
     const turns = mapChatHistory(['be terse'], [
       { role: 'user', content: [{ kind: 'text', text: 'hi' }] },
diff --git a/server/ai/drivers/http/chatCompletions.ts b/server/ai/drivers/http/chatCompletions.ts
index c4a006cc9..c8da06759 100644
--- a/server/ai/drivers/http/chatCompletions.ts
+++ b/server/ai/drivers/http/chatCompletions.ts
@@ -143,6 +143,18 @@ export function trimSlash(url: string): string {
   return url.replace(/\/+$/, '')
 }
 
+/**
+ * Normalize an OpenAI-style base URL: strip trailing slashes and an optional
+ * trailing `/v1` segment, so both `https://x/openai` and `https://x/openai/v1`
+ * resolve to the same endpoint when `/v1/...` is appended.
+ *
+ * This is a no-op for Ollama-style base URLs (e.g. `http://localhost:11434`)
+ * that don't carry a trailing `/v1`.
+ */
+export function normalizeOpenAiBaseUrl(url: string): string {
+  return trimSlash(url).replace(/\/v1$/, '')
+}
+
 // ---------------------------------------------------------------------------
 // SSE event schema (boundary validation — no `as` on parsed JSON)
 // ---------------------------------------------------------------------------
@@ -312,7 +324,7 @@ export function makeChatCompletionsAdapter(opts: {
   const { baseUrl, apiKey, label } = opts
   return {
     label,
-    endpoint: `${trimSlash(baseUrl)}/v1/chat/completions`,
+    endpoint: `${normalizeOpenAiBaseUrl(baseUrl)}/v1/chat/completions`,
     buildHeaders() {
       const headers: Record<string, string> = { 'content-type': 'application/json' }
       if (apiKey) headers.Authorization = `Bearer ${apiKey}`
diff --git a/server/ai/drivers/openaiCompatible.test.ts b/server/ai/drivers/openaiCompatible.test.ts
index 1bc0408c6..65e67d276 100644
--- a/server/ai/drivers/openaiCompatible.test.ts
+++ b/server/ai/drivers/openaiCompatible.test.ts
@@ -24,6 +24,16 @@ describe('openai-compatible driver', () => {
     expect(models[0]).toMatchObject({ label: 'llama-3.3-70b', catalogueSource: 'live' })
   })
 
+  it('listModels normalizes a /v1-suffixed base URL (no double /v1)', async () => {
+    // A user pastes the provider-documented URL including /v1 — must not produce /v1/v1/models.
+    globalThis.fetch = (async (url: string) => {
+      expect(String(url)).toBe('https://api.groq.com/openai/v1/models')
+      return new Response(JSON.stringify({ data: [{ id: 'llama-3.3-70b' }] }), { status: 200 })
+    }) as unknown as typeof fetch
+    const models = await openaiCompatibleDriver.listModels(creds('https://api.groq.com/openai/v1'))
+    expect(models.map((m) => m.id)).toEqual(['llama-3.3-70b'])
+  })
+
   it('listModels returns [] when the endpoint is unreachable or non-OK', async () => {
     globalThis.fetch = (async () => new Response('nope', { status: 500 })) as unknown as typeof fetch
     expect(await openaiCompatibleDriver.listModels(creds('https://x/v1'))).toEqual([])
diff --git a/server/ai/drivers/openaiCompatible.ts b/server/ai/drivers/openaiCompatible.ts
index da29c0387..db2ea1b83 100644
--- a/server/ai/drivers/openaiCompatible.ts
+++ b/server/ai/drivers/openaiCompatible.ts
@@ -11,7 +11,7 @@
  */
 
 import { Type, parseValue } from '@core/utils/typeboxHelpers'
-import type { AiAuthMode, AiProviderId, AiStreamEvent } from '../runtime/types'
+import type { AiAuthMode, AiStreamEvent } from '../runtime/types'
 import type {
   AiProvider,
   AiProviderModel,
@@ -19,7 +19,7 @@ import type {
   AiStreamRequest,
 } from './types'
 import { runToolLoop } from './http/toolLoop'
-import { makeChatCompletionsAdapter, trimSlash } from './http/chatCompletions'
+import { makeChatCompletionsAdapter, normalizeOpenAiBaseUrl } from './http/chatCompletions'
 
 const SUPPORTED_AUTH_MODES: AiAuthMode[] = ['baseUrl']
 
@@ -31,7 +31,7 @@ const GENERIC_CAPABILITIES = {
 } as const
 
 export const openaiCompatibleDriver: AiProvider = {
-  id: 'openai-compatible' as AiProviderId,
+  id: 'openai-compatible',
   label: 'OpenAI-Compatible',
   supportedAuthModes: SUPPORTED_AUTH_MODES,
 
@@ -90,7 +90,7 @@ async function fetchOpenAiCompatibleModels(
   try {
     const headers: Record<string, string> = {}
     if (apiKey) headers.Authorization = `Bearer ${apiKey}`
-    const res = await fetch(`${trimSlash(baseUrl)}/v1/models`, { headers })
+    const res = await fetch(`${normalizeOpenAiBaseUrl(baseUrl)}/v1/models`, { headers })
     if (!res.ok) return []
     const parsed = parseValue(ModelsResponseSchema, await res.json())
     return parsed.data.map((m) => ({
diff --git a/server/ai/runtime/types.ts b/server/ai/runtime/types.ts
index f8aca9ebd..b75e9b4b7 100644
--- a/server/ai/runtime/types.ts
+++ b/server/ai/runtime/types.ts
@@ -29,7 +29,8 @@ export type AiProviderId = 'anthropic' | 'openai' | 'ollama' | 'openrouter' | 'o
  * Credential auth modes.
  *
  *   - `apiKey`   — encrypted user-supplied key (Anthropic, OpenAI, OpenRouter).
- *   - `baseUrl`  — OpenAI-compatible local endpoint (Ollama). Optional
+ *   - `baseUrl`  — OpenAI-compatible endpoint (Ollama, or any openai-compatible
+ *                  provider such as Groq, DeepSeek, Mistral, vLLM…). Optional
  *                  bearer token may be stored alongside the URL.
  */
 export type AiAuthMode = 'apiKey' | 'baseUrl'
diff --git a/src/admin/pages/ai/tabs/ProvidersTab.tsx b/src/admin/pages/ai/tabs/ProvidersTab.tsx
index e980632ce..238554786 100644
--- a/src/admin/pages/ai/tabs/ProvidersTab.tsx
+++ b/src/admin/pages/ai/tabs/ProvidersTab.tsx
@@ -288,7 +288,7 @@ function AddCredentialDialog({
   const providerSpec = PROVIDERS.find((p) => p.id === providerId)!
   const effectiveAuthMode = providerSpec.authMode
   const baseUrlPlaceholder =
-    providerId === 'ollama' ? 'http://localhost:11434' : 'https://api.your-provider.com/v1'
+    providerId === 'ollama' ? 'http://localhost:11434' : 'https://api.groq.com/openai/v1'
 
   async function handleSubmit(e: React.FormEvent) {
     e.preventDefault()

From 8aac0242a13af78cf2ae5895f78a459e3056ebf8 Mon Sep 17 00:00:00 2001
From: Mario <mario.marquez@monkeywebs.com>
Date: Sat, 27 Jun 2026 20:28:36 -0700
Subject: [PATCH 6/8] feat(ai): rename custom provider display label to 'Custom
 Provider'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The id stays 'openai-compatible' (stable registry/DB identifier); only the
user-facing display label changes — dropdown, credential card, driver label,
and docs. Protocol descriptions and the filename are unchanged.
---
 docs/features/agent.md                   | 10 +++++-----
 server/ai/contextTokens.ts               |  2 +-
 server/ai/drivers/openaiCompatible.ts    |  6 +++---
 src/admin/pages/ai/tabs/ProvidersTab.tsx |  4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/docs/features/agent.md b/docs/features/agent.md
index 1fbaa3a36..b4ad6e5f9 100644
--- a/docs/features/agent.md
+++ b/docs/features/agent.md
@@ -12,7 +12,7 @@ The agent runs on a provider-agnostic AI runtime (`server/ai/`) that can drive a
 - **Styling via CSS.** The agent emits CSS the same way a human pastes it: a `<style>` block and/or `class=` attributes inside the `insertHtml`/`replaceNodeHtml` payload, or the standalone `applyCss` tool. The importer (`cssToStyleRules`) classifies every selector — a bare `.foo {}` rule becomes a reusable Selectors-panel class bound to `class="foo"`; any other selector (`.hero a`, `a:hover`, `nav > li`) becomes an ambient rule; `style=` attributes land on the node's inline styles. There is no structured `classes` parameter — the agent never hand-builds classes node-by-node at insert time. `applyCss` is the single tool for authoring/editing CSS on its own; it **upserts**, so re-applying a selector edits the existing rule (the way descendant/pseudo rules get restyled).
 - **35 tools total.** 6 server-side catalog read tools (resolved server-side from the posted snapshot / DB) + 29 browser-bridged tools.
 - **Two-endpoint bridge.** `POST /admin/api/ai/chat/site` opens an NDJSON stream. When the model calls a browser-bridged tool, the server emits `toolRequest`; the browser executor reads or mutates the editor store and POSTs the `AiToolOutput` result to `POST /admin/api/ai/tool-result`.
-- **Provider-agnostic.** The runtime selects a driver (Anthropic, OpenAI, OpenRouter, Ollama, OpenAI-Compatible) from the conversation's configured credential.
+- **Provider-agnostic.** The runtime selects a driver (Anthropic, OpenAI, OpenRouter, Ollama, Custom Provider) from the conversation's configured credential.
 - **Tool input schemas are a single source of truth** in `@core/ai` (`src/core/ai/toolSchemas.ts`). The server tool registry (`server/ai/tools/site/writeTools.ts`) and the browser executor (`executor.ts` + `tokenRunners.ts`) import the exact same schema objects — a constraint added once is enforced on both sides at build time. Gated by `ai-tool-schema-ssot.test.ts` and `ai-tools-typebox-only.test.ts`.
 - **Capabilities.** `ai.chat` required to stream; `ai.tools.write` required for write tools. Gated by `ai-handlers-capability-gated.test.ts`.
 
@@ -67,7 +67,7 @@ server/ai/
 │   ├── openai.ts              — OpenAI driver: direct POST /v1/responses (no SDK)
 │   ├── openrouter.ts          — OpenRouter driver: direct POST /v1/responses (shared Responses path; live /models; native cost)
 │   ├── ollama.ts              — Ollama driver: POST /v1/chat/completions via shared chatCompletions adapter; live /api/tags catalogue
-│   └── openaiCompatible.ts    — OpenAI-Compatible driver: any /v1/chat/completions endpoint; live GET /v1/models catalogue
+│   └── openaiCompatible.ts    — Custom Provider driver: any /v1/chat/completions endpoint; live GET /v1/models catalogue
 └── runtime/
     ├── runner.ts           — runChat(): drives a driver, emits stream events
     ├── persister.ts        — ConversationsPersister: messages + usage to DB; writes contextTokens snapshot
@@ -141,9 +141,9 @@ Each entry in **Settings → AI → Providers** stores one credential. The provi
 | `openai` | OpenAI | `apiKey` | API key (`sk-…`) | — | Static `gpt-*` / `o*` catalogue enriched with OpenRouter prices + context windows |
 | `openrouter` | OpenRouter | `apiKey` | API key (`sk-or-…`) | — | Live `GET /api/v1/models` (cross-provider; native cost reporting) |
 | `ollama` | Ollama (local) | `baseUrl` | Base URL (e.g. `http://localhost:11434`) | API key (bearer, for proxied deployments) | Live `GET {baseUrl}/api/tags`; static fallback list when unreachable |
-| `openai-compatible` | OpenAI-Compatible | `baseUrl` | Base URL — any host serving the OpenAI `/v1/chat/completions` wire protocol | API key (bearer; cloud services need one, local servers often don't) | Live `GET {baseUrl}/v1/models` (standard OpenAI list shape); model `id` used as label |
+| `openai-compatible` | Custom Provider | `baseUrl` | Base URL — any host serving the OpenAI `/v1/chat/completions` wire protocol | API key (bearer; cloud services need one, local servers often don't) | Live `GET {baseUrl}/v1/models` (standard OpenAI list shape); model `id` used as label |
 
-**OpenAI-Compatible** is the generic adapter for any endpoint that speaks the OpenAI chat/completions wire protocol — Groq (`https://api.groq.com/openai`), Together, DeepSeek, Mistral, Fireworks, self-hosted vLLM, LM Studio, and others. Capabilities default to `{ toolCalling: true, visionInput: false, promptCache: false, streaming: true }`; the operator is responsible for selecting a model that actually supports tool calling. Because arbitrary endpoints are not in the OpenRouter catalogue, no context-window enrichment is available and the context meter stays hidden for these models.
+**Custom Provider** (id `openai-compatible`) is the generic adapter for any endpoint that speaks the OpenAI chat/completions wire protocol — Groq (`https://api.groq.com/openai`), Together, DeepSeek, Mistral, Fireworks, self-hosted vLLM, LM Studio, and others. Capabilities default to `{ toolCalling: true, visionInput: false, promptCache: false, streaming: true }`; the operator is responsible for selecting a model that actually supports tool calling. Because arbitrary endpoints are not in the OpenRouter catalogue, no context-window enrichment is available and the context meter stays hidden for these models.
 
 ---
 
@@ -579,7 +579,7 @@ The `<ContextMeter>` shows how much of the active model's context window the cur
 - **Window** (`windowTokens` prop from `AgentPanel`): the model's max total tokens, resolved once from `GET /admin/api/ai/providers/:id/models?credentialId=…`. The models endpoint enriches Anthropic and OpenAI models with `contextWindow` from the live OpenRouter catalogue (`server/ai/pricing/`); OpenRouter populates it from its own native fetch. Ollama models and uncatalogued models have no window — the meter hides.
 - **Used** (`agentContextTokens` in the store): the provider-normalised "context used" — the CURRENT context size, computed by `normalizeContextTokens(providerId, buckets)` in `server/ai/contextTokens.ts`:
   - Anthropic reports `input_tokens` excluding cache buckets, so the true total is `promptTokens + cacheReadTokens + cacheCreationTokens`.
-  - OpenAI / OpenRouter / Ollama / OpenAI-Compatible report `input_tokens` as the full input; `promptTokens` alone is the total.
+  - OpenAI / OpenRouter / Ollama / Custom Provider report `input_tokens` as the full input; `promptTokens` alone is the total.
 
 **Live, per-round, not summed.** A turn makes one provider round-trip per tool batch. The toolLoop emits a `context` event **each round** carrying THAT round's input buckets; the chat handler injects the normalised `contextTokens` and the browser updates the meter on every round — so it climbs *during* a long tool loop instead of only at the end. The meter is the LATEST round's input (the current window fill), never the sum across rounds (which would over-count, since each round re-sends the growing context). The terminal `usage` event is **billing only** — its `promptTokens` stays summed across rounds (you pay input per round). The persister keeps the latest `context` value in memory (`recordContext`) and writes it once to `ai_conversations.context_tokens` with the final `usage` (overwritten per turn), so `loadAgentConversation` restores the true context on reload.
 
diff --git a/server/ai/contextTokens.ts b/server/ai/contextTokens.ts
index e6814cf1f..0dbfaa9e4 100644
--- a/server/ai/contextTokens.ts
+++ b/server/ai/contextTokens.ts
@@ -5,7 +5,7 @@
  *
  *   - Anthropic reports `input_tokens` EXCLUDING the cache buckets, so the true
  *     total is prompt + cacheRead + cacheCreation.
- *   - OpenAI / OpenRouter / Ollama / OpenAI-Compatible report `input_tokens` as
+ *   - OpenAI / OpenRouter / Ollama / Custom Provider report `input_tokens` as
  *     the full input (any cached tokens are already a subset), so prompt alone
  *     is the total.
  *
diff --git a/server/ai/drivers/openaiCompatible.ts b/server/ai/drivers/openaiCompatible.ts
index db2ea1b83..a2e4a8fcc 100644
--- a/server/ai/drivers/openaiCompatible.ts
+++ b/server/ai/drivers/openaiCompatible.ts
@@ -1,5 +1,5 @@
 /**
- * OpenAI-Compatible driver — direct HTTP against any endpoint that speaks the
+ * Custom Provider driver — direct HTTP against any endpoint that speaks the
  * OpenAI `/v1/chat/completions` wire protocol (Groq, Together, DeepSeek,
  * Mistral, Fireworks, self-hosted vLLM / LM Studio, …).
  *
@@ -32,7 +32,7 @@ const GENERIC_CAPABILITIES = {
 
 export const openaiCompatibleDriver: AiProvider = {
   id: 'openai-compatible',
-  label: 'OpenAI-Compatible',
+  label: 'Custom Provider',
   supportedAuthModes: SUPPORTED_AUTH_MODES,
 
   capabilities(_modelId: string) {
@@ -60,7 +60,7 @@ export const openaiCompatibleDriver: AiProvider = {
       makeChatCompletionsAdapter({
         baseUrl: req.credentials.baseUrl,
         apiKey: req.credentials.apiKey,
-        label: 'OpenAI-Compatible',
+        label: 'Custom Provider',
       }),
       req,
     )
diff --git a/src/admin/pages/ai/tabs/ProvidersTab.tsx b/src/admin/pages/ai/tabs/ProvidersTab.tsx
index 238554786..0e379cf0e 100644
--- a/src/admin/pages/ai/tabs/ProvidersTab.tsx
+++ b/src/admin/pages/ai/tabs/ProvidersTab.tsx
@@ -37,7 +37,7 @@ const PROVIDERS: Array<{ id: ProviderId; label: string; authMode: AuthMode }> =
   { id: 'openai', label: 'OpenAI', authMode: 'apiKey' },
   { id: 'openrouter', label: 'OpenRouter', authMode: 'apiKey' },
   { id: 'ollama', label: 'Ollama (local)', authMode: 'baseUrl' },
-  { id: 'openai-compatible', label: 'OpenAI-Compatible', authMode: 'baseUrl' },
+  { id: 'openai-compatible', label: 'Custom Provider', authMode: 'baseUrl' },
 ]
 
 const AUTH_MODE_LABEL: Record<AuthMode, string> = {
@@ -50,7 +50,7 @@ const PROVIDER_LABEL: Record<ProviderId, string> = {
   openai: 'OpenAI',
   openrouter: 'OpenRouter',
   ollama: 'Ollama',
-  'openai-compatible': 'OpenAI-Compatible',
+  'openai-compatible': 'Custom Provider',
 }
 
 // Hint text for the API-key field, per provider key prefix.

From 55a37df8cc3b58173ef50505647937f22754cfd3 Mon Sep 17 00:00:00 2001
From: Mario <mario.marquez@monkeywebs.com>
Date: Sat, 27 Jun 2026 21:11:10 -0700
Subject: [PATCH 7/8] fix(ai): tolerate explicit null in chat/completions
 stream chunks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Real OpenAI-compatible gateways (OpenCode Zen, OpenRouter, vLLM, …) send
explicit `null` for optional per-chunk fields (`usage: null`,
`tool_calls: null`, `delta.content: null`) on every chunk. The chunk schema
used Type.Optional, which accepts absent-or-value but not null, so parseValue
threw, the frame was dropped in translate()'s catch, and the model's entire
reply silently vanished — reasoning models (GLM, DeepSeek, Qwen, MiniMax)
appeared to 'not reply'. Wrap the optional fields in a nullable() helper so
both absent and null validate. Verified against real gateway frames.
---
 .../ai/drivers/http/chatCompletions.test.ts   | 33 +++++++++++++++++++
 server/ai/drivers/http/chatCompletions.ts     | 33 ++++++++++++-------
 2 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/server/ai/drivers/http/chatCompletions.test.ts b/server/ai/drivers/http/chatCompletions.test.ts
index 2635154c5..551e1cc43 100644
--- a/server/ai/drivers/http/chatCompletions.test.ts
+++ b/server/ai/drivers/http/chatCompletions.test.ts
@@ -45,6 +45,39 @@ describe('chatCompletions shared adapter', () => {
     expect(result.toolCalls).toEqual([])
   })
 
+  // Real OpenAI-compatible gateways (OpenCode Zen, OpenRouter, …) send explicit
+  // `null` for optional per-chunk fields rather than omitting them. The chunk
+  // schema must tolerate these, or `parseValue` throws, the frame is dropped,
+  // and the model's entire reply silently vanishes ("no reply").
+  it('still emits text when the chunk carries usage:null', () => {
+    const t = new ChatCompletionsTurnTranslator()
+    const events = t.translate(frame({ choices: [{ delta: { content: 'Hi' } }], usage: null }))
+    expect(events).toEqual([{ type: 'text', text: 'Hi' }])
+  })
+
+  it('still emits text when delta.tool_calls is null', () => {
+    const t = new ChatCompletionsTurnTranslator()
+    const events = t.translate(
+      frame({ choices: [{ delta: { content: 'Hi', reasoning_content: null, tool_calls: null }, finish_reason: 'stop' }], usage: null }),
+    )
+    expect(events).toEqual([{ type: 'text', text: 'Hi' }])
+  })
+
+  it('captures the final content of a reasoning model (content empty during reasoning, filled at the end)', () => {
+    const t = new ChatCompletionsTurnTranslator()
+    // Reasoning phase: content is "" (or null), answer lives in reasoning_content; tool_calls/usage are null.
+    t.translate(frame({ choices: [{ delta: { content: '', reasoning_content: 'thinking…', tool_calls: null } }], usage: null }))
+    t.translate(frame({ choices: [{ delta: { content: null, reasoning_content: ' more' } }], usage: null }))
+    // Final answer arrives in content.
+    const last = t.translate(
+      frame({ choices: [{ delta: { content: 'Hello there!', tool_calls: null }, finish_reason: 'stop' }], usage: null }),
+    )
+    expect(last).toEqual([{ type: 'text', text: 'Hello there!' }])
+    const result = t.finish()
+    expect(result.stop).toBe(true)
+    expect(result.assistantMessage?.[0]).toMatchObject({ role: 'assistant', content: 'Hello there!' })
+  })
+
   it('translator emits one toolCall event per accumulated call at finish_reason', () => {
     const t = new ChatCompletionsTurnTranslator()
     t.translate(frame({ choices: [{ delta: { tool_calls: [
diff --git a/server/ai/drivers/http/chatCompletions.ts b/server/ai/drivers/http/chatCompletions.ts
index c8da06759..f668c2d5f 100644
--- a/server/ai/drivers/http/chatCompletions.ts
+++ b/server/ai/drivers/http/chatCompletions.ts
@@ -7,7 +7,7 @@
  * adapter shape consumed by `runToolLoop`.
  */
 
-import { Type, parseValue, type Static } from '@core/utils/typeboxHelpers'
+import { Type, parseValue, type Static, type TSchema } from '@core/utils/typeboxHelpers'
 import {
   SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
   type AiContentBlock,
@@ -159,13 +159,22 @@ export function normalizeOpenAiBaseUrl(url: string): string {
 // SSE event schema (boundary validation — no `as` on parsed JSON)
 // ---------------------------------------------------------------------------
 
+// Real OpenAI-compatible gateways (OpenCode Zen, OpenRouter, vLLM, …) routinely
+// send explicit `null` for optional per-chunk fields (e.g. `usage: null`,
+// `tool_calls: null`, `delta.content: null`) on every chunk rather than omitting
+// them. `Type.Optional(T)` accepts absent-or-T but NOT null, so a stray null
+// fails validation, the frame is dropped in `translate()`'s catch, and the
+// model's entire reply silently vanishes ("no reply"). `nullable()` tolerates
+// both an absent field and an explicit null.
+const nullable = <T extends TSchema>(schema: T) => Type.Optional(Type.Union([schema, Type.Null()]))
+
 const ChatToolCallDeltaSchema = Type.Object(
   {
-    index: Type.Optional(Type.Number()),
-    id: Type.Optional(Type.String()),
-    function: Type.Optional(
+    index: nullable(Type.Number()),
+    id: nullable(Type.String()),
+    function: nullable(
       Type.Object(
-        { name: Type.Optional(Type.String()), arguments: Type.Optional(Type.String()) },
+        { name: nullable(Type.String()), arguments: nullable(Type.String()) },
         { additionalProperties: true },
       ),
     ),
@@ -175,28 +184,28 @@ const ChatToolCallDeltaSchema = Type.Object(
 
 const ChatChunkSchema = Type.Object(
   {
-    choices: Type.Optional(
+    choices: nullable(
       Type.Array(
         Type.Object(
           {
-            delta: Type.Optional(
+            delta: nullable(
               Type.Object(
                 {
-                  content: Type.Optional(Type.Union([Type.String(), Type.Null()])),
-                  tool_calls: Type.Optional(Type.Array(ChatToolCallDeltaSchema)),
+                  content: nullable(Type.String()),
+                  tool_calls: nullable(Type.Array(ChatToolCallDeltaSchema)),
                 },
                 { additionalProperties: true },
               ),
             ),
-            finish_reason: Type.Optional(Type.Union([Type.String(), Type.Null()])),
+            finish_reason: nullable(Type.String()),
           },
           { additionalProperties: true },
         ),
       ),
     ),
-    usage: Type.Optional(
+    usage: nullable(
       Type.Object(
-        { prompt_tokens: Type.Optional(Type.Number()), completion_tokens: Type.Optional(Type.Number()) },
+        { prompt_tokens: nullable(Type.Number()), completion_tokens: nullable(Type.Number()) },
         { additionalProperties: true },
       ),
     ),

From b004c08f303c8ccec5f534d9bfab0033151be49c Mon Sep 17 00:00:00 2001
From: Mario <mario.marquez@monkeywebs.com>
Date: Sat, 27 Jun 2026 22:11:26 -0700
Subject: [PATCH 8/8] =?UTF-8?q?feat(agent):=20live=20'Thinking=E2=80=A6'?=
 =?UTF-8?q?=20indicator=20for=20reasoning=20models?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reasoning models reached over the chat/completions wire stream their
chain-of-thought in delta.reasoning_content / delta.reasoning while the answer
stays empty — the panel looked frozen for seconds. Add an ephemeral reasoning
stream event: the chat/completions translator emits { type: 'reasoning' }
(never added to the assistant message, so it is not persisted or replayed); the
runner forwards it; the agent store accumulates it (rAF-batched, session-only)
into AgentMessage.reasoning; the panel shows an animated 'Thinking…' indicator
while reasoning streams with no answer yet, then an on-demand 'Show reasoning'
expander. No DB/schema changes. Reasoning UI extracted to MessageReasoning to
keep AgentPanel under the module-size ceiling.
---
 .../ai/drivers/http/chatCompletions.test.ts   | 21 +++++++
 server/ai/drivers/http/chatCompletions.ts     | 12 ++++
 server/ai/runtime/runner.ts                   |  6 ++
 server/ai/runtime/types.ts                    |  7 +++
 src/admin/pages/site/agent/agentSlice.ts      | 41 ++++++++-----
 .../pages/site/agent/streamEvents.test.ts     | 57 +++++++++++++++++++
 src/admin/pages/site/agent/streamEvents.ts    | 13 ++++-
 src/admin/pages/site/agent/types.ts           | 31 ++++++++--
 .../panels/AgentPanel/AgentPanel.module.css   | 40 +++++++++++++
 .../site/panels/AgentPanel/AgentPanel.tsx     | 25 +++++++-
 .../panels/AgentPanel/MessageReasoning.tsx    | 44 ++++++++++++++
 11 files changed, 275 insertions(+), 22 deletions(-)
 create mode 100644 src/admin/pages/site/agent/streamEvents.test.ts
 create mode 100644 src/admin/pages/site/panels/AgentPanel/MessageReasoning.tsx

diff --git a/server/ai/drivers/http/chatCompletions.test.ts b/server/ai/drivers/http/chatCompletions.test.ts
index 551e1cc43..30164df6c 100644
--- a/server/ai/drivers/http/chatCompletions.test.ts
+++ b/server/ai/drivers/http/chatCompletions.test.ts
@@ -78,6 +78,27 @@ describe('chatCompletions shared adapter', () => {
     expect(result.assistantMessage?.[0]).toMatchObject({ role: 'assistant', content: 'Hello there!' })
   })
 
+  // Reasoning models stream their chain-of-thought in a separate delta field
+  // (`reasoning_content`, or `reasoning` on OpenRouter-style gateways). We surface
+  // it as ephemeral `reasoning` events for a live "Thinking…" indicator — but it
+  // must NEVER enter the assistant message (not persisted, not replayed).
+  it('emits reasoning events for delta.reasoning_content without polluting the answer', () => {
+    const t = new ChatCompletionsTurnTranslator()
+    const r = t.translate(frame({ choices: [{ delta: { content: '', reasoning_content: 'Let me think…' } }] }))
+    expect(r).toEqual([{ type: 'reasoning', text: 'Let me think…' }])
+    const a = t.translate(frame({ choices: [{ delta: { content: 'Hello!' }, finish_reason: 'stop' }] }))
+    expect(a).toEqual([{ type: 'text', text: 'Hello!' }])
+    const result = t.finish()
+    // Only the answer is in the assistant message — reasoning is gone.
+    expect(result.assistantMessage?.[0]).toMatchObject({ role: 'assistant', content: 'Hello!' })
+  })
+
+  it('also emits reasoning events for the alternate delta.reasoning field', () => {
+    const t = new ChatCompletionsTurnTranslator()
+    const r = t.translate(frame({ choices: [{ delta: { content: '', reasoning: 'thinking' } }] }))
+    expect(r).toEqual([{ type: 'reasoning', text: 'thinking' }])
+  })
+
   it('translator emits one toolCall event per accumulated call at finish_reason', () => {
     const t = new ChatCompletionsTurnTranslator()
     t.translate(frame({ choices: [{ delta: { tool_calls: [
diff --git a/server/ai/drivers/http/chatCompletions.ts b/server/ai/drivers/http/chatCompletions.ts
index f668c2d5f..1bafc764a 100644
--- a/server/ai/drivers/http/chatCompletions.ts
+++ b/server/ai/drivers/http/chatCompletions.ts
@@ -192,6 +192,11 @@ const ChatChunkSchema = Type.Object(
               Type.Object(
                 {
                   content: nullable(Type.String()),
+                  // Reasoning models stream chain-of-thought separately from the
+                  // answer: `reasoning_content` (GLM/DeepSeek/Qwen/MiniMax) or
+                  // `reasoning` (OpenRouter-style gateways).
+                  reasoning_content: nullable(Type.String()),
+                  reasoning: nullable(Type.String()),
                   tool_calls: nullable(Type.Array(ChatToolCallDeltaSchema)),
                 },
                 { additionalProperties: true },
@@ -258,6 +263,13 @@ export class ChatCompletionsTurnTranslator implements TurnTranslator<ChatTurn> {
         this.text += delta.content
         events.push({ type: 'text', text: delta.content })
       }
+      // Ephemeral reasoning delta — forwarded for the live "Thinking…" indicator
+      // but deliberately NOT added to `this.text`, so it never lands in the
+      // assistant message (not persisted, not replayed to the provider).
+      const reasoning = delta.reasoning_content ?? delta.reasoning
+      if (typeof reasoning === 'string' && reasoning.length > 0) {
+        events.push({ type: 'reasoning', text: reasoning })
+      }
       if (delta.tool_calls) {
         for (const tc of delta.tool_calls) {
           const index = tc.index ?? 0
diff --git a/server/ai/runtime/runner.ts b/server/ai/runtime/runner.ts
index 207c5d7c0..42f8f695e 100644
--- a/server/ai/runtime/runner.ts
+++ b/server/ai/runtime/runner.ts
@@ -128,6 +128,12 @@ export async function runChat(args: RunChatArgs): Promise<void> {
           await flushPendingAssistantText()
           return
         }
+        case 'reasoning': {
+          // Ephemeral thinking delta — already forwarded to the browser above
+          // (drives the live "Thinking…" indicator). Never persisted, so the
+          // reasoning is not replayed to the provider on the next turn.
+          break
+        }
         // `bridgeReady`, `toolRequest`, `done`: nothing to persist.
         default:
           break
diff --git a/server/ai/runtime/types.ts b/server/ai/runtime/types.ts
index b75e9b4b7..cbe00a991 100644
--- a/server/ai/runtime/types.ts
+++ b/server/ai/runtime/types.ts
@@ -153,6 +153,13 @@ export type AiStreamEvent =
   | { type: 'bridgeReady'; bridgeId: string }
   /** Streaming text delta from the assistant. */
   | { type: 'text'; text: string }
+  /**
+   * Streaming reasoning/thinking delta (e.g. `delta.reasoning_content` from
+   * chat/completions reasoning models). Ephemeral: forwarded to the browser to
+   * drive a live "Thinking…" indicator, but never persisted to conversation
+   * history and never replayed back to the provider.
+   */
+  | { type: 'reasoning'; text: string }
   /** A tool call has been issued by the model. `status: 'pending'` until completion. */
   | { type: 'toolCall'; toolCallId: string; toolName: string; input: unknown; status: 'pending' }
   /** A tool call has completed (server-resolved or browser-bridged). */
diff --git a/src/admin/pages/site/agent/agentSlice.ts b/src/admin/pages/site/agent/agentSlice.ts
index 262e22ab4..992869077 100644
--- a/src/admin/pages/site/agent/agentSlice.ts
+++ b/src/admin/pages/site/agent/agentSlice.ts
@@ -42,7 +42,7 @@ import type {
   AgentBridgeRuntime,
   AgentMessage,
   AgentRequestBody,
-  AgentTextStreamSink,
+  AgentStreamSink,
 } from './types'
 import { getErrorMessage } from '@core/utils/errorMessage'
 
@@ -179,6 +179,7 @@ export function createAgentSlice(
   // flushed once per animation frame, OR explicitly before any tool-call
   // block is added so chronological ordering is preserved.
   let _pendingText = ''
+  let _pendingReasoning = ''
   let _pendingAssistantId = ''
   let _rafHandle = 0
 
@@ -197,21 +198,28 @@ export function createAgentSlice(
     }
   }
 
-  function flushPendingText() {
+  function flushPending() {
     _rafHandle = 0
-    if (!_pendingText || !_pendingAssistantId) return
+    if (!_pendingAssistantId) return
+    if (!_pendingText && !_pendingReasoning) return
     const text = _pendingText
+    const reasoning = _pendingReasoning
     const id = _pendingAssistantId
     _pendingText = ''
+    _pendingReasoning = ''
     set((state) => {
       const msg = state.agentMessages.find((m) => m.id === id)
-      if (msg) appendTextToBlocks(msg, text)
+      if (!msg) return
+      if (text) appendTextToBlocks(msg, text)
+      // Reasoning is ephemeral and lives on its own buffer (not a block), so it
+      // never interleaves with text/tool blocks and never reaches the answer.
+      if (reasoning) msg.reasoning = (msg.reasoning ?? '') + reasoning
     })
   }
 
   function scheduleFlush() {
     if (_rafHandle === 0) {
-      _rafHandle = requestAnimationFrame(flushPendingText)
+      _rafHandle = requestAnimationFrame(flushPending)
     }
   }
 
@@ -221,13 +229,20 @@ export function createAgentSlice(
     scheduleFlush()
   }
 
-  // Single text-stream sink passed into processStreamEvent. The sink's
-  // `flush()` is called from the toolCall/toolResult handlers to drain any
-  // pending text deltas BEFORE a tool-call block is added — that's what keeps
-  // the visual order in the panel chronologically correct.
-  const textSink: AgentTextStreamSink = {
+  function appendReasoningDelta(assistantId: string, text: string) {
+    _pendingAssistantId = assistantId
+    _pendingReasoning += text
+    scheduleFlush()
+  }
+
+  // Single streaming sink passed into processStreamEvent. The sink's `flush()`
+  // is called from the toolCall/toolResult handlers to drain any pending text
+  // deltas BEFORE a tool-call block is added — that's what keeps the visual
+  // order in the panel chronologically correct.
+  const textSink: AgentStreamSink = {
     append: appendTextDelta,
-    flush: flushPendingText,
+    appendReasoning: appendReasoningDelta,
+    flush: flushPending,
   }
 
   return {
@@ -445,7 +460,7 @@ export function createAgentSlice(
           )
         }
 
-        flushPendingText()
+        flushPending()
       } catch (err) {
         // Abort the fetch so any in-flight MCP tool handler on the server
         // rejects cleanly (via destroyBridge in the stream's finally block)
@@ -453,7 +468,7 @@ export function createAgentSlice(
         _abortController?.abort()
 
         if (err instanceof Error && err.name === 'AbortError') {
-          flushPendingText()
+          flushPending()
         } else {
           // Admin-only surface (capability gated) — show the actual
           // failure cause so the operator can act. Network / unexpected
diff --git a/src/admin/pages/site/agent/streamEvents.test.ts b/src/admin/pages/site/agent/streamEvents.test.ts
new file mode 100644
index 000000000..d9cf420da
--- /dev/null
+++ b/src/admin/pages/site/agent/streamEvents.test.ts
@@ -0,0 +1,57 @@
+import { describe, it, expect } from 'bun:test'
+import { processStreamEvent } from './streamEvents'
+import type { AgentStreamSink, AgentBridgeRuntime } from './types'
+import type { EditorStoreSet } from './agentSliceTypes'
+
+// The reasoning case touches only the sink — `set`, the bridge, signal, and the
+// tool dispatcher are never reached, so they're inert stubs here.
+const noopSet = ((): void => undefined) as EditorStoreSet
+const bridge: AgentBridgeRuntime = { bridgeId: null }
+const dispatchUnused = async (): Promise<never> => {
+  throw new Error('dispatchTool must not be called for a reasoning event')
+}
+
+describe('processStreamEvent — reasoning routing', () => {
+  it('routes a reasoning delta to appendReasoning and never to the text path', async () => {
+    const calls: string[] = []
+    const sink: AgentStreamSink = {
+      append: (_id, t) => calls.push(`text:${t}`),
+      appendReasoning: (id, t) => calls.push(`reasoning:${id}:${t}`),
+      flush: () => calls.push('flush'),
+    }
+
+    await processStreamEvent(
+      { type: 'reasoning', text: 'let me think…' },
+      'assistant-1',
+      sink,
+      noopSet,
+      bridge,
+      null,
+      dispatchUnused,
+    )
+
+    // Reasoning goes only to the reasoning buffer — no text append, no flush.
+    expect(calls).toEqual(['reasoning:assistant-1:let me think…'])
+  })
+
+  it('a text delta still routes to the text path (regression guard)', async () => {
+    const calls: string[] = []
+    const sink: AgentStreamSink = {
+      append: (id, t) => calls.push(`text:${id}:${t}`),
+      appendReasoning: (_id, t) => calls.push(`reasoning:${t}`),
+      flush: () => calls.push('flush'),
+    }
+
+    await processStreamEvent(
+      { type: 'text', text: 'hello' },
+      'assistant-1',
+      sink,
+      noopSet,
+      bridge,
+      null,
+      dispatchUnused,
+    )
+
+    expect(calls).toEqual(['text:assistant-1:hello'])
+  })
+})
diff --git a/src/admin/pages/site/agent/streamEvents.ts b/src/admin/pages/site/agent/streamEvents.ts
index cf510155d..020642c17 100644
--- a/src/admin/pages/site/agent/streamEvents.ts
+++ b/src/admin/pages/site/agent/streamEvents.ts
@@ -29,7 +29,7 @@ import { postToolResult } from './agentApi'
 import type { EditorStoreSet } from './agentSliceTypes'
 import type {
   AgentBridgeRuntime,
-  AgentTextStreamSink,
+  AgentStreamSink,
   AgentToolCall,
   ServerStreamEvent,
 } from './types'
@@ -47,6 +47,7 @@ import { getErrorMessage } from '@core/utils/errorMessage'
 
 export const ServerStreamEventSchema = Type.Union([
   Type.Object({ type: Type.Literal('text'), text: Type.String() }),
+  Type.Object({ type: Type.Literal('reasoning'), text: Type.String() }),
   Type.Object({
     type: Type.Literal('bridgeReady'),
     bridgeId: Type.String(),
@@ -94,7 +95,7 @@ export const ServerStreamEventSchema = Type.Union([
 export async function processStreamEvent(
   event: ServerStreamEvent,
   assistantId: string,
-  textSink: AgentTextStreamSink,
+  textSink: AgentStreamSink,
   set: EditorStoreSet,
   bridge: AgentBridgeRuntime,
   signal: AbortSignal | null,
@@ -112,6 +113,14 @@ export async function processStreamEvent(
       break
     }
 
+    case 'reasoning': {
+      // Ephemeral thinking delta — accumulated into the message's `reasoning`
+      // buffer (NOT through the text path, so it never becomes message text).
+      // Drives the live "Thinking…" indicator + on-demand expander.
+      textSink.appendReasoning(assistantId, event.text)
+      break
+    }
+
     case 'bridgeReady': {
       bridge.bridgeId = event.bridgeId
       break
diff --git a/src/admin/pages/site/agent/types.ts b/src/admin/pages/site/agent/types.ts
index bda671739..6a901b359 100644
--- a/src/admin/pages/site/agent/types.ts
+++ b/src/admin/pages/site/agent/types.ts
@@ -52,6 +52,16 @@ interface TextEvent {
   text: string
 }
 
+/**
+ * A chunk of reasoning/thinking from a reasoning model (chat/completions
+ * `delta.reasoning_content`). Ephemeral: drives the live "Thinking…" indicator
+ * and an on-demand expander, but is never persisted or replayed.
+ */
+interface ReasoningEvent {
+  type: 'reasoning'
+  text: string
+}
+
 /**
  * Bridge handshake: the server has accepted the request and assigned a bridge
  * id. The browser uses this id when POSTing tool-result responses to
@@ -130,6 +140,7 @@ interface ContextEvent {
 
 export type ServerStreamEvent =
   | TextEvent
+  | ReasoningEvent
   | BridgeReadyEvent
   | ToolRequestEvent
   | ToolCallEvent
@@ -170,6 +181,13 @@ export interface AgentMessage {
   role: 'user' | 'assistant'
   blocks: AgentMessageBlock[]
   timestamp: number
+  /**
+   * Accumulated reasoning/thinking text for this turn (chat/completions
+   * reasoning models). Ephemeral and session-only: it powers the live
+   * "Thinking…" indicator and an on-demand expander, but is never persisted to
+   * conversation history, so rehydrated past messages never carry it.
+   */
+  reasoning?: string
 }
 
 // ---------------------------------------------------------------------------
@@ -274,13 +292,16 @@ export interface AgentBridgeRuntime {
 }
 
 /**
- * Sink for assistant text deltas. `append` accumulates a delta; `flush`
- * drains accumulated text into the message's blocks immediately. The slice's
- * implementation rAF-batches `append` calls; the toolCall/toolResult handlers
- * call `flush` so any pending text lands BEFORE a tool-call block is appended,
+ * Sink for assistant streaming deltas. `append` accumulates a text delta;
+ * `appendReasoning` accumulates an (ephemeral) reasoning delta into the
+ * message's `reasoning` buffer; `flush` drains accumulated text into the
+ * message's blocks immediately. The slice's implementation rAF-batches the
+ * `append`/`appendReasoning` calls; the toolCall/toolResult handlers call
+ * `flush` so any pending text lands BEFORE a tool-call block is appended,
  * preserving chronological order in the UI.
  */
-export interface AgentTextStreamSink {
+export interface AgentStreamSink {
   append(assistantId: string, text: string): void
+  appendReasoning(assistantId: string, text: string): void
   flush(): void
 }
diff --git a/src/admin/pages/site/panels/AgentPanel/AgentPanel.module.css b/src/admin/pages/site/panels/AgentPanel/AgentPanel.module.css
index 238a4b6cd..8ac1e7e4d 100644
--- a/src/admin/pages/site/panels/AgentPanel/AgentPanel.module.css
+++ b/src/admin/pages/site/panels/AgentPanel/AgentPanel.module.css
@@ -76,6 +76,46 @@
   50% { opacity: 0.4; }
 }
 
+/* ── Reasoning ("Thinking…") ─────────────────────────────────────────────── */
+.thinkingIndicator {
+  display: inline-flex;
+  align-items: center;
+  gap: var(--space-2xs);
+  margin-bottom: var(--space-2xs);
+  font-size: var(--text-s);
+  font-style: italic;
+  color: var(--text-muted);
+}
+
+.thinkingDot {
+  width: 6px;
+  height: 6px;
+  border-radius: 50%;
+  background: var(--text-muted);
+  animation: pulse 1.4s cubic-bezier(0.4, 0, 0.6, 1) infinite;
+}
+
+@media (prefers-reduced-motion: reduce) {
+  .thinkingDot { animation: none; }
+}
+
+.reasoningDisclosure {
+  margin-bottom: var(--space-2xs);
+}
+
+.reasoningContent {
+  margin-top: var(--space-2xs);
+  padding: var(--space-2xs) var(--space-xs);
+  max-height: 240px;
+  overflow-y: auto;
+  white-space: pre-wrap;
+  font-size: var(--text-2xs);
+  line-height: 1.5;
+  color: var(--text-muted);
+  background: var(--bg-surface);
+  border-radius: var(--radius);
+}
+
 /* ── Message thread ──────────────────────────────────────────────────────── */
 .thread {
   flex: 1;
diff --git a/src/admin/pages/site/panels/AgentPanel/AgentPanel.tsx b/src/admin/pages/site/panels/AgentPanel/AgentPanel.tsx
index 0e2c6608d..7f6180511 100644
--- a/src/admin/pages/site/panels/AgentPanel/AgentPanel.tsx
+++ b/src/admin/pages/site/panels/AgentPanel/AgentPanel.tsx
@@ -42,6 +42,7 @@ import { Button } from '@ui/components/Button'
 import { EmptyState } from '@ui/components/EmptyState'
 import { Textarea } from '@ui/components/Input'
 import { useDraggablePanel } from '@site/hooks/useDraggablePanel'
+import { MessageReasoning } from './MessageReasoning'
 import { cn } from '@ui/cn'
 import { ModelPicker } from './ModelPicker'
 import { ConversationHistory } from './ConversationHistory'
@@ -69,6 +70,19 @@ export function AgentPanel({ variant = 'floating' }: { variant?: PanelVariant })
   const isStreaming = useAgentStore((s) => s.isAgentStreaming)
   const messages = useAgentStore((s) => s.agentMessages)
   const agentError = useAgentStore((s) => s.agentError)
+
+  // A turn is "thinking" while the active assistant message has streamed
+  // reasoning but no visible answer (text/tool block) yet. The compiler
+  // memoizes this derivation; once the answer's first block lands, the id no
+  // longer matches and the indicator gives way to the reasoning expander.
+  const lastMessage = messages[messages.length - 1]
+  const thinkingMessageId =
+    isStreaming &&
+    lastMessage?.role === 'assistant' &&
+    (lastMessage.reasoning?.length ?? 0) > 0 &&
+    lastMessage.blocks.length === 0
+      ? lastMessage.id
+      : null
   const closeAgent = useAgentStore((s) => s.closeAgent)
   const sendAgentMessage = useAgentStore((s) => s.sendAgentMessage)
   const abortAgent = useAgentStore((s) => s.abortAgent)
@@ -278,7 +292,9 @@ export function AgentPanel({ variant = 'floating' }: { variant?: PanelVariant })
         ) : (
           <>
             {lockReason && <AgentCredentialAlert mode={lockReason} />}
-            {messages.map((msg) => <MessageBubble key={msg.id} msg={msg} />)}
+            {messages.map((msg) => (
+              <MessageBubble key={msg.id} msg={msg} isThinking={msg.id === thinkingMessageId} />
+            ))}
           </>
         )}
 
@@ -371,11 +387,13 @@ export function AgentPanel({ variant = 'floating' }: { variant?: PanelVariant })
 
 interface MessageBubbleProps {
   msg: AgentMessage
+  /** True while this assistant turn has streamed reasoning but no answer yet. */
+  isThinking: boolean
 }
 
 // Exception #2: React.memo re-render bailout on a hot, list-rendered component
 // (one per message in messages.map).
-const MessageBubble = memo(function MessageBubble({ msg }: MessageBubbleProps) {
+const MessageBubble = memo(function MessageBubble({ msg, isThinking }: MessageBubbleProps) {
   const isUser = msg.role === 'user'
 
   return (
@@ -385,6 +403,9 @@ const MessageBubble = memo(function MessageBubble({ msg }: MessageBubbleProps) {
         {isUser ? 'You' : 'Assistant'}
       </div>
 
+      {/* Reasoning affordance: live "Thinking…" indicator → on-demand expander. */}
+      {!isUser && <MessageReasoning isThinking={isThinking} reasoning={msg.reasoning} />}
+
       {/* Chronological blocks — text and tool calls render in the order
           Claude actually emitted them, so a "text → tool → text" sequence
           shows two separate text bubbles around the tool badges. Text is
diff --git a/src/admin/pages/site/panels/AgentPanel/MessageReasoning.tsx b/src/admin/pages/site/panels/AgentPanel/MessageReasoning.tsx
new file mode 100644
index 000000000..18cdf7873
--- /dev/null
+++ b/src/admin/pages/site/panels/AgentPanel/MessageReasoning.tsx
@@ -0,0 +1,44 @@
+/**
+ * MessageReasoning — the reasoning/thinking affordance for one assistant turn.
+ *
+ * While a reasoning model streams its chain-of-thought with no answer yet, this
+ * shows a live "Thinking…" indicator. Once the answer arrives (or the stream
+ * ends), it becomes a collapsed-by-default expander revealing the captured
+ * reasoning. The reasoning is ephemeral (session-only) — never persisted, so
+ * rehydrated past turns render nothing here.
+ */
+
+import { useState } from 'react'
+import { Button } from '@ui/components/Button'
+import styles from './AgentPanel.module.css'
+
+interface MessageReasoningProps {
+  /** True while this assistant turn has streamed reasoning but no answer yet. */
+  isThinking: boolean
+  /** Accumulated ephemeral reasoning text for the turn, if any. */
+  reasoning?: string
+}
+
+export function MessageReasoning({ isThinking, reasoning }: MessageReasoningProps) {
+  const [open, setOpen] = useState(false)
+
+  if (isThinking) {
+    return (
+      <div className={styles.thinkingIndicator} role="status">
+        <span className={styles.thinkingDot} aria-hidden="true" />
+        Thinking…
+      </div>
+    )
+  }
+
+  if (!reasoning) return null
+
+  return (
+    <div className={styles.reasoningDisclosure}>
+      <Button variant="ghost" size="micro" onClick={() => setOpen((v) => !v)} aria-expanded={open}>
+        {open ? 'Hide reasoning' : 'Show reasoning'}
+      </Button>
+      {open && <div className={styles.reasoningContent}>{reasoning}</div>}
+    </div>
+  )
+}