From d1952c23d651e0dea576140afb287d1eb0b94823 Mon Sep 17 00:00:00 2001 From: Mario Date: Sat, 27 Jun 2026 17:51:33 -0700 Subject: [PATCH 1/8] refactor(ai): extract shared chat/completions adapter from ollama driver --- .../ai/drivers/http/chatCompletions.test.ts | 51 +++ server/ai/drivers/http/chatCompletions.ts | 350 +++++++++++++++++ server/ai/drivers/ollama.ts | 367 +----------------- .../ai-driver-shared-helpers.test.ts | 5 +- 4 files changed, 419 insertions(+), 354 deletions(-) create mode 100644 server/ai/drivers/http/chatCompletions.test.ts create mode 100644 server/ai/drivers/http/chatCompletions.ts diff --git a/server/ai/drivers/http/chatCompletions.test.ts b/server/ai/drivers/http/chatCompletions.test.ts new file mode 100644 index 000000000..f8c3ba40d --- /dev/null +++ b/server/ai/drivers/http/chatCompletions.test.ts @@ -0,0 +1,51 @@ +import { describe, it, expect } from 'bun:test' +import { + mapChatHistory, + ChatCompletionsTurnTranslator, + trimSlash, +} from './chatCompletions' +import type { SseFrame } from './sse' + +function frame(obj: unknown): SseFrame { + return { event: null, data: JSON.stringify(obj) } +} + +describe('chatCompletions shared adapter', () => { + it('trimSlash strips trailing slashes', () => { + expect(trimSlash('http://x/v1/')).toBe('http://x/v1') + expect(trimSlash('http://x/v1')).toBe('http://x/v1') + }) + + it('mapChatHistory prepends the system prompt as a system message', () => { + const turns = mapChatHistory(['be terse'], [ + { role: 'user', content: [{ kind: 'text', text: 'hi' }] }, + ]) + expect(turns[0]).toEqual([{ role: 'system', content: 'be terse' }]) + expect(turns[1]).toEqual([{ role: 'user', content: 'hi' }]) + }) + + it('translator accumulates streamed text and finishes with stop=true when no tool calls', () => { + const t = new ChatCompletionsTurnTranslator() + const events = t.translate(frame({ choices: [{ delta: { content: 'Hello' } }] })) + expect(events).toEqual([{ type: 'text', text: 'Hello' }]) + const result = t.finish() + expect(result.stop).toBe(true) + expect(result.toolCalls).toEqual([]) + }) + + it('translator emits one toolCall event per accumulated call at finish_reason', () => { + const t = new ChatCompletionsTurnTranslator() + t.translate(frame({ choices: [{ delta: { tool_calls: [ + { index: 0, id: 'c1', function: { name: 'insertHtml', arguments: '{"ht' } }, + ] } }] })) + const ev = t.translate(frame({ choices: [{ delta: { tool_calls: [ + { index: 0, function: { arguments: 'ml":"

"}' } }, + ] }, finish_reason: 'tool_calls' }] })) + const toolEvent = ev.find((e) => e.type === 'toolCall') + expect(toolEvent).toBeTruthy() + expect(toolEvent).toMatchObject({ type: 'toolCall', toolName: 'insertHtml', toolCallId: 'c1' }) + const result = t.finish() + expect(result.stop).toBe(false) + expect(result.toolCalls[0]).toMatchObject({ name: 'insertHtml' }) + }) +}) diff --git a/server/ai/drivers/http/chatCompletions.ts b/server/ai/drivers/http/chatCompletions.ts new file mode 100644 index 000000000..c4a006cc9 --- /dev/null +++ b/server/ai/drivers/http/chatCompletions.ts @@ -0,0 +1,350 @@ +/** + * Shared chat/completions adapter for OpenAI-compatible providers. + * + * Extracted from ollama.ts — all drivers using the OpenAI chat/completions + * wire protocol share this module. The factory `makeChatCompletionsAdapter` + * wires the per-provider options (baseUrl, apiKey, label) into the generic + * adapter shape consumed by `runToolLoop`. + */ + +import { Type, parseValue, type Static } from '@core/utils/typeboxHelpers' +import { + SYSTEM_PROMPT_DYNAMIC_BOUNDARY, + type AiContentBlock, + type AiMessage, + type AiStreamEvent, + type AiToolOutput, +} from '../../runtime/types' +import { + type ProviderAdapter, + type TurnResult, + type TurnToolCall, + type TurnToolResult, + type TurnTranslator, + type TurnUsage, +} from './toolLoop' +import type { SseFrame } from './sse' +import { parseToolArguments } from './toolArgs' +import { nanoid } from 'nanoid' + +// --------------------------------------------------------------------------- +// Provider-native chat/completions message shapes (request side) +// --------------------------------------------------------------------------- + +export type ChatContentPart = + | { type: 'text'; text: string } + | { type: 'image_url'; image_url: { url: string } } + +export type ChatToolCall = { + id: string + type: 'function' + function: { name: string; arguments: string } +} + +export type ChatMessage = + | { role: 'system'; content: string } + | { role: 'user'; content: string | ChatContentPart[] } + | { role: 'assistant'; content: string; tool_calls?: ChatToolCall[] } + | { role: 'tool'; tool_call_id: string; content: string } + +// Each canonical `AiMessage` maps to one or more chat messages (an assistant +// turn carries text + tool_calls in one message, but several tool results fan +// out into several `role:'tool'` messages), so the loop's `TMessage` is a +// message *array* and the request body flattens before sending. +export type ChatTurn = ChatMessage[] + +// --------------------------------------------------------------------------- +// AiMessage[] → chat/completions messages[] +// --------------------------------------------------------------------------- + +/** + * Map the canonical log into chat/completions messages. The system prompt is + * prepended as a `role:'system'` message (chat/completions has no separate + * `instructions` field). Assistant `toolCall` blocks ride on the assistant + * message as `tool_calls`; `role:'tool'` results become `role:'tool'` messages + * paired by `tool_call_id`. + */ +export function mapChatHistory(systemPrompt: string[], messages: AiMessage[]): ChatTurn[] { + const out: ChatTurn[] = [] + const system = joinSystemPrompt(systemPrompt) + if (system) out.push([{ role: 'system', content: system }]) + + for (const msg of messages) { + if (msg.role === 'user') { + out.push([{ role: 'user', content: userContent(msg.content) }]) + } else if (msg.role === 'assistant') { + out.push([assistantMessage(msg.content)]) + } else if (msg.role === 'tool') { + out.push([{ role: 'tool', tool_call_id: msg.toolCallId, content: toolOutputToString(msg.output) }]) + } + // role:'system' from the log is ignored — system is the prepended block. + } + return out +} + +function joinSystemPrompt(systemPrompt: string[]): string { + return systemPrompt.filter((s) => s !== SYSTEM_PROMPT_DYNAMIC_BOUNDARY).join('\n\n') +} + +function userContent(blocks: AiContentBlock[]): string | ChatContentPart[] { + const hasImage = blocks.some((b) => b.kind === 'image') + if (!hasImage) { + return blocks + .map((b) => (b.kind === 'text' ? b.text : '')) + .filter((s) => s.length > 0) + .join(' ') + } + const parts: ChatContentPart[] = [] + for (const block of blocks) { + if (block.kind === 'text') parts.push({ type: 'text', text: block.text }) + else if (block.kind === 'image') { + // Base64 data URL — the OpenAI-compatible image_url part. + parts.push({ type: 'image_url', image_url: { url: `data:${block.mimeType};base64,${block.data}` } }) + } + } + return parts +} + +function assistantMessage(blocks: AiContentBlock[]): ChatMessage { + let text = '' + const toolCalls: ChatToolCall[] = [] + for (const block of blocks) { + if (block.kind === 'text') text += block.text + else if (block.kind === 'toolCall') { + toolCalls.push({ + id: block.toolCallId, + type: 'function', + function: { name: block.toolName, arguments: JSON.stringify(block.input ?? {}) }, + }) + } + } + return toolCalls.length > 0 + ? { role: 'assistant', content: text, tool_calls: toolCalls } + : { role: 'assistant', content: text } +} + +function toolOutputToString(output: AiToolOutput): string { + if (!output.ok) return output.error ?? 'Tool call failed.' + const text = JSON.stringify(output.data ?? { ok: true }) + // The OpenAI-compatible `role:'tool'` message is text-only — an image can't + // ride in a tool result here. Drop it with a note so the model knows visual + // evidence exists but wasn't delivered through this channel. + if (output.images && output.images.length > 0) { + return `${text}\n\n[${output.images.length} screenshot(s) omitted: this provider delivers tool results as text only.]` + } + return text +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +export function trimSlash(url: string): string { + return url.replace(/\/+$/, '') +} + +// --------------------------------------------------------------------------- +// SSE event schema (boundary validation — no `as` on parsed JSON) +// --------------------------------------------------------------------------- + +const ChatToolCallDeltaSchema = Type.Object( + { + index: Type.Optional(Type.Number()), + id: Type.Optional(Type.String()), + function: Type.Optional( + Type.Object( + { name: Type.Optional(Type.String()), arguments: Type.Optional(Type.String()) }, + { additionalProperties: true }, + ), + ), + }, + { additionalProperties: true }, +) + +const ChatChunkSchema = Type.Object( + { + choices: Type.Optional( + Type.Array( + Type.Object( + { + delta: Type.Optional( + Type.Object( + { + content: Type.Optional(Type.Union([Type.String(), Type.Null()])), + tool_calls: Type.Optional(Type.Array(ChatToolCallDeltaSchema)), + }, + { additionalProperties: true }, + ), + ), + finish_reason: Type.Optional(Type.Union([Type.String(), Type.Null()])), + }, + { additionalProperties: true }, + ), + ), + ), + usage: Type.Optional( + Type.Object( + { prompt_tokens: Type.Optional(Type.Number()), completion_tokens: Type.Optional(Type.Number()) }, + { additionalProperties: true }, + ), + ), + }, + { additionalProperties: true }, +) + +// --------------------------------------------------------------------------- +// SSE translator — one per API call in the loop +// --------------------------------------------------------------------------- + +interface MutableToolCall { + id: string + name: string + arguments: string +} + +export class ChatCompletionsTurnTranslator implements TurnTranslator { + private text = '' + // Tool calls accumulate by their streamed `index`; fragments arrive across + // chunks (id + name on the first, arguments piecemeal after). + private readonly toolsByIndex = new Map() + private readonly order: number[] = [] + private emitted = false + private usage: TurnUsage | null = null + + translate(frame: SseFrame): AiStreamEvent[] { + let chunk: Static + try { + chunk = parseValue(ChatChunkSchema, JSON.parse(frame.data)) + } catch { + // Keep-alive / unparseable frame — not fatal. + return [] + } + + if (chunk.usage) { + this.usage = { + promptTokens: chunk.usage.prompt_tokens ?? 0, + completionTokens: chunk.usage.completion_tokens ?? 0, + } + } + + const choice = chunk.choices?.[0] + if (!choice) return [] + + const events: AiStreamEvent[] = [] + const delta = choice.delta + if (delta) { + if (typeof delta.content === 'string' && delta.content.length > 0) { + this.text += delta.content + events.push({ type: 'text', text: delta.content }) + } + if (delta.tool_calls) { + for (const tc of delta.tool_calls) { + const index = tc.index ?? 0 + let acc = this.toolsByIndex.get(index) + if (!acc) { + acc = { id: tc.id ?? `tool-${nanoid()}`, name: '', arguments: '' } + this.toolsByIndex.set(index, acc) + this.order.push(index) + } + if (tc.id) acc.id = tc.id + if (tc.function?.name) acc.name = tc.function.name + if (typeof tc.function?.arguments === 'string') acc.arguments += tc.function.arguments + } + } + } + + // The finish chunk signals all tool-call fragments are in; emit one + // canonical toolCall event per accumulated call (we don't stream partial + // arguments to the UI — see plan §11). + if (choice.finish_reason && this.toolsByIndex.size > 0 && !this.emitted) { + this.emitted = true + for (const index of this.order) { + const acc = this.toolsByIndex.get(index)! + events.push({ + type: 'toolCall', + toolCallId: acc.id, + toolName: acc.name || 'tool', + input: parseToolArguments(acc.arguments), + status: 'pending', + }) + } + } + + return events + } + + finish(): TurnResult { + const toolCalls: TurnToolCall[] = [] + const chatToolCalls: ChatToolCall[] = [] + for (const index of this.order) { + const acc = this.toolsByIndex.get(index)! + toolCalls.push({ id: acc.id, name: acc.name || 'tool', input: parseToolArguments(acc.arguments) }) + chatToolCalls.push({ + id: acc.id, + type: 'function', + function: { name: acc.name || 'tool', arguments: acc.arguments || '{}' }, + }) + } + + const assistant: ChatMessage = + chatToolCalls.length > 0 + ? { role: 'assistant', content: this.text, tool_calls: chatToolCalls } + : { role: 'assistant', content: this.text } + + return { + stop: toolCalls.length === 0, + toolCalls, + assistantMessage: this.text || chatToolCalls.length > 0 ? [assistant] : null, + usage: this.usage, + } + } +} + +// --------------------------------------------------------------------------- +// Generalized adapter factory +// --------------------------------------------------------------------------- + +export function makeChatCompletionsAdapter(opts: { + baseUrl: string + apiKey: string | null + label: string +}): ProviderAdapter { + const { baseUrl, apiKey, label } = opts + return { + label, + endpoint: `${trimSlash(baseUrl)}/v1/chat/completions`, + buildHeaders() { + const headers: Record = { 'content-type': 'application/json' } + if (apiKey) headers.Authorization = `Bearer ${apiKey}` + return headers + }, + mapHistory(req) { + return mapChatHistory(req.systemPrompt, req.messages) + }, + buildRequestBody(messages, req) { + const body: Record = { + model: req.modelId, + messages: messages.flat(), + stream: true, + stream_options: { include_usage: true }, + } + if (req.tools.length > 0) { + body.tools = req.tools.map((t) => ({ + type: 'function', + function: { name: t.name, description: t.description, parameters: t.inputSchema }, + })) + } + return body + }, + buildToolResultMessage(results: TurnToolResult[]): ChatTurn { + return results.map((r) => ({ + role: 'tool' as const, + tool_call_id: r.id, + content: toolOutputToString(r.output), + })) + }, + createTurnTranslator() { + return new ChatCompletionsTurnTranslator() + }, + } +} diff --git a/server/ai/drivers/ollama.ts b/server/ai/drivers/ollama.ts index 8c45c9b76..3e4767020 100644 --- a/server/ai/drivers/ollama.ts +++ b/server/ai/drivers/ollama.ts @@ -1,11 +1,10 @@ /** * Ollama driver — direct HTTP against an OpenAI-compatible local endpoint. * - * Ollama speaks the OpenAI **chat/completions** wire protocol (NOT the - * Responses protocol the OpenAI/OpenRouter drivers use), so it carries its own - * message mapping + SSE translation here; the shared `http/` layer still owns - * SSE framing, the multi-turn tool loop, tool execution, and error - * classification. + * Ollama speaks the OpenAI **chat/completions** wire protocol; the shared + * `http/chatCompletions.ts` module owns the message mapping + SSE translation. + * This file owns only Ollama-specific concerns: credential validation, live + * model catalogue (`/api/tags`), and fallback models. * * Auth: `baseUrl` mode. The endpoint is the credential's `baseUrl`; an optional * stored API key is sent as a bearer (some Ollama deployments sit behind a @@ -16,15 +15,11 @@ * - listModels(): GET `${baseUrl}/api/tags` (native Ollama catalogue). */ -import { Type, parseValue, type Static } from '@core/utils/typeboxHelpers' +import { Type, parseValue } from '@core/utils/typeboxHelpers' import { - SYSTEM_PROMPT_DYNAMIC_BOUNDARY, type AiAuthMode, - type AiContentBlock, - type AiMessage, type AiProviderId, type AiStreamEvent, - type AiToolOutput, } from '../runtime/types' import type { AiProvider, @@ -32,18 +27,8 @@ import type { AiResolvedCredential, AiStreamRequest, } from './types' -import { - runToolLoop, - type ProviderAdapter, - type TurnResult, - type TurnToolCall, - type TurnToolResult, - type TurnTranslator, - type TurnUsage, -} from './http/toolLoop' -import type { SseFrame } from './http/sse' -import { parseToolArguments } from './http/toolArgs' -import { nanoid } from 'nanoid' +import { runToolLoop } from './http/toolLoop' +import { makeChatCompletionsAdapter, trimSlash } from './http/chatCompletions' const SUPPORTED_AUTH_MODES: AiAuthMode[] = ['baseUrl'] @@ -104,7 +89,14 @@ export const ollamaDriver: AiProvider = { } return } - yield* runToolLoop(makeOllamaAdapter(req.credentials.baseUrl, req.credentials.apiKey), req) + yield* runToolLoop( + makeChatCompletionsAdapter({ + baseUrl: req.credentials.baseUrl, + apiKey: req.credentials.apiKey, + label: 'Ollama', + }), + req, + ) }, } @@ -140,332 +132,3 @@ async function fetchOllamaModels(baseUrl: string): Promise { return FALLBACK_MODELS } } - -// --------------------------------------------------------------------------- -// Provider-native chat/completions message shapes (request side) -// --------------------------------------------------------------------------- - -type ChatContentPart = - | { type: 'text'; text: string } - | { type: 'image_url'; image_url: { url: string } } - -interface ChatToolCall { - id: string - type: 'function' - function: { name: string; arguments: string } -} - -export type ChatMessage = - | { role: 'system'; content: string } - | { role: 'user'; content: string | ChatContentPart[] } - | { role: 'assistant'; content: string; tool_calls?: ChatToolCall[] } - | { role: 'tool'; tool_call_id: string; content: string } - -// Each canonical `AiMessage` maps to one or more chat messages (an assistant -// turn carries text + tool_calls in one message, but several tool results fan -// out into several `role:'tool'` messages), so the loop's `TMessage` is a -// message *array* and the request body flattens before sending. -type ChatTurn = ChatMessage[] - -// --------------------------------------------------------------------------- -// AiMessage[] → chat/completions messages[] -// --------------------------------------------------------------------------- - -/** - * Map the canonical log into chat/completions messages. The system prompt is - * prepended as a `role:'system'` message (chat/completions has no separate - * `instructions` field). Assistant `toolCall` blocks ride on the assistant - * message as `tool_calls`; `role:'tool'` results become `role:'tool'` messages - * paired by `tool_call_id`. - */ -export function mapChatHistory(systemPrompt: string[], messages: AiMessage[]): ChatTurn[] { - const out: ChatTurn[] = [] - const system = joinSystemPrompt(systemPrompt) - if (system) out.push([{ role: 'system', content: system }]) - - for (const msg of messages) { - if (msg.role === 'user') { - out.push([{ role: 'user', content: userContent(msg.content) }]) - } else if (msg.role === 'assistant') { - out.push([assistantMessage(msg.content)]) - } else if (msg.role === 'tool') { - out.push([{ role: 'tool', tool_call_id: msg.toolCallId, content: toolOutputToString(msg.output) }]) - } - // role:'system' from the log is ignored — system is the prepended block. - } - return out -} - -function joinSystemPrompt(systemPrompt: string[]): string { - return systemPrompt.filter((s) => s !== SYSTEM_PROMPT_DYNAMIC_BOUNDARY).join('\n\n') -} - -function userContent(blocks: AiContentBlock[]): string | ChatContentPart[] { - const hasImage = blocks.some((b) => b.kind === 'image') - if (!hasImage) { - return blocks - .map((b) => (b.kind === 'text' ? b.text : '')) - .filter((s) => s.length > 0) - .join(' ') - } - const parts: ChatContentPart[] = [] - for (const block of blocks) { - if (block.kind === 'text') parts.push({ type: 'text', text: block.text }) - else if (block.kind === 'image') { - // Base64 data URL — the OpenAI-compatible image_url part. - parts.push({ type: 'image_url', image_url: { url: `data:${block.mimeType};base64,${block.data}` } }) - } - } - return parts -} - -function assistantMessage(blocks: AiContentBlock[]): ChatMessage { - let text = '' - const toolCalls: ChatToolCall[] = [] - for (const block of blocks) { - if (block.kind === 'text') text += block.text - else if (block.kind === 'toolCall') { - toolCalls.push({ - id: block.toolCallId, - type: 'function', - function: { name: block.toolName, arguments: JSON.stringify(block.input ?? {}) }, - }) - } - } - return toolCalls.length > 0 - ? { role: 'assistant', content: text, tool_calls: toolCalls } - : { role: 'assistant', content: text } -} - -function toolOutputToString(output: AiToolOutput): string { - if (!output.ok) return output.error ?? 'Tool call failed.' - const text = JSON.stringify(output.data ?? { ok: true }) - // The OpenAI-compatible `role:'tool'` message is text-only — an image can't - // ride in a tool result here. Drop it with a note so the model knows visual - // evidence exists but wasn't delivered through this channel. - if (output.images && output.images.length > 0) { - return `${text}\n\n[${output.images.length} screenshot(s) omitted: this provider delivers tool results as text only.]` - } - return text -} - -// --------------------------------------------------------------------------- -// Adapter -// --------------------------------------------------------------------------- - -function makeOllamaAdapter(baseUrl: string, apiKey: string | null): ProviderAdapter { - return { - label: 'Ollama', - endpoint: `${trimSlash(baseUrl)}/v1/chat/completions`, - - buildHeaders() { - const headers: Record = { 'content-type': 'application/json' } - // Optional bearer — some Ollama deployments sit behind an auth proxy. - if (apiKey) headers.Authorization = `Bearer ${apiKey}` - return headers - }, - - mapHistory(req) { - return mapChatHistory(req.systemPrompt, req.messages) - }, - - buildRequestBody(messages, req) { - const body: Record = { - model: req.modelId, - messages: messages.flat(), - stream: true, - // Ollama emits a final usage-only chunk when asked. - stream_options: { include_usage: true }, - } - if (req.tools.length > 0) { - body.tools = req.tools.map((t) => ({ - type: 'function', - function: { - name: t.name, - description: t.description, - // TypeBox schema IS JSON Schema — pass it straight through. - parameters: t.inputSchema, - }, - })) - } - return body - }, - - buildToolResultMessage(results: TurnToolResult[]): ChatTurn { - return results.map((r) => ({ - role: 'tool' as const, - tool_call_id: r.id, - content: toolOutputToString(r.output), - })) - }, - - createTurnTranslator() { - return new ChatCompletionsTurnTranslator() - }, - } -} - -// --------------------------------------------------------------------------- -// SSE event schema (boundary validation — no `as` on parsed JSON) -// --------------------------------------------------------------------------- - -const ChatToolCallDeltaSchema = Type.Object( - { - index: Type.Optional(Type.Number()), - id: Type.Optional(Type.String()), - function: Type.Optional( - Type.Object( - { name: Type.Optional(Type.String()), arguments: Type.Optional(Type.String()) }, - { additionalProperties: true }, - ), - ), - }, - { additionalProperties: true }, -) - -const ChatChunkSchema = Type.Object( - { - choices: Type.Optional( - Type.Array( - Type.Object( - { - delta: Type.Optional( - Type.Object( - { - content: Type.Optional(Type.Union([Type.String(), Type.Null()])), - tool_calls: Type.Optional(Type.Array(ChatToolCallDeltaSchema)), - }, - { additionalProperties: true }, - ), - ), - finish_reason: Type.Optional(Type.Union([Type.String(), Type.Null()])), - }, - { additionalProperties: true }, - ), - ), - ), - usage: Type.Optional( - Type.Object( - { prompt_tokens: Type.Optional(Type.Number()), completion_tokens: Type.Optional(Type.Number()) }, - { additionalProperties: true }, - ), - ), - }, - { additionalProperties: true }, -) - -// --------------------------------------------------------------------------- -// SSE translator — one per API call in the loop -// --------------------------------------------------------------------------- - -interface MutableToolCall { - id: string - name: string - arguments: string -} - -export class ChatCompletionsTurnTranslator implements TurnTranslator { - private text = '' - // Tool calls accumulate by their streamed `index`; fragments arrive across - // chunks (id + name on the first, arguments piecemeal after). - private readonly toolsByIndex = new Map() - private readonly order: number[] = [] - private emitted = false - private usage: TurnUsage | null = null - - translate(frame: SseFrame): AiStreamEvent[] { - let chunk: Static - try { - chunk = parseValue(ChatChunkSchema, JSON.parse(frame.data)) - } catch { - // Keep-alive / unparseable frame — not fatal. - return [] - } - - if (chunk.usage) { - this.usage = { - promptTokens: chunk.usage.prompt_tokens ?? 0, - completionTokens: chunk.usage.completion_tokens ?? 0, - } - } - - const choice = chunk.choices?.[0] - if (!choice) return [] - - const events: AiStreamEvent[] = [] - const delta = choice.delta - if (delta) { - if (typeof delta.content === 'string' && delta.content.length > 0) { - this.text += delta.content - events.push({ type: 'text', text: delta.content }) - } - if (delta.tool_calls) { - for (const tc of delta.tool_calls) { - const index = tc.index ?? 0 - let acc = this.toolsByIndex.get(index) - if (!acc) { - acc = { id: tc.id ?? `tool-${nanoid()}`, name: '', arguments: '' } - this.toolsByIndex.set(index, acc) - this.order.push(index) - } - if (tc.id) acc.id = tc.id - if (tc.function?.name) acc.name = tc.function.name - if (typeof tc.function?.arguments === 'string') acc.arguments += tc.function.arguments - } - } - } - - // The finish chunk signals all tool-call fragments are in; emit one - // canonical toolCall event per accumulated call (we don't stream partial - // arguments to the UI — see plan §11). - if (choice.finish_reason && this.toolsByIndex.size > 0 && !this.emitted) { - this.emitted = true - for (const index of this.order) { - const acc = this.toolsByIndex.get(index)! - events.push({ - type: 'toolCall', - toolCallId: acc.id, - toolName: acc.name || 'tool', - input: parseToolArguments(acc.arguments), - status: 'pending', - }) - } - } - - return events - } - - finish(): TurnResult { - const toolCalls: TurnToolCall[] = [] - const chatToolCalls: ChatToolCall[] = [] - for (const index of this.order) { - const acc = this.toolsByIndex.get(index)! - toolCalls.push({ id: acc.id, name: acc.name || 'tool', input: parseToolArguments(acc.arguments) }) - chatToolCalls.push({ - id: acc.id, - type: 'function', - function: { name: acc.name || 'tool', arguments: acc.arguments || '{}' }, - }) - } - - const assistant: ChatMessage = - chatToolCalls.length > 0 - ? { role: 'assistant', content: this.text, tool_calls: chatToolCalls } - : { role: 'assistant', content: this.text } - - return { - stop: toolCalls.length === 0, - toolCalls, - assistantMessage: this.text || chatToolCalls.length > 0 ? [assistant] : null, - usage: this.usage, - } - } -} - -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -function trimSlash(url: string): string { - return url.replace(/\/+$/, '') -} diff --git a/src/__tests__/architecture/ai-driver-shared-helpers.test.ts b/src/__tests__/architecture/ai-driver-shared-helpers.test.ts index 474572525..e13628a59 100644 --- a/src/__tests__/architecture/ai-driver-shared-helpers.test.ts +++ b/src/__tests__/architecture/ai-driver-shared-helpers.test.ts @@ -30,7 +30,7 @@ const read = (rel: string) => readFileSync(join(REPO_ROOT, rel), 'utf8') const DRIVER_FILES = [ 'server/ai/drivers/anthropic.ts', 'server/ai/drivers/responses-shared.ts', - 'server/ai/drivers/ollama.ts', + 'server/ai/drivers/http/chatCompletions.ts', ] describe('ai-driver-shared-helpers gate', () => { @@ -42,7 +42,7 @@ describe('ai-driver-shared-helpers gate', () => { it('every driver imports parseToolArguments from the shared module', () => { for (const file of DRIVER_FILES) { const src = read(file) - expect(src).toContain("from './http/toolArgs'") + expect(src).toMatch(/from\s+'(\.\/|\.\.\/)+(http\/)?toolArgs'/) // No private copy of the parser may shadow the shared one. expect(src).not.toMatch(/function\s+parseJsonOrEmpty\b/) expect(src).not.toMatch(/function\s+parseToolArguments\b/) @@ -58,6 +58,7 @@ describe('ai-driver-shared-helpers gate', () => { it('SYSTEM_PROMPT_DYNAMIC_BOUNDARY is declared exactly once', () => { const SCAN = [ ...DRIVER_FILES, + 'server/ai/drivers/ollama.ts', 'server/ai/drivers/types.ts', 'server/ai/runtime/types.ts', 'server/ai/tools/site/systemPrompt.ts', From 19d02f55925d6e20f967dd837cf42ec0e7942f66 Mon Sep 17 00:00:00 2001 From: Mario Date: Sat, 27 Jun 2026 18:11:49 -0700 Subject: [PATCH 2/8] feat(ai): add openai-compatible provider driver (custom base URL) Co-Authored-By: Claude Sonnet 4.6 --- server/ai/drivers/index.ts | 2 + server/ai/drivers/openaiCompatible.test.ts | 39 ++++++++ server/ai/drivers/openaiCompatible.ts | 106 +++++++++++++++++++++ server/ai/handlers/credentials.ts | 1 + server/ai/handlers/models.ts | 7 +- server/ai/runtime/types.ts | 2 +- 6 files changed, 154 insertions(+), 3 deletions(-) create mode 100644 server/ai/drivers/openaiCompatible.test.ts create mode 100644 server/ai/drivers/openaiCompatible.ts diff --git a/server/ai/drivers/index.ts b/server/ai/drivers/index.ts index fc2097e61..ae02458cb 100644 --- a/server/ai/drivers/index.ts +++ b/server/ai/drivers/index.ts @@ -12,12 +12,14 @@ import { anthropicDriver } from './anthropic' import { openaiDriver } from './openai' import { ollamaDriver } from './ollama' import { openrouterDriver } from './openrouter' +import { openaiCompatibleDriver } from './openaiCompatible' const DRIVERS: Record = { anthropic: anthropicDriver, openai: openaiDriver, ollama: ollamaDriver, openrouter: openrouterDriver, + 'openai-compatible': openaiCompatibleDriver, } /** Returns the driver for a provider id, or throws if unknown. */ diff --git a/server/ai/drivers/openaiCompatible.test.ts b/server/ai/drivers/openaiCompatible.test.ts new file mode 100644 index 000000000..1bc0408c6 --- /dev/null +++ b/server/ai/drivers/openaiCompatible.test.ts @@ -0,0 +1,39 @@ +import { describe, it, expect, afterEach } from 'bun:test' +import { openaiCompatibleDriver } from './openaiCompatible' +import type { AiResolvedCredential } from './types' + +const realFetch = globalThis.fetch +afterEach(() => { globalThis.fetch = realFetch }) + +function creds(baseUrl: string | null): AiResolvedCredential { + return { id: 'c1', providerId: 'openai-compatible', authMode: 'baseUrl', apiKey: 'k', baseUrl } +} + +describe('openai-compatible driver', () => { + it('reports baseUrl as its only auth mode', () => { + expect(openaiCompatibleDriver.supportedAuthModes).toEqual(['baseUrl']) + }) + + it('listModels maps /v1/models data[].id into picker models', async () => { + globalThis.fetch = (async (url: string) => { + expect(String(url)).toBe('https://api.groq.com/openai/v1/models') + return new Response(JSON.stringify({ data: [{ id: 'llama-3.3-70b' }, { id: 'mixtral-8x7b' }] }), { status: 200 }) + }) as unknown as typeof fetch + const models = await openaiCompatibleDriver.listModels(creds('https://api.groq.com/openai')) + expect(models.map((m) => m.id)).toEqual(['llama-3.3-70b', 'mixtral-8x7b']) + expect(models[0]).toMatchObject({ label: 'llama-3.3-70b', catalogueSource: 'live' }) + }) + + it('listModels returns [] when the endpoint is unreachable or non-OK', async () => { + globalThis.fetch = (async () => new Response('nope', { status: 500 })) as unknown as typeof fetch + expect(await openaiCompatibleDriver.listModels(creds('https://x/v1'))).toEqual([]) + }) + + it('listModels returns [] with no base URL', async () => { + expect(await openaiCompatibleDriver.listModels(creds(null))).toEqual([]) + }) + + it('capabilities default to tool-calling + streaming', () => { + expect(openaiCompatibleDriver.capabilities('anything')).toMatchObject({ toolCalling: true, streaming: true }) + }) +}) diff --git a/server/ai/drivers/openaiCompatible.ts b/server/ai/drivers/openaiCompatible.ts new file mode 100644 index 000000000..da29c0387 --- /dev/null +++ b/server/ai/drivers/openaiCompatible.ts @@ -0,0 +1,106 @@ +/** + * OpenAI-Compatible driver — direct HTTP against any endpoint that speaks the + * OpenAI `/v1/chat/completions` wire protocol (Groq, Together, DeepSeek, + * Mistral, Fireworks, self-hosted vLLM / LM Studio, …). + * + * Auth: `baseUrl` mode. The endpoint is the credential's `baseUrl`; an optional + * stored API key is sent as a bearer (hosted services need one; local servers + * often don't). The chat/completions machinery is shared with the Ollama driver + * via `http/chatCompletions.ts`; this file owns only model discovery + * (`GET ${baseUrl}/v1/models`) and the generic capability defaults. + */ + +import { Type, parseValue } from '@core/utils/typeboxHelpers' +import type { AiAuthMode, AiProviderId, AiStreamEvent } from '../runtime/types' +import type { + AiProvider, + AiProviderModel, + AiResolvedCredential, + AiStreamRequest, +} from './types' +import { runToolLoop } from './http/toolLoop' +import { makeChatCompletionsAdapter, trimSlash } from './http/chatCompletions' + +const SUPPORTED_AUTH_MODES: AiAuthMode[] = ['baseUrl'] + +const GENERIC_CAPABILITIES = { + toolCalling: true, + visionInput: false, + promptCache: false, + streaming: true, +} as const + +export const openaiCompatibleDriver: AiProvider = { + id: 'openai-compatible' as AiProviderId, + label: 'OpenAI-Compatible', + supportedAuthModes: SUPPORTED_AUTH_MODES, + + capabilities(_modelId: string) { + // Arbitrary endpoints report no per-model capability flags. Tool-calling + // must default true — the site/content agents require it; picking a model + // that lacks it is the operator's choice. + return { ...GENERIC_CAPABILITIES } + }, + + async listModels(creds: AiResolvedCredential) { + if (creds.authMode !== 'baseUrl' || !creds.baseUrl) return [] + return fetchOpenAiCompatibleModels(creds.baseUrl, creds.apiKey) + }, + + async *stream(req: AiStreamRequest): AsyncIterable { + if (req.credentials.authMode !== 'baseUrl' || !req.credentials.baseUrl) { + yield { + type: 'error', + message: + 'This provider requires a base URL. Add a base-URL credential in /admin/ai/providers and pick it for the site default.', + } + return + } + yield* runToolLoop( + makeChatCompletionsAdapter({ + baseUrl: req.credentials.baseUrl, + apiKey: req.credentials.apiKey, + label: 'OpenAI-Compatible', + }), + req, + ) + }, +} + +// --------------------------------------------------------------------------- +// Live model catalogue — GET /v1/models (standard OpenAI list shape) +// --------------------------------------------------------------------------- + +const ModelsResponseSchema = Type.Object( + { data: Type.Array(Type.Object({ id: Type.String() }, { additionalProperties: true })) }, + { additionalProperties: true }, +) + +/** + * Fetch the model catalogue from `GET ${baseUrl}/v1/models`. Unlike the OpenAI + * driver we do NOT filter by family or derive tiers — the endpoint is arbitrary, + * so the id is the label and capabilities are the generic defaults. Any failure + * (offline, non-OK, unparseable) returns [] so the picker stays empty and the + * credential Test button surfaces the underlying error. + */ +async function fetchOpenAiCompatibleModels( + baseUrl: string, + apiKey: string | null, +): Promise { + try { + const headers: Record = {} + if (apiKey) headers.Authorization = `Bearer ${apiKey}` + const res = await fetch(`${trimSlash(baseUrl)}/v1/models`, { headers }) + if (!res.ok) return [] + const parsed = parseValue(ModelsResponseSchema, await res.json()) + return parsed.data.map((m) => ({ + id: m.id, + label: m.id, + catalogueSource: 'live' as const, + capabilities: { ...GENERIC_CAPABILITIES }, + })) + } catch (err) { + console.error('[ai/openai-compatible] models request failed:', err) + return [] + } +} diff --git a/server/ai/handlers/credentials.ts b/server/ai/handlers/credentials.ts index d105541d0..faf500eff 100644 --- a/server/ai/handlers/credentials.ts +++ b/server/ai/handlers/credentials.ts @@ -35,6 +35,7 @@ const ProviderId = Type.Union([ Type.Literal('openai'), Type.Literal('ollama'), Type.Literal('openrouter'), + Type.Literal('openai-compatible'), ]) const CreateBodySchema = Type.Union([ diff --git a/server/ai/handlers/models.ts b/server/ai/handlers/models.ts index 11384e100..f94915424 100644 --- a/server/ai/handlers/models.ts +++ b/server/ai/handlers/models.ts @@ -19,7 +19,7 @@ import { getModelCatalogue, pricingKey } from '../pricing' import type { AiProviderModel } from '../drivers/types' import type { AiProviderId } from '../runtime/types' -const VALID_PROVIDERS: AiProviderId[] = ['anthropic', 'openai', 'ollama', 'openrouter'] +const VALID_PROVIDERS: AiProviderId[] = ['anthropic', 'openai', 'ollama', 'openrouter', 'openai-compatible'] export function tryHandleAiModels( req: Request, @@ -75,7 +75,10 @@ async function handleModels( resolved = { id: '', providerId, - authMode: providerId === 'ollama' ? ('baseUrl' as const) : ('apiKey' as const), + authMode: + providerId === 'ollama' || providerId === 'openai-compatible' + ? ('baseUrl' as const) + : ('apiKey' as const), apiKey: null, baseUrl: null, } diff --git a/server/ai/runtime/types.ts b/server/ai/runtime/types.ts index 29f90acca..f8aca9ebd 100644 --- a/server/ai/runtime/types.ts +++ b/server/ai/runtime/types.ts @@ -24,7 +24,7 @@ export type { AiContentBlock, AiToolImage, AiToolOutput } from '@core/ai' // Provider identity + auth modes // --------------------------------------------------------------------------- -export type AiProviderId = 'anthropic' | 'openai' | 'ollama' | 'openrouter' +export type AiProviderId = 'anthropic' | 'openai' | 'ollama' | 'openrouter' | 'openai-compatible' /** * Credential auth modes. * From 533c2935da9fcfbaa6a81f7d0b456d49edc2def6 Mon Sep 17 00:00:00 2001 From: Mario Date: Sat, 27 Jun 2026 18:24:58 -0700 Subject: [PATCH 3/8] feat(ai): expose openai-compatible provider in the admin providers UI --- src/admin/ai/api.ts | 7 ++++--- src/admin/pages/ai/tabs/ProvidersTab.tsx | 15 +++++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/admin/ai/api.ts b/src/admin/ai/api.ts index 13b77655f..3fec25640 100644 --- a/src/admin/ai/api.ts +++ b/src/admin/ai/api.ts @@ -27,6 +27,7 @@ const ProviderId = Type.Union([ Type.Literal('openai'), Type.Literal('ollama'), Type.Literal('openrouter'), + Type.Literal('openai-compatible'), ]) const AuthMode = Type.Union([ @@ -170,13 +171,13 @@ export async function listCredentials(signal?: AbortSignal): Promise { // --------------------------------------------------------------------------- export async function listModels( - providerId: 'anthropic' | 'openai' | 'ollama' | 'openrouter', + providerId: 'anthropic' | 'openai' | 'ollama' | 'openrouter' | 'openai-compatible', credentialId?: string, ): Promise { const body = await apiRequest(`/admin/api/ai/providers/${providerId}/models`, { diff --git a/src/admin/pages/ai/tabs/ProvidersTab.tsx b/src/admin/pages/ai/tabs/ProvidersTab.tsx index fa15b3eb8..e980632ce 100644 --- a/src/admin/pages/ai/tabs/ProvidersTab.tsx +++ b/src/admin/pages/ai/tabs/ProvidersTab.tsx @@ -27,7 +27,7 @@ import { ApiError } from '@core/http' import styles from '../AiPage.module.css' import { getErrorMessage } from '@core/utils/errorMessage' -type ProviderId = 'anthropic' | 'openai' | 'ollama' | 'openrouter' +type ProviderId = 'anthropic' | 'openai' | 'ollama' | 'openrouter' | 'openai-compatible' type AuthMode = 'apiKey' | 'baseUrl' // Each provider has exactly one credential shape; the UI derives it instead @@ -37,6 +37,7 @@ const PROVIDERS: Array<{ id: ProviderId; label: string; authMode: AuthMode }> = { id: 'openai', label: 'OpenAI', authMode: 'apiKey' }, { id: 'openrouter', label: 'OpenRouter', authMode: 'apiKey' }, { id: 'ollama', label: 'Ollama (local)', authMode: 'baseUrl' }, + { id: 'openai-compatible', label: 'OpenAI-Compatible', authMode: 'baseUrl' }, ] const AUTH_MODE_LABEL: Record = { @@ -49,12 +50,14 @@ const PROVIDER_LABEL: Record = { openai: 'OpenAI', openrouter: 'OpenRouter', ollama: 'Ollama', + 'openai-compatible': 'OpenAI-Compatible', } // Hint text for the API-key field, per provider key prefix. const API_KEY_PLACEHOLDER: Partial> = { anthropic: 'sk-ant-...', openrouter: 'sk-or-...', + 'openai-compatible': 'sk-... (optional)', } async function deleteCredentialAction( @@ -278,12 +281,14 @@ function AddCredentialDialog({ const [providerId, setProviderId] = useState('anthropic') const [displayLabel, setDisplayLabel] = useState('') const [apiKey, setApiKey] = useState('') - const [baseUrl, setBaseUrl] = useState('http://localhost:11434') + const [baseUrl, setBaseUrl] = useState('') const [busy, setBusy] = useState(false) const [error, setError] = useState(null) const providerSpec = PROVIDERS.find((p) => p.id === providerId)! const effectiveAuthMode = providerSpec.authMode + const baseUrlPlaceholder = + providerId === 'ollama' ? 'http://localhost:11434' : 'https://api.your-provider.com/v1' async function handleSubmit(e: React.FormEvent) { e.preventDefault() @@ -360,12 +365,14 @@ function AddCredentialDialog({ id={baseUrlInputId} value={baseUrl} onChange={(e) => setBaseUrl(e.currentTarget.value)} - placeholder="http://localhost:11434" + placeholder={baseUrlPlaceholder} required />

- + Date: Sat, 27 Jun 2026 18:32:03 -0700 Subject: [PATCH 4/8] docs(ai): document the openai-compatible provider --- docs/features/agent.md | 44 +++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/docs/features/agent.md b/docs/features/agent.md index 9b1c3c93c..1fbaa3a36 100644 --- a/docs/features/agent.md +++ b/docs/features/agent.md @@ -2,7 +2,7 @@ The AI Agent is a model-powered assistant integrated into the visual editor. The user types a request in the Agent Panel; the agent reads the current page snapshot, plans a sequence of edits, and executes them by calling tools. Structure is written as semantic HTML (`insertHtml` / `replaceNodeHtml`); styling is written as CSS — a `