diff --git a/src/api/providers/__tests__/openai-codex-responses.spec.ts b/src/api/providers/__tests__/openai-codex-responses.spec.ts new file mode 100644 index 0000000000..0dd8fd201f --- /dev/null +++ b/src/api/providers/__tests__/openai-codex-responses.spec.ts @@ -0,0 +1,459 @@ +// npx vitest run api/providers/__tests__/openai-codex-responses.spec.ts + +import { OpenAiHandler } from "../openai" +import { ApiHandlerOptions } from "../../../shared/api" +import { Anthropic } from "@anthropic-ai/sdk" +import { openAiModelInfoSaneDefaults } from "@roo-code/types" + +const mockChatCreate = vitest.fn() +const mockResponsesCreate = vitest.fn() + +vitest.mock("openai", () => { + const mockConstructor = vitest.fn() + return { + __esModule: true, + default: mockConstructor.mockImplementation(() => ({ + chat: { + completions: { + create: mockChatCreate, + }, + }, + responses: { + create: mockResponsesCreate, + }, + })), + AzureOpenAI: mockConstructor.mockImplementation(() => ({ + chat: { + completions: { + create: mockChatCreate, + }, + }, + responses: { + create: mockResponsesCreate, + }, + })), + } +}) + +describe("OpenAiHandler - Codex model detection", () => { + let handler: OpenAiHandler + + beforeEach(() => { + mockChatCreate.mockClear() + mockResponsesCreate.mockClear() + }) + + describe("_isCodexModel", () => { + it("should detect gpt-5.3-codex as a codex model", () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-5.3-codex", + openAiBaseUrl: "https://test.openai.azure.com/openai/deployments/gpt5.3", + openAiUseAzure: true, + }) + // Access the protected method via any cast + expect((handler as any)._isCodexModel("gpt-5.3-codex")).toBe(true) + }) + + it("should detect gpt-5.1-codex as a codex model", () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-5.1-codex", + }) + expect((handler as any)._isCodexModel("gpt-5.1-codex")).toBe(true) + }) + + it("should detect codex in a case-insensitive manner", () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "GPT-5.3-CODEX", + }) + expect((handler as any)._isCodexModel("GPT-5.3-CODEX")).toBe(true) + }) + + it("should not detect regular models as codex", () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-4", + }) + expect((handler as any)._isCodexModel("gpt-4")).toBe(false) + expect((handler as any)._isCodexModel("gpt-4o")).toBe(false) + expect((handler as any)._isCodexModel("o3-mini")).toBe(false) + }) + }) + + describe("createMessage with codex model", () => { + it("should use Responses API for codex models instead of Chat Completions", async () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-5.3-codex", + openAiBaseUrl: "https://test.openai.azure.com/openai/deployments/gpt5.3", + openAiUseAzure: true, + }) + + // Mock the responses.create to return a streaming async iterable + mockResponsesCreate.mockResolvedValue({ + [Symbol.asyncIterator]: async function* () { + yield { + type: "response.output_text.delta", + delta: "Hello from codex!", + } + yield { + type: "response.done", + response: { + usage: { + input_tokens: 10, + output_tokens: 5, + }, + }, + } + }, + }) + + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }] + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("You are a helpful assistant", messages, { taskId: "test" })) { + chunks.push(chunk) + } + + // Verify responses.create was called, NOT chat.completions.create + expect(mockResponsesCreate).toHaveBeenCalledTimes(1) + expect(mockChatCreate).not.toHaveBeenCalled() + + // Verify the request body structure + const requestBody = mockResponsesCreate.mock.calls[0][0] + expect(requestBody.model).toBe("gpt-5.3-codex") + expect(requestBody.stream).toBe(true) + expect(requestBody.instructions).toBe("You are a helpful assistant") + expect(requestBody.input).toBeDefined() + expect(Array.isArray(requestBody.input)).toBe(true) + + // Verify chunks + const textChunks = chunks.filter((c) => c.type === "text") + expect(textChunks.length).toBe(1) + expect(textChunks[0].text).toBe("Hello from codex!") + + const usageChunks = chunks.filter((c) => c.type === "usage") + expect(usageChunks.length).toBe(1) + expect(usageChunks[0].inputTokens).toBe(10) + expect(usageChunks[0].outputTokens).toBe(5) + }) + + it("should use Chat Completions for non-codex models", async () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-4", + openAiBaseUrl: "https://api.openai.com/v1", + }) + + mockChatCreate.mockResolvedValue({ + [Symbol.asyncIterator]: async function* () { + yield { + choices: [{ delta: { content: "Hello" }, index: 0 }], + usage: null, + } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }, + } + }, + }) + + const messages: Anthropic.Messages.MessageParam[] = [{ role: "user", content: "Hello" }] + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("System", messages, { taskId: "test" })) { + chunks.push(chunk) + } + + // Verify chat.completions.create was called, NOT responses.create + expect(mockChatCreate).toHaveBeenCalledTimes(1) + expect(mockResponsesCreate).not.toHaveBeenCalled() + }) + }) + + describe("createMessage codex conversation formatting", () => { + it("should format conversation with tool use correctly for Responses API", async () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-5.3-codex", + openAiBaseUrl: "https://test.openai.azure.com/openai/deployments/gpt5.3", + openAiUseAzure: true, + }) + + mockResponsesCreate.mockResolvedValue({ + [Symbol.asyncIterator]: async function* () { + yield { + type: "response.output_text.delta", + delta: "Done.", + } + yield { + type: "response.done", + response: { + usage: { input_tokens: 20, output_tokens: 3 }, + }, + } + }, + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { role: "user", content: "What is 1+1?" }, + { + role: "assistant", + content: [ + { type: "text", text: "Let me calculate that." }, + { type: "tool_use", id: "call_123", name: "calculator", input: { expression: "1+1" } }, + ], + }, + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "call_123", + content: "2", + }, + ], + }, + ] + + for await (const _chunk of handler.createMessage("You are helpful", messages, { taskId: "test" })) { + // consume + } + + const requestBody = mockResponsesCreate.mock.calls[0][0] + const input = requestBody.input + + // First item: user message + expect(input[0].role).toBe("user") + expect(input[0].content[0].type).toBe("input_text") + expect(input[0].content[0].text).toBe("What is 1+1?") + + // Second item: assistant text + expect(input[1].role).toBe("assistant") + expect(input[1].content[0].type).toBe("output_text") + + // Third item: function_call + expect(input[2].type).toBe("function_call") + expect(input[2].name).toBe("calculator") + + // Fourth item: function_call_output + expect(input[3].type).toBe("function_call_output") + expect(input[3].output).toBe("2") + }) + }) + + describe("createMessage codex tool call streaming", () => { + it("should handle tool call events from the Responses API", async () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-5.3-codex", + openAiUseAzure: true, + }) + + mockResponsesCreate.mockResolvedValue({ + [Symbol.asyncIterator]: async function* () { + yield { + type: "response.output_item.added", + item: { + type: "function_call", + call_id: "call_abc", + name: "read_file", + }, + } + yield { + type: "response.function_call_arguments.delta", + call_id: "call_abc", + name: "read_file", + delta: '{"path":', + index: 0, + } + yield { + type: "response.function_call_arguments.delta", + call_id: "call_abc", + name: "read_file", + delta: '"test.ts"}', + index: 0, + } + yield { + type: "response.function_call_arguments.done", + call_id: "call_abc", + } + yield { + type: "response.done", + response: { + usage: { input_tokens: 5, output_tokens: 10 }, + }, + } + }, + }) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage( + "System", + [{ role: "user", content: "Read test.ts" }], + { taskId: "test" }, + )) { + chunks.push(chunk) + } + + const partialCalls = chunks.filter((c) => c.type === "tool_call_partial") + expect(partialCalls.length).toBe(2) + expect(partialCalls[0].id).toBe("call_abc") + expect(partialCalls[0].name).toBe("read_file") + expect(partialCalls[0].arguments).toBe('{"path":') + expect(partialCalls[1].arguments).toBe('"test.ts"}') + }) + + it("should handle complete tool calls from output_item.done", async () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-5.3-codex", + openAiUseAzure: true, + }) + + mockResponsesCreate.mockResolvedValue({ + [Symbol.asyncIterator]: async function* () { + yield { + type: "response.output_item.added", + item: { + type: "function_call", + call_id: "call_xyz", + name: "write_file", + }, + } + yield { + type: "response.output_item.done", + item: { + type: "function_call", + call_id: "call_xyz", + name: "write_file", + arguments: '{"path":"out.txt","content":"hello"}', + }, + } + yield { + type: "response.done", + response: { + usage: { input_tokens: 5, output_tokens: 10 }, + }, + } + }, + }) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("System", [{ role: "user", content: "Write file" }], { taskId: "test" })) { + chunks.push(chunk) + } + + const toolCalls = chunks.filter((c) => c.type === "tool_call") + expect(toolCalls.length).toBe(1) + expect(toolCalls[0].id).toBe("call_xyz") + expect(toolCalls[0].name).toBe("write_file") + expect(toolCalls[0].arguments).toBe('{"path":"out.txt","content":"hello"}') + }) + }) + + describe("completePrompt with codex model", () => { + it("should use Responses API for codex models in completePrompt", async () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-5.3-codex", + openAiBaseUrl: "https://test.openai.azure.com/openai/deployments/gpt5.3", + openAiUseAzure: true, + }) + + mockResponsesCreate.mockResolvedValue({ + output: [ + { + type: "message", + content: [ + { + type: "output_text", + text: "Completed prompt response", + }, + ], + }, + ], + }) + + const result = await handler.completePrompt("Complete this") + + expect(mockResponsesCreate).toHaveBeenCalledTimes(1) + expect(mockChatCreate).not.toHaveBeenCalled() + + const requestBody = mockResponsesCreate.mock.calls[0][0] + expect(requestBody.model).toBe("gpt-5.3-codex") + expect(requestBody.stream).toBe(false) + expect(requestBody.input[0].role).toBe("user") + + expect(result).toBe("Completed prompt response") + }) + + it("should use Chat Completions for non-codex models in completePrompt", async () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-4", + openAiBaseUrl: "https://api.openai.com/v1", + }) + + mockChatCreate.mockResolvedValue({ + choices: [ + { + message: { role: "assistant", content: "Chat completion response" }, + finish_reason: "stop", + index: 0, + }, + ], + usage: { prompt_tokens: 5, completion_tokens: 3, total_tokens: 8 }, + }) + + const result = await handler.completePrompt("Complete this") + + expect(mockChatCreate).toHaveBeenCalledTimes(1) + expect(mockResponsesCreate).not.toHaveBeenCalled() + expect(result).toBe("Chat completion response") + }) + }) + + describe("createMessage codex error handling", () => { + it("should handle API errors from Responses API", async () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-5.3-codex", + openAiUseAzure: true, + }) + + mockResponsesCreate.mockRejectedValue(new Error("API rate limit exceeded")) + + await expect(async () => { + for await (const _chunk of handler.createMessage("System", [{ role: "user", content: "Hello" }], { taskId: "test" })) { + // consume + } + }).rejects.toThrow() + }) + + it("should handle error events in the stream", async () => { + handler = new OpenAiHandler({ + openAiApiKey: "test-key", + openAiModelId: "gpt-5.3-codex", + openAiUseAzure: true, + }) + + mockResponsesCreate.mockResolvedValue({ + [Symbol.asyncIterator]: async function* () { + yield { + type: "response.error", + error: { message: "Something went wrong" }, + } + }, + }) + + await expect(async () => { + for await (const _chunk of handler.createMessage("System", [{ role: "user", content: "Hello" }], { taskId: "test" })) { + // consume + } + }).rejects.toThrow("Responses API error: Something went wrong") + }) + }) +}) diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 33b29abcaf..58d4a03eb4 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -13,6 +13,8 @@ import { import type { ApiHandlerOptions } from "../../shared/api" import { TagMatcher } from "../../utils/tag-matcher" +import { sanitizeOpenAiCallId } from "../../utils/tool-id" +import { isMcpTool } from "../../utils/mcp-name" import { convertToOpenAiMessages } from "../transform/openai-format" import { convertToR1Format } from "../transform/r1-format" @@ -91,6 +93,11 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const isAzureAiInference = this._isAzureAiInference(modelUrl) const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format + if (this._isCodexModel(modelId)) { + yield* this.handleCodexMessage(systemPrompt, messages, metadata) + return + } + if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) { yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages, metadata) return @@ -294,12 +301,19 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl async completePrompt(prompt: string): Promise { try { - const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl) const model = this.getModel() + const modelId = model.id const modelInfo = model.info + // Codex models must use the Responses API + if (this._isCodexModel(modelId)) { + return this._completePromptWithResponsesApi(prompt, model) + } + + const isAzureAiInference = this._isAzureAiInference(this.options.openAiBaseUrl) + const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { - model: model.id, + model: modelId, messages: [{ role: "user", content: prompt }], } @@ -326,6 +340,58 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } + /** + * Complete a prompt using the Responses API (for codex models). + */ + private async _completePromptWithResponsesApi( + prompt: string, + model: ReturnType, + ): Promise { + const requestBody: any = { + model: model.id, + input: [ + { + role: "user", + content: [{ type: "input_text", text: prompt }], + }, + ], + stream: false, + store: false, + } + + // Add max_output_tokens if needed + if (this.options.includeMaxTokens === true) { + requestBody.max_output_tokens = this.options.modelMaxTokens || model.info.maxTokens + } + + let response + try { + response = await (this.client as any).responses.create(requestBody) + } catch (error) { + throw handleOpenAIError(error, this.providerName) + } + + // Extract text from the Responses API response + if (response?.output && Array.isArray(response.output)) { + for (const outputItem of response.output) { + if (outputItem.type === "message" && outputItem.content) { + for (const content of outputItem.content) { + if (content.type === "output_text" && content.text) { + return content.text + } + } + } + } + } + + // Fallback: check for direct text in response + if (response?.text) { + return response.text + } + + return "" + } + private async *handleO3FamilyMessage( modelId: string, systemPrompt: string, @@ -496,6 +562,410 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } + /** + * Checks if the model is a codex model that requires the Responses API. + * Azure-hosted GPT-5.x codex models (e.g., gpt-5.3-codex) do not support + * the Chat Completions API and must use the Responses API instead. + */ + protected _isCodexModel(modelId: string): boolean { + return modelId.toLowerCase().includes("codex") + } + + /** + * Handles message creation for codex models using the OpenAI Responses API. + * Codex models (e.g., gpt-5.3-codex on Azure) only support the Responses API, + * not the Chat Completions API. + */ + private async *handleCodexMessage( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { + const model = this.getModel() + + // Format conversation for the Responses API + const formattedInput = this._formatConversationForResponsesApi(messages) + + // Build tools in Responses API format (flat structure, not nested under function) + const tools = this._convertToolsForResponsesApi(metadata?.tools) + + // Build the request body + const requestBody: any = { + model: model.id, + input: formattedInput, + stream: true, + store: false, + instructions: systemPrompt, + ...(tools && tools.length > 0 ? { tools } : {}), + ...(metadata?.tool_choice ? { tool_choice: metadata.tool_choice } : {}), + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + } + + // Add temperature + if (model.info.supportsTemperature !== false) { + requestBody.temperature = this.options.modelTemperature ?? 0 + } + + // Add max_output_tokens if needed + if (this.options.includeMaxTokens === true) { + requestBody.max_output_tokens = this.options.modelMaxTokens || model.info.maxTokens + } + + // State tracking for streaming + let pendingToolCallId: string | undefined + let pendingToolCallName: string | undefined + let sawTextOutput = false + const streamedToolCallIds = new Set() + + try { + const stream = (await (this.client as any).responses.create(requestBody)) as AsyncIterable + + for await (const event of stream) { + // Handle text deltas + if (event?.type === "response.text.delta" || event?.type === "response.output_text.delta") { + if (event?.delta) { + sawTextOutput = true + yield { type: "text", text: event.delta } + } + continue + } + + // Handle done-only text for variants that skip delta events + if (event?.type === "response.text.done" || event?.type === "response.output_text.done") { + const doneText = + typeof event?.text === "string" + ? event.text + : typeof event?.output_text === "string" + ? event.output_text + : undefined + if (!sawTextOutput && doneText) { + sawTextOutput = true + yield { type: "text", text: doneText } + } + continue + } + + // Handle content part events + if (event?.type === "response.content_part.added" || event?.type === "response.content_part.done") { + const part = event?.part + if ( + !sawTextOutput && + (part?.type === "text" || part?.type === "output_text") && + typeof part?.text === "string" && + part.text + ) { + sawTextOutput = true + yield { type: "text", text: part.text } + } + continue + } + + // Handle reasoning deltas + if ( + event?.type === "response.reasoning.delta" || + event?.type === "response.reasoning_text.delta" || + event?.type === "response.reasoning_summary.delta" || + event?.type === "response.reasoning_summary_text.delta" + ) { + if (event?.delta) { + yield { type: "reasoning", text: event.delta } + } + continue + } + + // Handle refusal deltas + if (event?.type === "response.refusal.delta") { + if (event?.delta) { + sawTextOutput = true + yield { type: "text", text: `[Refusal] ${event.delta}` } + } + continue + } + + // Handle output item events (track tool identity) + if (event?.type === "response.output_item.added" || event?.type === "response.output_item.done") { + const item = event?.item + if (item) { + // Capture tool identity for subsequent argument deltas + if (item.type === "function_call" || item.type === "tool_call") { + const callId = item.call_id || item.tool_call_id || item.id + const name = item.name || item.function?.name + if (typeof callId === "string" && callId.length > 0) { + pendingToolCallId = callId + pendingToolCallName = typeof name === "string" ? name : undefined + } + } + + if (event.type === "response.output_item.added") { + if ((item.type === "text" || item.type === "output_text") && item.text) { + sawTextOutput = true + yield { type: "text", text: item.text } + } else if (item.type === "message" && Array.isArray(item.content)) { + for (const content of item.content) { + if ( + (content?.type === "text" || content?.type === "output_text") && + content?.text + ) { + sawTextOutput = true + yield { type: "text", text: content.text } + } + } + } + } else if ( + event.type === "response.output_item.done" && + (item.type === "function_call" || item.type === "tool_call") + ) { + const callId = item.call_id || item.tool_call_id || item.id + const name = item.name || item.function?.name + const argsRaw = item.arguments || item.function?.arguments || item.input + const args = + typeof argsRaw === "string" + ? argsRaw + : argsRaw && typeof argsRaw === "object" + ? JSON.stringify(argsRaw) + : "" + + if ( + typeof callId === "string" && + callId.length > 0 && + typeof name === "string" && + name.length > 0 && + !streamedToolCallIds.has(callId) + ) { + yield { type: "tool_call", id: callId, name, arguments: args } + } + } else if (!sawTextOutput) { + if ((item.type === "text" || item.type === "output_text") && item.text) { + sawTextOutput = true + yield { type: "text", text: item.text } + } else if (item.type === "message" && Array.isArray(item.content)) { + for (const content of item.content) { + if ( + (content?.type === "text" || content?.type === "output_text") && + content?.text + ) { + sawTextOutput = true + yield { type: "text", text: content.text } + } + } + } + } + } + continue + } + + // Handle tool/function call argument deltas + if ( + event?.type === "response.tool_call_arguments.delta" || + event?.type === "response.function_call_arguments.delta" + ) { + const callId = event.call_id || event.tool_call_id || event.id || pendingToolCallId || undefined + const name = event.name || event.function_name || pendingToolCallName || undefined + const args = event.delta || event.arguments + + if ( + typeof name === "string" && + name.length > 0 && + typeof callId === "string" && + callId.length > 0 + ) { + streamedToolCallIds.add(callId) + yield { + type: "tool_call_partial", + index: event.index ?? 0, + id: callId, + name, + arguments: args, + } + } + continue + } + + // Handle tool/function call completion + if ( + event?.type === "response.tool_call_arguments.done" || + event?.type === "response.function_call_arguments.done" + ) { + continue + } + + // Handle completion events with usage + if (event?.type === "response.done" || event?.type === "response.completed") { + // Fallback text extraction from final payload + if (!sawTextOutput && Array.isArray(event?.response?.output)) { + for (const outputItem of event.response.output) { + if ( + (outputItem?.type === "text" || outputItem?.type === "output_text") && + outputItem?.text + ) { + sawTextOutput = true + yield { type: "text", text: outputItem.text } + continue + } + if (outputItem?.type === "message" && Array.isArray(outputItem.content)) { + for (const content of outputItem.content) { + if ( + (content?.type === "text" || content?.type === "output_text") && + content?.text + ) { + sawTextOutput = true + yield { type: "text", text: content.text } + } + } + } + } + } + + // Extract usage + const usage = event?.response?.usage || event?.usage + if (usage) { + yield { + type: "usage", + inputTokens: usage.input_tokens ?? usage.prompt_tokens ?? 0, + outputTokens: usage.output_tokens ?? usage.completion_tokens ?? 0, + cacheWriteTokens: usage.cache_creation_input_tokens || undefined, + cacheReadTokens: usage.cache_read_input_tokens || undefined, + } + } + continue + } + + // Handle error events + if (event?.type === "response.error" || event?.type === "error") { + if (event.error || event.message) { + throw new Error( + `Responses API error: ${event.error?.message || event.message || "Unknown error"}`, + ) + } + } + + // Handle failed event + if (event?.type === "response.failed") { + if (event.error || event.message) { + throw new Error( + `Response failed: ${event.error?.message || event.message || "Unknown failure"}`, + ) + } + } + + // Fallback for older formats + if (event?.choices?.[0]?.delta?.content) { + yield { type: "text", text: event.choices[0].delta.content } + } + + if (event?.usage) { + yield { + type: "usage", + inputTokens: event.usage.input_tokens ?? event.usage.prompt_tokens ?? 0, + outputTokens: event.usage.output_tokens ?? event.usage.completion_tokens ?? 0, + } + } + } + } catch (error) { + throw handleOpenAIError(error, this.providerName) + } + } + + /** + * Formats an Anthropic message array into the Responses API input format. + */ + private _formatConversationForResponsesApi(messages: Anthropic.Messages.MessageParam[]): any[] { + const formattedInput: any[] = [] + + for (const message of messages) { + if (message.role === "user") { + const content: any[] = [] + const toolResults: any[] = [] + + if (typeof message.content === "string") { + content.push({ type: "input_text", text: message.content }) + } else if (Array.isArray(message.content)) { + for (const block of message.content) { + if (block.type === "text") { + content.push({ type: "input_text", text: block.text }) + } else if (block.type === "image") { + const image = block as Anthropic.Messages.ImageBlockParam + const imageUrl = `data:${image.source.media_type};base64,${image.source.data}` + content.push({ type: "input_image", image_url: imageUrl }) + } else if (block.type === "tool_result") { + const result = + typeof block.content === "string" + ? block.content + : block.content?.map((c: any) => (c.type === "text" ? c.text : "")).join("") || "" + toolResults.push({ + type: "function_call_output", + call_id: sanitizeOpenAiCallId(block.tool_use_id), + output: result, + }) + } + } + } + + if (content.length > 0) { + formattedInput.push({ role: "user", content }) + } + if (toolResults.length > 0) { + formattedInput.push(...toolResults) + } + } else if (message.role === "assistant") { + const content: any[] = [] + const toolCalls: any[] = [] + + if (typeof message.content === "string") { + content.push({ type: "output_text", text: message.content }) + } else if (Array.isArray(message.content)) { + for (const block of message.content) { + if (block.type === "text") { + content.push({ type: "output_text", text: block.text }) + } else if (block.type === "tool_use") { + toolCalls.push({ + type: "function_call", + call_id: sanitizeOpenAiCallId(block.id), + name: block.name, + arguments: JSON.stringify(block.input), + }) + } + } + } + + if (content.length > 0) { + formattedInput.push({ role: "assistant", content }) + } + if (toolCalls.length > 0) { + formattedInput.push(...toolCalls) + } + } + } + + return formattedInput + } + + /** + * Converts tools from the Chat Completions format to the Responses API format. + * The Responses API uses a flat structure: {type, name, description, parameters, strict} + * instead of the nested {type, function: {name, description, parameters}} format. + */ + private _convertToolsForResponsesApi(tools: any[] | undefined): any[] | undefined { + if (!tools || tools.length === 0) { + return undefined + } + + return tools + .filter((tool: any) => tool.type === "function") + .map((tool: any) => { + const isMcp = isMcpTool(tool.function.name) + return { + type: "function", + name: tool.function.name, + description: tool.function.description, + parameters: isMcp + ? tool.function.parameters + : this.convertToolSchemaForOpenAI(tool.function.parameters), + strict: !isMcp, + } + }) + } + protected _getUrlHost(baseUrl?: string): string { try { return new URL(baseUrl ?? "").host