diff --git a/apps/web/src/lib/ai-gateway/models.ts b/apps/web/src/lib/ai-gateway/models.ts index 75988779f7..904d98cd8b 100644 --- a/apps/web/src/lib/ai-gateway/models.ts +++ b/apps/web/src/lib/ai-gateway/models.ts @@ -31,6 +31,7 @@ import { gemma_4_26b_a4b_it_free_model, } from '@/lib/ai-gateway/providers/google'; import { QWEN37_PLUS_MODEL_ID, qwen36_plus_stealth_model } from '@/lib/ai-gateway/providers/qwen'; +import { morphChatModels } from '@/lib/ai-gateway/providers/morph'; import { stepfun_37_flash_free_model } from '@/lib/ai-gateway/providers/stepfun'; import { isGrokModel } from '@/lib/ai-gateway/providers/xai'; import { isClaudeModel } from '@/lib/ai-gateway/providers/anthropic.constants'; @@ -85,6 +86,7 @@ export const kiloExclusiveModels = [ gemma_4_26b_a4b_it_free_model, seed_20_code_free_model, ...deepseekDiscountedModels, + ...morphChatModels, qwen36_plus_stealth_model, claude_sonnet_clawsetup_model, claude_opus_4_8_stealth_model, diff --git a/apps/web/src/lib/ai-gateway/providers/model-settings.ts b/apps/web/src/lib/ai-gateway/providers/model-settings.ts index 52ff151945..409823da81 100644 --- a/apps/web/src/lib/ai-gateway/providers/model-settings.ts +++ b/apps/web/src/lib/ai-gateway/providers/model-settings.ts @@ -19,6 +19,7 @@ import { isStepModel } from '@/lib/ai-gateway/providers/stepfun'; import { ReasoningEffortSchema } from '@kilocode/db/schema-types'; import { isDeepseekModel } from '@/lib/ai-gateway/providers/deepseek'; import { isMinimaxModel } from '@/lib/ai-gateway/providers/minimax'; +import { isMorphModel } from '@/lib/ai-gateway/providers/morph'; import type { DirectUserByokInferenceProviderId } from '@/lib/ai-gateway/providers/openrouter/inference-provider-id'; const REASONING_VARIANTS_THINKING_ONLY = { @@ -136,6 +137,15 @@ export function getAiSdkProvider( model: string, directProviderId: DirectUserByokInferenceProviderId | null ): Exclude | undefined { + if (isMorphModel(model)) { + // Morph's gateway only exposes OpenAI Chat Completions + // (MORPH.supportedChatApis === ['chat_completions']). Pin every Morph model + // to the OpenAI-compatible AI SDK provider so name-based heuristics below + // (e.g. minimax -> 'anthropic'/Messages, gpt/grok -> 'openai'/Responses) + // never select an API kind the gateway would reject with + // apiKindNotSupportedResponse. + return 'openai-compatible'; + } if (seed_20_code_free_model.public_id === model) { // with 'openai' (Responses API) prompt caching doesn't work return 'openai-compatible'; diff --git a/apps/web/src/lib/ai-gateway/providers/morph.test.ts b/apps/web/src/lib/ai-gateway/providers/morph.test.ts new file mode 100644 index 0000000000..7ddf5d0811 --- /dev/null +++ b/apps/web/src/lib/ai-gateway/providers/morph.test.ts @@ -0,0 +1,173 @@ +import { describe, it, expect } from '@jest/globals'; +import { findKiloExclusiveModel } from '@/lib/ai-gateway/models'; +import PROVIDERS from '@/lib/ai-gateway/providers/provider-definitions'; +import { morphChatModels, isMorphModel } from '@/lib/ai-gateway/providers/morph'; +import { + getAiSdkProvider, + getGatewayOpenCodeSettings, +} from '@/lib/ai-gateway/providers/model-settings'; +import { calculateCost_mUsd } from '@/lib/ai-gateway/providers/kilo-exclusive-model'; + +// Mirrors get-provider.ts: the OpenCode AI SDK provider selects the request kind +// a client sends. Morph only supports chat_completions, so any other kind is +// rejected by apiKindNotSupportedResponse before reaching the gateway. +function requestKindFor(aiSdkProvider: string | undefined) { + if (aiSdkProvider === 'anthropic') return 'messages'; + if (aiSdkProvider === 'openai') return 'responses'; + return 'chat_completions'; +} + +// Resolves a kilo-exclusive model to its provider using the exact same lookup +// get-provider.ts performs for non-Vercel, non-BYOK gateway models. +function resolveGatewayProvider(gateway: string) { + return Object.values(PROVIDERS).find(p => p.id === gateway) ?? PROVIDERS.OPENROUTER; +} + +describe('Morph gateway provider', () => { + it('exposes the Morph gateway with an OpenAI-compatible chat endpoint', () => { + expect(PROVIDERS.MORPH.id).toBe('morph'); + expect(PROVIDERS.MORPH.apiUrl).toBe('https://api.morphllm.com/v1'); + expect(PROVIDERS.MORPH.supportedChatApis).toContain('chat_completions'); + }); + + it('registers exactly the six large open-source models (no proprietary models)', () => { + expect(morphChatModels.map(m => m.public_id).sort()).toEqual( + [ + 'morph/deepseek-v4-flash', + 'morph/glm-5.2', + 'morph/minimax-m2.7', + 'morph/minimax-m3', + 'morph/qwen3.5-397b', + 'morph/qwen3.6-27b', + ].sort() + ); + // Apply/compactor/warp-grep/embeddings must not be exposed through Kilo. + expect(morphChatModels.some(m => /v3|apply|compact|warp|embed/i.test(m.public_id))).toBe(false); + }); + + it('resolves a Morph model and routes it to the Morph provider', () => { + const model = findKiloExclusiveModel('morph/qwen3.6-27b'); + expect(model).not.toBeNull(); + expect(model!.gateway).toBe('morph'); + expect(model!.internal_id).toBe('morph-qwen36-27b'); + expect(resolveGatewayProvider(model!.gateway)).toBe(PROVIDERS.MORPH); + }); + + it('routes every registered Morph model to the Morph provider', () => { + for (const m of morphChatModels) { + expect(isMorphModel(m.public_id)).toBe(true); + expect(findKiloExclusiveModel(m.public_id)).toBe(m); + expect(resolveGatewayProvider(m.gateway)).toBe(PROVIDERS.MORPH); + } + }); + + // Per-1M-token rates mirror Morph's canonical pricing + // (https://www.morphllm.com/api/models/json). Cache-read is only billed for + // qwen3.5 (0.3); every other Morph model (incl. GLM-5.2) has no cache rate. + const EXPECTED: Record< + string, + { in: number; out: number; cache: number | null; vision: boolean } + > = { + 'morph/qwen3.5-397b': { in: 0.5, out: 3.5, cache: 0.3, vision: true }, + 'morph/qwen3.6-27b': { in: 0.289, out: 2.4, cache: null, vision: false }, + 'morph/minimax-m2.7': { in: 0.279, out: 1.2, cache: null, vision: false }, + 'morph/minimax-m3': { in: 0.6, out: 2.4, cache: null, vision: true }, + 'morph/glm-5.2': { in: 1.1, out: 4.1, cache: null, vision: false }, + 'morph/deepseek-v4-flash': { in: 0.139, out: 0.278, cache: null, vision: false }, + }; + + it.each(morphChatModels)('prices $public_id to match Morph canonical pricing', model => { + const want = EXPECTED[model.public_id]; + expect(want).toBeDefined(); + expect(model.pricing).toHaveLength(1); + const p = model.pricing![0].pricing; + expect(p.prompt_per_million).toBe(want.in); + expect(p.completion_per_million).toBe(want.out); + expect(p.input_cache_read_per_million ?? null).toBe(want.cache); + }); + + // Only qwen3.5-397b and minimax-m3 expose image input on Morph's gateway + // (canonical JSON input_modalities includes "image"). + it.each(morphChatModels)( + 'flags $public_id vision support to match canonical modalities', + model => { + const want = EXPECTED[model.public_id]; + expect(want).toBeDefined(); + expect(model.flags.includes('vision')).toBe(want.vision); + // Every Morph chat model is a reasoning model. + expect(model.flags.includes('reasoning')).toBe(true); + } + ); + + // Regression: getAiSdkProvider's name-based heuristics map any '*minimax*' id + // to the Anthropic Messages API (and gpt/grok ids to the OpenAI Responses + // API). The Morph gateway only speaks chat_completions, so every Morph model + // must resolve to an OpenAI-compatible provider whose request kind the gateway + // actually supports — otherwise OpenCode clients hit apiKindNotSupportedResponse. + it.each(morphChatModels)( + 'maps $public_id to a chat_completions-compatible OpenCode provider', + model => { + const aiSdkProvider = getAiSdkProvider(model.public_id, null); + expect(aiSdkProvider).toBe('openai-compatible'); + expect(getGatewayOpenCodeSettings(model.public_id)?.ai_sdk_provider).toBe( + 'openai-compatible' + ); + + const kind = requestKindFor(aiSdkProvider); + expect(kind).toBe('chat_completions'); + expect(PROVIDERS.MORPH.supportedChatApis).toContain(kind); + } + ); + + it('does not route the Morph MiniMax models through the Anthropic Messages API', () => { + for (const id of ['morph/minimax-m2.7', 'morph/minimax-m3']) { + expect(getAiSdkProvider(id, null)).not.toBe('anthropic'); + } + }); + + // calculateCost_mUsd returns micro-USD: tokens * (USD per 1M tokens). These + // assertions prove the stored pricing actually bills correctly end to end, not + // just that the per-million numbers are present. + const ONE_M = 1_000_000; + const flatPricing = (id: string) => findKiloExclusiveModel(id)!.pricing![0].pricing; + + it('bills cache reads at the discounted rate for qwen3.5 (the only cache model)', () => { + // 1M cache-hit tokens should cost the cache_read rate, strictly less than prompt. + const qwen = flatPricing('morph/qwen3.5-397b'); // prompt 0.5, cache_read 0.3 + const qwenCacheCost = calculateCost_mUsd( + { uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 }, + [{ start_context_length: 0, pricing: qwen }] + ); + expect(qwenCacheCost).toBe(300_000); // 1M * 0.3 + expect(qwenCacheCost).toBeLessThan(ONE_M * qwen.prompt_per_million); + }); + + it('falls back to the prompt rate for cache reads on models without a cache_read price', () => { + // GLM-5.2 and dsv4flash declare no cache_read rate per canonical JSON; cache + // hits must bill at the prompt rate (not free, not a discount). + for (const id of ['morph/glm-5.2', 'morph/deepseek-v4-flash']) { + const p = flatPricing(id); + expect(p.input_cache_read_per_million).toBeNull(); + const cost = calculateCost_mUsd( + { uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 }, + [{ start_context_length: 0, pricing: p }] + ); + expect(cost).toBe(ONE_M * p.prompt_per_million); + } + }); + + it('computes a mixed-usage bill from the stored qwen3.5 pricing', () => { + const qwen = flatPricing('morph/qwen3.5-397b'); // prompt 0.5, completion 3.5, cache_read 0.3 + const cost = calculateCost_mUsd( + { + uncachedInputTokens: ONE_M, + cacheWriteTokens: 0, + cacheHitTokens: ONE_M, + totalOutputTokens: ONE_M, + }, + [{ start_context_length: 0, pricing: qwen }] + ); + // 1M*0.5 (uncached) + 1M*0.3 (cache hit) + 1M*3.5 (output) = 4,300,000 µUSD + expect(cost).toBe(4_300_000); + }); +}); diff --git a/apps/web/src/lib/ai-gateway/providers/morph.ts b/apps/web/src/lib/ai-gateway/providers/morph.ts new file mode 100644 index 0000000000..bddae72b09 --- /dev/null +++ b/apps/web/src/lib/ai-gateway/providers/morph.ts @@ -0,0 +1,158 @@ +import type { + KiloExclusiveModel, + Pricing, + PricingTiers, +} from '@/lib/ai-gateway/providers/kilo-exclusive-model'; + +// Large open-source models served on Morph's own inference fleet and exposed +// through Morph's OpenAI-compatible gateway (https://api.morphllm.com/v1). +// +// `internal_id` is the model id Morph's gateway expects; `public_id` is the +// Kilo-facing id. Requests route to the MORPH provider via `gateway: 'morph'` +// (see provider-definitions.ts and get-provider.ts). Kilo holds the key +// (MORPH_API_KEY); this is the gateway integration, not BYOK. +// +// Pricing (per 1M tokens) and context windows mirror Morph's published numbers +// (landing/src/lib/pricing.ts + MODEL_CONTEXT_WINDOWS). Keep in sync. + +function flat(pricing: Pricing): PricingTiers { + return [{ start_context_length: 0, pricing }]; +} + +export const morph_qwen35_397b_model: KiloExclusiveModel = { + public_id: 'morph/qwen3.5-397b', + display_name: 'Morph: Qwen3.5 397B', + description: 'Qwen3.5 397B (A17B), served on Morph infrastructure.', + context_length: 262_144, + max_completion_tokens: 131_072, + status: 'public', + // Qwen3.5 397B accepts image input on Morph's gateway (canonical JSON + // input_modalities: ["text","image"]). + flags: ['reasoning', 'vision'], + gateway: 'morph', + internal_id: 'morph-qwen35-397b', + pricing: flat({ + prompt_per_million: 0.5, + completion_per_million: 3.5, + input_cache_read_per_million: 0.3, + input_cache_write_per_million: null, + }), + exclusive_to: [], + inference_provider_restriction: [], +}; + +export const morph_qwen36_27b_model: KiloExclusiveModel = { + public_id: 'morph/qwen3.6-27b', + display_name: 'Morph: Qwen3.6 27B', + description: 'Qwen3.6 27B, served on Morph infrastructure.', + context_length: 131_072, + max_completion_tokens: 131_072, + status: 'public', + flags: ['reasoning'], + gateway: 'morph', + internal_id: 'morph-qwen36-27b', + pricing: flat({ + prompt_per_million: 0.289, + completion_per_million: 2.4, + input_cache_read_per_million: null, + input_cache_write_per_million: null, + }), + exclusive_to: [], + inference_provider_restriction: [], +}; + +export const morph_minimax_m27_model: KiloExclusiveModel = { + public_id: 'morph/minimax-m2.7', + display_name: 'Morph: MiniMax M2.7', + description: 'MiniMax M2.7 (230B A10B), served on Morph infrastructure.', + context_length: 196_608, + max_completion_tokens: 196_608, + status: 'public', + flags: ['reasoning'], + gateway: 'morph', + internal_id: 'morph-minimax27-230b', + pricing: flat({ + prompt_per_million: 0.279, + completion_per_million: 1.2, + input_cache_read_per_million: null, + input_cache_write_per_million: null, + }), + exclusive_to: [], + inference_provider_restriction: [], +}; + +export const morph_minimax_m3_model: KiloExclusiveModel = { + public_id: 'morph/minimax-m3', + display_name: 'Morph: MiniMax M3', + description: 'MiniMax M3 (428B A23B), served on Morph infrastructure.', + context_length: 256_000, + max_completion_tokens: 256_000, + status: 'public', + // MiniMax M3 accepts image input on Morph's gateway (canonical JSON + // input_modalities: ["text","image"]). + flags: ['reasoning', 'vision'], + gateway: 'morph', + internal_id: 'morph-minimax3-428b', + pricing: flat({ + prompt_per_million: 0.6, + completion_per_million: 2.4, + input_cache_read_per_million: null, + input_cache_write_per_million: null, + }), + exclusive_to: [], + inference_provider_restriction: [], +}; + +export const morph_glm52_744b_model: KiloExclusiveModel = { + public_id: 'morph/glm-5.2', + display_name: 'Morph: GLM-5.2', + description: 'GLM-5.2 744B, served on Morph infrastructure.', + context_length: 1_048_576, + max_completion_tokens: 1_048_576, + status: 'public', + flags: ['reasoning'], + gateway: 'morph', + internal_id: 'morph-glm52-744b', + pricing: flat({ + prompt_per_million: 1.1, + completion_per_million: 4.1, + // No cache rate per the canonical /api/models/json — only qwen3.5 caches. + input_cache_read_per_million: null, + input_cache_write_per_million: null, + }), + exclusive_to: [], + inference_provider_restriction: [], +}; + +export const morph_dsv4flash_model: KiloExclusiveModel = { + public_id: 'morph/deepseek-v4-flash', + display_name: 'Morph: DeepSeek V4 Flash', + description: 'DeepSeek V4 Flash (1M context), served on Morph infrastructure.', + context_length: 1_048_576, + max_completion_tokens: 1_048_576, + status: 'public', + flags: ['reasoning'], + gateway: 'morph', + internal_id: 'morph-dsv4flash', + pricing: flat({ + prompt_per_million: 0.139, + completion_per_million: 0.278, + input_cache_read_per_million: null, + input_cache_write_per_million: null, + }), + exclusive_to: [], + inference_provider_restriction: [], +}; + +export const morphChatModels: KiloExclusiveModel[] = [ + morph_qwen35_397b_model, + morph_qwen36_27b_model, + morph_minimax_m27_model, + morph_minimax_m3_model, + morph_glm52_744b_model, + morph_dsv4flash_model, +]; + +export function isMorphModel(model: string): boolean { + return model.startsWith('morph/'); +} diff --git a/apps/web/src/lib/ai-gateway/providers/provider-definitions.ts b/apps/web/src/lib/ai-gateway/providers/provider-definitions.ts index bc0aa86b61..dd779c94b1 100644 --- a/apps/web/src/lib/ai-gateway/providers/provider-definitions.ts +++ b/apps/web/src/lib/ai-gateway/providers/provider-definitions.ts @@ -60,6 +60,14 @@ export default { supportedChatApis: [], async transformRequest() {}, }, + MORPH: { + id: 'morph', + apiUrl: 'https://api.morphllm.com/v1', + apiKey: getEnvVariable('MORPH_API_KEY'), + // Morph's gateway exposes an OpenAI-compatible Chat Completions endpoint. + supportedChatApis: ['chat_completions'], + async transformRequest() {}, + }, VERCEL_AI_GATEWAY: { id: 'vercel', apiUrl: 'https://ai-gateway.vercel.sh/v1', diff --git a/apps/web/src/lib/ai-gateway/providers/types.ts b/apps/web/src/lib/ai-gateway/providers/types.ts index aae97dc111..4e8125d961 100644 --- a/apps/web/src/lib/ai-gateway/providers/types.ts +++ b/apps/web/src/lib/ai-gateway/providers/types.ts @@ -10,6 +10,7 @@ export type ProviderId = | 'inception' | 'martian' | 'mistral' + | 'morph' | 'vercel' | 'custom' | 'experiment'