Skip to content
2 changes: 2 additions & 0 deletions apps/web/src/lib/ai-gateway/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import {
gemma_4_26b_a4b_it_free_model,
} from '@/lib/ai-gateway/providers/google';
import { QWEN37_PLUS_MODEL_ID, qwen36_plus_stealth_model } from '@/lib/ai-gateway/providers/qwen';
import { morphChatModels } from '@/lib/ai-gateway/providers/morph';
import { stepfun_37_flash_free_model } from '@/lib/ai-gateway/providers/stepfun';
import { isGrokModel } from '@/lib/ai-gateway/providers/xai';
import { isClaudeModel } from '@/lib/ai-gateway/providers/anthropic.constants';
Expand Down Expand Up @@ -85,6 +86,7 @@ export const kiloExclusiveModels = [
gemma_4_26b_a4b_it_free_model,
seed_20_code_free_model,
...deepseekDiscountedModels,
...morphChatModels,
qwen36_plus_stealth_model,
claude_sonnet_clawsetup_model,
claude_opus_4_8_stealth_model,
Expand Down
10 changes: 10 additions & 0 deletions apps/web/src/lib/ai-gateway/providers/model-settings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { isStepModel } from '@/lib/ai-gateway/providers/stepfun';
import { ReasoningEffortSchema } from '@kilocode/db/schema-types';
import { isDeepseekModel } from '@/lib/ai-gateway/providers/deepseek';
import { isMinimaxModel } from '@/lib/ai-gateway/providers/minimax';
import { isMorphModel } from '@/lib/ai-gateway/providers/morph';
import type { DirectUserByokInferenceProviderId } from '@/lib/ai-gateway/providers/openrouter/inference-provider-id';

const REASONING_VARIANTS_THINKING_ONLY = {
Expand Down Expand Up @@ -136,6 +137,15 @@ export function getAiSdkProvider(
model: string,
directProviderId: DirectUserByokInferenceProviderId | null
): Exclude<CustomLlmProvider, 'openrouter' /*the default*/> | undefined {
if (isMorphModel(model)) {
// Morph's gateway only exposes OpenAI Chat Completions
// (MORPH.supportedChatApis === ['chat_completions']). Pin every Morph model
// to the OpenAI-compatible AI SDK provider so name-based heuristics below
// (e.g. minimax -> 'anthropic'/Messages, gpt/grok -> 'openai'/Responses)
// never select an API kind the gateway would reject with
// apiKindNotSupportedResponse.
return 'openai-compatible';
}
if (seed_20_code_free_model.public_id === model) {
// with 'openai' (Responses API) prompt caching doesn't work
return 'openai-compatible';
Expand Down
178 changes: 178 additions & 0 deletions apps/web/src/lib/ai-gateway/providers/morph.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
import { describe, it, expect } from '@jest/globals';
import { findKiloExclusiveModel } from '@/lib/ai-gateway/models';
import PROVIDERS from '@/lib/ai-gateway/providers/provider-definitions';
import { morphChatModels, isMorphModel } from '@/lib/ai-gateway/providers/morph';
import {
getAiSdkProvider,
getGatewayOpenCodeSettings,
} from '@/lib/ai-gateway/providers/model-settings';
import { calculateCost_mUsd } from '@/lib/ai-gateway/providers/kilo-exclusive-model';

// Mirrors get-provider.ts: the OpenCode AI SDK provider selects the request kind
// a client sends. Morph only supports chat_completions, so any other kind is
// rejected by apiKindNotSupportedResponse before reaching the gateway.
function requestKindFor(aiSdkProvider: string | undefined) {
if (aiSdkProvider === 'anthropic') return 'messages';
if (aiSdkProvider === 'openai') return 'responses';
return 'chat_completions';
}

// Resolves a kilo-exclusive model to its provider using the exact same lookup
// get-provider.ts performs for non-Vercel, non-BYOK gateway models.
function resolveGatewayProvider(gateway: string) {
return Object.values(PROVIDERS).find(p => p.id === gateway) ?? PROVIDERS.OPENROUTER;
}

describe('Morph gateway provider', () => {
it('exposes the Morph gateway with an OpenAI-compatible chat endpoint', () => {
expect(PROVIDERS.MORPH.id).toBe('morph');
expect(PROVIDERS.MORPH.apiUrl).toBe('https://api.morphllm.com/v1');
expect(PROVIDERS.MORPH.supportedChatApis).toContain('chat_completions');
});

it('registers exactly the six large open-source models (no proprietary models)', () => {
expect(morphChatModels.map(m => m.public_id).sort()).toEqual(
[
'morph/deepseek-v4-flash',
'morph/glm-5.2',
'morph/minimax-m2.7',
'morph/minimax-m3',
'morph/qwen3.5-397b',
'morph/qwen3.6-27b',
].sort()
);
// Apply/compactor/warp-grep/embeddings must not be exposed through Kilo.
expect(morphChatModels.some(m => /v3|apply|compact|warp|embed/i.test(m.public_id))).toBe(false);
});

it('resolves a Morph model and routes it to the Morph provider', () => {
const model = findKiloExclusiveModel('morph/qwen3.6-27b');
expect(model).not.toBeNull();
expect(model!.gateway).toBe('morph');
expect(model!.internal_id).toBe('morph-qwen36-27b');
expect(resolveGatewayProvider(model!.gateway)).toBe(PROVIDERS.MORPH);
});

it('routes every registered Morph model to the Morph provider', () => {
for (const m of morphChatModels) {
expect(isMorphModel(m.public_id)).toBe(true);
expect(findKiloExclusiveModel(m.public_id)).toBe(m);
expect(resolveGatewayProvider(m.gateway)).toBe(PROVIDERS.MORPH);
}
});

// Per-1M-token rates mirror Morph's canonical pricing
// (https://www.morphllm.com/api/models/json). Cache-read is only billed for
// qwen3.5 (0.3) and glm-5.2 (0.35, LMCache prefix reuse); the JSON omits the
// glm rate, but Morph's calculateChatGlm52Cost bills it, so it is set here.
const EXPECTED: Record<
string,
{ in: number; out: number; cache: number | null; vision: boolean }
> = {
'morph/qwen3.5-397b': { in: 0.5, out: 3.5, cache: 0.3, vision: true },
'morph/qwen3.6-27b': { in: 0.289, out: 2.4, cache: null, vision: false },
'morph/minimax-m2.7': { in: 0.279, out: 1.2, cache: null, vision: false },
'morph/minimax-m3': { in: 0.6, out: 2.4, cache: null, vision: true },
'morph/glm-5.2': { in: 1.1, out: 4.1, cache: 0.35, vision: false },
'morph/deepseek-v4-flash': { in: 0.139, out: 0.278, cache: null, vision: false },
};

it.each(morphChatModels)('prices $public_id to match Morph canonical pricing', model => {
const want = EXPECTED[model.public_id];
expect(want).toBeDefined();
expect(model.pricing).toHaveLength(1);
const p = model.pricing![0].pricing;
expect(p.prompt_per_million).toBe(want.in);
expect(p.completion_per_million).toBe(want.out);
expect(p.input_cache_read_per_million ?? null).toBe(want.cache);
});

// Only qwen3.5-397b and minimax-m3 expose image input on Morph's gateway
// (canonical JSON input_modalities includes "image").
it.each(morphChatModels)(
'flags $public_id vision support to match canonical modalities',
model => {
const want = EXPECTED[model.public_id];
expect(want).toBeDefined();
expect(model.flags.includes('vision')).toBe(want.vision);
// Every Morph chat model is a reasoning model.
expect(model.flags.includes('reasoning')).toBe(true);
}
);

// Regression: getAiSdkProvider's name-based heuristics map any '*minimax*' id
// to the Anthropic Messages API (and gpt/grok ids to the OpenAI Responses
// API). The Morph gateway only speaks chat_completions, so every Morph model
// must resolve to an OpenAI-compatible provider whose request kind the gateway
// actually supports — otherwise OpenCode clients hit apiKindNotSupportedResponse.
it.each(morphChatModels)(
'maps $public_id to a chat_completions-compatible OpenCode provider',
model => {
const aiSdkProvider = getAiSdkProvider(model.public_id, null);
expect(aiSdkProvider).toBe('openai-compatible');
expect(getGatewayOpenCodeSettings(model.public_id)?.ai_sdk_provider).toBe(
'openai-compatible'
);

const kind = requestKindFor(aiSdkProvider);
expect(kind).toBe('chat_completions');
expect(PROVIDERS.MORPH.supportedChatApis).toContain(kind);
}
);

it('does not route the Morph MiniMax models through the Anthropic Messages API', () => {
for (const id of ['morph/minimax-m2.7', 'morph/minimax-m3']) {
expect(getAiSdkProvider(id, null)).not.toBe('anthropic');
}
});

// calculateCost_mUsd returns micro-USD: tokens * (USD per 1M tokens). These
// assertions prove the stored pricing actually bills correctly end to end, not
// just that the per-million numbers are present.
const ONE_M = 1_000_000;
const flatPricing = (id: string) => findKiloExclusiveModel(id)!.pricing![0].pricing;

it('bills cache reads at the discounted rate for qwen3.5 and glm-5.2', () => {
// 1M cache-hit tokens should cost the cache_read rate, strictly less than prompt.
const qwen = flatPricing('morph/qwen3.5-397b'); // prompt 0.5, cache_read 0.3
const qwenCacheCost = calculateCost_mUsd(
{ uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 },
[{ start_context_length: 0, pricing: qwen }]
);
expect(qwenCacheCost).toBe(300_000); // 1M * 0.3
expect(qwenCacheCost).toBeLessThan(ONE_M * qwen.prompt_per_million);

const glm = flatPricing('morph/glm-5.2'); // prompt 1.1, cache_read 0.35
const glmCacheCost = calculateCost_mUsd(
{ uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 },
[{ start_context_length: 0, pricing: glm }]
);
expect(glmCacheCost).toBe(350_000); // 1M * 0.35
});

it('falls back to the prompt rate for cache reads on models without a cache_read price', () => {
// dsv4flash declares no cache_read rate; cache hits must bill at the prompt rate (not free).
const ds = flatPricing('morph/deepseek-v4-flash'); // prompt 0.139, cache_read null
expect(ds.input_cache_read_per_million).toBeNull();
const cost = calculateCost_mUsd(
{ uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 },
[{ start_context_length: 0, pricing: ds }]
);
expect(cost).toBe(ONE_M * ds.prompt_per_million); // 139_000
});

it('computes a mixed-usage bill from the stored qwen3.5 pricing', () => {
const qwen = flatPricing('morph/qwen3.5-397b'); // prompt 0.5, completion 3.5, cache_read 0.3
const cost = calculateCost_mUsd(
{
uncachedInputTokens: ONE_M,
cacheWriteTokens: 0,
cacheHitTokens: ONE_M,
totalOutputTokens: ONE_M,
},
[{ start_context_length: 0, pricing: qwen }]
);
// 1M*0.5 (uncached) + 1M*0.3 (cache hit) + 1M*3.5 (output) = 4,300,000 µUSD
expect(cost).toBe(4_300_000);
});
});
160 changes: 160 additions & 0 deletions apps/web/src/lib/ai-gateway/providers/morph.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import type {
KiloExclusiveModel,
Pricing,
PricingTiers,
} from '@/lib/ai-gateway/providers/kilo-exclusive-model';

// Large open-source models served on Morph's own inference fleet and exposed
// through Morph's OpenAI-compatible gateway (https://api.morphllm.com/v1).
//
// `internal_id` is the model id Morph's gateway expects; `public_id` is the
// Kilo-facing id. Requests route to the MORPH provider via `gateway: 'morph'`
// (see provider-definitions.ts and get-provider.ts). Kilo holds the key
// (MORPH_API_KEY); this is the gateway integration, not BYOK.
//
// Pricing (per 1M tokens) and context windows mirror Morph's published numbers
// (landing/src/lib/pricing.ts + MODEL_CONTEXT_WINDOWS). Keep in sync.

function flat(pricing: Pricing): PricingTiers {
return [{ start_context_length: 0, pricing }];
}

export const morph_qwen35_397b_model: KiloExclusiveModel = {
public_id: 'morph/qwen3.5-397b',
display_name: 'Morph: Qwen3.5 397B',
description: 'Qwen3.5 397B (A17B), served on Morph infrastructure.',
context_length: 262_144,
max_completion_tokens: 131_072,
status: 'public',
// Qwen3.5 397B accepts image input on Morph's gateway (canonical JSON
// input_modalities: ["text","image"]).
flags: ['reasoning', 'vision'],
gateway: 'morph',
internal_id: 'morph-qwen35-397b',
pricing: flat({
prompt_per_million: 0.5,
completion_per_million: 3.5,
input_cache_read_per_million: 0.3,
input_cache_write_per_million: null,
}),
exclusive_to: [],
inference_provider_restriction: [],
};

export const morph_qwen36_27b_model: KiloExclusiveModel = {
public_id: 'morph/qwen3.6-27b',
display_name: 'Morph: Qwen3.6 27B',
description: 'Qwen3.6 27B, served on Morph infrastructure.',
context_length: 131_072,
max_completion_tokens: 131_072,
status: 'public',
flags: ['reasoning'],
gateway: 'morph',
internal_id: 'morph-qwen36-27b',
pricing: flat({
prompt_per_million: 0.289,
completion_per_million: 2.4,
input_cache_read_per_million: null,
input_cache_write_per_million: null,
}),
exclusive_to: [],
inference_provider_restriction: [],
};

export const morph_minimax_m27_model: KiloExclusiveModel = {
public_id: 'morph/minimax-m2.7',

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WARNING: Morph MiniMax models will fail for OpenCode clients

getGatewayOpenCodeSettings() treats any *minimax* model as ai_sdk_provider: 'anthropic', and CustomLlmProviderSchema documents that as the Messages API. Because MORPH.supportedChatApis only exposes chat_completions, requests for both morph/minimax-m2.7 and morph/minimax-m3 are rejected by apiKindNotSupportedResponse before they ever reach Morph. Either override these ids to an OpenAI-compatible provider or add /messages support for the Morph gateway.


Reply with @kilocode-bot fix it to have Kilo Code address this issue.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — fixed in 40795bd.

getAiSdkProvider mapped any *minimax* id to the Anthropic Messages API, which the Morph gateway (chat_completions only) rejects via apiKindNotSupportedResponse. Rather than patch just the two MiniMax ids, I pinned every Morph model to openai-compatible at the top of getAiSdkProvider, encoding the gateway invariant and guarding the whole class (also protects future gpt/grok-named ids from being routed to the Responses API). Added regression coverage asserting all six Morph models resolve to a chat_completions-compatible OpenCode provider.

Also verified live against api.morphllm.com: all six models return 200 with correct output + usage, and the two vision-flagged models (qwen3.5-397b, minimax-m3) correctly process image input.

display_name: 'Morph: MiniMax M2.7',
description: 'MiniMax M2.7 (230B A10B), served on Morph infrastructure.',
context_length: 196_608,
max_completion_tokens: 196_608,
status: 'public',
flags: ['reasoning'],
gateway: 'morph',
internal_id: 'morph-minimax27-230b',
pricing: flat({
prompt_per_million: 0.279,
completion_per_million: 1.2,
input_cache_read_per_million: null,
input_cache_write_per_million: null,
}),
exclusive_to: [],
inference_provider_restriction: [],
};

export const morph_minimax_m3_model: KiloExclusiveModel = {
public_id: 'morph/minimax-m3',
display_name: 'Morph: MiniMax M3',
description: 'MiniMax M3 (428B A23B), served on Morph infrastructure.',
context_length: 256_000,
max_completion_tokens: 256_000,
status: 'public',
// MiniMax M3 accepts image input on Morph's gateway (canonical JSON
// input_modalities: ["text","image"]).
flags: ['reasoning', 'vision'],
gateway: 'morph',
internal_id: 'morph-minimax3-428b',
pricing: flat({
prompt_per_million: 0.6,
completion_per_million: 2.4,
input_cache_read_per_million: null,
input_cache_write_per_million: null,
}),
exclusive_to: [],
inference_provider_restriction: [],
};

export const morph_glm52_744b_model: KiloExclusiveModel = {
public_id: 'morph/glm-5.2',
display_name: 'Morph: GLM-5.2',
description: 'GLM-5.2 744B, served on Morph infrastructure.',
context_length: 1_048_576,
max_completion_tokens: 1_048_576,
status: 'public',
flags: ['reasoning'],
gateway: 'morph',
internal_id: 'morph-glm52-744b',
pricing: flat({
prompt_per_million: 1.1,
completion_per_million: 4.1,
// GLM-5.2 runs LMCache prefix reuse, so cached input bills at a cheaper
// read rate (Morph's calculateChatGlm52Cost). The other Morph chat models
// do not bill cache reads, hence null on those.
input_cache_read_per_million: 0.35,
input_cache_write_per_million: null,
}),
exclusive_to: [],
inference_provider_restriction: [],
};

export const morph_dsv4flash_model: KiloExclusiveModel = {
public_id: 'morph/deepseek-v4-flash',
display_name: 'Morph: DeepSeek V4 Flash',
description: 'DeepSeek V4 Flash (1M context), served on Morph infrastructure.',
context_length: 1_048_576,
max_completion_tokens: 1_048_576,
status: 'public',
flags: ['reasoning'],
gateway: 'morph',
internal_id: 'morph-dsv4flash',
pricing: flat({
prompt_per_million: 0.139,
completion_per_million: 0.278,
input_cache_read_per_million: null,
input_cache_write_per_million: null,
}),
exclusive_to: [],
inference_provider_restriction: [],
};

export const morphChatModels: KiloExclusiveModel[] = [
morph_qwen35_397b_model,
morph_qwen36_27b_model,
morph_minimax_m27_model,
morph_minimax_m3_model,
morph_glm52_744b_model,
morph_dsv4flash_model,
];

export function isMorphModel(model: string): boolean {
return model.startsWith('morph/');
}
8 changes: 8 additions & 0 deletions apps/web/src/lib/ai-gateway/providers/provider-definitions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ export default {
supportedChatApis: [],
async transformRequest() {},
},
MORPH: {
id: 'morph',
apiUrl: 'https://api.morphllm.com/v1',
apiKey: getEnvVariable('MORPH_API_KEY'),
// Morph's gateway exposes an OpenAI-compatible Chat Completions endpoint.
supportedChatApis: ['chat_completions'],
async transformRequest() {},
},
VERCEL_AI_GATEWAY: {
id: 'vercel',
apiUrl: 'https://ai-gateway.vercel.sh/v1',
Expand Down
Loading