Kilo-Org · shreybirmiwalmorph · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
diff --git a/apps/web/src/lib/ai-gateway/models.ts b/apps/web/src/lib/ai-gateway/models.ts
@@ -31,6 +31,7 @@ import {
   gemma_4_26b_a4b_it_free_model,
 } from '@/lib/ai-gateway/providers/google';
 import { QWEN37_PLUS_MODEL_ID, qwen36_plus_stealth_model } from '@/lib/ai-gateway/providers/qwen';
+import { morphChatModels } from '@/lib/ai-gateway/providers/morph';
 import { stepfun_37_flash_free_model } from '@/lib/ai-gateway/providers/stepfun';
 import { isGrokModel } from '@/lib/ai-gateway/providers/xai';
 import { isClaudeModel } from '@/lib/ai-gateway/providers/anthropic.constants';
@@ -85,6 +86,7 @@ export const kiloExclusiveModels = [
   gemma_4_26b_a4b_it_free_model,
   seed_20_code_free_model,
   ...deepseekDiscountedModels,
+  ...morphChatModels,
   qwen36_plus_stealth_model,
   claude_sonnet_clawsetup_model,
   claude_opus_4_8_stealth_model,

diff --git a/apps/web/src/lib/ai-gateway/providers/model-settings.ts b/apps/web/src/lib/ai-gateway/providers/model-settings.ts
@@ -19,6 +19,7 @@ import { isStepModel } from '@/lib/ai-gateway/providers/stepfun';
 import { ReasoningEffortSchema } from '@kilocode/db/schema-types';
 import { isDeepseekModel } from '@/lib/ai-gateway/providers/deepseek';
 import { isMinimaxModel } from '@/lib/ai-gateway/providers/minimax';
+import { isMorphModel } from '@/lib/ai-gateway/providers/morph';
 import type { DirectUserByokInferenceProviderId } from '@/lib/ai-gateway/providers/openrouter/inference-provider-id';
 
 const REASONING_VARIANTS_THINKING_ONLY = {
@@ -136,6 +137,15 @@ export function getAiSdkProvider(
   model: string,
   directProviderId: DirectUserByokInferenceProviderId | null
 ): Exclude<CustomLlmProvider, 'openrouter' /*the default*/> | undefined {
+  if (isMorphModel(model)) {
+    // Morph's gateway only exposes OpenAI Chat Completions
+    // (MORPH.supportedChatApis === ['chat_completions']). Pin every Morph model
+    // to the OpenAI-compatible AI SDK provider so name-based heuristics below
+    // (e.g. minimax -> 'anthropic'/Messages, gpt/grok -> 'openai'/Responses)
+    // never select an API kind the gateway would reject with
+    // apiKindNotSupportedResponse.
+    return 'openai-compatible';
+  }
   if (seed_20_code_free_model.public_id === model) {
     // with 'openai' (Responses API) prompt caching doesn't work
     return 'openai-compatible';

diff --git a/apps/web/src/lib/ai-gateway/providers/morph.test.ts b/apps/web/src/lib/ai-gateway/providers/morph.test.ts
@@ -0,0 +1,178 @@
+import { describe, it, expect } from '@jest/globals';
+import { findKiloExclusiveModel } from '@/lib/ai-gateway/models';
+import PROVIDERS from '@/lib/ai-gateway/providers/provider-definitions';
+import { morphChatModels, isMorphModel } from '@/lib/ai-gateway/providers/morph';
+import {
+  getAiSdkProvider,
+  getGatewayOpenCodeSettings,
+} from '@/lib/ai-gateway/providers/model-settings';
+import { calculateCost_mUsd } from '@/lib/ai-gateway/providers/kilo-exclusive-model';
+
+// Mirrors get-provider.ts: the OpenCode AI SDK provider selects the request kind
+// a client sends. Morph only supports chat_completions, so any other kind is
+// rejected by apiKindNotSupportedResponse before reaching the gateway.
+function requestKindFor(aiSdkProvider: string | undefined) {
+  if (aiSdkProvider === 'anthropic') return 'messages';
+  if (aiSdkProvider === 'openai') return 'responses';
+  return 'chat_completions';
+}
+
+// Resolves a kilo-exclusive model to its provider using the exact same lookup
+// get-provider.ts performs for non-Vercel, non-BYOK gateway models.
+function resolveGatewayProvider(gateway: string) {
+  return Object.values(PROVIDERS).find(p => p.id === gateway) ?? PROVIDERS.OPENROUTER;
+}
+
+describe('Morph gateway provider', () => {
+  it('exposes the Morph gateway with an OpenAI-compatible chat endpoint', () => {
+    expect(PROVIDERS.MORPH.id).toBe('morph');
+    expect(PROVIDERS.MORPH.apiUrl).toBe('https://api.morphllm.com/v1');
+    expect(PROVIDERS.MORPH.supportedChatApis).toContain('chat_completions');
+  });
+
+  it('registers exactly the six large open-source models (no proprietary models)', () => {
+    expect(morphChatModels.map(m => m.public_id).sort()).toEqual(
+      [
+        'morph/deepseek-v4-flash',
+        'morph/glm-5.2',
+        'morph/minimax-m2.7',
+        'morph/minimax-m3',
+        'morph/qwen3.5-397b',
+        'morph/qwen3.6-27b',
+      ].sort()
+    );
+    // Apply/compactor/warp-grep/embeddings must not be exposed through Kilo.
+    expect(morphChatModels.some(m => /v3|apply|compact|warp|embed/i.test(m.public_id))).toBe(false);
+  });
+
+  it('resolves a Morph model and routes it to the Morph provider', () => {
+    const model = findKiloExclusiveModel('morph/qwen3.6-27b');
+    expect(model).not.toBeNull();
+    expect(model!.gateway).toBe('morph');
+    expect(model!.internal_id).toBe('morph-qwen36-27b');
+    expect(resolveGatewayProvider(model!.gateway)).toBe(PROVIDERS.MORPH);
+  });
+
+  it('routes every registered Morph model to the Morph provider', () => {
+    for (const m of morphChatModels) {
+      expect(isMorphModel(m.public_id)).toBe(true);
+      expect(findKiloExclusiveModel(m.public_id)).toBe(m);
+      expect(resolveGatewayProvider(m.gateway)).toBe(PROVIDERS.MORPH);
+    }
+  });
+
+  // Per-1M-token rates mirror Morph's canonical pricing
+  // (https://www.morphllm.com/api/models/json). Cache-read is only billed for
+  // qwen3.5 (0.3) and glm-5.2 (0.35, LMCache prefix reuse); the JSON omits the
+  // glm rate, but Morph's calculateChatGlm52Cost bills it, so it is set here.
+  const EXPECTED: Record<
+    string,
+    { in: number; out: number; cache: number | null; vision: boolean }
+  > = {
+    'morph/qwen3.5-397b': { in: 0.5, out: 3.5, cache: 0.3, vision: true },
+    'morph/qwen3.6-27b': { in: 0.289, out: 2.4, cache: null, vision: false },
+    'morph/minimax-m2.7': { in: 0.279, out: 1.2, cache: null, vision: false },
+    'morph/minimax-m3': { in: 0.6, out: 2.4, cache: null, vision: true },
+    'morph/glm-5.2': { in: 1.1, out: 4.1, cache: 0.35, vision: false },
+    'morph/deepseek-v4-flash': { in: 0.139, out: 0.278, cache: null, vision: false },
+  };
+
+  it.each(morphChatModels)('prices $public_id to match Morph canonical pricing', model => {
+    const want = EXPECTED[model.public_id];
+    expect(want).toBeDefined();
+    expect(model.pricing).toHaveLength(1);
+    const p = model.pricing![0].pricing;
+    expect(p.prompt_per_million).toBe(want.in);
+    expect(p.completion_per_million).toBe(want.out);
+    expect(p.input_cache_read_per_million ?? null).toBe(want.cache);
+  });
+
+  // Only qwen3.5-397b and minimax-m3 expose image input on Morph's gateway
+  // (canonical JSON input_modalities includes "image").
+  it.each(morphChatModels)(
+    'flags $public_id vision support to match canonical modalities',
+    model => {
+      const want = EXPECTED[model.public_id];
+      expect(want).toBeDefined();
+      expect(model.flags.includes('vision')).toBe(want.vision);
+      // Every Morph chat model is a reasoning model.
+      expect(model.flags.includes('reasoning')).toBe(true);
+    }
+  );
+
+  // Regression: getAiSdkProvider's name-based heuristics map any '*minimax*' id
+  // to the Anthropic Messages API (and gpt/grok ids to the OpenAI Responses
+  // API). The Morph gateway only speaks chat_completions, so every Morph model
+  // must resolve to an OpenAI-compatible provider whose request kind the gateway
+  // actually supports — otherwise OpenCode clients hit apiKindNotSupportedResponse.
+  it.each(morphChatModels)(
+    'maps $public_id to a chat_completions-compatible OpenCode provider',
+    model => {
+      const aiSdkProvider = getAiSdkProvider(model.public_id, null);
+      expect(aiSdkProvider).toBe('openai-compatible');
+      expect(getGatewayOpenCodeSettings(model.public_id)?.ai_sdk_provider).toBe(
+        'openai-compatible'
+      );
+
+      const kind = requestKindFor(aiSdkProvider);
+      expect(kind).toBe('chat_completions');
+      expect(PROVIDERS.MORPH.supportedChatApis).toContain(kind);
+    }
+  );
+
+  it('does not route the Morph MiniMax models through the Anthropic Messages API', () => {
+    for (const id of ['morph/minimax-m2.7', 'morph/minimax-m3']) {
+      expect(getAiSdkProvider(id, null)).not.toBe('anthropic');
+    }
+  });
+
+  // calculateCost_mUsd returns micro-USD: tokens * (USD per 1M tokens). These
+  // assertions prove the stored pricing actually bills correctly end to end, not
+  // just that the per-million numbers are present.
+  const ONE_M = 1_000_000;
+  const flatPricing = (id: string) => findKiloExclusiveModel(id)!.pricing![0].pricing;
+
+  it('bills cache reads at the discounted rate for qwen3.5 and glm-5.2', () => {
+    // 1M cache-hit tokens should cost the cache_read rate, strictly less than prompt.
+    const qwen = flatPricing('morph/qwen3.5-397b'); // prompt 0.5, cache_read 0.3
+    const qwenCacheCost = calculateCost_mUsd(
+      { uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 },
+      [{ start_context_length: 0, pricing: qwen }]
+    );
+    expect(qwenCacheCost).toBe(300_000); // 1M * 0.3
+    expect(qwenCacheCost).toBeLessThan(ONE_M * qwen.prompt_per_million);
+
+    const glm = flatPricing('morph/glm-5.2'); // prompt 1.1, cache_read 0.35
+    const glmCacheCost = calculateCost_mUsd(
+      { uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 },
+      [{ start_context_length: 0, pricing: glm }]
+    );
+    expect(glmCacheCost).toBe(350_000); // 1M * 0.35
+  });
+
+  it('falls back to the prompt rate for cache reads on models without a cache_read price', () => {
+    // dsv4flash declares no cache_read rate; cache hits must bill at the prompt rate (not free).
+    const ds = flatPricing('morph/deepseek-v4-flash'); // prompt 0.139, cache_read null
+    expect(ds.input_cache_read_per_million).toBeNull();
+    const cost = calculateCost_mUsd(
+      { uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 },
+      [{ start_context_length: 0, pricing: ds }]
+    );
+    expect(cost).toBe(ONE_M * ds.prompt_per_million); // 139_000
+  });
+
+  it('computes a mixed-usage bill from the stored qwen3.5 pricing', () => {
+    const qwen = flatPricing('morph/qwen3.5-397b'); // prompt 0.5, completion 3.5, cache_read 0.3
+    const cost = calculateCost_mUsd(
+      {
+        uncachedInputTokens: ONE_M,
+        cacheWriteTokens: 0,
+        cacheHitTokens: ONE_M,
+        totalOutputTokens: ONE_M,
+      },
+      [{ start_context_length: 0, pricing: qwen }]
+    );
+    // 1M*0.5 (uncached) + 1M*0.3 (cache hit) + 1M*3.5 (output) = 4,300,000 µUSD
+    expect(cost).toBe(4_300_000);
+  });
+});
diff --git a/apps/web/src/lib/ai-gateway/providers/morph.ts b/apps/web/src/lib/ai-gateway/providers/morph.ts
@@ -0,0 +1,160 @@
+import type {
+  KiloExclusiveModel,
+  Pricing,
+  PricingTiers,
+} from '@/lib/ai-gateway/providers/kilo-exclusive-model';
+
+// Large open-source models served on Morph's own inference fleet and exposed
+// through Morph's OpenAI-compatible gateway (https://api.morphllm.com/v1).
+//
+// `internal_id` is the model id Morph's gateway expects; `public_id` is the
+// Kilo-facing id. Requests route to the MORPH provider via `gateway: 'morph'`
+// (see provider-definitions.ts and get-provider.ts). Kilo holds the key
+// (MORPH_API_KEY); this is the gateway integration, not BYOK.
+//
+// Pricing (per 1M tokens) and context windows mirror Morph's published numbers
+// (landing/src/lib/pricing.ts + MODEL_CONTEXT_WINDOWS). Keep in sync.
+
+function flat(pricing: Pricing): PricingTiers {
+  return [{ start_context_length: 0, pricing }];
+}
+
+export const morph_qwen35_397b_model: KiloExclusiveModel = {
+  public_id: 'morph/qwen3.5-397b',
+  display_name: 'Morph: Qwen3.5 397B',
+  description: 'Qwen3.5 397B (A17B), served on Morph infrastructure.',
+  context_length: 262_144,
+  max_completion_tokens: 131_072,
+  status: 'public',
+  // Qwen3.5 397B accepts image input on Morph's gateway (canonical JSON
+  // input_modalities: ["text","image"]).
+  flags: ['reasoning', 'vision'],
+  gateway: 'morph',
+  internal_id: 'morph-qwen35-397b',
+  pricing: flat({
+    prompt_per_million: 0.5,
+    completion_per_million: 3.5,
+    input_cache_read_per_million: 0.3,
+    input_cache_write_per_million: null,
+  }),
+  exclusive_to: [],
+  inference_provider_restriction: [],
+};
+
+export const morph_qwen36_27b_model: KiloExclusiveModel = {
+  public_id: 'morph/qwen3.6-27b',
+  display_name: 'Morph: Qwen3.6 27B',
+  description: 'Qwen3.6 27B, served on Morph infrastructure.',
+  context_length: 131_072,
+  max_completion_tokens: 131_072,
+  status: 'public',
+  flags: ['reasoning'],
+  gateway: 'morph',
+  internal_id: 'morph-qwen36-27b',
+  pricing: flat({
+    prompt_per_million: 0.289,
+    completion_per_million: 2.4,
+    input_cache_read_per_million: null,
+    input_cache_write_per_million: null,
+  }),
+  exclusive_to: [],
+  inference_provider_restriction: [],
+};
+
+export const morph_minimax_m27_model: KiloExclusiveModel = {
+  public_id: 'morph/minimax-m2.7',
+  display_name: 'Morph: MiniMax M2.7',
+  description: 'MiniMax M2.7 (230B A10B), served on Morph infrastructure.',
+  context_length: 196_608,
+  max_completion_tokens: 196_608,
+  status: 'public',
+  flags: ['reasoning'],
+  gateway: 'morph',
+  internal_id: 'morph-minimax27-230b',
+  pricing: flat({
+    prompt_per_million: 0.279,
+    completion_per_million: 1.2,
+    input_cache_read_per_million: null,
+    input_cache_write_per_million: null,
+  }),
+  exclusive_to: [],
+  inference_provider_restriction: [],
+};
+
+export const morph_minimax_m3_model: KiloExclusiveModel = {
+  public_id: 'morph/minimax-m3',
+  display_name: 'Morph: MiniMax M3',
+  description: 'MiniMax M3 (428B A23B), served on Morph infrastructure.',
+  context_length: 256_000,
+  max_completion_tokens: 256_000,
+  status: 'public',
+  // MiniMax M3 accepts image input on Morph's gateway (canonical JSON
+  // input_modalities: ["text","image"]).
+  flags: ['reasoning', 'vision'],
+  gateway: 'morph',
+  internal_id: 'morph-minimax3-428b',
+  pricing: flat({
+    prompt_per_million: 0.6,
+    completion_per_million: 2.4,
+    input_cache_read_per_million: null,
+    input_cache_write_per_million: null,
+  }),
+  exclusive_to: [],
+  inference_provider_restriction: [],
+};
+
+export const morph_glm52_744b_model: KiloExclusiveModel = {
+  public_id: 'morph/glm-5.2',
+  display_name: 'Morph: GLM-5.2',
+  description: 'GLM-5.2 744B, served on Morph infrastructure.',
+  context_length: 1_048_576,
+  max_completion_tokens: 1_048_576,
+  status: 'public',
+  flags: ['reasoning'],
+  gateway: 'morph',
+  internal_id: 'morph-glm52-744b',
+  pricing: flat({
+    prompt_per_million: 1.1,
+    completion_per_million: 4.1,
+    // GLM-5.2 runs LMCache prefix reuse, so cached input bills at a cheaper
+    // read rate (Morph's calculateChatGlm52Cost). The other Morph chat models
+    // do not bill cache reads, hence null on those.
+    input_cache_read_per_million: 0.35,
+    input_cache_write_per_million: null,
+  }),
+  exclusive_to: [],
+  inference_provider_restriction: [],
+};
+
+export const morph_dsv4flash_model: KiloExclusiveModel = {
+  public_id: 'morph/deepseek-v4-flash',
+  display_name: 'Morph: DeepSeek V4 Flash',
+  description: 'DeepSeek V4 Flash (1M context), served on Morph infrastructure.',
+  context_length: 1_048_576,
+  max_completion_tokens: 1_048_576,
+  status: 'public',
+  flags: ['reasoning'],
+  gateway: 'morph',
+  internal_id: 'morph-dsv4flash',
+  pricing: flat({
+    prompt_per_million: 0.139,
+    completion_per_million: 0.278,
+    input_cache_read_per_million: null,
+    input_cache_write_per_million: null,
+  }),
+  exclusive_to: [],
+  inference_provider_restriction: [],
+};
+
+export const morphChatModels: KiloExclusiveModel[] = [
+  morph_qwen35_397b_model,
+  morph_qwen36_27b_model,
+  morph_minimax_m27_model,
+  morph_minimax_m3_model,
+  morph_glm52_744b_model,
+  morph_dsv4flash_model,
+];
+
+export function isMorphModel(model: string): boolean {
+  return model.startsWith('morph/');
+}
diff --git a/apps/web/src/lib/ai-gateway/providers/provider-definitions.ts b/apps/web/src/lib/ai-gateway/providers/provider-definitions.ts
@@ -60,6 +60,14 @@ export default {
     supportedChatApis: [],
     async transformRequest() {},
   },
+  MORPH: {
+    id: 'morph',
+    apiUrl: 'https://api.morphllm.com/v1',
+    apiKey: getEnvVariable('MORPH_API_KEY'),
+    // Morph's gateway exposes an OpenAI-compatible Chat Completions endpoint.
+    supportedChatApis: ['chat_completions'],
+    async transformRequest() {},
+  },
   VERCEL_AI_GATEWAY: {
     id: 'vercel',
     apiUrl: 'https://ai-gateway.vercel.sh/v1',