From f942d393900506eaf9fef1a29943e103e9da7654 Mon Sep 17 00:00:00 2001
From: Shrey Birmiwal <shrey@morphllm.com>
Date: Wed, 24 Jun 2026 19:24:49 -0700
Subject: [PATCH 1/7] feat(provider): add Morph as a gateway provider for large
 open-source models

Adds Morph (https://api.morphllm.com/v1, OpenAI-compatible) as a first-party
gateway provider and registers the large open-source models Morph serves on its
own fleet: Qwen3.5 397B, Qwen3.6 27B, MiniMax M2.7, MiniMax M3, GLM-5.2, and
DeepSeek V4 Flash. Excludes Morph's proprietary models (apply/v3, compactor,
warp-grep, embeddings).

Routing reuses the existing gateway->provider resolution in get-provider.ts;
'morph' is already a recognized inference-provider id. Kilo holds the key via
MORPH_API_KEY. Pricing and context windows mirror Morph's published numbers.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 apps/web/src/lib/ai-gateway/models.ts         |   2 +
 .../web/src/lib/ai-gateway/providers/morph.ts | 153 ++++++++++++++++++
 .../providers/provider-definitions.ts         |   8 +
 .../web/src/lib/ai-gateway/providers/types.ts |   1 +
 4 files changed, 164 insertions(+)
 create mode 100644 apps/web/src/lib/ai-gateway/providers/morph.ts

diff --git a/apps/web/src/lib/ai-gateway/models.ts b/apps/web/src/lib/ai-gateway/models.ts
index 75988779f7..904d98cd8b 100644
--- a/apps/web/src/lib/ai-gateway/models.ts
+++ b/apps/web/src/lib/ai-gateway/models.ts
@@ -31,6 +31,7 @@ import {
   gemma_4_26b_a4b_it_free_model,
 } from '@/lib/ai-gateway/providers/google';
 import { QWEN37_PLUS_MODEL_ID, qwen36_plus_stealth_model } from '@/lib/ai-gateway/providers/qwen';
+import { morphChatModels } from '@/lib/ai-gateway/providers/morph';
 import { stepfun_37_flash_free_model } from '@/lib/ai-gateway/providers/stepfun';
 import { isGrokModel } from '@/lib/ai-gateway/providers/xai';
 import { isClaudeModel } from '@/lib/ai-gateway/providers/anthropic.constants';
@@ -85,6 +86,7 @@ export const kiloExclusiveModels = [
   gemma_4_26b_a4b_it_free_model,
   seed_20_code_free_model,
   ...deepseekDiscountedModels,
+  ...morphChatModels,
   qwen36_plus_stealth_model,
   claude_sonnet_clawsetup_model,
   claude_opus_4_8_stealth_model,
diff --git a/apps/web/src/lib/ai-gateway/providers/morph.ts b/apps/web/src/lib/ai-gateway/providers/morph.ts
new file mode 100644
index 0000000000..85475a0e2e
--- /dev/null
+++ b/apps/web/src/lib/ai-gateway/providers/morph.ts
@@ -0,0 +1,153 @@
+import type {
+  KiloExclusiveModel,
+  Pricing,
+  PricingTiers,
+} from '@/lib/ai-gateway/providers/kilo-exclusive-model';
+
+// Large open-source models served on Morph's own inference fleet and exposed
+// through Morph's OpenAI-compatible gateway (https://api.morphllm.com/v1).
+//
+// `internal_id` is the model id Morph's gateway expects; `public_id` is the
+// Kilo-facing id. Requests route to the MORPH provider via `gateway: 'morph'`
+// (see provider-definitions.ts and get-provider.ts). Kilo holds the key
+// (MORPH_API_KEY); this is the gateway integration, not BYOK.
+//
+// Pricing (per 1M tokens) and context windows mirror Morph's published numbers
+// (landing/src/lib/pricing.ts + MODEL_CONTEXT_WINDOWS). Keep in sync.
+
+function flat(pricing: Pricing): PricingTiers {
+  return [{ start_context_length: 0, pricing }];
+}
+
+export const morph_qwen35_397b_model: KiloExclusiveModel = {
+  public_id: 'morph/qwen3.5-397b',
+  display_name: 'Morph: Qwen3.5 397B',
+  description: 'Qwen3.5 397B (A17B), served on Morph infrastructure.',
+  context_length: 262_144,
+  max_completion_tokens: 131_072,
+  status: 'public',
+  flags: ['reasoning'],
+  gateway: 'morph',
+  internal_id: 'morph-qwen35-397b',
+  pricing: flat({
+    prompt_per_million: 0.5,
+    completion_per_million: 3.5,
+    input_cache_read_per_million: 0.3,
+    input_cache_write_per_million: null,
+  }),
+  exclusive_to: [],
+  inference_provider_restriction: [],
+};
+
+export const morph_qwen36_27b_model: KiloExclusiveModel = {
+  public_id: 'morph/qwen3.6-27b',
+  display_name: 'Morph: Qwen3.6 27B',
+  description: 'Qwen3.6 27B, served on Morph infrastructure.',
+  context_length: 131_072,
+  max_completion_tokens: 131_072,
+  status: 'public',
+  flags: ['reasoning'],
+  gateway: 'morph',
+  internal_id: 'morph-qwen36-27b',
+  pricing: flat({
+    prompt_per_million: 0.289,
+    completion_per_million: 2.4,
+    input_cache_read_per_million: null,
+    input_cache_write_per_million: null,
+  }),
+  exclusive_to: [],
+  inference_provider_restriction: [],
+};
+
+export const morph_minimax_m27_model: KiloExclusiveModel = {
+  public_id: 'morph/minimax-m2.7',
+  display_name: 'Morph: MiniMax M2.7',
+  description: 'MiniMax M2.7 (230B A10B), served on Morph infrastructure.',
+  context_length: 196_608,
+  max_completion_tokens: 196_608,
+  status: 'public',
+  flags: ['reasoning'],
+  gateway: 'morph',
+  internal_id: 'morph-minimax27-230b',
+  pricing: flat({
+    prompt_per_million: 0.279,
+    completion_per_million: 1.2,
+    input_cache_read_per_million: null,
+    input_cache_write_per_million: null,
+  }),
+  exclusive_to: [],
+  inference_provider_restriction: [],
+};
+
+export const morph_minimax_m3_model: KiloExclusiveModel = {
+  public_id: 'morph/minimax-m3',
+  display_name: 'Morph: MiniMax M3',
+  description: 'MiniMax M3 (428B A23B), served on Morph infrastructure.',
+  context_length: 256_000,
+  max_completion_tokens: 256_000,
+  status: 'public',
+  flags: ['reasoning'],
+  gateway: 'morph',
+  internal_id: 'morph-minimax3-428b',
+  pricing: flat({
+    prompt_per_million: 0.6,
+    completion_per_million: 2.4,
+    input_cache_read_per_million: null,
+    input_cache_write_per_million: null,
+  }),
+  exclusive_to: [],
+  inference_provider_restriction: [],
+};
+
+export const morph_glm52_744b_model: KiloExclusiveModel = {
+  public_id: 'morph/glm-5.2',
+  display_name: 'Morph: GLM-5.2',
+  description: 'GLM-5.2 744B, served on Morph infrastructure.',
+  context_length: 450_000,
+  max_completion_tokens: 450_000,
+  status: 'public',
+  flags: ['reasoning'],
+  gateway: 'morph',
+  internal_id: 'morph-glm52-744b',
+  pricing: flat({
+    prompt_per_million: 1.1,
+    completion_per_million: 4.1,
+    input_cache_read_per_million: null,
+    input_cache_write_per_million: null,
+  }),
+  exclusive_to: [],
+  inference_provider_restriction: [],
+};
+
+export const morph_dsv4flash_model: KiloExclusiveModel = {
+  public_id: 'morph/deepseek-v4-flash',
+  display_name: 'Morph: DeepSeek V4 Flash',
+  description: 'DeepSeek V4 Flash (1M context), served on Morph infrastructure.',
+  context_length: 1_048_576,
+  max_completion_tokens: 1_048_576,
+  status: 'public',
+  flags: ['reasoning'],
+  gateway: 'morph',
+  internal_id: 'morph-dsv4flash',
+  pricing: flat({
+    prompt_per_million: 0.139,
+    completion_per_million: 0.278,
+    input_cache_read_per_million: null,
+    input_cache_write_per_million: null,
+  }),
+  exclusive_to: [],
+  inference_provider_restriction: [],
+};
+
+export const morphChatModels: KiloExclusiveModel[] = [
+  morph_qwen35_397b_model,
+  morph_qwen36_27b_model,
+  morph_minimax_m27_model,
+  morph_minimax_m3_model,
+  morph_glm52_744b_model,
+  morph_dsv4flash_model,
+];
+
+export function isMorphModel(model: string): boolean {
+  return model.startsWith('morph/');
+}
diff --git a/apps/web/src/lib/ai-gateway/providers/provider-definitions.ts b/apps/web/src/lib/ai-gateway/providers/provider-definitions.ts
index bc0aa86b61..dd779c94b1 100644
--- a/apps/web/src/lib/ai-gateway/providers/provider-definitions.ts
+++ b/apps/web/src/lib/ai-gateway/providers/provider-definitions.ts
@@ -60,6 +60,14 @@ export default {
     supportedChatApis: [],
     async transformRequest() {},
   },
+  MORPH: {
+    id: 'morph',
+    apiUrl: 'https://api.morphllm.com/v1',
+    apiKey: getEnvVariable('MORPH_API_KEY'),
+    // Morph's gateway exposes an OpenAI-compatible Chat Completions endpoint.
+    supportedChatApis: ['chat_completions'],
+    async transformRequest() {},
+  },
   VERCEL_AI_GATEWAY: {
     id: 'vercel',
     apiUrl: 'https://ai-gateway.vercel.sh/v1',
diff --git a/apps/web/src/lib/ai-gateway/providers/types.ts b/apps/web/src/lib/ai-gateway/providers/types.ts
index aae97dc111..4e8125d961 100644
--- a/apps/web/src/lib/ai-gateway/providers/types.ts
+++ b/apps/web/src/lib/ai-gateway/providers/types.ts
@@ -10,6 +10,7 @@ export type ProviderId =
   | 'inception'
   | 'martian'
   | 'mistral'
+  | 'morph'
   | 'vercel'
   | 'custom'
   | 'experiment'

From aff4c19c8cb33e221fea7cdc48d0df5ee34327fc Mon Sep 17 00:00:00 2001
From: Shrey Birmiwal <shrey@morphllm.com>
Date: Wed, 24 Jun 2026 20:23:30 -0700
Subject: [PATCH 2/7] fix(provider): correct GLM-5.2 cache price + add Morph
 gateway tests

Reconcile Morph gateway pricing against the canonical source
(https://www.morphllm.com/api/models/json + landing/src/lib/pricing.ts
cost functions):

- GLM-5.2: set input_cache_read to $0.35/1M. Morph's calculateChatGlm52Cost
  bills cached input at this rate (LMCache prefix reuse); the models JSON
  currently omits the field, so the prior null under-priced cache reads.
- Verified the other five models: qwen3.5 keeps its $0.30 cache rate; the
  rest (qwen3.6, minimax m2.7/m3, deepseek-v4-flash) have no cache-read
  billing in Morph's cost functions, so they correctly stay null. The
  minimax cache rates present in MODEL_PRICING config are never applied by
  the cost functions, so they are intentionally not exposed here.

Adds morph.test.ts covering: Morph provider config, the six registered
open-source models (and exclusion of proprietary apply/compactor/etc.),
gateway->provider resolution to PROVIDERS.MORPH (the same lookup
get-provider.ts uses), and per-model pricing.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../lib/ai-gateway/providers/morph.test.ts    | 72 +++++++++++++++++++
 .../web/src/lib/ai-gateway/providers/morph.ts |  5 +-
 2 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 apps/web/src/lib/ai-gateway/providers/morph.test.ts

diff --git a/apps/web/src/lib/ai-gateway/providers/morph.test.ts b/apps/web/src/lib/ai-gateway/providers/morph.test.ts
new file mode 100644
index 0000000000..7a1dfd80f1
--- /dev/null
+++ b/apps/web/src/lib/ai-gateway/providers/morph.test.ts
@@ -0,0 +1,72 @@
+import { describe, it, expect } from '@jest/globals';
+import { findKiloExclusiveModel } from '@/lib/ai-gateway/models';
+import PROVIDERS from '@/lib/ai-gateway/providers/provider-definitions';
+import { morphChatModels, isMorphModel } from '@/lib/ai-gateway/providers/morph';
+
+// Resolves a kilo-exclusive model to its provider using the exact same lookup
+// get-provider.ts performs for non-Vercel, non-BYOK gateway models.
+function resolveGatewayProvider(gateway: string) {
+  return Object.values(PROVIDERS).find(p => p.id === gateway) ?? PROVIDERS.OPENROUTER;
+}
+
+describe('Morph gateway provider', () => {
+  it('exposes the Morph gateway with an OpenAI-compatible chat endpoint', () => {
+    expect(PROVIDERS.MORPH.id).toBe('morph');
+    expect(PROVIDERS.MORPH.apiUrl).toBe('https://api.morphllm.com/v1');
+    expect(PROVIDERS.MORPH.supportedChatApis).toContain('chat_completions');
+  });
+
+  it('registers exactly the six large open-source models (no proprietary models)', () => {
+    expect(morphChatModels.map(m => m.public_id).sort()).toEqual(
+      [
+        'morph/deepseek-v4-flash',
+        'morph/glm-5.2',
+        'morph/minimax-m2.7',
+        'morph/minimax-m3',
+        'morph/qwen3.5-397b',
+        'morph/qwen3.6-27b',
+      ].sort()
+    );
+    // Apply/compactor/warp-grep/embeddings must not be exposed through Kilo.
+    expect(morphChatModels.some(m => /v3|apply|compact|warp|embed/i.test(m.public_id))).toBe(false);
+  });
+
+  it('resolves a Morph model and routes it to the Morph provider', () => {
+    const model = findKiloExclusiveModel('morph/qwen3.6-27b');
+    expect(model).not.toBeNull();
+    expect(model!.gateway).toBe('morph');
+    expect(model!.internal_id).toBe('morph-qwen36-27b');
+    expect(resolveGatewayProvider(model!.gateway)).toBe(PROVIDERS.MORPH);
+  });
+
+  it('routes every registered Morph model to the Morph provider', () => {
+    for (const m of morphChatModels) {
+      expect(isMorphModel(m.public_id)).toBe(true);
+      expect(findKiloExclusiveModel(m.public_id)).toBe(m);
+      expect(resolveGatewayProvider(m.gateway)).toBe(PROVIDERS.MORPH);
+    }
+  });
+
+  // Per-1M-token rates mirror Morph's canonical pricing
+  // (https://www.morphllm.com/api/models/json). Cache-read is only billed for
+  // qwen3.5 (0.3) and glm-5.2 (0.35, LMCache prefix reuse); the JSON omits the
+  // glm rate, but Morph's calculateChatGlm52Cost bills it, so it is set here.
+  const EXPECTED: Record<string, { in: number; out: number; cache: number | null }> = {
+    'morph/qwen3.5-397b': { in: 0.5, out: 3.5, cache: 0.3 },
+    'morph/qwen3.6-27b': { in: 0.289, out: 2.4, cache: null },
+    'morph/minimax-m2.7': { in: 0.279, out: 1.2, cache: null },
+    'morph/minimax-m3': { in: 0.6, out: 2.4, cache: null },
+    'morph/glm-5.2': { in: 1.1, out: 4.1, cache: 0.35 },
+    'morph/deepseek-v4-flash': { in: 0.139, out: 0.278, cache: null },
+  };
+
+  it.each(morphChatModels)('prices $public_id to match Morph canonical pricing', model => {
+    const want = EXPECTED[model.public_id];
+    expect(want).toBeDefined();
+    expect(model.pricing).toHaveLength(1);
+    const p = model.pricing![0].pricing;
+    expect(p.prompt_per_million).toBe(want.in);
+    expect(p.completion_per_million).toBe(want.out);
+    expect(p.input_cache_read_per_million ?? null).toBe(want.cache);
+  });
+});
diff --git a/apps/web/src/lib/ai-gateway/providers/morph.ts b/apps/web/src/lib/ai-gateway/providers/morph.ts
index 85475a0e2e..7b5f792a5b 100644
--- a/apps/web/src/lib/ai-gateway/providers/morph.ts
+++ b/apps/web/src/lib/ai-gateway/providers/morph.ts
@@ -112,7 +112,10 @@ export const morph_glm52_744b_model: KiloExclusiveModel = {
   pricing: flat({
     prompt_per_million: 1.1,
     completion_per_million: 4.1,
-    input_cache_read_per_million: null,
+    // GLM-5.2 runs LMCache prefix reuse, so cached input bills at a cheaper
+    // read rate (Morph's calculateChatGlm52Cost). The other Morph chat models
+    // do not bill cache reads, hence null on those.
+    input_cache_read_per_million: 0.35,
     input_cache_write_per_million: null,
   }),
   exclusive_to: [],

From f632630cc877ef39f36ec081ae07084862cb60b3 Mon Sep 17 00:00:00 2001
From: Shrey Birmiwal <shrey@morphllm.com>
Date: Wed, 24 Jun 2026 22:29:47 -0700
Subject: [PATCH 3/7] fix(provider): GLM-5.2 context window 450k -> 1M

Match Morph's canonical context (MODEL_CONTEXT_WINDOWS / api/models/json):
GLM-5.2 serves a 1,048,576-token window, not 450k.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 apps/web/src/lib/ai-gateway/providers/morph.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/web/src/lib/ai-gateway/providers/morph.ts b/apps/web/src/lib/ai-gateway/providers/morph.ts
index 7b5f792a5b..7112a23243 100644
--- a/apps/web/src/lib/ai-gateway/providers/morph.ts
+++ b/apps/web/src/lib/ai-gateway/providers/morph.ts
@@ -103,8 +103,8 @@ export const morph_glm52_744b_model: KiloExclusiveModel = {
   public_id: 'morph/glm-5.2',
   display_name: 'Morph: GLM-5.2',
   description: 'GLM-5.2 744B, served on Morph infrastructure.',
-  context_length: 450_000,
-  max_completion_tokens: 450_000,
+  context_length: 1_048_576,
+  max_completion_tokens: 1_048_576,
   status: 'public',
   flags: ['reasoning'],
   gateway: 'morph',

From aada3de1acef6cd450bea94cc7c940ce2fdeb846 Mon Sep 17 00:00:00 2001
From: Shrey Birmiwal <shrey@morphllm.com>
Date: Wed, 24 Jun 2026 23:18:38 -0700
Subject: [PATCH 4/7] feat(provider): flag Morph vision models + verify pricing
 vs canonical source

Reconcile all six Morph gateway models against the canonical source of truth
(https://www.morphllm.com/api/models/json). Per-1M prompt/completion/cache-read
rates and context windows already match exactly; no pricing change needed.

The canonical JSON marks qwen3.5-397b and minimax-m3 as image-capable
(input_modalities: ["text","image"]), so add the 'vision' flag to those two and
assert vision support (and reasoning) for every model in the test matrix.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../lib/ai-gateway/providers/morph.test.ts    | 30 ++++++++++++++-----
 .../web/src/lib/ai-gateway/providers/morph.ts |  8 +++--
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/apps/web/src/lib/ai-gateway/providers/morph.test.ts b/apps/web/src/lib/ai-gateway/providers/morph.test.ts
index 7a1dfd80f1..52748bbcd9 100644
--- a/apps/web/src/lib/ai-gateway/providers/morph.test.ts
+++ b/apps/web/src/lib/ai-gateway/providers/morph.test.ts
@@ -51,13 +51,16 @@ describe('Morph gateway provider', () => {
   // (https://www.morphllm.com/api/models/json). Cache-read is only billed for
   // qwen3.5 (0.3) and glm-5.2 (0.35, LMCache prefix reuse); the JSON omits the
   // glm rate, but Morph's calculateChatGlm52Cost bills it, so it is set here.
-  const EXPECTED: Record<string, { in: number; out: number; cache: number | null }> = {
-    'morph/qwen3.5-397b': { in: 0.5, out: 3.5, cache: 0.3 },
-    'morph/qwen3.6-27b': { in: 0.289, out: 2.4, cache: null },
-    'morph/minimax-m2.7': { in: 0.279, out: 1.2, cache: null },
-    'morph/minimax-m3': { in: 0.6, out: 2.4, cache: null },
-    'morph/glm-5.2': { in: 1.1, out: 4.1, cache: 0.35 },
-    'morph/deepseek-v4-flash': { in: 0.139, out: 0.278, cache: null },
+  const EXPECTED: Record<
+    string,
+    { in: number; out: number; cache: number | null; vision: boolean }
+  > = {
+    'morph/qwen3.5-397b': { in: 0.5, out: 3.5, cache: 0.3, vision: true },
+    'morph/qwen3.6-27b': { in: 0.289, out: 2.4, cache: null, vision: false },
+    'morph/minimax-m2.7': { in: 0.279, out: 1.2, cache: null, vision: false },
+    'morph/minimax-m3': { in: 0.6, out: 2.4, cache: null, vision: true },
+    'morph/glm-5.2': { in: 1.1, out: 4.1, cache: 0.35, vision: false },
+    'morph/deepseek-v4-flash': { in: 0.139, out: 0.278, cache: null, vision: false },
   };
 
   it.each(morphChatModels)('prices $public_id to match Morph canonical pricing', model => {
@@ -69,4 +72,17 @@ describe('Morph gateway provider', () => {
     expect(p.completion_per_million).toBe(want.out);
     expect(p.input_cache_read_per_million ?? null).toBe(want.cache);
   });
+
+  // Only qwen3.5-397b and minimax-m3 expose image input on Morph's gateway
+  // (canonical JSON input_modalities includes "image").
+  it.each(morphChatModels)(
+    'flags $public_id vision support to match canonical modalities',
+    model => {
+      const want = EXPECTED[model.public_id];
+      expect(want).toBeDefined();
+      expect(model.flags.includes('vision')).toBe(want.vision);
+      // Every Morph chat model is a reasoning model.
+      expect(model.flags.includes('reasoning')).toBe(true);
+    }
+  );
 });
diff --git a/apps/web/src/lib/ai-gateway/providers/morph.ts b/apps/web/src/lib/ai-gateway/providers/morph.ts
index 7112a23243..7d72d64433 100644
--- a/apps/web/src/lib/ai-gateway/providers/morph.ts
+++ b/apps/web/src/lib/ai-gateway/providers/morph.ts
@@ -26,7 +26,9 @@ export const morph_qwen35_397b_model: KiloExclusiveModel = {
   context_length: 262_144,
   max_completion_tokens: 131_072,
   status: 'public',
-  flags: ['reasoning'],
+  // Qwen3.5 397B accepts image input on Morph's gateway (canonical JSON
+  // input_modalities: ["text","image"]).
+  flags: ['reasoning', 'vision'],
   gateway: 'morph',
   internal_id: 'morph-qwen35-397b',
   pricing: flat({
@@ -86,7 +88,9 @@ export const morph_minimax_m3_model: KiloExclusiveModel = {
   context_length: 256_000,
   max_completion_tokens: 256_000,
   status: 'public',
-  flags: ['reasoning'],
+  // MiniMax M3 accepts image input on Morph's gateway (canonical JSON
+  // input_modalities: ["text","image"]).
+  flags: ['reasoning', 'vision'],
   gateway: 'morph',
   internal_id: 'morph-minimax3-428b',
   pricing: flat({

From 40795bdf499360aa1cc90c84e9b2b1861a325545 Mon Sep 17 00:00:00 2001
From: Shrey Birmiwal <shrey@morphllm.com>
Date: Wed, 24 Jun 2026 23:32:20 -0700
Subject: [PATCH 5/7] fix(provider): keep Morph models on chat_completions for
 OpenCode clients

getAiSdkProvider maps any '*minimax*' id to the Anthropic Messages API (and
gpt/grok ids to the OpenAI Responses API). Morph's gateway only exposes
chat_completions (MORPH.supportedChatApis), so OpenCode requests for
morph/minimax-m2.7 and morph/minimax-m3 were rejected by
apiKindNotSupportedResponse before reaching Morph.

Pin every Morph model to the 'openai-compatible' AI SDK provider at the top of
getAiSdkProvider, encoding the gateway's real invariant and guarding the whole
bug class (not just minimax). Add regression coverage asserting all six Morph
models resolve to a chat_completions-compatible OpenCode provider.

Reported-by: kilo-code-bot on PR #4245.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../ai-gateway/providers/model-settings.ts    | 10 +++++
 .../lib/ai-gateway/providers/morph.test.ts    | 39 +++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/apps/web/src/lib/ai-gateway/providers/model-settings.ts b/apps/web/src/lib/ai-gateway/providers/model-settings.ts
index 52ff151945..409823da81 100644
--- a/apps/web/src/lib/ai-gateway/providers/model-settings.ts
+++ b/apps/web/src/lib/ai-gateway/providers/model-settings.ts
@@ -19,6 +19,7 @@ import { isStepModel } from '@/lib/ai-gateway/providers/stepfun';
 import { ReasoningEffortSchema } from '@kilocode/db/schema-types';
 import { isDeepseekModel } from '@/lib/ai-gateway/providers/deepseek';
 import { isMinimaxModel } from '@/lib/ai-gateway/providers/minimax';
+import { isMorphModel } from '@/lib/ai-gateway/providers/morph';
 import type { DirectUserByokInferenceProviderId } from '@/lib/ai-gateway/providers/openrouter/inference-provider-id';
 
 const REASONING_VARIANTS_THINKING_ONLY = {
@@ -136,6 +137,15 @@ export function getAiSdkProvider(
   model: string,
   directProviderId: DirectUserByokInferenceProviderId | null
 ): Exclude<CustomLlmProvider, 'openrouter' /*the default*/> | undefined {
+  if (isMorphModel(model)) {
+    // Morph's gateway only exposes OpenAI Chat Completions
+    // (MORPH.supportedChatApis === ['chat_completions']). Pin every Morph model
+    // to the OpenAI-compatible AI SDK provider so name-based heuristics below
+    // (e.g. minimax -> 'anthropic'/Messages, gpt/grok -> 'openai'/Responses)
+    // never select an API kind the gateway would reject with
+    // apiKindNotSupportedResponse.
+    return 'openai-compatible';
+  }
   if (seed_20_code_free_model.public_id === model) {
     // with 'openai' (Responses API) prompt caching doesn't work
     return 'openai-compatible';
diff --git a/apps/web/src/lib/ai-gateway/providers/morph.test.ts b/apps/web/src/lib/ai-gateway/providers/morph.test.ts
index 52748bbcd9..b29ed8070b 100644
--- a/apps/web/src/lib/ai-gateway/providers/morph.test.ts
+++ b/apps/web/src/lib/ai-gateway/providers/morph.test.ts
@@ -2,6 +2,19 @@ import { describe, it, expect } from '@jest/globals';
 import { findKiloExclusiveModel } from '@/lib/ai-gateway/models';
 import PROVIDERS from '@/lib/ai-gateway/providers/provider-definitions';
 import { morphChatModels, isMorphModel } from '@/lib/ai-gateway/providers/morph';
+import {
+  getAiSdkProvider,
+  getGatewayOpenCodeSettings,
+} from '@/lib/ai-gateway/providers/model-settings';
+
+// Mirrors get-provider.ts: the OpenCode AI SDK provider selects the request kind
+// a client sends. Morph only supports chat_completions, so any other kind is
+// rejected by apiKindNotSupportedResponse before reaching the gateway.
+function requestKindFor(aiSdkProvider: string | undefined) {
+  if (aiSdkProvider === 'anthropic') return 'messages';
+  if (aiSdkProvider === 'openai') return 'responses';
+  return 'chat_completions';
+}
 
 // Resolves a kilo-exclusive model to its provider using the exact same lookup
 // get-provider.ts performs for non-Vercel, non-BYOK gateway models.
@@ -85,4 +98,30 @@ describe('Morph gateway provider', () => {
       expect(model.flags.includes('reasoning')).toBe(true);
     }
   );
+
+  // Regression: getAiSdkProvider's name-based heuristics map any '*minimax*' id
+  // to the Anthropic Messages API (and gpt/grok ids to the OpenAI Responses
+  // API). The Morph gateway only speaks chat_completions, so every Morph model
+  // must resolve to an OpenAI-compatible provider whose request kind the gateway
+  // actually supports — otherwise OpenCode clients hit apiKindNotSupportedResponse.
+  it.each(morphChatModels)(
+    'maps $public_id to a chat_completions-compatible OpenCode provider',
+    model => {
+      const aiSdkProvider = getAiSdkProvider(model.public_id, null);
+      expect(aiSdkProvider).toBe('openai-compatible');
+      expect(getGatewayOpenCodeSettings(model.public_id)?.ai_sdk_provider).toBe(
+        'openai-compatible'
+      );
+
+      const kind = requestKindFor(aiSdkProvider);
+      expect(kind).toBe('chat_completions');
+      expect(PROVIDERS.MORPH.supportedChatApis).toContain(kind);
+    }
+  );
+
+  it('does not route the Morph MiniMax models through the Anthropic Messages API', () => {
+    for (const id of ['morph/minimax-m2.7', 'morph/minimax-m3']) {
+      expect(getAiSdkProvider(id, null)).not.toBe('anthropic');
+    }
+  });
 });

From 6f9ce6c6e0f3e67975e0f3977678d0b70317c073 Mon Sep 17 00:00:00 2001
From: Shrey Birmiwal <shrey@morphllm.com>
Date: Wed, 24 Jun 2026 23:48:34 -0700
Subject: [PATCH 6/7] test(provider): assert Morph pricing bills correctly via
 calculateCost_mUsd

Add cost-calculation coverage proving the stored per-million rates produce the
right micro-USD bills, not just that the numbers are present:
- qwen3.5/glm-5.2 cache reads bill at the discounted cache_read rate (< prompt)
- models without a cache_read price (e.g. deepseek-v4-flash) fall back to the
  prompt rate for cache hits (not free)
- a mixed uncached + cache-hit + output bill matches the expected total

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../lib/ai-gateway/providers/morph.test.ts    | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/apps/web/src/lib/ai-gateway/providers/morph.test.ts b/apps/web/src/lib/ai-gateway/providers/morph.test.ts
index b29ed8070b..d193d5ad21 100644
--- a/apps/web/src/lib/ai-gateway/providers/morph.test.ts
+++ b/apps/web/src/lib/ai-gateway/providers/morph.test.ts
@@ -6,6 +6,7 @@ import {
   getAiSdkProvider,
   getGatewayOpenCodeSettings,
 } from '@/lib/ai-gateway/providers/model-settings';
+import { calculateCost_mUsd } from '@/lib/ai-gateway/providers/kilo-exclusive-model';
 
 // Mirrors get-provider.ts: the OpenCode AI SDK provider selects the request kind
 // a client sends. Morph only supports chat_completions, so any other kind is
@@ -124,4 +125,54 @@ describe('Morph gateway provider', () => {
       expect(getAiSdkProvider(id, null)).not.toBe('anthropic');
     }
   });
+
+  // calculateCost_mUsd returns micro-USD: tokens * (USD per 1M tokens). These
+  // assertions prove the stored pricing actually bills correctly end to end, not
+  // just that the per-million numbers are present.
+  const ONE_M = 1_000_000;
+  const flatPricing = (id: string) => findKiloExclusiveModel(id)!.pricing![0].pricing;
+
+  it('bills cache reads at the discounted rate for qwen3.5 and glm-5.2', () => {
+    // 1M cache-hit tokens should cost the cache_read rate, strictly less than prompt.
+    const qwen = flatPricing('morph/qwen3.5-397b'); // prompt 0.5, cache_read 0.3
+    const qwenCacheCost = calculateCost_mUsd(
+      { uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 },
+      [{ start_context_length: 0, pricing: qwen }]
+    );
+    expect(qwenCacheCost).toBe(300_000); // 1M * 0.3
+    expect(qwenCacheCost).toBeLessThan(ONE_M * qwen.prompt_per_million);
+
+    const glm = flatPricing('morph/glm-5.2'); // prompt 1.1, cache_read 0.35
+    const glmCacheCost = calculateCost_mUsd(
+      { uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 },
+      [{ start_context_length: 0, pricing: glm }]
+    );
+    expect(glmCacheCost).toBe(350_000); // 1M * 0.35
+  });
+
+  it('falls back to the prompt rate for cache reads on models without a cache_read price', () => {
+    // dsv4flash declares no cache_read rate; cache hits must bill at the prompt rate (not free).
+    const ds = flatPricing('morph/deepseek-v4-flash'); // prompt 0.139, cache_read null
+    expect(ds.input_cache_read_per_million).toBeNull();
+    const cost = calculateCost_mUsd(
+      { uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 },
+      [{ start_context_length: 0, pricing: ds }]
+    );
+    expect(cost).toBe(ONE_M * ds.prompt_per_million); // 139_000
+  });
+
+  it('computes a mixed-usage bill from the stored qwen3.5 pricing', () => {
+    const qwen = flatPricing('morph/qwen3.5-397b'); // prompt 0.5, completion 3.5, cache_read 0.3
+    const cost = calculateCost_mUsd(
+      {
+        uncachedInputTokens: ONE_M,
+        cacheWriteTokens: 0,
+        cacheHitTokens: ONE_M,
+        totalOutputTokens: ONE_M,
+      },
+      [{ start_context_length: 0, pricing: qwen }]
+    );
+    // 1M*0.5 (uncached) + 1M*0.3 (cache hit) + 1M*3.5 (output) = 4,300,000 µUSD
+    expect(cost).toBe(4_300_000);
+  });
 });

From 4ee98f1619bf3634656af432216f3ef046b9c4df Mon Sep 17 00:00:00 2001
From: Shrey Birmiwal <shrey@morphllm.com>
Date: Thu, 25 Jun 2026 11:18:07 -0700
Subject: [PATCH 7/7] fix(provider): GLM-5.2 has no cache rate (match canonical
 /api/models/json)

The canonical models JSON advertises a cache rate only for qwen3.5 (0.3);
GLM-5.2 has none. Set input_cache_read_per_million to null and update the
test so GLM-5.2 bills cache hits at the prompt rate, like the other
non-cache models.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .../lib/ai-gateway/providers/morph.test.ts    | 33 ++++++++-----------
 .../web/src/lib/ai-gateway/providers/morph.ts |  6 ++--
 2 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/apps/web/src/lib/ai-gateway/providers/morph.test.ts b/apps/web/src/lib/ai-gateway/providers/morph.test.ts
index d193d5ad21..7ddf5d0811 100644
--- a/apps/web/src/lib/ai-gateway/providers/morph.test.ts
+++ b/apps/web/src/lib/ai-gateway/providers/morph.test.ts
@@ -63,8 +63,7 @@ describe('Morph gateway provider', () => {
 
   // Per-1M-token rates mirror Morph's canonical pricing
   // (https://www.morphllm.com/api/models/json). Cache-read is only billed for
-  // qwen3.5 (0.3) and glm-5.2 (0.35, LMCache prefix reuse); the JSON omits the
-  // glm rate, but Morph's calculateChatGlm52Cost bills it, so it is set here.
+  // qwen3.5 (0.3); every other Morph model (incl. GLM-5.2) has no cache rate.
   const EXPECTED: Record<
     string,
     { in: number; out: number; cache: number | null; vision: boolean }
@@ -73,7 +72,7 @@ describe('Morph gateway provider', () => {
     'morph/qwen3.6-27b': { in: 0.289, out: 2.4, cache: null, vision: false },
     'morph/minimax-m2.7': { in: 0.279, out: 1.2, cache: null, vision: false },
     'morph/minimax-m3': { in: 0.6, out: 2.4, cache: null, vision: true },
-    'morph/glm-5.2': { in: 1.1, out: 4.1, cache: 0.35, vision: false },
+    'morph/glm-5.2': { in: 1.1, out: 4.1, cache: null, vision: false },
     'morph/deepseek-v4-flash': { in: 0.139, out: 0.278, cache: null, vision: false },
   };
 
@@ -132,7 +131,7 @@ describe('Morph gateway provider', () => {
   const ONE_M = 1_000_000;
   const flatPricing = (id: string) => findKiloExclusiveModel(id)!.pricing![0].pricing;
 
-  it('bills cache reads at the discounted rate for qwen3.5 and glm-5.2', () => {
+  it('bills cache reads at the discounted rate for qwen3.5 (the only cache model)', () => {
     // 1M cache-hit tokens should cost the cache_read rate, strictly less than prompt.
     const qwen = flatPricing('morph/qwen3.5-397b'); // prompt 0.5, cache_read 0.3
     const qwenCacheCost = calculateCost_mUsd(
@@ -141,24 +140,20 @@ describe('Morph gateway provider', () => {
     );
     expect(qwenCacheCost).toBe(300_000); // 1M * 0.3
     expect(qwenCacheCost).toBeLessThan(ONE_M * qwen.prompt_per_million);
-
-    const glm = flatPricing('morph/glm-5.2'); // prompt 1.1, cache_read 0.35
-    const glmCacheCost = calculateCost_mUsd(
-      { uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 },
-      [{ start_context_length: 0, pricing: glm }]
-    );
-    expect(glmCacheCost).toBe(350_000); // 1M * 0.35
   });
 
   it('falls back to the prompt rate for cache reads on models without a cache_read price', () => {
-    // dsv4flash declares no cache_read rate; cache hits must bill at the prompt rate (not free).
-    const ds = flatPricing('morph/deepseek-v4-flash'); // prompt 0.139, cache_read null
-    expect(ds.input_cache_read_per_million).toBeNull();
-    const cost = calculateCost_mUsd(
-      { uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 },
-      [{ start_context_length: 0, pricing: ds }]
-    );
-    expect(cost).toBe(ONE_M * ds.prompt_per_million); // 139_000
+    // GLM-5.2 and dsv4flash declare no cache_read rate per canonical JSON; cache
+    // hits must bill at the prompt rate (not free, not a discount).
+    for (const id of ['morph/glm-5.2', 'morph/deepseek-v4-flash']) {
+      const p = flatPricing(id);
+      expect(p.input_cache_read_per_million).toBeNull();
+      const cost = calculateCost_mUsd(
+        { uncachedInputTokens: 0, cacheWriteTokens: 0, cacheHitTokens: ONE_M, totalOutputTokens: 0 },
+        [{ start_context_length: 0, pricing: p }]
+      );
+      expect(cost).toBe(ONE_M * p.prompt_per_million);
+    }
   });
 
   it('computes a mixed-usage bill from the stored qwen3.5 pricing', () => {
diff --git a/apps/web/src/lib/ai-gateway/providers/morph.ts b/apps/web/src/lib/ai-gateway/providers/morph.ts
index 7d72d64433..bddae72b09 100644
--- a/apps/web/src/lib/ai-gateway/providers/morph.ts
+++ b/apps/web/src/lib/ai-gateway/providers/morph.ts
@@ -116,10 +116,8 @@ export const morph_glm52_744b_model: KiloExclusiveModel = {
   pricing: flat({
     prompt_per_million: 1.1,
     completion_per_million: 4.1,
-    // GLM-5.2 runs LMCache prefix reuse, so cached input bills at a cheaper
-    // read rate (Morph's calculateChatGlm52Cost). The other Morph chat models
-    // do not bill cache reads, hence null on those.
-    input_cache_read_per_million: 0.35,
+    // No cache rate per the canonical /api/models/json — only qwen3.5 caches.
+    input_cache_read_per_million: null,
     input_cache_write_per_million: null,
   }),
   exclusive_to: [],