aebrer · m-aebrer · May 19, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -34,7 +34,7 @@
 	"engines": {
 		"node": ">=20.0.0"
 	},
-	"version": "2.19.2",
+	"version": "2.19.3",
 	"dependencies": {
 		"@mariozechner/jiti": "^2.6.5",
 		"@dreb/coding-agent": "*",

diff --git a/packages/agent/package.json b/packages/agent/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@dreb/agent-core",
-	"version": "2.19.2",
+	"version": "2.19.3",
 	"description": "General-purpose agent with transport abstraction, state management, and attachment support",
 	"type": "module",
 	"main": "./dist/index.js",

diff --git a/packages/agent/test/e2e.test.ts b/packages/agent/test/e2e.test.ts
@@ -233,8 +233,8 @@ describe("Agent E2E Tests", () => {
 		});
 	});
 
-	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-3)", () => {
-		const model = getModel("xai", "grok-3");
+	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-4.3)", () => {
+		const model = getModel("xai", "grok-4.3");
 
 		it("should handle basic text prompt", async () => {
 			await basicPrompt(model);

diff --git a/packages/ai/package.json b/packages/ai/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@dreb/ai",
-	"version": "2.19.2",
+	"version": "2.19.3",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"type": "module",
 	"main": "./dist/index.js",

diff --git a/packages/ai/test/context-overflow.test.ts b/packages/ai/test/context-overflow.test.ts
@@ -314,8 +314,8 @@ describe("Context overflow error handling", () => {
 	// =============================================================================
 
 	describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
-		it("grok-3-fast - should detect overflow via isContextOverflow", async () => {
-			const model = getModel("xai", "grok-3-fast");
+		it("grok-4.3 - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("xai", "grok-4.3");
 			const result = await testContextOverflow(model, process.env.XAI_API_KEY!);
 			logResult(result);
 

diff --git a/packages/ai/test/empty.test.ts b/packages/ai/test/empty.test.ts
@@ -249,7 +249,7 @@ describe("AI Providers Empty Message Tests", () => {
 	});
 
 	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Empty Messages", () => {
-		const llm = getModel("xai", "grok-3");
+		const llm = getModel("xai", "grok-4.3");
 
 		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);

diff --git a/packages/ai/test/openai-responses-copilot-provider.test.ts b/packages/ai/test/openai-responses-copilot-provider.test.ts
@@ -1,6 +1,17 @@
 import { afterEach, describe, expect, it, vi } from "vitest";
 import { getModel } from "../src/models.js";
 import { streamOpenAIResponses } from "../src/providers/openai-responses.js";
+import { streamSimple } from "../src/stream.js";
+import type { Model } from "../src/types.js";
+
+function mockDoneStream() {
+	vi.spyOn(globalThis, "fetch").mockResolvedValue(
+		new Response("data: [DONE]\n\n", {
+			status: 200,
+			headers: { "content-type": "text/event-stream" },
+		}),
+	);
+}
 
 describe("openai-responses github-copilot defaults", () => {
 	afterEach(() => {
@@ -11,12 +22,7 @@ describe("openai-responses github-copilot defaults", () => {
 		const model = getModel("github-copilot", "gpt-5-mini");
 		let capturedPayload: unknown;
 
-		vi.spyOn(globalThis, "fetch").mockResolvedValue(
-			new Response("data: [DONE]\n\n", {
-				status: 200,
-				headers: { "content-type": "text/event-stream" },
-			}),
-		);
+		mockDoneStream();
 
 		const stream = streamOpenAIResponses(
 			model,
@@ -41,4 +47,48 @@ describe("openai-responses github-copilot defaults", () => {
 			reasoning: expect.anything(),
 		});
 	});
+
+	it("streamSimple applies reasoning defaults without synthetic one-token cap", async () => {
+		const model: Model<"openai-responses"> = {
+			id: "gpt-5.5",
+			name: "gpt-5.5",
+			api: "openai-responses",
+			provider: "openai",
+			baseUrl: "https://api.openai.com/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: { input: 1, output: 1, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 200000,
+			maxTokens: 100000,
+		};
+		let capturedPayload: { max_output_tokens?: unknown } | undefined;
+
+		mockDoneStream();
+
+		const stream = streamSimple(
+			model,
+			{
+				systemPrompt: "sys",
+				messages: [{ role: "user", content: "hi", timestamp: Date.now() }],
+			},
+			{
+				apiKey: "test-key",
+				reasoning: "xhigh",
+				onPayload: (payload) => {
+					capturedPayload = payload as { max_output_tokens?: unknown };
+				},
+			},
+		);
+
+		for await (const event of stream) {
+			if (event.type === "done" || event.type === "error") break;
+		}
+
+		expect(capturedPayload).toMatchObject({
+			reasoning: { effort: "xhigh", summary: "auto" },
+			max_output_tokens: 32000,
+			include: ["reasoning.encrypted_content"],
+		});
+		expect(capturedPayload?.max_output_tokens).not.toBe(1);
+	});
 });
diff --git a/packages/ai/test/tokens.test.ts b/packages/ai/test/tokens.test.ts
@@ -136,7 +136,7 @@ describe("Token Statistics on Abort", () => {
 	});
 
 	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => {
-		const llm = getModel("xai", "grok-3-fast");
+		const llm = getModel("xai", "grok-4.3");
 
 		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
 			await testTokensOnAbort(llm);

diff --git a/packages/ai/test/tool-call-without-result.test.ts b/packages/ai/test/tool-call-without-result.test.ts
@@ -145,7 +145,7 @@ describe("Tool Call Without Result Tests", () => {
 	});
 
 	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => {
-		const model = getModel("xai", "grok-3-fast");
+		const model = getModel("xai", "grok-4.3");
 
 		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testToolCallWithoutResult(model);

diff --git a/packages/ai/test/total-tokens.test.ts b/packages/ai/test/total-tokens.test.ts
@@ -219,22 +219,18 @@ describe("totalTokens field", () => {
 	// =========================================================================
 
 	describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
-		it(
-			"grok-3-fast - should return totalTokens equal to sum of components",
-			{ retry: 3, timeout: 60000 },
-			async () => {
-				const llm = getModel("xai", "grok-3-fast");
+		it("grok-4.3 - should return totalTokens equal to sum of components", { retry: 3, timeout: 60000 }, async () => {
+			const llm = getModel("xai", "grok-4.3");
 
-				console.log(`\nxAI / ${llm.id}:`);
-				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
+			console.log(`\nxAI / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
 
-				logUsage("First request", first);
-				logUsage("Second request", second);
+			logUsage("First request", first);
+			logUsage("Second request", second);
 
-				assertTotalTokensEqualsComponents(first);
-				assertTotalTokensEqualsComponents(second);
-			},
-		);
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		});
 	});
 
 	// =========================================================================

diff --git a/packages/ai/test/unicode-surrogate.test.ts b/packages/ai/test/unicode-surrogate.test.ts
@@ -540,7 +540,7 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
 	});
 
 	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Unicode Handling", () => {
-		const llm = getModel("xai", "grok-3");
+		const llm = getModel("xai", "grok-4.3");
 
 		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);

diff --git a/packages/coding-agent/README.md b/packages/coding-agent/README.md
@@ -346,7 +346,7 @@ The `subagent` tool delegates tasks to independent child agent processes. Each s
 
 **Agent definitions** live in `~/.dreb/agents/` (global) and `.dreb/agents/` (project). Each is a markdown file with YAML frontmatter specifying `name`, `model` (with provider fallback list), and optional `systemPrompt`. Built-in agents include `Explore` (read-only codebase exploration), `Sandbox` (restricted to `/tmp`), `feature-dev` (strong-tier coding), and several review agents.
 
-**Model availability probes:** When an agent definition specifies a fallback list (comma-separated models), each model is verified with a lightweight 1-token API call before the subagent is spawned. Models that fail the probe (rate limit, quota exhaustion, auth failure, timeout) are skipped with a loud log line, and the next fallback is tried. If all configured models fail, the parent session's model is used as a last resort. Per-invocation model overrides and single-model configs skip probing entirely.
+**Model availability probes:** When an agent definition specifies a fallback list (comma-separated models), each model is verified with a lightweight API call via the same `streamSimple` path the agent loop uses before the subagent is spawned. The probe uses normal coding-agent thinking defaults and does not pass a synthetic `maxTokens` override, which keeps the request shape representative for reasoning models as well as non-reasoning models. Models that fail the probe (rate limit, quota exhaustion, auth failure, timeout) are skipped with a loud log line, and the next fallback is tried. If all configured models fail, the parent session's model is used as a last resort. Per-invocation model overrides and single-model configs skip probing entirely.
 
 **Session metadata:** Each child process records its agent type in the session JSONL header (`agentType` field), providing an audit trail of which agent definition executed the work.
 

diff --git a/packages/coding-agent/package.json b/packages/coding-agent/package.json
@@ -1,6 +1,6 @@
 {
 	"name": "@dreb/coding-agent",
-	"version": "2.19.2",
+	"version": "2.19.3",
 	"description": "Coding agent CLI with read, bash, edit, write tools and session management",
 	"type": "module",
 	"drebConfig": {

diff --git a/packages/coding-agent/src/core/sdk.ts b/packages/coding-agent/src/core/sdk.ts
@@ -14,6 +14,7 @@ import type { ResourceLoader } from "./resource-loader.js";
 import { DefaultResourceLoader } from "./resource-loader.js";
 import { getDefaultSessionDir, SessionManager } from "./session-manager.js";
 import { SettingsManager } from "./settings-manager.js";
+import { resolveEffectiveThinkingLevel } from "./thinking.js";
 import { time } from "./timings.js";
 import {
 	allTools,
@@ -252,9 +253,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 	}
 
 	// Clamp to model capabilities
-	if (!model || !model.reasoning) {
-		thinkingLevel = "off";
-	}
+	thinkingLevel = resolveEffectiveThinkingLevel(model, thinkingLevel);
 
 	// Tools that are always active when available (created by factory, not in allTools singleton).
 	// suggest_next is only auto-activated when tools aren't explicitly specified — subagent

diff --git a/packages/coding-agent/src/core/thinking.ts b/packages/coding-agent/src/core/thinking.ts
@@ -0,0 +1,21 @@
+import type { ThinkingLevel as AgentThinkingLevel } from "@dreb/agent-core";
+import type { ThinkingLevel as AiThinkingLevel, Model } from "@dreb/ai";
+import { DEFAULT_THINKING_LEVEL } from "./defaults.js";
+
+/**
+ * Resolve the effective thinking level for a model using the same capability
+ * clamp as normal coding-agent sessions.
+ */
+export function resolveEffectiveThinkingLevel(
+	model: Model<any> | undefined,
+	thinkingLevel: AgentThinkingLevel | undefined,
+	defaultThinkingLevel: AgentThinkingLevel = DEFAULT_THINKING_LEVEL,
+): AgentThinkingLevel {
+	const effectiveThinkingLevel = thinkingLevel ?? defaultThinkingLevel;
+	return model?.reasoning ? effectiveThinkingLevel : "off";
+}
+
+/** Convert an effective thinking level into the reasoning option passed to streamSimple. */
+export function thinkingLevelToReasoning(thinkingLevel: AgentThinkingLevel): AiThinkingLevel | undefined {
+	return thinkingLevel === "off" ? undefined : (thinkingLevel as AiThinkingLevel);
+}