Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"engines": {
"node": ">=20.0.0"
},
"version": "2.19.2",
"version": "2.19.3",
"dependencies": {
"@mariozechner/jiti": "^2.6.5",
"@dreb/coding-agent": "*",
Expand Down
2 changes: 1 addition & 1 deletion packages/agent/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@dreb/agent-core",
"version": "2.19.2",
"version": "2.19.3",
"description": "General-purpose agent with transport abstraction, state management, and attachment support",
"type": "module",
"main": "./dist/index.js",
Expand Down
4 changes: 2 additions & 2 deletions packages/agent/test/e2e.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,8 @@ describe("Agent E2E Tests", () => {
});
});

describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-3)", () => {
const model = getModel("xai", "grok-3");
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-4.3)", () => {
const model = getModel("xai", "grok-4.3");

it("should handle basic text prompt", async () => {
await basicPrompt(model);
Expand Down
2 changes: 1 addition & 1 deletion packages/ai/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@dreb/ai",
"version": "2.19.2",
"version": "2.19.3",
"description": "Unified LLM API with automatic model discovery and provider configuration",
"type": "module",
"main": "./dist/index.js",
Expand Down
4 changes: 2 additions & 2 deletions packages/ai/test/context-overflow.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,8 @@ describe("Context overflow error handling", () => {
// =============================================================================

describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
it("grok-3-fast - should detect overflow via isContextOverflow", async () => {
const model = getModel("xai", "grok-3-fast");
it("grok-4.3 - should detect overflow via isContextOverflow", async () => {
const model = getModel("xai", "grok-4.3");
const result = await testContextOverflow(model, process.env.XAI_API_KEY!);
logResult(result);

Expand Down
2 changes: 1 addition & 1 deletion packages/ai/test/empty.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ describe("AI Providers Empty Message Tests", () => {
});

describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Empty Messages", () => {
const llm = getModel("xai", "grok-3");
const llm = getModel("xai", "grok-4.3");

it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
Expand Down
62 changes: 56 additions & 6 deletions packages/ai/test/openai-responses-copilot-provider.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { getModel } from "../src/models.js";
import { streamOpenAIResponses } from "../src/providers/openai-responses.js";
import { streamSimple } from "../src/stream.js";
import type { Model } from "../src/types.js";

function mockDoneStream() {
vi.spyOn(globalThis, "fetch").mockResolvedValue(
new Response("data: [DONE]\n\n", {
status: 200,
headers: { "content-type": "text/event-stream" },
}),
);
}

describe("openai-responses github-copilot defaults", () => {
afterEach(() => {
Expand All @@ -11,12 +22,7 @@ describe("openai-responses github-copilot defaults", () => {
const model = getModel("github-copilot", "gpt-5-mini");
let capturedPayload: unknown;

vi.spyOn(globalThis, "fetch").mockResolvedValue(
new Response("data: [DONE]\n\n", {
status: 200,
headers: { "content-type": "text/event-stream" },
}),
);
mockDoneStream();

const stream = streamOpenAIResponses(
model,
Expand All @@ -41,4 +47,48 @@ describe("openai-responses github-copilot defaults", () => {
reasoning: expect.anything(),
});
});

it("streamSimple applies reasoning defaults without synthetic one-token cap", async () => {
const model: Model<"openai-responses"> = {
id: "gpt-5.5",
name: "gpt-5.5",
api: "openai-responses",
provider: "openai",
baseUrl: "https://api.openai.com/v1",
reasoning: true,
input: ["text"],
cost: { input: 1, output: 1, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens: 100000,
};
let capturedPayload: { max_output_tokens?: unknown } | undefined;

mockDoneStream();

const stream = streamSimple(
model,
{
systemPrompt: "sys",
messages: [{ role: "user", content: "hi", timestamp: Date.now() }],
},
{
apiKey: "test-key",
reasoning: "xhigh",
onPayload: (payload) => {
capturedPayload = payload as { max_output_tokens?: unknown };
},
},
);

for await (const event of stream) {
if (event.type === "done" || event.type === "error") break;
}

expect(capturedPayload).toMatchObject({
reasoning: { effort: "xhigh", summary: "auto" },
max_output_tokens: 32000,
include: ["reasoning.encrypted_content"],
});
expect(capturedPayload?.max_output_tokens).not.toBe(1);
});
});
2 changes: 1 addition & 1 deletion packages/ai/test/tokens.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ describe("Token Statistics on Abort", () => {
});

describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => {
const llm = getModel("xai", "grok-3-fast");
const llm = getModel("xai", "grok-4.3");

it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
Expand Down
2 changes: 1 addition & 1 deletion packages/ai/test/tool-call-without-result.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ describe("Tool Call Without Result Tests", () => {
});

describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => {
const model = getModel("xai", "grok-3-fast");
const model = getModel("xai", "grok-4.3");

it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
Expand Down
22 changes: 9 additions & 13 deletions packages/ai/test/total-tokens.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -219,22 +219,18 @@ describe("totalTokens field", () => {
// =========================================================================

describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
it(
"grok-3-fast - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("xai", "grok-3-fast");
it("grok-4.3 - should return totalTokens equal to sum of components", { retry: 3, timeout: 60000 }, async () => {
const llm = getModel("xai", "grok-4.3");

console.log(`\nxAI / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
console.log(`\nxAI / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });

logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);

assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
});
});

// =========================================================================
Expand Down
2 changes: 1 addition & 1 deletion packages/ai/test/unicode-surrogate.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
});

describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Unicode Handling", () => {
const llm = getModel("xai", "grok-3");
const llm = getModel("xai", "grok-4.3");

it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
Expand Down
2 changes: 1 addition & 1 deletion packages/coding-agent/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ The `subagent` tool delegates tasks to independent child agent processes. Each s

**Agent definitions** live in `~/.dreb/agents/` (global) and `.dreb/agents/` (project). Each is a markdown file with YAML frontmatter specifying `name`, `model` (with provider fallback list), and optional `systemPrompt`. Built-in agents include `Explore` (read-only codebase exploration), `Sandbox` (restricted to `/tmp`), `feature-dev` (strong-tier coding), and several review agents.

**Model availability probes:** When an agent definition specifies a fallback list (comma-separated models), each model is verified with a lightweight 1-token API call before the subagent is spawned. Models that fail the probe (rate limit, quota exhaustion, auth failure, timeout) are skipped with a loud log line, and the next fallback is tried. If all configured models fail, the parent session's model is used as a last resort. Per-invocation model overrides and single-model configs skip probing entirely.
**Model availability probes:** When an agent definition specifies a fallback list (comma-separated models), each model is verified with a lightweight API call via the same `streamSimple` path the agent loop uses before the subagent is spawned. The probe uses normal coding-agent thinking defaults and does not pass a synthetic `maxTokens` override, which keeps the request shape representative for reasoning models as well as non-reasoning models. Models that fail the probe (rate limit, quota exhaustion, auth failure, timeout) are skipped with a loud log line, and the next fallback is tried. If all configured models fail, the parent session's model is used as a last resort. Per-invocation model overrides and single-model configs skip probing entirely.

**Session metadata:** Each child process records its agent type in the session JSONL header (`agentType` field), providing an audit trail of which agent definition executed the work.

Expand Down
2 changes: 1 addition & 1 deletion packages/coding-agent/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@dreb/coding-agent",
"version": "2.19.2",
"version": "2.19.3",
"description": "Coding agent CLI with read, bash, edit, write tools and session management",
"type": "module",
"drebConfig": {
Expand Down
5 changes: 2 additions & 3 deletions packages/coding-agent/src/core/sdk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import type { ResourceLoader } from "./resource-loader.js";
import { DefaultResourceLoader } from "./resource-loader.js";
import { getDefaultSessionDir, SessionManager } from "./session-manager.js";
import { SettingsManager } from "./settings-manager.js";
import { resolveEffectiveThinkingLevel } from "./thinking.js";
import { time } from "./timings.js";
import {
allTools,
Expand Down Expand Up @@ -252,9 +253,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
}

// Clamp to model capabilities
if (!model || !model.reasoning) {
thinkingLevel = "off";
}
thinkingLevel = resolveEffectiveThinkingLevel(model, thinkingLevel);

// Tools that are always active when available (created by factory, not in allTools singleton).
// suggest_next is only auto-activated when tools aren't explicitly specified — subagent
Expand Down
21 changes: 21 additions & 0 deletions packages/coding-agent/src/core/thinking.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import type { ThinkingLevel as AgentThinkingLevel } from "@dreb/agent-core";
import type { ThinkingLevel as AiThinkingLevel, Model } from "@dreb/ai";
import { DEFAULT_THINKING_LEVEL } from "./defaults.js";

/**
* Resolve the effective thinking level for a model using the same capability
* clamp as normal coding-agent sessions.
*/
export function resolveEffectiveThinkingLevel(
model: Model<any> | undefined,
thinkingLevel: AgentThinkingLevel | undefined,
defaultThinkingLevel: AgentThinkingLevel = DEFAULT_THINKING_LEVEL,
): AgentThinkingLevel {
const effectiveThinkingLevel = thinkingLevel ?? defaultThinkingLevel;
return model?.reasoning ? effectiveThinkingLevel : "off";
}

/** Convert an effective thinking level into the reasoning option passed to streamSimple. */
export function thinkingLevelToReasoning(thinkingLevel: AgentThinkingLevel): AiThinkingLevel | undefined {
return thinkingLevel === "off" ? undefined : (thinkingLevel as AiThinkingLevel);
}
Loading
Loading