MoonshotAI · bj456736 · Jun 30, 2026 · Jul 1, 2026 · Jul 1, 2026 · chatgpt-codex-connector
diff --git a/packages/agent-core/src/tools/builtin/state/todo-list.md b/packages/agent-core/src/tools/builtin/state/todo-list.md
@@ -19,9 +19,10 @@ Use this tool to maintain a structured TODO list as you work through a multi-ste
 - If no available tool can move any task forward, tell the user where you are stuck instead of repeatedly re-ordering the same todos.
 
 **How to use:**
-- Call with `todos: [...]` to replace the full list. Statuses: pending / in_progress / done.
+- Call with `todos: [...]` to replace the full list. Statuses: `pending` / `in_progress` / `done`.
 - Call with no `todos` argument to retrieve the current list without changing it.
 - Call with `todos: []` to clear the list.
+- **Important:** the status must be exactly `done`, not `completed` or `finished`.
 - Keep titles short and actionable (e.g. "Read session-control.ts", "Add planMode flag to TurnManager").
 - Update statuses as you make progress.
 - When work is underway, keep exactly one task `in_progress`.

diff --git a/packages/agent-core/src/tools/builtin/state/todo-list.ts b/packages/agent-core/src/tools/builtin/state/todo-list.ts
@@ -28,7 +28,7 @@ export const TODO_STORE_KEY = 'todo';
 const TODO_LIST_WRITE_REMINDER =
   'Ensure that you continue to use the todo list to track progress. Mark tasks done immediately after finishing them, and keep exactly one task in_progress when work is underway.';
 
-export type TodoStatus = 'pending' | 'in_progress' | 'done';
+export type TodoStatus = 'pending' | 'in_progress' | 'done' | 'completed';
 
 export interface TodoItem {
   readonly title: string;
@@ -45,7 +45,9 @@ declare module '../../store' {
 
 const TodoItemSchema = z.object({
   title: z.string().min(1).describe('Short, actionable title for the todo.'),
-  status: z.enum(['pending', 'in_progress', 'done']).describe('Current status of the todo.'),
+  status: z
+    .preprocess((val) => (val === 'completed' ? 'done' : val), z.enum(['pending', 'in_progress', 'done']))
+    .describe('Current status of the todo. Must be exactly one of: pending, in_progress, done. Do NOT use completed or finished.'),
 });
 
 export interface TodoListInput {
@@ -81,6 +83,7 @@ function statusMarker(status: TodoStatus): string {
     case 'in_progress':
       return '[in_progress]';
     case 'done':
+    case 'completed':
       return '[done]';
     default: {
       const _exhaustive: never = status;
@@ -133,7 +136,10 @@ export class TodoListTool implements BuiltinTool<TodoListInput> {
   private setTodos(todos: readonly TodoItem[]): void {
     this.store.set(
       TODO_STORE_KEY,
-      todos.map((todo) => ({ title: todo.title, status: todo.status })),
+      todos.map((todo) => ({
+        title: todo.title,
+        status: todo.status === 'completed' ? 'done' : todo.status,
+      })),
     );
   }
 }
diff --git a/packages/agent-core/src/tools/builtin/web/fetch-url.md b/packages/agent-core/src/tools/builtin/web/fetch-url.md
@@ -1,3 +1,3 @@
-Fetch content from a URL. For an HTML page the main article text is extracted; for a plain-text or markdown response the full body is returned verbatim. The result states which of the two you received, so you can judge how complete it is. Use this when you need to read a specific web page.
+Fetch content from a URL. For an HTML page the main article text is extracted; for a plain-text or markdown response the full body is returned verbatim; for an image the image is returned directly so the model can view it. The result states which of the three you received, so you can judge how complete it is. Use this when you need to read a specific web page or view an image from a URL.
 
-Only fully-formed public `http`/`https` URLs are supported; other schemes and private or loopback addresses are not fetched. Very large pages may be truncated or refused.
+Only fully-formed public `http`/`https` URLs are supported; other schemes and private or loopback addresses are not fetched. Very large pages or images may be truncated or refused.
diff --git a/packages/agent-core/src/tools/builtin/web/fetch-url.ts b/packages/agent-core/src/tools/builtin/web/fetch-url.ts
@@ -7,6 +7,7 @@
  */
 
 import { z } from 'zod';
+import type { ContentPart } from '@moonshot-ai/kosong';
 
 import type { BuiltinTool } from '../../../agent/tool';
 import { ToolAccesses } from '../../../loop/tool-access';
@@ -26,13 +27,22 @@ import DESCRIPTION from './fetch-url.md?raw';
  * - `extracted` — the body was an HTML page; only the main article text
  *   was extracted and returned.
  */
-export type UrlFetchKind = 'passthrough' | 'extracted';
+export type UrlFetchKind = 'passthrough' | 'extracted' | 'image';
+
+export interface UrlFetchImageData {
+  /** Base64-encoded image bytes. */
+  base64: string;
+  /** MIME type of the image (e.g. image/png). */
+  mimeType: string;
+}
 
 export interface UrlFetchResult {
-  /** The text handed to the LLM. */
+  /** The text handed to the LLM, or an empty string when imageData is present. */
   content: string;
-  /** Whether `content` is a verbatim passthrough or extracted main text. */
+  /** Whether `content` is a verbatim passthrough, extracted main text, or an image. */
   kind: UrlFetchKind;
+  /** When `kind` is 'image', the image data to be rendered as an image_url content part. */
+  imageData?: UrlFetchImageData;
 }
 
 export interface UrlFetcher {
@@ -89,7 +99,20 @@ export class FetchURLTool implements BuiltinTool<FetchURLInput> {
     }: ExecutableToolContext,
   ): Promise<ExecutableToolResult> {
     try {
-      const { content, kind } = await this.fetcher.fetch(args.url, { toolCallId });
+      const { content, kind, imageData } = await this.fetcher.fetch(args.url, { toolCallId });
+
+      // If the provider returned an image, render it as an image_url content part
+      // so the model can see it directly.
+      if (imageData) {
+        const output: ContentPart[] = [
+          { type: 'text', text: `Fetched image from ${args.url}` },
+          {
+            type: 'image_url',
+            imageUrl: { url: `data:${imageData.mimeType};base64,${imageData.base64}` },
+          },
+        ];
+        return { output, isError: false };
+      }
 
       if (!content) {
         return {

diff --git a/packages/agent-core/src/tools/providers/local-fetch-url.ts b/packages/agent-core/src/tools/providers/local-fetch-url.ts
@@ -161,6 +161,35 @@
       );
     }
 
+    const contentType = (response.headers.get('content-type') ?? '').toLowerCase();
+
+    // Image responses: stream as binary and return as base64-encoded image data
+    // so the tool can render them directly as image_url content parts.
+    if (contentType.startsWith('image/')) {
+      const contentLengthRaw = response.headers.get('content-length');
+      if (contentLengthRaw !== null) {
+        const cl = Number(contentLengthRaw);
+        if (Number.isFinite(cl) && cl > this.maxBytes) {
+          throw new Error(
+            `Response body too large: ${String(cl)} bytes exceeds maxBytes (${String(this.maxBytes)}).`,
+          );
+        }
+      }
+      const buffer = await response.arrayBuffer();
+      const bytes = Buffer.byteLength(buffer);
+      if (bytes > this.maxBytes) {
+        throw new Error(
+          `Response body too large: ${String(bytes)} bytes exceeds maxBytes (${String(this.maxBytes)}).`,
+        );
+      }
+      const base64 = Buffer.from(buffer).toString('base64');
+      return {
+        content: '',
+        kind: 'image',
+        imageData: { base64, mimeType: contentType.split(';')[0].trim() },
+      };
+    }
+
     // Reject oversized responses before buffering the full body.
     const contentLengthRaw = response.headers.get('content-length');
     if (contentLengthRaw !== null) {
@@ -182,7 +211,6 @@
       );
     }
 
-    const contentType = (response.headers.get('content-type') ?? '').toLowerCase();
     if (contentType.startsWith('text/plain') || contentType.startsWith('text/markdown')) {
       return { content: body, kind: 'passthrough' };
     }

diff --git a/packages/agent-core/src/tools/providers/moonshot-fetch-url.ts b/packages/agent-core/src/tools/providers/moonshot-fetch-url.ts
@@ -50,9 +50,7 @@
 
   async fetch(url: string, options?: { toolCallId?: string }): Promise<UrlFetchResult> {
     try {
-      const content = await this.fetchViaMoonshot(url, options?.toolCallId);
-      // The service returns text it has already extracted from the page.
-      return { content, kind: 'extracted' };
+      return await this.fetchViaMoonshot(url, options?.toolCallId);
     } catch {
       // Forward an explicit options object even when the caller passed
       // none, so downstream consumers always see a defined second arg.
@@ -63,7 +61,7 @@
   private async fetchViaMoonshot(
     url: string,
     toolCallId: string | undefined,
-  ): Promise<string> {
+  ): Promise<UrlFetchResult> {
     const bodyJson = JSON.stringify({ url });
 
     const response = await this.post(bodyJson, toolCallId);
@@ -82,7 +80,19 @@
       );
     }
 
-    return response.text();
+    const contentType = (response.headers.get('content-type') ?? '').toLowerCase();
+    if (contentType.startsWith('image/')) {
+      const buffer = await response.arrayBuffer();
+      const base64 = Buffer.from(buffer).toString('base64');
+      return {
+        content: '',
+        kind: 'image',
+        imageData: { base64, mimeType: contentType.split(';')[0].trim() },
+      };
+    }
+
+    // The service returns text it has already extracted from the page.
+    return { content: await response.text(), kind: 'extracted' };
   }
 
   private async post(bodyJson: string, toolCallId: string | undefined): Promise<Response> {

diff --git a/packages/agent-core/test/tools/fetch-url.test.ts b/packages/agent-core/test/tools/fetch-url.test.ts
@@ -249,8 +249,33 @@ describe('FetchURLTool', () => {
     expect(toolContentString(result)).toMatch(/due to network error/i);
   });
 
+  it('returns image data as image_url content parts when fetcher returns an image', async () => {
+    const fetcher: UrlFetcher = {
+      fetch: vi.fn().mockResolvedValue({
+        content: '',
+        kind: 'image' as const,
+        imageData: { base64: 'abc123', mimeType: 'image/png' },
+      }),
+    };
+    const tool = new FetchURLTool(fetcher);
+    const result = await executeTool(tool, {
+      turnId: 't1',
+      toolCallId: 'c-img',
+      args: { url: 'https://example.com/chart.png' },
+      signal,
+    });
+
+    expect(result.isError).toBe(false);
+    expect(Array.isArray(result.output)).toBe(true);
+    const parts = result.output as Array<{ type: string; text?: string; imageUrl?: { url: string } }>;
+    expect(parts).toHaveLength(2);
+    expect(parts[0].type).toBe('text');
+    expect(parts[0].text).toContain('Fetched image');
+    expect(parts[1].type).toBe('image_url');
+    expect(parts[1].imageUrl?.url).toBe('data:image/png;base64,abc123');
+  });
+
   it('passes through markdown content verbatim instead of running text extraction', async () => {
-    // py: when the server returns text/markdown, extraction is skipped and
     // the body is returned as-is with a different status message. The
     // fetcher signals the bypass via UrlFetchResult.kind = 'passthrough'.
     const markdown = '# Title\n\nThis is a markdown document.\n';

diff --git a/packages/agent-core/test/tools/providers/local-fetch-url.test.ts b/packages/agent-core/test/tools/providers/local-fetch-url.test.ts
@@ -40,6 +40,24 @@ describe('LocalFetchURLProvider content kind', () => {
     expect(result).toEqual({ content: '# Title\n\nbody', kind: 'passthrough' });
   });
 
+  it('returns image data as base64 for image/* content types', async () => {
+    const fetchImpl = vi.fn<typeof fetch>().mockResolvedValue(
+      new Response(Buffer.from('fake-png-bytes'), {
+        status: 200,
+        headers: { 'content-type': 'image/png' },
+      }),
+    );
+    const provider = new LocalFetchURLProvider({ fetchImpl });
+
+    const result = await provider.fetch('https://example.com/chart.png');
+
+    expect(result.kind).toBe('image');
+    expect(result.content).toBe('');
+    expect(result.imageData).toBeDefined();
+    expect(result.imageData?.mimeType).toBe('image/png');
+    expect(result.imageData?.base64).toBe(Buffer.from('fake-png-bytes').toString('base64'));
+  });
+
   it('reports HTML bodies as extracted main content', async () => {
     const html =
       '<html><head><title>Doc</title></head><body><article>' +

diff --git a/packages/agent-core/test/tools/todo-list.test.ts b/packages/agent-core/test/tools/todo-list.test.ts
@@ -142,6 +142,23 @@ describe('TodoListTool', () => {
     ]);
   });
 
+  it('accepts "completed" as a status and maps it to "done"', async () => {
+    const { tool, getTodos } = makeTool();
+
+    const result = await executeTool(tool, {
+      turnId: 't1',
+      toolCallId: 'call_1',
+      args: {
+        todos: [{ title: 'done task', status: 'completed' }],
+      },
+      signal,
+    });
+
+    expect(result).toMatchObject({ isError: false });
+    expect(result.output).toContain('[done] done task');
+    expect(getTodos()).toEqual([{ title: 'done task', status: 'done' }]);
+  });
+
   it('renders a done todo with a marker matching the status enum value', async () => {
     const { tool } = makeTool([{ title: 'shipped', status: 'done' }]);