Skip to content
5 changes: 5 additions & 0 deletions .changeset/enlarge-uploaded-images.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@moonshot-ai/kimi-code": patch
---

Add click-to-enlarge for images uploaded in the web chat. Click an image in a message to open it.
5 changes: 5 additions & 0 deletions .changeset/fix-web-video-playback.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@moonshot-ai/kimi-code": patch
---

Fix uploaded videos failing to play in the web chat.
28 changes: 20 additions & 8 deletions apps/kimi-code/src/tui/utils/image-placeholder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,24 @@
* the text (we can't hallucinate files for it).
* - Order is preserved for text/image/video segments. Image placeholders
* expand to image content parts so the prompt reaches the provider
* without relying on a model tool call. Video placeholders still expand
* to file-path tags so `ReadMediaFile` can own video upload behavior.
* without relying on a model tool call. Video placeholders are copied
* into the shared cache (`getCacheDir()`) and expand to file-path tags,
* so `ReadMediaFile` — and the provider's `VideoUploader` — own video
* upload behavior instead of base64-inlining here.
* - Adjacent text segments are flattened — empty / whitespace-only
* segments drop out so we never emit `{type:'text', text:' '}`
* noise between two media parts.
*/

import { randomUUID } from 'node:crypto';
import { copyFileSync, mkdirSync } from 'node:fs';
import { join } from 'node:path';

import type { PromptPart } from '@moonshot-ai/kimi-code-sdk';
import { buildImageCompressionCaption } from '@moonshot-ai/kimi-code-sdk';

import { getCacheDir } from '#/utils/paths';

import type {
ImageAttachment,
ImageAttachmentStore,
Expand Down Expand Up @@ -63,8 +71,8 @@ export function extractMediaAttachments(
const before = text.slice(cursor, match.index);
pushText(parts, before);
if (attachment.kind === 'video') {
const mediaText = tagTextForVideo(attachment);
pushText(parts, mediaText);
const cachePath = materializeVideoToCache(attachment);
pushText(parts, formatMediaTag('video', cachePath));
videoAttachmentIds.push(id);
} else {
// Paste-time compression is announced next to the image so the model
Expand Down Expand Up @@ -115,6 +123,14 @@ function imagePartForAttachment(att: ImageAttachment): PromptPart {
};
}

function materializeVideoToCache(att: VideoAttachment): string {
const cacheDir = getCacheDir();
mkdirSync(cacheDir, { recursive: true });
const target = join(cacheDir, `${randomUUID()}-${att.label}`);
copyFileSync(att.sourcePath, target);
return target;
}

function captionForCompressedImage(att: ImageAttachment): string {
const original = att.original;
if (original === undefined) return '';
Expand All @@ -135,10 +151,6 @@ function captionForCompressedImage(att: ImageAttachment): string {
});
}

function tagTextForVideo(att: VideoAttachment): string {
return formatMediaTag('video', att.sourcePath);
}

function formatMediaTag(tag: 'image' | 'video', path: string): string {
return `<${tag} path="${escapeAttribute(path)}"></${tag}>`;
}
Expand Down
121 changes: 96 additions & 25 deletions apps/kimi-code/test/tui/input/image-placeholder.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';

import { describe, it, expect } from 'vitest';

import { KIMI_CODE_HOME_ENV } from '#/constant/app';
import { ImageAttachmentStore } from '#/tui/utils/image-attachment-store';
import { extractMediaAttachments } from '#/tui/utils/image-placeholder';
import { getCacheDir } from '#/utils/paths';

function storeWith(
bytes: Uint8Array,
Expand All @@ -13,6 +19,36 @@ function storeWith(
return { store, placeholder: att.placeholder };
}

/** Point `getCacheDir()` at a fresh temp home for the duration of a test. */
function setupTempCache(): { cleanup: () => void } {
const home = mkdtempSync(join(tmpdir(), 'kimi-home-'));
const prev = process.env[KIMI_CODE_HOME_ENV];
process.env[KIMI_CODE_HOME_ENV] = home;
return {
cleanup: () => {
if (prev === undefined) delete process.env[KIMI_CODE_HOME_ENV];
else process.env[KIMI_CODE_HOME_ENV] = prev;
rmSync(home, { recursive: true, force: true });
},
};
}

function makeTempDir(): string {
return mkdtempSync(join(tmpdir(), 'kimi-src-'));
}

type TextPart = { type: 'text'; text: string };

function videoPathFromParts(parts: unknown[]): string {
const text = parts
.filter((p): p is TextPart => (p as TextPart).type === 'text')
.map((p) => p.text)
.join('');
const m = /<video path="([^"]+)"><\/video>/.exec(text);
if (!m) throw new Error(`no video tag found in: ${text}`);
return m[1]!;
}

describe('extractMediaAttachments', () => {
it('returns no parts and hasMedia=false for plain text', () => {
const store = new ImageAttachmentStore();
Expand Down Expand Up @@ -52,18 +88,30 @@ describe('extractMediaAttachments', () => {
});

it('keeps matched-placeholder order with mixed image and video attachments', () => {
const store = new ImageAttachmentStore();
const img = store.addImage(new Uint8Array([1]), 'image/png', 10, 10);
const vid = store.addVideo('video/quicktime', '/tmp/clip.mov');
const text = `first ${img.placeholder} then ${vid.placeholder} end`;
const r = extractMediaAttachments(text, store);
expect(r.imageAttachmentIds).toEqual([1]);
expect(r.videoAttachmentIds).toEqual([2]);
expect(r.parts).toEqual([
{ type: 'text', text: 'first ' },
{ type: 'image_url', imageUrl: { url: 'data:image/png;base64,AQ==' } },
{ type: 'text', text: ' then <video path="/tmp/clip.mov"></video> end' },
]);
const { cleanup } = setupTempCache();
const srcDir = makeTempDir();
try {
const srcVideo = join(srcDir, 'clip.mov');
writeFileSync(srcVideo, 'video-bytes');
const store = new ImageAttachmentStore();
const img = store.addImage(new Uint8Array([1]), 'image/png', 10, 10);
const vid = store.addVideo('video/quicktime', srcVideo);
const text = `first ${img.placeholder} then ${vid.placeholder} end`;
const r = extractMediaAttachments(text, store);
expect(r.imageAttachmentIds).toEqual([1]);
expect(r.videoAttachmentIds).toEqual([2]);
expect(r.parts[0]).toEqual({ type: 'text', text: 'first ' });
expect(r.parts[1]).toEqual({
type: 'image_url',
imageUrl: { url: 'data:image/png;base64,AQ==' },
});
const cachePath = videoPathFromParts(r.parts);
expect(cachePath.startsWith(getCacheDir())).toBe(true);
expect(readFileSync(cachePath, 'utf8')).toBe('video-bytes');
} finally {
cleanup();
rmSync(srcDir, { recursive: true, force: true });
}
});

it('leaves unresolved (typed by hand) placeholders as literal text', () => {
Expand All @@ -85,21 +133,44 @@ describe('extractMediaAttachments', () => {
});

it('escapes media paths in generated tags', () => {
const store = new ImageAttachmentStore();
const att = store.addVideo('video/mp4', '/tmp/a&"<>.mp4', 'sample.mp4');
const r = extractMediaAttachments(att.placeholder, store);
expect(r.parts).toEqual([
{ type: 'text', text: '<video path="/tmp/a&amp;&quot;&lt;&gt;.mp4"></video>' },
]);
const { cleanup } = setupTempCache();
const srcDir = makeTempDir();
try {
const srcVideo = join(srcDir, 'source.mp4');
writeFileSync(srcVideo, 'x');
const store = new ImageAttachmentStore();
// The filename drives the cache label; `&` must be escaped in the attribute.
const att = store.addVideo('video/mp4', srcVideo, 'a&b.mp4');
const r = extractMediaAttachments(att.placeholder, store);
expect(r.parts).toHaveLength(1);
const text = (r.parts[0] as TextPart).text;
expect(text).toMatch(/<video path="[^"]+a&amp;b\.mp4"><\/video>/);
} finally {
cleanup();
rmSync(srcDir, { recursive: true, force: true });
}
});

it('expands video placeholders backed by local files to readMediaFile video tags', () => {
const store = new ImageAttachmentStore();
const att = store.addVideo('video/mp4', '/tmp/sample.mp4');
const r = extractMediaAttachments(att.placeholder, store);
expect(r.hasMedia).toBe(true);
expect(r.videoAttachmentIds).toEqual([1]);
expect(r.parts).toEqual([{ type: 'text', text: '<video path="/tmp/sample.mp4"></video>' }]);
it('copies video placeholders into the cache and emits cache-path tags', () => {
const { cleanup } = setupTempCache();
const srcDir = makeTempDir();
try {
const srcVideo = join(srcDir, 'sample.mp4');
writeFileSync(srcVideo, 'video-data');
const store = new ImageAttachmentStore();
const att = store.addVideo('video/mp4', srcVideo);
const r = extractMediaAttachments(att.placeholder, store);
expect(r.hasMedia).toBe(true);
expect(r.videoAttachmentIds).toEqual([1]);
const cachePath = videoPathFromParts(r.parts);
// The tag points at the cache, not the original source path.
expect(cachePath.startsWith(getCacheDir())).toBe(true);
expect(cachePath).not.toBe(srcVideo);
expect(readFileSync(cachePath, 'utf8')).toBe('video-data');
} finally {
cleanup();
rmSync(srcDir, { recursive: true, force: true });
}
});

it('inserts a compression caption before an image that was compressed at paste time', () => {
Expand Down
7 changes: 7 additions & 0 deletions apps/kimi-web/src/api/daemon/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1230,6 +1230,13 @@ export class DaemonKimiWebApi implements KimiWebApi {
return buildRestUrl(this.config.serverHttpUrl, `/files/${encodeURIComponent(fileId)}`);
}

/** Fetch a file's bytes with the Bearer credential attached. Use this (not
* getFileUrl) when the bytes feed a <video>/<img> src: the browser loads
* those natively without the Authorization header, so the URL alone 401s. */
async getFileBlob(fileId: string): Promise<Blob> {
return this.http.getBlob(`/files/${encodeURIComponent(fileId)}`);
}

// -------------------------------------------------------------------------
// WebSocket events
// -------------------------------------------------------------------------
Expand Down
14 changes: 12 additions & 2 deletions apps/kimi-web/src/api/daemon/eventReducer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,22 @@ function sameMessageContent(a: AppMessage, b: AppMessage): boolean {
shape of a user message. The daemon's echo carries images as a resolved
URL/base64 while our optimistic copy carries `{kind:'file',fileId}`, so the
raw content never matches; comparing (text, image-count) does. */
// Matches the self-contained media path tag the server substitutes for an
// uploaded image/video/audio in a prompt (e.g. `<video path="/cache/f.mp4"></video>`).
// A tag is its own text part, so anchoring keeps ordinary prose from matching.
const MEDIA_PATH_TAG_SHAPE_RE = /^<(image|video|audio)\s+path="[^"]+"><\/\1>$/;

function userMessageShape(m: AppMessage): { text: string; media: number } {
let text = '';
let media = 0;
for (const c of m.content) {
if (c.type === 'text') text += c.text;
else if (c.type === 'image' || c.type === 'file') media += 1;
if (c.type === 'text') {
// A video/image upload reaches us (after the server resolves it) as a
// `<video path=…></video>` text tag, not a media part — count it as media
// and drop it from the text so the echo reconciles with our optimistic copy.
if (MEDIA_PATH_TAG_SHAPE_RE.test(c.text.trim())) media += 1;
else text += c.text;
} else if (c.type === 'image' || c.type === 'video' || c.type === 'file') media += 1;
}
return { text, media };
}
Expand Down
76 changes: 76 additions & 0 deletions apps/kimi-web/src/api/daemon/http.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,82 @@ export class DaemonHttpClient {
return this.request<T>('GET', path, undefined, query);
}

/** Authenticated raw-binary GET (no envelope). Used for file downloads that
* must carry the Bearer token — e.g. <video>/<img> src, which the browser
* fetches natively and cannot authorize on its own. Returns the body as a
* Blob on 2xx; otherwise parses the daemon envelope and throws. */
async getBlob(path: string): Promise<Blob> {
const url = buildRestUrl(this.origin, path);
const requestId = createRequestId();
const headers: Record<string, string> = { 'X-Request-Id': requestId };
this.addClientHeaders(headers);
const startedAt = Date.now();
traceRestRequest({ method: 'GET', path, url, requestId });
let response: Response;
try {
response = await fetch(url, { method: 'GET', headers, signal: timeoutSignal() });
} catch (err) {
traceRestFailure({
method: 'GET',
path,
requestId,
phase: 'fetch',
durationMs: Date.now() - startedAt,
error: err,
});
throw new DaemonNetworkError({
message: `Network error calling GET ${path}`,
cause: err,
method: 'GET',
path,
url,
requestId,
phase: 'fetch',
timeoutMs: REQUEST_TIMEOUT_MS,
timestamp: Date.now(),
durationMs: Date.now() - startedAt,
});
}
if (response.ok) {
traceRestResponse({
method: 'GET',
path,
requestId,
status: response.status,
durationMs: Date.now() - startedAt,
code: 0,
msg: '',
});
return response.blob();
}
// Error path: the daemon sends a JSON envelope (401/404/413…).
let envelope: WireEnvelope<unknown> | undefined;
try {
envelope = (await response.clone().json()) as WireEnvelope<unknown>;
} catch {
// not JSON — fall back to the HTTP status below
}
this.checkAuthRequired(response, envelope?.code ?? 0);
traceRestResponse({
method: 'GET',
path,
requestId,
status: response.status,
durationMs: Date.now() - startedAt,
code: envelope?.code ?? response.status,
msg: envelope?.msg ?? response.statusText,
envelopeRequestId: envelope?.request_id,
});
throw new DaemonApiError({
code: envelope?.code ?? response.status,
msg: envelope?.msg ?? response.statusText,
requestId: envelope?.request_id ?? requestId,
details: envelope?.details,
timestamp: Date.now(),
durationMs: Date.now() - startedAt,
});
}

async post<T>(path: string, body?: unknown, opts?: { allowCodes?: number[] }): Promise<T> {
return this.request<T>('POST', path, body, undefined, opts?.allowCodes);
}
Expand Down
2 changes: 2 additions & 0 deletions apps/kimi-web/src/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,8 @@ export interface KimiWebApi {
// File upload / download
uploadFile(input: { file: Blob; name?: string }): Promise<{ id: string; name: string; mediaType: string; size: number }>;
getFileUrl(fileId: string): string;
/** Fetch a file's bytes with auth — feed the resulting Blob to a blob URL for <video>/<img> src. */
getFileBlob(fileId: string): Promise<Blob>;

// Config — REAL endpoints
getConfig(): Promise<AppConfig>;
Expand Down
Loading
Loading