From a250495d77e3a45698357f9b63b42c4e70c4ea32 Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Mon, 22 Jun 2026 20:39:13 +0000 Subject: [PATCH 1/2] feat(client): honor SEP-2549 cacheHints (ttlMs/scope) on cacheable list*/readResource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The four list verbs and readResource now serve a still-fresh ResponseCacheStore entry without a round trip when the server-stamped ttlMs has not elapsed. Additive on the substrate (#2336): _listAllPages now stamps {expiresAt, scope} on the aggregate write; a _serveFromCache front gates each verb on freshness; readResource is newly cached (URI-keyed; only stored when ttl > 0, since the URI keyspace is unbounded and there is no derived index). Per-call CacheableRequestOptions.cacheMode ('use' | 'refresh' | 'bypass') maps to mcp.d's CacheMode. ClientOptions.cachePartition is the per-principal slot for 'private'-scoped entries (the spec's MUST-NOT-share-across-authz-contexts); 'public' entries always live at partition '' so a shared store serves them to every co-tenant. ClientResponseCache reads probe own-partition then '' (mcp.d's two-probe order — own-first because scope is only known after a fetch); the toolDefinition/outputValidator derived indices use the same probe so SEP-2243 mirroring works under partitioning. readResource applies the same partition derivation as the list verbs and treats absent cacheScope as 'private', so a shared store cannot serve one principal's resource body to another. ClientOptions.defaultCacheTtlMs (default 0) supplies the TTL when the result lacks one (e.g. a legacy-era response); an explicit server-sent ttlMs:0 is honoured as immediately stale. List aggregates are always stored regardless of TTL (mcp.d's retainForSchema posture) so callTool's mirroring/output-validation index keeps working at any TTL while the freshness gate never serves a stale entry. A list_changed eviction beats TTL (the existing partition-agnostic evict). Clock seam (now) injectable on ClientResponseCache for tests. New exports: CacheMode, CacheableRequestOptions. --- .changeset/client-honor-cache-hints.md | 7 + .changeset/client-response-cache-substrate.md | 2 +- .../protocol-pre-aborted-signal-wrap.md | 5 + docs/migration-SKILL.md | 2 + docs/migration.md | 19 + packages/client/src/client/client.ts | 285 +++++- packages/client/src/client/responseCache.ts | 461 +++++++-- packages/client/src/index.ts | 14 +- packages/client/test/client/listen.test.ts | 26 + .../test/client/mcpParamMirroring.test.ts | 117 ++- .../client/test/client/responseCache.test.ts | 909 +++++++++++++++++- packages/core/src/shared/protocol.ts | 11 +- 12 files changed, 1699 insertions(+), 159 deletions(-) create mode 100644 .changeset/client-honor-cache-hints.md create mode 100644 .changeset/protocol-pre-aborted-signal-wrap.md diff --git a/.changeset/client-honor-cache-hints.md b/.changeset/client-honor-cache-hints.md new file mode 100644 index 0000000000..2a7659db63 --- /dev/null +++ b/.changeset/client-honor-cache-hints.md @@ -0,0 +1,7 @@ +--- +'@modelcontextprotocol/client': minor +--- + +`Client` now **honours** the server-stamped SEP-2549 `ttlMs`/`cacheScope` cache hints on the cacheable verbs (`listTools()`, `listPrompts()`, `listResources()`, `listResourceTemplates()`, `readResource()`): a still-fresh held entry is served without a round trip. New `CacheableRequestOptions.cacheMode` (`'use'` — the default; `'refresh'` — always fetch and re-store; `'bypass'` — fetch without consulting or writing the cache) gives per-call control. The behaviour is opt-in by hint: a server that sends `ttlMs: 0` (the conservative default this SDK's server stamps) sees byte-identical behaviour — every call fetches. + +Entries are automatically scoped by connected-server identity (derived from `serverInfo` after connect, encoded collision-free via `JSON.stringify`); `ClientOptions.cachePartition` is the opaque per-principal slot for `'private'`-scoped entries — set it to your principal identifier (e.g. the auth subject) when one `responseCacheStore` backs several principals. With the default `''` every entry lives at the connected server's shared partition (the safe single-tenant posture). `ClientOptions.defaultCacheTtlMs` (default `0`) supplies the TTL when a result lacks one (e.g. a legacy-era response); the server-supplied `ttlMs` is clamped at 24 h (`MAX_CACHE_TTL_MS`). The list verbs always store the aggregate (so `callTool`'s mirroring/output-validation index keeps working at any TTL); `readResource` stores only when the resolved TTL is positive. `notifications/resources/updated` evicts the cached `resources/read` body for that URI. `ResponseCacheStore` gained `delete(key)`; `InMemoryResponseCacheStore` is now bounded (`{ maxEntries }`, default 512, oldest-first eviction). New exports: `CacheMode`, `CacheableRequestOptions`, `InMemoryResponseCacheStoreOptions`, `MAX_CACHE_TTL_MS`. diff --git a/.changeset/client-response-cache-substrate.md b/.changeset/client-response-cache-substrate.md index 16a0621cac..9d957d5f0c 100644 --- a/.changeset/client-response-cache-substrate.md +++ b/.changeset/client-response-cache-substrate.md @@ -2,6 +2,6 @@ '@modelcontextprotocol/client': major --- -`Client.listTools()` / `listPrompts()` / `listResources()` / `listResourceTemplates()` now **auto-aggregate every page** when called without a `cursor` and return the complete result with `nextCursor: undefined` (matching the C#, Java, and mcp.d SDKs). Pass an explicit `{ cursor }` string to fetch a single page; the per-page path is unchanged. Existing manual pagination loops keep working — the first iteration returns everything and the loop exits — but can be deleted. The aggregated result is written to the new pluggable `ResponseCacheStore` (default: a fresh per-instance `InMemoryResponseCacheStore`); a `ClientResponseCache` collaborator owns the eviction-generation guard and the derived `tools/list` index that `callTool`'s output validation and SEP-2243 `Mcp-Param-*` mirroring read. New exports: `ResponseCacheStore`, `CacheKey`, `CacheEntry`, `CacheScope`, `MaybePromise`, `InMemoryResponseCacheStore`; new `ClientOptions.responseCacheStore` / `ClientOptions.listMaxPages` (caps the auto-aggregate walk at 64 pages by default; throws `SdkError` with `SdkErrorCode.ListPaginationExceeded` on overrun so a partial aggregate is never cached). The store interface is async-ready (`MaybePromise<…>`); the in-memory default stays synchronous. **A store instance must not be shared across `Client` instances at all in v2.0.x** — entries are keyed by method only (server-identity confusion + `clear()`/`evict()` cross-talk); per-principal partitioning that enables safe sharing arrives with the full caching engine. +`Client.listTools()` / `listPrompts()` / `listResources()` / `listResourceTemplates()` now **auto-aggregate every page** when called without a `cursor` and return the complete result with `nextCursor: undefined` (matching the C#, Java, and mcp.d SDKs). Pass an explicit `{ cursor }` string to fetch a single page; the per-page path is unchanged. Existing manual pagination loops keep working — the first iteration returns everything and the loop exits — but can be deleted. The aggregated result is written to the new pluggable `ResponseCacheStore` (default: a fresh per-instance `InMemoryResponseCacheStore`); a `ClientResponseCache` collaborator owns the eviction-generation guard and the derived `tools/list` index that `callTool`'s output validation and SEP-2243 `Mcp-Param-*` mirroring read. New exports: `ResponseCacheStore`, `CacheKey`, `CacheEntry`, `CacheScope`, `MaybePromise`, `InMemoryResponseCacheStore`; new `ClientOptions.responseCacheStore` / `ClientOptions.listMaxPages` (caps the auto-aggregate walk at 64 pages by default; throws `SdkError` with `SdkErrorCode.ListPaginationExceeded` on overrun so a partial aggregate is never cached). The store interface is async-ready (`MaybePromise<…>`); the in-memory default stays synchronous. Entries are automatically scoped by the connected server's identity and (when set) the consumer-supplied `cachePartition`, so a shared store does not collide across servers or principals; evictions are likewise scoped to the connected server's partitions. **Behavior change (every era):** output-schema validator compilation is now lazy — validators are compiled on the first `callTool()` against the cached `tools/list` entry, not eagerly inside `listTools()` — and non-throwing: an uncompilable `outputSchema` is `console.warn`-ed and validation is skipped for that tool only (previously `listTools()` threw). A pluggable `jsonSchemaValidator` provider therefore observes compilation at `callTool` time, not `listTools` time. The legacy-era `listTools()` path is unchanged at the wire level but is observably different at the validator-lifecycle level. diff --git a/.changeset/protocol-pre-aborted-signal-wrap.md b/.changeset/protocol-pre-aborted-signal-wrap.md new file mode 100644 index 0000000000..a0a95758c8 --- /dev/null +++ b/.changeset/protocol-pre-aborted-signal-wrap.md @@ -0,0 +1,5 @@ +--- +'@modelcontextprotocol/core': patch +--- + +`Protocol.request()` now rejects with `SdkError(RequestTimeout, reason)` when called with an already-aborted signal, matching in-flight aborts. Previously the raw `signal.reason` was thrown. diff --git a/docs/migration-SKILL.md b/docs/migration-SKILL.md index 14eb419f57..ae20884449 100644 --- a/docs/migration-SKILL.md +++ b/docs/migration-SKILL.md @@ -572,6 +572,8 @@ side: auto-fulfilment is on by default (`ClientOptions.inputRequired`, `maxRound `Client.listTools()`, `listPrompts()`, `listResources()`, `listResourceTemplates()` called without a `cursor` now auto-aggregate every page and return the complete result (`nextCursor: undefined`); an explicit `{ cursor }` string still returns one page. Manual `do { … } while (cursor !== undefined)` loops keep working (the first call returns everything and the loop exits after one iteration) — replace them with the bare no-arg call. New `ClientOptions.listMaxPages` (default 64) caps the aggregate walk only; overrun throws `SdkError` (`SdkErrorCode.ListPaginationExceeded`). +`Client.listTools()` / `listPrompts()` / `listResources()` / `listResourceTemplates()` / `readResource()` now honour the server-stamped SEP-2549 `ttlMs`/`cacheScope`: a still-fresh cached entry is returned without a round trip. Opt-in by server hint — a server that sends `ttlMs: 0` (the SDK's default stamp) sees no behaviour change. Per-call override: pass `{ cacheMode: 'refresh' }` (always fetch and re-store) or `{ cacheMode: 'bypass' }` (fetch without touching the cache). Server `ttlMs` is clamped at 24 h (`MAX_CACHE_TTL_MS`). Entries are automatically scoped by connected-server identity; new `ClientOptions.cachePartition` (per-principal slot for `'private'`-scoped entries on a shared `responseCacheStore`; default `''`) and `ClientOptions.defaultCacheTtlMs` (TTL when the result lacks one, e.g. legacy-era responses; default `0`). `ResponseCacheStore` gained `delete(key)` (driven by `notifications/resources/updated`); `InMemoryResponseCacheStore` is now bounded (`{ maxEntries }`, default 512). + Output-schema validator compilation is now lazy (first `callTool()` against the cached `tools/list` entry) and non-throwing (an uncompilable `outputSchema` is `console.warn`-ed and validation is skipped for that tool only); `listTools()` no longer throws on an uncompilable `outputSchema`. Applies on every era — the legacy-era `listTools()` path is unchanged at the wire level only. No code changes required; wire-behavior note: on a 2026-07-28 Streamable HTTP connection, aborting an in-flight client request (caller `signal` / timeout) closes that request's SSE response stream as the spec cancellation signal — `notifications/cancelled` is no longer POSTed diff --git a/docs/migration.md b/docs/migration.md index 6208654bea..008c69e86d 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -575,6 +575,25 @@ const { tools } = await client.listTools(); The auto-aggregate walk is capped at `ClientOptions.listMaxPages` pages (default 64; `0` disables) and throws an `SdkError` with `SdkErrorCode.ListPaginationExceeded` if the server's pagination does not converge, so a partial aggregate is never returned. The cap applies only to the no-`cursor` aggregate path; explicit per-page calls are never capped. The aggregated result is also written to the client's response cache (the source for `callTool`'s output-schema validation and SEP-2243 header mirroring). +### Client honours server cache hints (SEP-2549) + +On a 2026-07-28 connection the cacheable verbs — `listTools()`, `listPrompts()`, `listResources()`, `listResourceTemplates()`, and `readResource()` — now serve a still-fresh held entry without a round trip when the server-stamped `ttlMs` has not elapsed. The behaviour is opt-in **by server hint**: a server that sends `ttlMs: 0` (the conservative default the SDK's `McpServer` stamps unless configured otherwise) sees byte-identical behaviour — every call fetches. A `list_changed` notification still evicts immediately regardless of TTL. + +Per-call control via the new `CacheableRequestOptions.cacheMode` (`'use'` is the default): + +```typescript +await client.listTools(); // serve from cache if fresh +await client.listTools(undefined, { cacheMode: 'refresh' }); // always fetch, then re-store +await client.listTools(undefined, { cacheMode: 'bypass' }); // fetch; do not read or write the cache +``` + +New `ClientOptions`: + +- `cachePartition?: string` — the opaque per-principal identifier for `'private'`-scoped entries (the spec's "MUST NOT share across authorization contexts"). Entries are automatically scoped by connected-server identity (derived from `serverInfo`), so one `responseCacheStore` may back several clients without consumer-side encoding; set `cachePartition` to your principal identifier (e.g. the auth subject) when sharing a store across principals. With the default `''` every entry — public or private — lives at the connected server's shared partition (the safe single-tenant posture). Note `serverInfo` is self-reported, so a server that deliberately impersonates another's `name`/`version` shares its `'public'` slot; the per-principal isolation holds regardless. +- `defaultCacheTtlMs?: number` — applied when a cacheable result lacks `ttlMs` (e.g. a legacy-era response). Default `0` — never serve from cache; the list aggregate is still **stored** so `callTool`'s mirroring/output-validation index keeps working regardless. The server-supplied `ttlMs` is clamped at 24 h (`MAX_CACHE_TTL_MS`). + +The `ResponseCacheStore` interface gained `delete(key)` (the per-URI invalidation `notifications/resources/updated` drives) — custom stores written against the alpha substrate need to add it. The default `InMemoryResponseCacheStore` is now bounded (default 512 entries, oldest-first eviction; configurable via `{ maxEntries }`). + **Output-schema validator lifecycle (every era):** validator compilation is now lazy — validators are compiled on the first `callTool()` against the cached `tools/list` entry, not eagerly inside `listTools()` — and non-throwing: an uncompilable `outputSchema` is `console.warn`-ed and validation is skipped for that tool only. In v1, `listTools()` threw on an uncompilable `outputSchema`; now it succeeds, and a pluggable `jsonSchemaValidator` provider observes compilation at `callTool` time, not `listTools` time. The legacy-era `listTools()` path is unchanged at the wire level but is observably different at the validator-lifecycle level. diff --git a/packages/client/src/client/client.ts b/packages/client/src/client/client.ts index 07071e8e34..61c16e221b 100644 --- a/packages/client/src/client/client.ts +++ b/packages/client/src/client/client.ts @@ -82,8 +82,8 @@ import { SubscriptionFilterSchema } from '@modelcontextprotocol/core'; -import type { ResponseCacheStore } from './responseCache.js'; -import { ClientResponseCache, InMemoryResponseCacheStore } from './responseCache.js'; +import type { CacheMode, CacheScope, ResponseCacheStore } from './responseCache.js'; +import { ClientResponseCache, InMemoryResponseCacheStore, MAX_CACHE_TTL_MS } from './responseCache.js'; import type { ResolvedVersionNegotiation, VersionNegotiationOptions } from './versionNegotiation.js'; import { detectProbeEnvironment, detectProbeTransportKind, negotiateEra, resolveVersionNegotiation } from './versionNegotiation.js'; @@ -278,18 +278,65 @@ export type ClientOptions = ProtocolOptions & { /** * The response-cache store backing the client's derived views (the cached * `tools/list` result that {@linkcode Client.callTool | callTool}'s output - * validation and SEP-2243 header mirroring will read once the stacked - * SEP-2243 PR lands; this commit ships only the seam). Defaults to a fresh + * validation and SEP-2243 header mirroring read) and the SEP-2549 + * cache-hint serving on the cacheable verbs. Defaults to a fresh * {@linkcode InMemoryResponseCacheStore} per client. * - * **Do not share one store across clients at all in v2.0.x** — entries - * are keyed by method + params only, so two clients connected to - * different servers (even under the same credential) collide on - * `tools/list`, and one client's `list_changed` evicts every co-tenant's - * entry. Supply your own only as a single-client backing store. - * Per-principal partitioning that enables safe sharing is #39. + * Entries are automatically scoped by connected-server identity (derived + * from `serverInfo` after connect) AND, for `'private'`-scoped results, + * by `cachePartition` — encoded collision-free via `JSON.stringify`, so a + * server cannot craft a `serverInfo` that bleeds into another server's + * namespace or another principal's slot. One store may therefore back + * several clients (e.g. a host pool against the same server, or one + * persistent KV across servers); `list_changed` evictions are scoped to + * the connected server's partitions, so co-tenants are unaffected. Set + * `cachePartition` to your principal identifier (e.g. the auth subject) + * when sharing across principals. Note `serverInfo` is self-reported — a + * server that deliberately impersonates + * another's `name`/`version` shares its `'public'` slot; the + * per-principal isolation via `cachePartition` holds regardless. */ responseCacheStore?: ResponseCacheStore; + + /** + * Opaque per-principal identifier for response-cache writes whose + * server-reported `cacheScope` is `'private'` (the spec's "MUST NOT share + * across authorization contexts"). Within the connected server's + * namespace, `'public'`-scoped entries live at the shared + * `[serverIdentity, '']` partition and `'private'`-scoped entries at + * `[serverIdentity, cachePartition]`. Set this to a stable identity of + * the authorization context (e.g. the auth subject) when one + * `responseCacheStore` backs several principals; with the + * default `''` every entry — public or private — lives at the server's + * shared partition, which is the safe single-tenant posture. + */ + cachePartition?: string; + + /** + * TTL (ms) applied when a cacheable result arrives without a `ttlMs` + * field. Default `0` — a result without an explicit hint is never served + * from cache (every call refetches), but it is still **stored** so the + * `tools/list`-derived index that {@linkcode Client.callTool | callTool}'s + * SEP-2243 mirroring and output-schema validation read keeps working + * regardless. The spec defines absent-or-≤0 as "immediately stale". + */ + defaultCacheTtlMs?: number; +}; + +/** + * {@linkcode RequestOptions} extended with the per-call cache disposition for + * the cacheable verbs (`listTools()` / `listPrompts()` / `listResources()` / + * `listResourceTemplates()` / `readResource()`). See {@linkcode CacheMode}. + */ +export type CacheableRequestOptions = RequestOptions & { + /** + * `'use'` (default) serves a still-fresh cached entry without a round + * trip; `'refresh'` always fetches and re-stores; `'bypass'` fetches + * without consulting or writing the cache. Applies to the no-`cursor` + * auto-aggregate path on the list verbs and to `readResource`; ignored + * elsewhere. + */ + cacheMode?: CacheMode; }; /** @@ -418,6 +465,7 @@ export class Client extends Protocol { * `Mcp-Param-*` mirroring through `toolDefinition` on top. */ private readonly _cache: ClientResponseCache; + private readonly _defaultCacheTtlMs: number; private readonly _listMaxPages: number; private _listChangedDebounceTimers: Map> = new Map(); /** @@ -516,8 +564,10 @@ export class Client extends Protocol { this._cache = new ClientResponseCache( options?.responseCacheStore ?? new InMemoryResponseCacheStore(), options?.responseCacheStore !== undefined, - error => this._reportStoreError(error) + error => this._reportStoreError(error), + options?.cachePartition ?? '' ); + this._defaultCacheTtlMs = options?.defaultCacheTtlMs ?? 0; this._listMaxPages = options?.listMaxPages ?? DEFAULT_LIST_MAX_PAGES; // Store list changed config for setup after connection (when we know server capabilities) @@ -618,23 +668,29 @@ export class Client extends Protocol { * @internal */ private _setupListChangedHandlers(config: ListChangedHandlers): void { + // Every autoRefresh fetcher forces `cacheMode: 'refresh'`: the + // notification handler's `_cache.evict()` runs first, but a custom + // store whose `delete()` no-ops (or rejects) leaves the stale entry + // in place — a default-mode `list*()` would then cache-serve the very + // entry the notification declared stale. `'refresh'` bypasses the + // cache read so the fetcher always reaches the wire and overwrites. if (config.tools && this._serverCapabilities?.tools?.listChanged) { this._setupListChangedHandler('tools', 'notifications/tools/list_changed', config.tools, async () => { - const result = await this.listTools(); + const result = await this.listTools(undefined, { cacheMode: 'refresh' }); return result.tools; }); } if (config.prompts && this._serverCapabilities?.prompts?.listChanged) { this._setupListChangedHandler('prompts', 'notifications/prompts/list_changed', config.prompts, async () => { - const result = await this.listPrompts(); + const result = await this.listPrompts(undefined, { cacheMode: 'refresh' }); return result.prompts; }); } if (config.resources && this._serverCapabilities?.resources?.listChanged) { this._setupListChangedHandler('resources', 'notifications/resources/list_changed', config.resources, async () => { - const result = await this.listResources(); + const result = await this.listResources(undefined, { cacheMode: 'refresh' }); return result.resources; }); } @@ -913,6 +969,7 @@ export class Client extends Protocol { this._serverCapabilities = result.capabilities; this._serverVersion = result.serverInfo; + this._cache.setServerIdentity(this._deriveServerIdentity(transport)); // HTTP transports must set the protocol version in each header after initialization. if (transport.setProtocolVersion) { transport.setProtocolVersion(result.protocolVersion); @@ -996,6 +1053,7 @@ export class Client extends Protocol { this._serverCapabilities = result.discover.capabilities; this._serverVersion = result.discover.serverInfo; + this._cache.setServerIdentity(this._deriveServerIdentity(transport)); this._instructions = result.discover.instructions; // Modern selection: the same connection state the legacy handshake completion sets. this._negotiatedProtocolVersion = result.version; @@ -1105,6 +1163,20 @@ export class Client extends Protocol { return this._serverVersion; } + /** + * The connected server's identity for response-cache partitioning. The + * `serverInfo` `name@version` pair when available (the spec requires it on + * both `initialize` and `server/discover`); falls back to the transport's + * `sessionId` otherwise. The value itself is server-controlled — the + * collision-safety of the storage partition comes from + * {@linkcode ClientResponseCache}'s JSON-array encoding around it, not + * from any character it does or does not contain. + */ + private _deriveServerIdentity(transport: Transport): string { + const v = this._serverVersion; + return v === undefined ? (transport.sessionId ?? '') : `${v.name}@${v.version}`; + } + /** * After initialization has completed, this will be populated with the protocol version negotiated * during the initialize handshake. When manually reconstructing a transport for reconnection, pass this @@ -1345,7 +1417,7 @@ export class Client extends Protocol { * ); * ``` */ - async listPrompts(params?: ListPromptsRequest['params'], options?: RequestOptions): Promise { + async listPrompts(params?: ListPromptsRequest['params'], options?: CacheableRequestOptions): Promise { if (!this._serverCapabilities?.prompts && !this._enforceStrictCapabilities) { // Respect capability negotiation: server does not support prompts console.debug('Client.listPrompts() called but server does not advertise prompts capability - returning empty list'); @@ -1354,6 +1426,8 @@ export class Client extends Protocol { if (params?.cursor !== undefined) { return this.request({ method: 'prompts/list', params }, options); } + const hit = await this._serveFromCache('prompts/list', undefined, options); + if (hit !== undefined) return hit; return this._listAllPages('prompts/list', params, options, (acc, page) => acc.prompts.push(...page.prompts)); } @@ -1383,7 +1457,7 @@ export class Client extends Protocol { * ); * ``` */ - async listResources(params?: ListResourcesRequest['params'], options?: RequestOptions): Promise { + async listResources(params?: ListResourcesRequest['params'], options?: CacheableRequestOptions): Promise { if (!this._serverCapabilities?.resources && !this._enforceStrictCapabilities) { // Respect capability negotiation: server does not support resources console.debug('Client.listResources() called but server does not advertise resources capability - returning empty list'); @@ -1392,6 +1466,8 @@ export class Client extends Protocol { if (params?.cursor !== undefined) { return this.request({ method: 'resources/list', params }, options); } + const hit = await this._serveFromCache('resources/list', undefined, options); + if (hit !== undefined) return hit; return this._listAllPages('resources/list', params, options, (acc, page) => acc.resources.push(...page.resources) ); @@ -1411,7 +1487,7 @@ export class Client extends Protocol { */ async listResourceTemplates( params?: ListResourceTemplatesRequest['params'], - options?: RequestOptions + options?: CacheableRequestOptions ): Promise { if (!this._serverCapabilities?.resources && !this._enforceStrictCapabilities) { // Respect capability negotiation: server does not support resources @@ -1423,6 +1499,8 @@ export class Client extends Protocol { if (params?.cursor !== undefined) { return this.request({ method: 'resources/templates/list', params }, options); } + const hit = await this._serveFromCache('resources/templates/list', undefined, options); + if (hit !== undefined) return hit; return this._listAllPages('resources/templates/list', params, options, (acc, page) => acc.resourceTemplates.push(...page.resourceTemplates) ); @@ -1453,10 +1531,14 @@ export class Client extends Protocol { private async _listAllPages( method: RequestMethod, baseParams: { readonly [key: string]: unknown } | undefined, - options: RequestOptions | undefined, + options: CacheableRequestOptions | undefined, append: (acc: R, page: R) => void, finalize?: (acc: R) => void ): Promise { + // `'bypass'` is the no-touch path: the cache is neither read nor + // written, so the substrate is byte-untouched (the `tools/list` + // derived index keeps whatever it held). + const bypass = options?.cacheMode === 'bypass'; // Capture the eviction generation BEFORE page 1: a `list_changed` that // lands mid-walk bumps the counter, and the terminal `write` skips // when it observes the bump (the result still returns to the caller — @@ -1482,10 +1564,76 @@ export class Client extends Protocol { } acc.nextCursor = undefined; finalize?.(acc); - await this._cache.write(method, acc, generation); + if (bypass) return acc; + // The aggregate is ALWAYS written: even when the resolved TTL is ≤0 + // the entry is stored already-stale (mcp.d's `retainForSchema` + // posture) so the `tools/list`-derived index keeps working regardless, + // while the freshness gate in `_serveFromCache` never serves it. + // Page-1 carries the result-level `ttlMs`/`cacheScope` (`acc` IS the + // mutated page-1 object). + await this._cache.write(method, acc, generation, this._freshness(acc)); return acc; } + /** + * Compute the {@linkcode ClientResponseCache.write} freshness payload from + * a cacheable result body. The single seam through which the client reads + * `ttlMs`/`cacheScope` (mcp.d's `cachedFetch` engine). The fields pass + * through the loose result schema, so they are read off the runtime body; + * a missing `ttlMs` falls back to + * {@linkcode ClientOptions | ClientOptions.defaultCacheTtlMs}; an explicit server-sent + * `ttlMs` (including `0` — the spec's "immediately stale") is honoured + * as-is. The default of `0` means `expiresAt === now()` ⇒ never served, + * only stored. A missing `cacheScope` is treated as `'private'` — the + * spec's `'public'` grant ("any client … MAY serve to any user") is too + * strong to infer by default, and matches this SDK's server-side stamp + * default. + */ + private _freshness(result: unknown, params?: string): { expiresAt: number; scope: CacheScope; params?: string } { + const body = result as { ttlMs?: unknown; cacheScope?: unknown }; + const ttlMs = typeof body.ttlMs === 'number' ? body.ttlMs : this._defaultCacheTtlMs; + const scope: CacheScope = body.cacheScope === 'public' ? 'public' : 'private'; + // Clamp at 24h (`MAX_CACHE_TTL_MS`) so a server cannot pin an entry + // indefinitely; floor at 0 so a negative `ttlMs` is treated as + // immediately stale (the spec's absent-or-≤0 rule). + return { expiresAt: this._cache.now() + Math.min(Math.max(0, ttlMs), MAX_CACHE_TTL_MS), scope, params }; + } + + /** + * The cache-serving front of every cacheable verb (mcp.d's `cachedFetch` + * read half): under `cacheMode: 'use'` (the default), a held entry whose + * `expiresAt` is in the future is returned as a `structuredClone` and the + * round trip is skipped. `'refresh'` and `'bypass'` always fetch (the + * caller decides whether to write). A custom store whose `get()` rejects + * is routed to `onerror` and treated as a miss — cache bookkeeping never + * blocks a request from reaching the wire. + */ + private async _serveFromCache( + method: string, + params: string | undefined, + options: CacheableRequestOptions | undefined + ): Promise { + if (options?.cacheMode === 'bypass' || options?.cacheMode === 'refresh') return undefined; + const entry = await this._cache.read(method, params).catch(error => void this._reportStoreError(error)); + if (entry?.expiresAt !== undefined && entry.expiresAt > this._cache.now()) { + // A pre-aborted caller signal must reject the same way it would on + // the wire path (`Protocol.request()` wraps an already-aborted + // signal as `SdkError(RequestTimeout, reason)`); without this guard + // a cache hit would resolve successfully and silently swallow the + // abort. + if (options?.signal?.aborted) { + const reason = options.signal.reason; + throw reason instanceof SdkError ? reason : new SdkError(SdkErrorCode.RequestTimeout, String(reason)); + } + // Clone on the way out so a caller mutating the returned aggregate + // (e.g. `result.tools.sort(...)`) cannot reach the cache or the + // stamp-memoized indices derived from it — the same invariant + // `_cache.write` upholds on the way in. + return structuredClone(entry.value) as R; + } + return undefined; + } + /** Route a custom-store failure to `onerror` without aborting the surrounding dispatch. */ private _reportStoreError(e: unknown): void { this.onerror?.(e instanceof Error ? e : new Error(String(e))); @@ -1529,9 +1677,48 @@ export class Client extends Protocol { return tool === undefined ? undefined : scanXMcpHeaderDeclarations(tool.inputSchema); } - /** Reads the contents of a resource by URI. */ - async readResource(params: ReadResourceRequest['params'], options?: RequestOptions): Promise { - return this.request({ method: 'resources/read', params }, options); + /** + * Reads the contents of a resource by URI. + * + * Honours the result's `ttlMs`/`cacheScope` (SEP-2549): a still-fresh + * cached body for the same `uri` is returned without a round trip + * (`cacheMode: 'use'`, the default). The cache key is `{method, uri}` + * partitioned by the resolved scope — `'private'` (the default when the + * server omits the field) is stored under this client's + * {@linkcode ClientOptions | ClientOptions.cachePartition}, so a shared + * store cannot serve one principal's resource body to another. Unlike the + * list verbs, a result whose resolved TTL is ≤0 is **not** stored + * (`resources/read` has no derived index and the URI keyspace is + * unbounded). + */ + async readResource(params: ReadResourceRequest['params'], options?: CacheableRequestOptions): Promise { + const hit = await this._serveFromCache('resources/read', params.uri, options); + if (hit !== undefined) return hit; + // Capture the per-URI eviction generation BEFORE the request: a + // `notifications/resources/updated` for this URI arriving while the + // read is in flight bumps it (via `evictKey`), and the terminal + // `write` skips so the now-stale body is not re-cached. Same guard as + // the list verbs' `_listAllPages`, keyed by `{method, uri}`. + const generation = this._cache.captureGeneration('resources/read', params.uri); + const result = await this.request({ method: 'resources/read', params }, options); + if (options?.cacheMode !== 'bypass') { + const freshness = this._freshness(result, params.uri); + if (freshness.expiresAt > this._cache.now()) { + await this._cache.write('resources/read', result, generation, freshness); + } else if (options?.cacheMode === 'refresh') { + // ttl≤0 → drop any held positive-TTL entry so the next + // default-mode read fetches fresh — a `'refresh'` that + // returns ttl≤0 must not leave the previously-warm entry + // serving stale until its old expiry (mcp.d's `cachedFetch` + // write half evicts on the no-store branch). Only on the + // `'refresh'` path: a default-mode miss already proved + // nothing fresh is held (`_serveFromCache` returned + // `undefined`), so `evictKey`'s store deletes would be wasted + // round trips against an async store on a ttl≤0 working set. + await this._cache.evictKey('resources/read', params.uri); + } + } + return result; } /** Subscribes to change notifications for a resource. The server must support resource subscriptions. */ @@ -1587,8 +1774,14 @@ export class Client extends Protocol { } // Honor RequestOptions.signal exactly as request() does: an - // already-aborted signal rejects synchronously before any setup. - options?.signal?.throwIfAborted(); + // already-aborted signal rejects synchronously before any setup, and + // the rejection is the same `SdkError(RequestTimeout, reason)` wrap + // request() / `_serveFromCache` apply (unless `reason` is already an + // SdkError — preserved verbatim). + if (options?.signal?.aborted) { + const reason = options.signal.reason; + throw reason instanceof SdkError ? reason : new SdkError(SdkErrorCode.RequestTimeout, String(reason)); + } const requestAbort = new AbortController(); // The listen request's JSON-RPC id (= the spec's subscription id @@ -1773,16 +1966,27 @@ export class Client extends Protocol { // `Object.prototype` member name (`constructor`, `toString`, …) does // not reach the iteration as a non-iterable function. const evicted = Object.hasOwn(LIST_CHANGED_EVICTIONS, raw.method) ? LIST_CHANGED_EVICTIONS[raw.method] : undefined; - if (evicted !== undefined) { + if (raw.method === 'notifications/resources/updated') { + // Per-URI eviction (mcp.d's `invalidateLogical`): the now-cached + // `resources/read` body for this URI is stale on receipt; drop it + // from BOTH partitions so the next `readResource` for the same URI + // refetches even within TTL (the documented subscribe → updated → + // re-read flow). Fire-and-forget like the `list_changed` path — + // dispatch must not block on an async store. + const uri = (raw.params as { uri?: unknown } | undefined)?.uri; + if (typeof uri === 'string') void this._cache.evictKey('resources/read', uri); + } else if (evicted !== undefined) { for (const method of evicted) { // `evict()` bumps the generation FIRST and unconditionally // (the `_cacheListResult` race guard relies on the bump, not - // on the store's evict completing), then awaits the store. A - // custom store's `evict()` may throw or reject; route to - // `onerror` and proceed so dispatch (and the user's - // `listChanged` handler) runs regardless. Fire-and-forget — - // dispatch must not block on an async store. - void this._cache.evict(method).catch(error => this._reportStoreError(error)); + // on the store's deletes completing), then drops only THIS + // server's two partition singletons — co-tenants on a shared + // store keep their entries. Store failures are reported via + // `onerror` inside `evict()` and the call resolves, so + // dispatch (and the user's `listChanged` handler) runs + // regardless. Fire-and-forget — dispatch must not block on + // an async store. + void this._cache.evict(method); } } if (raw.method === 'notifications/subscriptions/acknowledged') { @@ -1966,12 +2170,15 @@ export class Client extends Protocol { if (!mirroringActive || !isHeaderMismatch || options?.toolDefinition !== undefined) { throw error; } - const refreshOptions = { signal: options?.signal, timeout: options?.timeout }; - // A custom store's `evict()` may throw or reject (the documented - // store contract); route that to `onerror` and proceed — the - // generation bump already happened, so the refetch overwrites the - // stale entry regardless. - await this._cache.evict('tools/list').catch(error_ => this._reportStoreError(error_)); + // `cacheMode: 'refresh'` so the recovery refetch always reaches + // the wire — `evict()` reports a custom store's `delete()` + // failure via `onerror` and resolves, but a no-op/failing + // `delete()` leaves the stale entry in place; without `'refresh'` + // the `listTools()` below would cache-serve the very entry whose + // staleness triggered this recovery. The generation bump still + // happens regardless, so the refetch's write overwrites. + const refreshOptions = { signal: options?.signal, timeout: options?.timeout, cacheMode: 'refresh' as const }; + await this._cache.evict('tools/list'); // The recovery refetch may itself fail (e.g. `listMaxPages`, a // transient error that hits only the `tools/list` walk). Surface // it via `onerror` so the real cause is observable, then proceed @@ -2063,7 +2270,7 @@ export class Client extends Protocol { * ); * ``` */ - async listTools(params?: ListToolsRequest['params'], options?: RequestOptions): Promise { + async listTools(params?: ListToolsRequest['params'], options?: CacheableRequestOptions): Promise { if (!this._serverCapabilities?.tools && !this._enforceStrictCapabilities) { // Respect capability negotiation: server does not support tools console.debug('Client.listTools() called but server does not advertise tools capability - returning empty list'); @@ -2079,6 +2286,8 @@ export class Client extends Protocol { this._excludeInvalidXMcpHeaderTools(page); return page; } + const hit = await this._serveFromCache('tools/list', undefined, options); + if (hit !== undefined) return hit; // Auto-aggregate: SEP-2243 invalid-`x-mcp-header` exclusion runs on // the complete aggregate via the `finalize` hook before the cache // write, so the cached entry never holds an unmirrorable tool. diff --git a/packages/client/src/client/responseCache.ts b/packages/client/src/client/responseCache.ts index 8fbe90ca49..cc99221e7e 100644 --- a/packages/client/src/client/responseCache.ts +++ b/packages/client/src/client/responseCache.ts @@ -1,35 +1,49 @@ import type { ListToolsResult, Tool } from '@modelcontextprotocol/core'; /** - * Minimal response-cache substrate (the kernel of #39's design). + * Client-side response cache for SEP-2549 (`CacheableResult`) freshness hints. * * The store is a dumb keyed-value carrier: every freshness, scope and * invalidation decision lives in the {@linkcode ClientResponseCache} (the - * `Client`'s single cache-coordination collaborator). This file - * deliberately - * ships only what the SEP-2243 mirroring path and the existing - * `tools/list`-derived validators need today — the full `cacheHints` engine - * (TTL short-circuiting, public/private partitioning, `CacheMode`) lands with - * the rest of #39 on top of the same interface. + * `Client`'s single cache-coordination collaborator). The `stamp` field is + * mcp.d's re-derivation key — a derived view (e.g. the `name → Tool` index) + * re-computes only when the backing entry's stamp changes. * * Reference design: mcp.d `client/cache.d` / `client/client.d` (`CacheStore`, - * `cachedTool`). The `stamp` field is mcp.d's re-derivation key — a derived - * view (e.g. the `name → Tool` index) re-computes only when the backing - * entry's stamp changes. + * `cachedTool`, `cachedFetch`, `invalidateLogical`). */ /** A value or a promise of one. The store interface is async-ready; the in-memory default returns plain values. */ export type MaybePromise = T | Promise; -/** The freshness scope of a cached entry (#39's `cacheHints.scope`). */ +/** The freshness scope of a cached entry (SEP-2549 `cacheHints.scope`). */ export type CacheScope = 'public' | 'private'; +/** + * Per-call cache disposition for the cacheable verbs (`listTools()` / + * `listPrompts()` / `listResources()` / `listResourceTemplates()` / + * `readResource()`): + * + * - `'use'` (the default) — serve a still-fresh cached entry without a round + * trip; on miss/stale, fetch and write. + * - `'refresh'` — always fetch (ignore any held entry) and write the fresh + * result. + * - `'bypass'` — fetch without consulting OR writing the cache (the result is + * not stored). The `tools/list`-derived index (mirroring / output + * validation) is therefore unaffected by a `'bypass'` call. + */ +export type CacheMode = 'use' | 'refresh' | 'bypass'; + /** * A logical cache address. `params` is the canonical result-affecting params * key (`''` for the four list ops, the `uri` for `resources/read`); omitted is - * equivalent to `''`. `partition` is the per-principal identity slot reserved - * for #39's shared-store partitioning — always `''` today (the - * `Client` never populates it); omitted is equivalent to `''`. + * equivalent to `''`. `partition` namespaces the entry by connected-server + * identity AND per-principal scope: the `Client` writes a JSON-encoded + * `[serverIdentity, principal]` pair (so a server-controlled `serverInfo` + * string cannot bleed into the principal slot regardless of what characters + * it contains). A `'public'`-scoped entry lives at `[serverIdentity, '']`; a + * `'private'`-scoped entry at `[serverIdentity, cachePartition]`. Omitted is + * equivalent to `''`. */ export interface CacheKey { readonly method: string; @@ -41,11 +55,10 @@ export interface CacheKey { * One cached response body. `value` is the verbatim decoded result; `stamp` is * the store-generated monotonically increasing write counter — opaque to * callers. Derived views (e.g. a `name → Tool` index) memoize against it and - * re-derive only when it changes. `expiresAt` and `scope` are the - * client-computed freshness metadata (#39 — `expiresAt = now + ttlMs`, - * `scope` from `cacheHints`); the substrate does not populate them yet, but - * the slot exists so a custom store written today persists them once #39 - * lands without a signature change. + * re-derive only when it changes. `expiresAt` (absolute ms epoch, `now + + * ttlMs`) and `scope` are the client-computed freshness metadata; the store + * MUST persist them and hand them back on `get` so the read path can decide + * freshness and gate the shared-partition fallback on `scope === 'public'`. */ export interface CacheEntry { readonly value: unknown; @@ -63,17 +76,16 @@ export interface CacheEntry { * in-memory default stays synchronous (plain values are valid under * `MaybePromise`). The `Client` `await`s every call site. * - * **A store instance MUST NOT be shared across `Client` instances at - * all in v2.0.x.** Entries are keyed by method + params only (the - * `Client` never populates `partition` today), so two clients - * connected to different servers — even under the same credential — collide on - * `tools/list` (server-identity confusion); a `list_changed` from one server - * evicts every co-tenant's entry; and one client reconnecting drops the - * derived indices that read the shared store. The `Client` - * constructor always allocates a fresh {@linkcode InMemoryResponseCacheStore} - * when `responseCacheStore` is not supplied; pass your own only as a - * single-client backing store. Per-principal partitioning that enables safe - * sharing arrives with the full #39 `cacheHints` engine. + * Entries are keyed by `{method, params, partition}` where `partition` is the + * `Client`-derived `[serverIdentity, principal]` JSON pair, so one store + * instance is safe to share across `Client` instances connected to different + * servers and/or principals: writes from distinct connections never collide, + * the shared-partition read fallback is gated on the stored + * `scope === 'public'`, and `list_changed` / `HEADER_MISMATCH` evictions are + * scoped to the connected server's two partitions — co-tenants on a shared + * store are unaffected. The `Client` constructor still allocates a fresh + * {@linkcode InMemoryResponseCacheStore} per instance by default; supply your + * own to share or persist. */ export interface ResponseCacheStore { get(key: CacheKey): MaybePromise; @@ -81,52 +93,163 @@ export interface ResponseCacheStore { * Writes `entry` under `key` and returns the store-generated stamp the * resulting {@linkcode CacheEntry} carries. The store owns the stamp * counter; callers do not supply one. The caller owns `expiresAt` and - * `scope` (the client-computed freshness metadata; not yet populated by - * the substrate — #39 wires them); the store MUST persist them and hand - * them back on `get`. + * `scope` (the client-computed freshness metadata); the store MUST persist + * them and hand them back on `get`. */ set(key: CacheKey, entry: { value: unknown; expiresAt?: number; scope?: CacheScope }): MaybePromise; - /** Drop every entry for `method` (the `list_changed` invalidation). */ + /** + * Drop the single entry under `key` (no-op if absent). Called for both + * `notifications/resources/updated` (per-URI) and the `list_changed` + * notifications (the list singletons live at `{method, params: '', partition}`). + */ + delete(key: CacheKey): MaybePromise; + /** + * Drop every entry for `method` across every partition. The `Client` does + * NOT call this (its `list_changed` path issues two partition-scoped + * `delete()` calls so co-tenants on a shared store keep their entries); + * kept on the interface for callers that want a method-wide bulk-clear. + */ evict(method: string): MaybePromise; /** Drop every entry (connection reset). */ clear(): MaybePromise; } +/** Options for {@linkcode InMemoryResponseCacheStore}. */ +export interface InMemoryResponseCacheStoreOptions { + /** + * Maximum number of held `resources/read` entries (the only + * unbounded-keyspace method). When inserting a new `resources/read` key + * would exceed this, the oldest such entry (by insertion order) is + * evicted first. The list-singleton methods (`tools/list`, + * `prompts/list`, `resources/list`, `resources/templates/list`, + * `server/discover`) are **exempt** — they hold at most one entry per + * partition and back the `tools/list`-derived index, so an unbounded URI + * working set never displaces them. The default of `512` bounds growth on + * a long-lived client against template-expanded URIs. `0` disables the + * bound. + */ + maxEntries?: number; +} + +/** + * Methods whose entries are exempt from the + * {@linkcode InMemoryResponseCacheStoreOptions.maxEntries} cap. Each holds at + * most one entry per partition (a small bounded set) and the + * `tools/list`-derived index depends on its entry surviving regardless of the + * `resources/read` working-set size. Only `resources/read` keys count toward + * the cap and are eligible for FIFO eviction. + */ +const CAP_EXEMPT_METHODS: ReadonlySet = new Set([ + 'tools/list', + 'prompts/list', + 'resources/list', + 'resources/templates/list', + 'server/discover' +]); + /** - * In-memory default. Unbounded — the four list ops write at most one entry - * each, so a bound is not yet useful; the LRU cap arrives with `resources/read` - * caching in #39. + * In-memory default. Bounded by an insertion-ordered size cap (default `512`; + * see {@linkcode InMemoryResponseCacheStoreOptions.maxEntries}) on the + * `resources/read` keyspace so an unbounded stream of distinct URIs cannot + * grow it without limit; the list-singleton methods are exempt and never + * evicted by the cap. `Map` preserves insertion order, so the oldest live + * capped key is the first matching iteration entry. */ export class InMemoryResponseCacheStore implements ResponseCacheStore { private readonly _entries = new Map(); + private readonly _maxEntries: number; private _stamp = 0; + /** Count of held entries that are subject to the cap (i.e. not in {@linkcode CAP_EXEMPT_METHODS}). */ + private _cappedSize = 0; + + constructor(options?: InMemoryResponseCacheStoreOptions) { + this._maxEntries = options?.maxEntries ?? 512; + } + + /** Number of held entries (for diagnostics / bounding tests). */ + get size(): number { + return this._entries.size; + } get(key: CacheKey): CacheEntry | undefined { return this._entries.get(keyOf(key)); } set(key: CacheKey, entry: { value: unknown; expiresAt?: number; scope?: CacheScope }): number { + const k = keyOf(key); + const exempt = CAP_EXEMPT_METHODS.has(key.method); + const isNew = !this._entries.has(k); + // Evict the oldest CAPPED entry first when adding a NEW capped key + // would exceed the cap (re-set of an existing key never evicts; an + // exempt-method write never evicts). `Map` iteration order is + // insertion order, so the first non-exempt key is the oldest one. + if (!exempt && isNew && this._maxEntries > 0 && this._cappedSize >= this._maxEntries) { + for (const oldKey of this._entries.keys()) { + if (!CAP_EXEMPT_METHODS.has(oldKey.slice(0, oldKey.indexOf('\0')))) { + this._entries.delete(oldKey); + this._cappedSize--; + break; + } + } + } const stamp = ++this._stamp; - this._entries.set(keyOf(key), { ...entry, stamp }); + this._entries.set(k, { ...entry, stamp }); + if (isNew && !exempt) this._cappedSize++; return stamp; } + delete(key: CacheKey): void { + if (this._entries.delete(keyOf(key)) && !CAP_EXEMPT_METHODS.has(key.method)) this._cappedSize--; + } + evict(method: string): void { const prefix = `${method}\0`; + const exempt = CAP_EXEMPT_METHODS.has(method); for (const k of this._entries.keys()) { - if (k.startsWith(prefix)) this._entries.delete(k); + if (k.startsWith(prefix)) { + this._entries.delete(k); + if (!exempt) this._cappedSize--; + } } } clear(): void { this._entries.clear(); + this._cappedSize = 0; } } +/** + * Serialize a {@linkcode CacheKey} for the in-memory map. `method` is always + * an SDK-set MCP method string (never contains a NUL), so the `\0` prefix + * delimiter is safe and lets {@linkcode InMemoryResponseCacheStore.evict} do a + * cheap prefix scan. `partition` (already a JSON-encoded + * `[serverIdentity, principal]` pair) and `params` (a resource URI on the + * `resources/read` path — caller-controlled) are JSON-array-encoded together, + * which is collision-free regardless of any NUL or delimiter characters they + * carry. + */ function keyOf(key: CacheKey): string { - return `${key.method}\0${key.partition ?? ''}\0${key.params ?? ''}`; + return `${key.method}\0${JSON.stringify([key.partition ?? '', key.params ?? ''])}`; +} + +/** + * Serialize a `{method, params}` pair for the eviction-generation map. The + * list singletons key on `method` alone (their {@linkcode ClientResponseCache.evict} + * is whole-method); `resources/read` keys on `` `${method}\0${uri}` `` so + * {@linkcode ClientResponseCache.evictKey} bumps a per-URI counter. + */ +function genKey(method: string, params?: string): string { + return params === undefined ? method : `${method}\0${params}`; } +/** + * Upper bound on the server-supplied `ttlMs` honoured by + * {@linkcode ClientResponseCache} (24h). A server cannot pin an entry + * indefinitely. + */ +export const MAX_CACHE_TTL_MS = 86_400_000; + /** * The `Client`'s cache-coordination collaborator. * @@ -142,11 +265,21 @@ function keyOf(key: CacheKey): string { */ export class ClientResponseCache { /** - * Per-method eviction-generation counter. {@linkcode evict} bumps it before - * touching the store; {@linkcode captureGeneration} reads it before a list - * walk's page 1; {@linkcode write} skips when it moved — so a - * `list_changed` arriving mid-walk is not overwritten by the walk's stale - * aggregate. + * Per-logical-key eviction-generation counter. {@linkcode evict} (whole + * method) and {@linkcode evictKey} (single `{method, params}`) bump it + * before touching the store; {@linkcode captureGeneration} reads it before + * the request; {@linkcode write} skips when it moved — so a `list_changed` + * arriving mid-walk, or a `resources/updated` arriving while a + * `readResource()` for the same URI is in flight, is not overwritten by + * the in-flight request's stale write. The map key is `method` for the + * list singletons and `` `${method}\0${params}` `` for per-URI keys. + * + * Growth is bounded by keys the CLIENT has issued a `captureGeneration` + * for: {@linkcode captureGeneration} records the key (so an interleaved + * {@linkcode evictKey} sees there is an in-flight write to suppress); + * {@linkcode evictKey} only bumps a key that is already recorded — a + * server streaming `notifications/resources/updated` for URIs the client + * has never read therefore cannot grow this map. */ private readonly _evictionGeneration = new Map(); /** @@ -163,6 +296,16 @@ export class ClientResponseCache { * concrete type. */ private _toolOutputValidatorIndex?: { stamp: number; byName: Map }; + /** + * The connected server's identity (`serverInfo.name@version`, or a + * transport-supplied surrogate). Set by the `Client` immediately after a + * successful connect; `''` is the pre-connect sentinel. Every storage + * partition is derived from this (see `_partitionFor`), so two + * clients sharing one store but connected to different servers never + * collide on `tools/list` and a server cannot read another server's + * `'public'` entries. + */ + private _serverIdentity = ''; constructor( private readonly _store: ResponseCacheStore, @@ -178,24 +321,163 @@ export class ClientResponseCache { * fetched — the failure is reported here and the write resolves. The * `Client` wires this to `onerror`. */ - private readonly _reportError: (error: unknown) => void = () => {} + private readonly _reportError: (error: unknown) => void = () => {}, + /** + * The opaque per-principal identifier for this client (the + * `private`-scope storage slot within the connected server's + * namespace). `''` (the default) makes the `private` slot identical to + * the server's shared `public` slot — the safe single-tenant posture. + * See `_partitionFor`. + */ + private readonly _cachePartition: string = '', + /** + * Clock seam (testing). The freshness check (`entry.expiresAt > now()`) + * and the `expiresAt = now() + ttlMs` stamp both read it via + * {@linkcode now}. Default `Date.now`. + */ + private readonly _now: () => number = Date.now ) {} + /** The clock used for every freshness computation and check. */ + now(): number { + return this._now(); + } + + /** + * Record the connected server's identity. Called by `Client` immediately + * after a successful connect (`serverInfo.name@version`, or the + * transport's `sessionId` when `serverInfo` is unavailable). Every + * partition derived after this call is scoped to this identity; entries + * written under the pre-connect `''` sentinel are no longer reachable. + */ + setServerIdentity(identity: string): void { + this._serverIdentity = identity; + } + + /** + * Derive the storage partition for `scope`. The encoding is + * `JSON.stringify([serverIdentity, principal])` — JSON escaping makes it + * collision-free by construction: a malicious server cannot craft a + * `serverInfo.name`/`version` whose concatenated form bleeds into another + * server's namespace or another principal's slot, regardless of `@` / `|` + * / `"` / NUL in the server-controlled strings. `'public'` → + * `[serverIdentity, '']` (shared within this server); `'private'` → + * `[serverIdentity, cachePartition]`. When `cachePartition` is `''` the + * two coincide. + */ + private _partitionFor(scope: CacheScope): string { + return JSON.stringify([this._serverIdentity, scope === 'public' ? '' : this._cachePartition]); + } + + /** + * Two-probe lookup: this client's own (private) partition first, then the + * connected server's shared (public) partition. The shared probe is gated + * on `entry.scope === 'public'` — a co-tenant client that omits + * `cachePartition` writes its `'private'`-scoped entries at the public + * partition, and serving those to a correctly-partitioned client would + * leak private bodies (mcp.d's `cachedEntry` two-probe order; the scope + * gate is defence-in-depth on top of the partition split). When + * `cachePartition` is `''` the two partitions are identical and only one + * probe is issued. + */ + private async _probe(method: string, params?: string): Promise { + const key = { method, params: params ?? '' }; + const ownPartition = this._partitionFor('private'); + const own = await this._store.get({ ...key, partition: ownPartition }); + if (own !== undefined) return own; + const sharedPartition = this._partitionFor('public'); + if (sharedPartition === ownPartition) return undefined; + const shared = await this._store.get({ ...key, partition: sharedPartition }); + return shared?.scope === 'public' ? shared : undefined; + } + /** * Bump the per-method generation (so an in-flight {@linkcode write} for the - * same method becomes a no-op) and evict the store entry. The generation - * bump is unconditional and FIRST — the {@linkcode write} race guard relies - * on the bump, not on the store's evict completing. A custom store's - * `evict()` may throw or reject; the caller routes that to `onerror`. + * same method becomes a no-op) and drop the connected server's two list + * singletons (own + shared partition; `params: ''`). The generation bump + * is unconditional and FIRST — the {@linkcode write} race guard relies on + * the bump, not on the store's deletes completing. + * + * Eviction is scoped to this client's `[serverIdentity, principal]` + * partitions (mirroring {@linkcode evictKey}) — the method-wide + * `store.evict()` is NOT called, so on a shared store one server's + * `list_changed` cannot wipe a co-tenant's entry. A custom store's + * `delete()` may throw or reject; each partition is guarded + * independently so a failure on one does not skip the other, the failure + * is reported via the constructor's sink, and the call resolves so + * dispatch proceeds. */ async evict(method: string): Promise { this._evictionGeneration.set(method, (this._evictionGeneration.get(method) ?? 0) + 1); - await this._store.evict(method); + const ownPartition = this._partitionFor('private'); + const sharedPartition = this._partitionFor('public'); + try { + await this._store.delete({ method, params: '', partition: ownPartition }); + } catch (error) { + this._reportError(error); + } + if (sharedPartition !== ownPartition) { + try { + await this._store.delete({ method, params: '', partition: sharedPartition }); + } catch (error) { + this._reportError(error); + } + } + } + + /** + * Drop the single logical entry `{method, params}` from BOTH the private + * and public partitions for this client's connected server (mcp.d's + * `invalidateLogical`). Used for `notifications/resources/updated`'s + * per-URI eviction. The per-key generation is bumped FIRST (so an + * in-flight {@linkcode write} for the same `{method, params}` becomes a + * no-op and cannot re-cache the now-stale body) but only when the key was + * already recorded by {@linkcode captureGeneration} — bounding the map to + * keys the client has actually read. A custom store's `delete()` may + * throw or reject; each partition's delete is guarded independently so a + * failure on one does not skip the other, and the call resolves so + * dispatch proceeds. + */ + async evictKey(method: string, params: string): Promise { + const gk = genKey(method, params); + // Only bump a key the client has actually captured: if no entry is + // present there is no in-flight write to suppress, and an + // unconditional bump would let a server streaming distinct-URI + // `resources/updated` notifications grow this map without bound. The + // store deletes still run regardless (a previously-written entry may + // be held even when the generation entry has since been cleared by + // `resetForReconnect`). + const current = this._evictionGeneration.get(gk); + if (current !== undefined) this._evictionGeneration.set(gk, current + 1); + const ownPartition = this._partitionFor('private'); + const sharedPartition = this._partitionFor('public'); + try { + await this._store.delete({ method, params, partition: ownPartition }); + } catch (error) { + this._reportError(error); + } + if (sharedPartition !== ownPartition) { + try { + await this._store.delete({ method, params, partition: sharedPartition }); + } catch (error) { + this._reportError(error); + } + } } - /** Snapshot the eviction generation for `method` before a list walk's page 1. */ - captureGeneration(method: string): number { - return this._evictionGeneration.get(method) ?? 0; + /** + * Snapshot the eviction generation for `{method, params}` before issuing + * the request (a list walk's page 1, or a `resources/read` for `params`). + * Records the key so an interleaved {@linkcode evictKey} for the same + * `{method, params}` knows there is an in-flight write to suppress and + * bumps; without the record, `evictKey`'s recorded-only bump would skip + * and the stale body would be cached. + */ + captureGeneration(method: string, params?: string): number { + const gk = genKey(method, params); + const current = this._evictionGeneration.get(gk) ?? 0; + this._evictionGeneration.set(gk, current); + return current; } /** @@ -212,19 +494,67 @@ export class ClientResponseCache { * `reportError` sink and the write resolves — cache bookkeeping never * costs the caller a result it already fetched (consistent with the * eviction path). + * + * `freshness` carries the client-computed `expiresAt` (absolute ms epoch, + * `now + ttlMs`) and the server-reported `cacheScope`. The storage + * `partition` is derived from the scope via `_partitionFor`: + * `'public'` → `[serverIdentity, '']` (shared within this server); + * `'private'` → `[serverIdentity, cachePartition]` (so a shared store + * never serves a private entry to another identity). Absent `freshness` + * preserves the substrate write (no `expiresAt`, private partition) — the + * `tools/list` retain-for-schema posture: never served by + * {@linkcode read}'s freshness gate, always readable by + * {@linkcode toolDefinition}. + * + * After storing under the derived partition, the same `{method, params}` + * is deleted from the OPPOSITE partition (mirroring {@linkcode evictKey}'s + * two-partition posture). A server that flips a result's `cacheScope` for + * the same key would otherwise leave the previous entry in the other slot + * — and since `_probe` checks own-partition first, a stale private entry + * would shadow the fresh public one (or a stale public entry would keep + * serving co-tenants). Both store calls are independently guarded so a + * custom store's failure on one does not skip the other. */ - async write(method: string, value: unknown, capturedGen: number): Promise { - if ((this._evictionGeneration.get(method) ?? 0) !== capturedGen) return; + async write( + method: string, + value: unknown, + capturedGen: number, + freshness?: { expiresAt: number; scope: CacheScope; params?: string } + ): Promise { + if ((this._evictionGeneration.get(genKey(method, freshness?.params)) ?? 0) !== capturedGen) return; + const params = freshness?.params ?? ''; + const ownPartition = this._partitionFor('private'); + const sharedPartition = this._partitionFor('public'); + const partition = (freshness?.scope ?? 'private') === 'public' ? sharedPartition : ownPartition; try { - await this._store.set({ method }, { value: structuredClone(value) }); + await this._store.set( + { method, params, partition }, + { value: structuredClone(value), expiresAt: freshness?.expiresAt, scope: freshness?.scope } + ); } catch (error) { this._reportError(error); } + if (sharedPartition !== ownPartition) { + try { + await this._store.delete({ + method, + params, + partition: partition === ownPartition ? sharedPartition : ownPartition + }); + } catch (error) { + this._reportError(error); + } + } } - /** Read the cached entry for `{method}` (the four list verbs). */ - async read(method: string): Promise { - return this._store.get({ method }); + /** + * Read the cached entry for `{method, params}` via the two-probe lookup + * (own-partition then this server's shared partition, gated on + * `scope === 'public'`). The caller owns the freshness check + * (`entry.expiresAt > now()`); a missing `expiresAt` is never fresh. + */ + async read(method: string, params?: string): Promise { + return this._probe(method, params); } /** @@ -233,15 +563,16 @@ export class ClientResponseCache { * the only reason to supply one. The generation map and every derived * index are dropped regardless: they are connection-scoped even when the * backing store survives, so the next read re-derives from whatever the - * store still holds. The default impl is synchronous, so the - * `MaybePromise` return is a plain void here and the caller need not - * await. + * store still holds. The server identity returns to the pre-connect + * sentinel. The default impl is synchronous, so the `MaybePromise` + * return is a plain void here and the caller need not await. */ resetForReconnect(): void { if (!this._isUserSupplied) void this._store.clear(); this._evictionGeneration.clear(); this._toolIndex = undefined; this._toolOutputValidatorIndex = undefined; + this._serverIdentity = ''; } /** @@ -255,7 +586,7 @@ export class ClientResponseCache { * and, via {@linkcode outputValidator}, its output-schema validation. */ async toolDefinition(name: string): Promise { - const entry = await this._store.get({ method: 'tools/list' }); + const entry = await this._probe('tools/list'); if (entry === undefined) { this._toolIndex = undefined; return undefined; @@ -286,7 +617,7 @@ export class ClientResponseCache { * index simply omits it. */ async outputValidator(name: string, compile: (tool: Tool) => V | undefined): Promise { - const entry = await this._store.get({ method: 'tools/list' }); + const entry = await this._probe('tools/list'); if (entry === undefined) { this._toolOutputValidatorIndex = undefined; return undefined; diff --git a/packages/client/src/index.ts b/packages/client/src/index.ts index 694084598d..05dfa807a6 100644 --- a/packages/client/src/index.ts +++ b/packages/client/src/index.ts @@ -52,15 +52,23 @@ export { PrivateKeyJwtProvider, StaticPrivateKeyJwtProvider } from './client/authExtensions.js'; -export type { CallToolRequestOptions, ClientOptions, McpSubscription } from './client/client.js'; +export type { CacheableRequestOptions, CallToolRequestOptions, ClientOptions, McpSubscription } from './client/client.js'; export { Client } from './client/client.js'; export { getSupportedElicitationModes } from './client/client.js'; export type { DiscoverAndRequestJwtAuthGrantOptions, JwtAuthGrantResult, RequestJwtAuthGrantOptions } from './client/crossAppAccess.js'; export { discoverAndRequestJwtAuthGrant, exchangeJwtAuthGrant, requestJwtAuthorizationGrant } from './client/crossAppAccess.js'; export type { LoggingOptions, Middleware, RequestLogger } from './client/middleware.js'; export { applyMiddlewares, createMiddleware, withLogging, withOAuth } from './client/middleware.js'; -export type { CacheEntry, CacheKey, CacheScope, MaybePromise, ResponseCacheStore } from './client/responseCache.js'; -export { InMemoryResponseCacheStore } from './client/responseCache.js'; +export type { + CacheEntry, + CacheKey, + CacheMode, + CacheScope, + InMemoryResponseCacheStoreOptions, + MaybePromise, + ResponseCacheStore +} from './client/responseCache.js'; +export { InMemoryResponseCacheStore, MAX_CACHE_TTL_MS } from './client/responseCache.js'; export type { SSEClientTransportOptions } from './client/sse.js'; export { SSEClientTransport, SseError } from './client/sse.js'; export type { VersionNegotiationMode, VersionNegotiationOptions, VersionNegotiationProbeOptions } from './client/versionNegotiation.js'; diff --git a/packages/client/test/client/listen.test.ts b/packages/client/test/client/listen.test.ts index 2476325c23..855dbc14b8 100644 --- a/packages/client/test/client/listen.test.ts +++ b/packages/client/test/client/listen.test.ts @@ -379,6 +379,32 @@ describe('Client.listen()', () => { await client.close(); }); + it('options.signal already aborted: listen() rejects with SdkError(RequestTimeout) before any setup (parity with request())', async () => { + const { clientTx, written } = await scriptedModern(); + const client = new Client({ name: 'c', version: '1' }, { versionNegotiation: { mode: 'auto' } }); + await client.connect(clientTx); + written.length = 0; + const ac = new AbortController(); + ac.abort('user cancelled'); + const error = await client.listen({ toolsListChanged: true }, { signal: ac.signal }).catch(e => e as SdkError); + // Same wrap as `Protocol.request()` / `_serveFromCache`: a non-SdkError + // reason is wrapped as RequestTimeout; the reason text is preserved. + expect(error).toBeInstanceOf(SdkError); + expect((error as SdkError).code).toBe(SdkErrorCode.RequestTimeout); + expect((error as SdkError).message).toContain('user cancelled'); + // No subscriptions/listen reached the wire; no listen state registered. + await flush(); + expect(written.find(m => (m as { method?: string }).method === 'subscriptions/listen')).toBeUndefined(); + expect((client as unknown as { _listenState: Map })._listenState.size).toBe(0); + // An SdkError reason is preserved verbatim (not double-wrapped). + const ac2 = new AbortController(); + const own = new SdkError(SdkErrorCode.NotConnected, 'upstream'); + ac2.abort(own); + const error2 = await client.listen({ toolsListChanged: true }, { signal: ac2.signal }).catch(e => e as SdkError); + expect(error2).toBe(own); + await client.close(); + }); + it('options.signal aborted while opening: listen() rejects fast with the signal reason', async () => { const [clientTx, serverTx] = InMemoryTransport.createLinkedPair(); const written: JSONRPCMessage[] = []; diff --git a/packages/client/test/client/mcpParamMirroring.test.ts b/packages/client/test/client/mcpParamMirroring.test.ts index 424e0d5289..1d8752d469 100644 --- a/packages/client/test/client/mcpParamMirroring.test.ts +++ b/packages/client/test/client/mcpParamMirroring.test.ts @@ -16,6 +16,8 @@ import { InMemoryResponseCacheStore, type ResponseCacheStore } from '../../src/c import { StreamableHTTPClientTransport } from '../../src/client/streamableHttp.js'; const MODERN = '2026-07-28'; +/** Partition the `Client` derives for the scripted server (`serverInfo.name@version`, default `cachePartition`). */ +const PART = JSON.stringify(['scripted@1.0.0', '']); const REGION_TOOL: Tool = { name: 'route', @@ -125,7 +127,9 @@ describe('SEP-2243 Mcp-Param-* mirroring (modern era)', () => { const { tools } = await client.listTools(); expect(tools.map(t => t.name)).toEqual(['route']); expect(warn).toHaveBeenCalledWith(expect.stringContaining("excluding tool 'broken'")); - expect((store.get({ method: 'tools/list' })?.value as { tools: Tool[] }).tools.map(t => t.name)).toEqual(['route']); + expect((store.get({ method: 'tools/list', partition: PART })?.value as { tools: Tool[] }).tools.map(t => t.name)).toEqual([ + 'route' + ]); // The explicit-cursor per-page path is filtered too (the spec's MUST // has no carve-out for paginated reads). const page = await client.listTools({ cursor: '0' }); @@ -161,18 +165,78 @@ describe('SEP-2243 Mcp-Param-* mirroring (modern era)', () => { const { clientTx, callHeaders, listCount } = await scriptedModernServer([[REGION_TOOL]], /* rejectFirstCall */ true); const client = modernClient(store); await client.connect(clientTx); - // Seed a STALE entry (no declarations) so callTool reads it and the - // first send carries no param headers — server rejects + // Seed a STALE entry at the connected-server partition (a STALE + // declaration on `region`) so callTool reads IT and the first send + // carries the stale `Mcp-Param-Stale-Region` header — server rejects // HEADER_MISMATCH, client evicts, refetches via listTools() - // (the live REGION_TOOL), and retries with the headers. - store.set({ method: 'tools/list' }, { value: { tools: [{ name: 'route', inputSchema: { type: 'object', properties: {} } }] } }); + // (the live REGION_TOOL), and retries with the correct header. + store.set( + { method: 'tools/list', partition: PART }, + { + value: { + tools: [ + { + name: 'route', + inputSchema: { type: 'object', properties: { region: { type: 'string', 'x-mcp-header': 'Stale-Region' } } } + } + ] + } + } + ); const result = await client.callTool({ name: 'route', arguments: { region: 'ap' } }); expect(result.content?.[0]).toEqual({ type: 'text', text: 'ok' }); expect(listCount()).toBe(1); - expect(callHeaders).toEqual([undefined, { 'Mcp-Param-Region': 'ap' }]); + // First send mirrored the SEEDED stale declaration (proves the + // stale-cache read path, not cold-cache); retry mirrored the live one. + expect(callHeaders).toEqual([{ 'Mcp-Param-Stale-Region': 'ap' }, { 'Mcp-Param-Region': 'ap' }]); // The recovery refetch wrote a fresh cache entry (REGION_TOOL, with the declaration). - expect((store.get({ method: 'tools/list' })?.value as { tools: Tool[] }).tools[0]?.inputSchema.properties).toHaveProperty('region'); + expect( + (store.get({ method: 'tools/list', partition: PART })?.value as { tools: Tool[] }).tools[0]?.inputSchema.properties + ).toHaveProperty('region'); + }); + + it("HEADER_MISMATCH recovery refetch reaches the wire even when the store's delete() no-ops (cacheMode:'refresh' bypasses the stale entry)", async () => { + // A custom store whose `delete()` is a no-op (or rejects) leaves the + // stale `tools/list` entry in place after `evict()`. The recovery + // refetch must NOT be cache-served that stale entry — it carries + // `cacheMode: 'refresh'` so it always reaches the wire and overwrites. + const store = new InMemoryResponseCacheStore(); + (store as ResponseCacheStore).delete = () => undefined; + const { clientTx, callHeaders, listCount } = await scriptedModernServer([[REGION_TOOL]], /* rejectFirstCall */ true); + const client = modernClient(store); + await client.connect(clientTx); + // Seed a STALE-and-fresh entry (the declaration mirrors as + // `Stale-Region`; expiresAt in the future so a default-mode + // `listTools()` WOULD serve it if not for `'refresh'`). + store.set( + { method: 'tools/list', partition: PART }, + { + value: { + tools: [ + { + name: 'route', + inputSchema: { type: 'object', properties: { region: { type: 'string', 'x-mcp-header': 'Stale-Region' } } } + } + ] + }, + expiresAt: Date.now() + 60_000, + scope: 'public' + } + ); + + const result = await client.callTool({ name: 'route', arguments: { region: 'ap' } }); + expect(result.content?.[0]).toEqual({ type: 'text', text: 'ok' }); + // The refetch hit the wire (delete() no-op did NOT short-circuit it + // into a cache serve of the stale seed). + expect(listCount()).toBe(1); + // Retry mirrored the LIVE declaration, not the stale seed. + expect(callHeaders).toEqual([{ 'Mcp-Param-Stale-Region': 'ap' }, { 'Mcp-Param-Region': 'ap' }]); + // The refetch's write overwrote the stale entry (the no-op delete + // never dropped it; the `'refresh'` write replaced it). + expect( + (store.get({ method: 'tools/list', partition: PART })?.value as { tools: Tool[] }).tools[0]?.inputSchema.properties + ).toHaveProperty(['region', 'x-mcp-header'], 'Region'); }); it('callTool() with a cold cache issues NO tools/list and sends without Mcp-Param-* headers (cache reads only)', async () => { @@ -233,15 +297,32 @@ describe('SEP-2243 Mcp-Param-* mirroring (modern era)', () => { const { clientTx, callHeaders, listCount } = await scriptedModernServer([[PAGE1], [REGION_TOOL]], /* rejectFirstCall */ true); const client = modernClient(store); await client.connect(clientTx); - // Seed a STALE entry so the first send goes without headers; the - // recovery refetch (via listTools()) then walks BOTH pages. - store.set({ method: 'tools/list' }, { value: { tools: [PAGE1] } }); + // Seed a STALE entry at the connected-server partition (one stale + // page; `route` carries a STALE declaration) so callTool reads it, + // mirrors the stale header on the first send, and the recovery + // refetch (via listTools()) then walks BOTH live pages. + store.set( + { method: 'tools/list', partition: PART }, + { + value: { + tools: [ + PAGE1, + { + name: 'route', + inputSchema: { type: 'object', properties: { region: { type: 'string', 'x-mcp-header': 'Stale-Region' } } } + } + ] + } + } + ); const result = await client.callTool({ name: 'route', arguments: { region: 'us-west1' } }); expect(result.content?.[0]).toEqual({ type: 'text', text: 'ok' }); // The recovery refetch walked both pages. expect(listCount()).toBe(2); - expect(callHeaders).toEqual([undefined, { 'Mcp-Param-Region': 'us-west1' }]); + // First send mirrored the SEEDED stale declaration (proves the + // stale-cache read path); retry mirrored the live page-2 declaration. + expect(callHeaders).toEqual([{ 'Mcp-Param-Stale-Region': 'us-west1' }, { 'Mcp-Param-Region': 'us-west1' }]); // A follow-up call still mirrors from the cached entry (no extra list). await client.callTool({ name: 'route', arguments: { region: 'eu' } }); expect(callHeaders[2]).toEqual({ 'Mcp-Param-Region': 'eu' }); @@ -253,12 +334,16 @@ describe('SEP-2243 Mcp-Param-* mirroring (modern era)', () => { const { clientTx, serverTx, callHeaders, listCount } = await scriptedModernServer([[REGION_TOOL]]); const client = modernClient(store); await client.connect(clientTx); - // Seed a STALE entry (no declarations); list_changed evicts it; the - // next callTool reads cold and sends without headers — callTool - // never refetches on its own. - store.set({ method: 'tools/list' }, { value: { tools: [{ name: 'route', inputSchema: { type: 'object', properties: {} } }] } }); + // Seed a STALE entry at the connected-server partition; list_changed + // evicts it (partition-scoped delete); the next callTool reads cold + // and sends without headers — callTool never refetches on its own. + store.set( + { method: 'tools/list', partition: PART }, + { value: { tools: [{ name: 'route', inputSchema: { type: 'object', properties: {} } }] } } + ); + expect(store.get({ method: 'tools/list', partition: PART })).toBeDefined(); await serverTx.send({ jsonrpc: '2.0', method: 'notifications/tools/list_changed' } as JSONRPCMessage); - expect(store.get({ method: 'tools/list' })).toBeUndefined(); + expect(store.get({ method: 'tools/list', partition: PART })).toBeUndefined(); const result = await client.callTool({ name: 'route', arguments: { region: 'us' } }); expect(result.content?.[0]).toEqual({ type: 'text', text: 'ok' }); diff --git a/packages/client/test/client/responseCache.test.ts b/packages/client/test/client/responseCache.test.ts index 127be04bf8..c10cb521a3 100644 --- a/packages/client/test/client/responseCache.test.ts +++ b/packages/client/test/client/responseCache.test.ts @@ -12,7 +12,7 @@ import { InMemoryTransport, SdkError, SdkErrorCode } from '@modelcontextprotocol import { describe, expect, it } from 'vitest'; import { Client } from '../../src/client/client.js'; -import type { ResponseCacheStore } from '../../src/client/responseCache.js'; +import type { CacheEntry, ResponseCacheStore } from '../../src/client/responseCache.js'; import { ClientResponseCache, InMemoryResponseCacheStore } from '../../src/client/responseCache.js'; const MODERN = '2026-07-28'; @@ -20,6 +20,16 @@ const MODERN = '2026-07-28'; const TOOL_A: Tool = { name: 'a', inputSchema: { type: 'object', properties: {} } }; const TOOL_B: Tool = { name: 'b', inputSchema: { type: 'object', properties: {} } }; +/** + * Partition the `Client` derives for the scripted server (`serverInfo: + * {name:'scripted', version:'1.0.0'}`) and `principal` (default `''` ⇒ the + * server's shared/public slot). The encoding is the same JSON-array form + * `ClientResponseCache._partitionFor` produces. + */ +const part = (principal = '', serverIdentity = 'scripted@1.0.0'): string => JSON.stringify([serverIdentity, principal]); +/** The pre-connect / direct-`ClientResponseCache` sentinel partition (`['', '']`). */ +const PRE = JSON.stringify(['', '']); + describe('InMemoryResponseCacheStore', () => { it('get/set/evict/clear round-trip; evict is method-scoped; set returns the store-generated stamp', () => { const store = new InMemoryResponseCacheStore(); @@ -30,7 +40,7 @@ describe('InMemoryResponseCacheStore', () => { expect(s2).toBeGreaterThan(s1); expect(s3).toBeGreaterThan(s2); expect(store.get({ method: 'tools/list' })).toEqual({ value: 1, stamp: s1 }); - // Store persists caller-supplied freshness metadata (#39 wires population; the slot exists today). + // Store persists caller-supplied freshness metadata. expect(store.get({ method: 'resources/read', params: 'file:///a' })).toEqual({ value: 3, stamp: s3, @@ -46,19 +56,96 @@ describe('InMemoryResponseCacheStore', () => { expect(store.get({ method: 'prompts/list' })).toBeUndefined(); }); - it('partition is part of the key serialization (always empty today; #39 wires population)', () => { + it('partition is part of the key serialization; evict(method) is partition-agnostic', () => { const store = new InMemoryResponseCacheStore(); store.set({ method: 'tools/list', partition: 'p1' }, { value: 'a' }); store.set({ method: 'tools/list', partition: 'p2' }, { value: 'b' }); expect(store.get({ method: 'tools/list', partition: 'p1' })?.value).toBe('a'); expect(store.get({ method: 'tools/list', partition: 'p2' })?.value).toBe('b'); - // The Client never populates partition today, so the default-partition slot is distinct. + // The default-partition slot is distinct. expect(store.get({ method: 'tools/list' })).toBeUndefined(); // evict(method) is partition-agnostic. store.evict('tools/list'); expect(store.get({ method: 'tools/list', partition: 'p1' })).toBeUndefined(); expect(store.get({ method: 'tools/list', partition: 'p2' })).toBeUndefined(); }); + + it('keyOf is collision-free for NUL / quote / delimiter characters in partition and params', () => { + const store = new InMemoryResponseCacheStore(); + // A NUL in `partition` cannot smuggle into `params` (and vice versa) — + // the `[partition, params]` JSON-array encoding escapes every control + // and quote character. + store.set({ method: 'resources/read', partition: 'a\0b', params: 'c' }, { value: 1 }); + store.set({ method: 'resources/read', partition: 'a', params: 'b\0c' }, { value: 2 }); + expect(store.get({ method: 'resources/read', partition: 'a\0b', params: 'c' })?.value).toBe(1); + expect(store.get({ method: 'resources/read', partition: 'a', params: 'b\0c' })?.value).toBe(2); + // Same for the partition's own JSON-shaped content: the outer + // JSON.stringify escapes the inner quotes. + store.set({ method: 'tools/list', partition: '["x",""]' }, { value: 'real' }); + store.set({ method: 'tools/list', partition: '["x","' }, { value: 'spoof' }); + expect(store.get({ method: 'tools/list', partition: '["x",""]' })?.value).toBe('real'); + }); + + it('maxEntries cap: oldest-first eviction; re-set of an existing key never evicts; 0 disables the bound', () => { + const small = new InMemoryResponseCacheStore({ maxEntries: 2 }); + small.set({ method: 'resources/read', params: 'a' }, { value: 'a' }); + small.set({ method: 'resources/read', params: 'b' }, { value: 'b' }); + // Re-set of an existing key updates in place without consuming + // capacity (Map preserves the original insertion position). + small.set({ method: 'resources/read', params: 'a' }, { value: 'a2' }); + expect(small.size).toBe(2); + expect(small.get({ method: 'resources/read', params: 'a' })?.value).toBe('a2'); + // A NEW key at capacity evicts the oldest insertion ('a'). + small.set({ method: 'resources/read', params: 'c' }, { value: 'c' }); + expect(small.get({ method: 'resources/read', params: 'a' })).toBeUndefined(); + expect(small.get({ method: 'resources/read', params: 'b' })?.value).toBe('b'); + expect(small.get({ method: 'resources/read', params: 'c' })?.value).toBe('c'); + expect(small.size).toBe(2); + + // 0 disables the bound. + const unbounded = new InMemoryResponseCacheStore({ maxEntries: 0 }); + for (let i = 0; i < 1000; i++) unbounded.set({ method: 'resources/read', params: String(i) }, { value: i }); + expect(unbounded.size).toBe(1000); + }); + + it('maxEntries cap exempts list-singleton methods: a resources/read flood never evicts tools/list', () => { + const store = new InMemoryResponseCacheStore({ maxEntries: 3 }); + // List singletons are exempt: never counted, never evicted by the cap. + store.set({ method: 'tools/list' }, { value: 'T' }); + store.set({ method: 'prompts/list' }, { value: 'P' }); + store.set({ method: 'resources/list' }, { value: 'R' }); + store.set({ method: 'resources/templates/list' }, { value: 'RT' }); + store.set({ method: 'server/discover' }, { value: 'D' }); + // Five exempt entries already exceed maxEntries=3; a resources/read + // write does NOT evict any of them — the cap counts only non-exempt + // keys. + for (let i = 0; i < 5; i++) store.set({ method: 'resources/read', params: String(i) }, { value: i }); + expect(store.get({ method: 'tools/list' })?.value).toBe('T'); + expect(store.get({ method: 'prompts/list' })?.value).toBe('P'); + expect(store.get({ method: 'resources/list' })?.value).toBe('R'); + expect(store.get({ method: 'resources/templates/list' })?.value).toBe('RT'); + expect(store.get({ method: 'server/discover' })?.value).toBe('D'); + // Only 3 resources/read entries survive (oldest two evicted). + expect(store.get({ method: 'resources/read', params: '0' })).toBeUndefined(); + expect(store.get({ method: 'resources/read', params: '1' })).toBeUndefined(); + expect(store.get({ method: 'resources/read', params: '2' })?.value).toBe(2); + expect(store.get({ method: 'resources/read', params: '4' })?.value).toBe(4); + expect(store.size).toBe(8); + // An exempt-method write at capacity never evicts a resources/read entry. + store.set({ method: 'tools/list', partition: 'p2' }, { value: 'T2' }); + expect(store.get({ method: 'resources/read', params: '2' })?.value).toBe(2); + }); + + it('delete(key) drops the single entry; no-op when absent', () => { + const store = new InMemoryResponseCacheStore(); + store.set({ method: 'resources/read', params: 'a', partition: 'p' }, { value: 1 }); + store.set({ method: 'resources/read', params: 'b', partition: 'p' }, { value: 2 }); + store.delete({ method: 'resources/read', params: 'a', partition: 'p' }); + expect(store.get({ method: 'resources/read', params: 'a', partition: 'p' })).toBeUndefined(); + expect(store.get({ method: 'resources/read', params: 'b', partition: 'p' })?.value).toBe(2); + // Absent key: no-op. + store.delete({ method: 'resources/read', params: 'a', partition: 'p' }); + }); }); describe('ClientResponseCache', () => { @@ -69,11 +156,11 @@ describe('ClientResponseCache', () => { await cache.evict('tools/list'); await cache.write('tools/list', { tools: [TOOL_A] }, gen); // Generation moved between capture and write → the stale aggregate is dropped. - expect(store.get({ method: 'tools/list' })).toBeUndefined(); + expect(store.get({ method: 'tools/list', partition: PRE })).toBeUndefined(); // A fresh capture after the evict writes through. const gen2 = cache.captureGeneration('tools/list'); await cache.write('tools/list', { tools: [TOOL_A] }, gen2); - expect(store.get({ method: 'tools/list' })).toBeDefined(); + expect(store.get({ method: 'tools/list', partition: PRE })).toBeDefined(); }); it('resetForReconnect: clears the default store, leaves a user-supplied store, ALWAYS drops generation + indices', async () => { @@ -85,7 +172,7 @@ describe('ClientResponseCache', () => { await userCache.evict('prompts/list'); expect(userCache.captureGeneration('prompts/list')).toBe(1); userCache.resetForReconnect(); - expect(userStore.get({ method: 'tools/list' })).toBeDefined(); + expect(userStore.get({ method: 'tools/list', partition: PRE })).toBeDefined(); expect(userCache.captureGeneration('prompts/list')).toBe(0); // Index dropped → re-derived from the (still-populated) store on next read. expect((userCache as unknown as { _toolIndex?: unknown })._toolIndex).toBeUndefined(); @@ -96,7 +183,7 @@ describe('ClientResponseCache', () => { const defCache = new ClientResponseCache(defStore, false); await defCache.write('tools/list', { tools: [TOOL_A] }, defCache.captureGeneration('tools/list')); defCache.resetForReconnect(); - expect(defStore.get({ method: 'tools/list' })).toBeUndefined(); + expect(defStore.get({ method: 'tools/list', partition: PRE })).toBeUndefined(); expect(await defCache.toolDefinition('a')).toBeUndefined(); }); @@ -109,11 +196,85 @@ describe('ClientResponseCache', () => { value.tools.length = 0; // The cached entry is a structuredClone, so the store and the // stamp-memoized index are unaffected. - expect((store.get({ method: 'tools/list' })?.value as { tools: Tool[] }).tools.map(t => t.name)).toEqual(['a', 'b']); + expect((store.get({ method: 'tools/list', partition: PRE })?.value as { tools: Tool[] }).tools.map(t => t.name)).toEqual([ + 'a', + 'b' + ]); expect((await cache.toolDefinition('a'))?.name).toBe('a'); expect((await cache.toolDefinition('b'))?.name).toBe('b'); }); + it('evictKey bumps the per-key generation so an in-flight write for the same {method, params} is suppressed', async () => { + const store = new InMemoryResponseCacheStore(); + const cache = new ClientResponseCache(store, false); + // Capture BEFORE the request; evictKey lands mid-flight; the stale + // write is dropped (mirrors the list_changed-during-walk guard, + // keyed per URI). + const gen = cache.captureGeneration('resources/read', 'res://a'); + await cache.evictKey('resources/read', 'res://a'); + await cache.write('resources/read', { contents: [] }, gen, { expiresAt: 1e9, scope: 'private', params: 'res://a' }); + expect(store.get({ method: 'resources/read', params: 'res://a', partition: PRE })).toBeUndefined(); + // A sibling URI's generation is independent: evictKey('a') does not + // suppress a write for 'b'. + const genB = cache.captureGeneration('resources/read', 'res://b'); + await cache.evictKey('resources/read', 'res://a'); + await cache.write('resources/read', { contents: [] }, genB, { expiresAt: 1e9, scope: 'private', params: 'res://b' }); + expect(store.get({ method: 'resources/read', params: 'res://b', partition: PRE })).toBeDefined(); + // A fresh capture after the evictKey writes through. + const gen2 = cache.captureGeneration('resources/read', 'res://a'); + await cache.write('resources/read', { contents: [] }, gen2, { expiresAt: 1e9, scope: 'private', params: 'res://a' }); + expect(store.get({ method: 'resources/read', params: 'res://a', partition: PRE })).toBeDefined(); + }); + + it('evictKey: own-partition store.delete rejecting does not skip the shared-partition delete', async () => { + const deleted: string[] = []; + const store: ResponseCacheStore = { + get: () => undefined, + set: () => 0, + evict: () => {}, + clear: () => {}, + delete: key => { + if (key.partition === JSON.stringify(['srv', 'alice'])) return Promise.reject(new Error('own boom')); + deleted.push(key.partition ?? ''); + return undefined; + } + }; + const reported: unknown[] = []; + const cache = new ClientResponseCache(store, true, e => reported.push(e), 'alice'); + cache.setServerIdentity('srv'); + await cache.evictKey('resources/read', 'res://x'); + // Own-partition rejected → reported; shared-partition delete still ran. + expect((reported[0] as Error).message).toBe('own boom'); + expect(deleted).toEqual([JSON.stringify(['srv', ''])]); + }); + + it("write/read/evict address the list singletons consistently as params: '' on a non-normalizing custom store", async () => { + // A custom store that keys on the raw CacheKey without normalizing + // omitted/undefined `params` to '' (e.g. JSON.stringify, which drops + // undefined members). Every SDK→store call must therefore send the + // SAME params shape so write/read/evict address one backend key. + const entries = new Map(); + let stamp = 0; + const store: ResponseCacheStore = { + get: k => entries.get(JSON.stringify(k)), + set: (k, e) => (entries.set(JSON.stringify(k), { ...e, stamp: ++stamp }), stamp), + delete: k => void entries.delete(JSON.stringify(k)), + evict: () => {}, + clear: () => entries.clear() + }; + const cache = new ClientResponseCache(store, true); + await cache.write('tools/list', { tools: [TOOL_A] }, cache.captureGeneration('tools/list'), { + expiresAt: 1e9, + scope: 'private' + }); + // The read path finds the entry the write path stored. + expect((await cache.read('tools/list'))?.value).toEqual({ tools: [TOOL_A] }); + // The list_changed eviction path deletes the SAME backend key — gone. + await cache.evict('tools/list'); + expect(await cache.read('tools/list')).toBeUndefined(); + expect(entries.size).toBe(0); + }); + it('a custom store whose set() rejects is routed to reportError and write still resolves', async () => { const store: ResponseCacheStore = new InMemoryResponseCacheStore(); store.set = () => Promise.reject(new Error('redis down')); @@ -131,7 +292,7 @@ describe('ClientResponseCache', () => { const cache = new ClientResponseCache(store, true); expect(await cache.toolDefinition('a')).toBeUndefined(); - store.set({ method: 'tools/list' }, { value: { tools: [TOOL_A, TOOL_B] } }); + store.set({ method: 'tools/list', partition: PRE }, { value: { tools: [TOOL_A, TOOL_B] } }); const hit = await cache.toolDefinition('a'); expect(hit?.name).toBe('a'); // Same backing entry → identical reference (memoized index, not re-derived). @@ -139,7 +300,7 @@ describe('ClientResponseCache', () => { // A fresh write bumps the store stamp → the index re-derives (the new // entry's tool instance is what comes back, not the memoized one). - store.set({ method: 'tools/list' }, { value: { tools: [{ ...TOOL_A }, { ...TOOL_B }] } }); + store.set({ method: 'tools/list', partition: PRE }, { value: { tools: [{ ...TOOL_A }, { ...TOOL_B }] } }); const hit2 = await cache.toolDefinition('a'); expect(hit2?.name).toBe('a'); expect(hit2).not.toBe(hit); @@ -151,15 +312,24 @@ interface Scripted { serverTx: InMemoryTransport; listCount: () => number; listParams: () => ({ cursor?: string; _meta?: unknown } | undefined)[]; + wireCount: (method: string) => number; } -async function scriptedModernServer(pages: Tool[][]): Promise { +interface ScriptOptions { + listHint?: { ttlMs?: number; cacheScope?: 'public' | 'private' }; + readHint?: { ttlMs?: number; cacheScope?: 'public' | 'private' }; + serverInfo?: { name: string; version: string }; +} + +async function scriptedModernServer(pages: Tool[][], opts: ScriptOptions = {}): Promise { const [clientTx, serverTx] = InMemoryTransport.createLinkedPair(); let lists = 0; + const wireCounts = new Map(); const params: ({ cursor?: string; _meta?: unknown } | undefined)[] = []; serverTx.onmessage = m => { const r = m as JSONRPCRequest; if (r.id === undefined) return; + wireCounts.set(r.method, (wireCounts.get(r.method) ?? 0) + 1); if (r.method === 'server/discover') { void serverTx.send({ jsonrpc: '2.0', @@ -168,7 +338,7 @@ async function scriptedModernServer(pages: Tool[][]): Promise { resultType: 'complete', supportedVersions: [MODERN], capabilities: { tools: { listChanged: true }, prompts: {}, resources: {} }, - serverInfo: { name: 'scripted', version: '1.0.0' } + serverInfo: opts.serverInfo ?? { name: 'scripted', version: '1.0.0' } } }); } else if (r.method === 'tools/list') { @@ -182,8 +352,8 @@ async function scriptedModernServer(pages: Tool[][]): Promise { id: r.id, result: { resultType: 'complete', - ttlMs: 0, - cacheScope: 'private', + ttlMs: opts.listHint?.ttlMs ?? 0, + cacheScope: opts.listHint?.cacheScope ?? 'private', tools: pages[idx] ?? [], ...(next !== undefined && { nextCursor: next }) } @@ -193,18 +363,41 @@ async function scriptedModernServer(pages: Tool[][]): Promise { void serverTx.send({ jsonrpc: '2.0', id: r.id, - result: { resultType: 'complete', ttlMs: 0, cacheScope: 'private', [key]: [] } + result: { + resultType: 'complete', + ttlMs: opts.listHint?.ttlMs ?? 0, + cacheScope: opts.listHint?.cacheScope ?? 'private', + [key]: [] + } + }); + } else if (r.method === 'resources/read') { + const uri = (r.params as { uri: string }).uri; + void serverTx.send({ + jsonrpc: '2.0', + id: r.id, + result: { + resultType: 'complete', + ...(opts.readHint?.ttlMs !== undefined && { ttlMs: opts.readHint.ttlMs }), + ...(opts.readHint?.cacheScope !== undefined && { cacheScope: opts.readHint.cacheScope }), + contents: [{ uri, mimeType: 'text/plain', text: `body:${uri}` }] + } }); } }; await serverTx.start(); - return { clientTx, serverTx, listCount: () => lists, listParams: () => params }; + return { + clientTx, + serverTx, + listCount: () => lists, + listParams: () => params, + wireCount: m => wireCounts.get(m) ?? 0 + }; } -function modernClient(store?: InMemoryResponseCacheStore): Client { +function modernClient(store?: InMemoryResponseCacheStore, extra?: { cachePartition?: string; defaultCacheTtlMs?: number }): Client { return new Client( { name: 'cache-client', version: '1.0.0' }, - { versionNegotiation: { mode: { pin: MODERN } }, ...(store && { responseCacheStore: store }) } + { versionNegotiation: { mode: { pin: MODERN } }, ...(store && { responseCacheStore: store }), ...extra } ); } @@ -223,7 +416,7 @@ describe('Client response-cache substrate', () => { const page = await client.listTools({ cursor: '1' }); expect(page.tools.map(t => t.name)).toEqual(['b']); expect(page.nextCursor).toBeUndefined(); - expect(store.get({ method: 'tools/list' })).toBeUndefined(); + expect(store.get({ method: 'tools/list', partition: part() })).toBeUndefined(); expect(listCount()).toBe(1); // No cursor → aggregates every page and writes one entry. @@ -232,7 +425,7 @@ describe('Client response-cache substrate', () => { expect(nextCursor).toBeUndefined(); expect(listCount()).toBe(3); - const entry = store.get({ method: 'tools/list' }); + const entry = store.get({ method: 'tools/list', partition: part() }); expect((entry?.value as { tools: Tool[] }).tools.map(t => t.name)).toEqual(['a', 'b']); }); @@ -287,7 +480,7 @@ describe('Client response-cache substrate', () => { expect((error as SdkError).message).toMatch(/exceeded listMaxPages \(2\); server pagination did not terminate/); expect((error as SdkError).data).toEqual({ method: 'tools/list', listMaxPages: 2 }); // Aggregate-then-write: the throw happens before the cache write, so nothing is cached. - expect(store.get({ method: 'tools/list' })).toBeUndefined(); + expect(store.get({ method: 'tools/list', partition: part() })).toBeUndefined(); // The per-page path is never capped. const page = await client.listTools({ cursor: '2' }); expect(page.tools.map(t => t.name)).toEqual(['a']); @@ -302,9 +495,9 @@ describe('Client response-cache substrate', () => { await client.listPrompts(); await client.listResources(); await client.listResourceTemplates(); - expect(store.get({ method: 'prompts/list' })).toBeDefined(); - expect(store.get({ method: 'resources/list' })).toBeDefined(); - expect(store.get({ method: 'resources/templates/list' })).toBeDefined(); + expect(store.get({ method: 'prompts/list', partition: part() })).toBeDefined(); + expect(store.get({ method: 'resources/list', partition: part() })).toBeDefined(); + expect(store.get({ method: 'resources/templates/list', partition: part() })).toBeDefined(); }); it('toolDefinition through the Client wiring: miss before any list, hit after', async () => { @@ -325,13 +518,13 @@ describe('Client response-cache substrate', () => { const client = modernClient(store); await client.connect(clientTx); await client.listTools(); - expect(store.get({ method: 'tools/list' })).toBeDefined(); + expect(store.get({ method: 'tools/list', partition: part() })).toBeDefined(); expect(await toolDef(client, 'a')).toBeDefined(); const before = listCount(); await serverTx.send({ jsonrpc: '2.0', method: 'notifications/tools/list_changed' } as JSONRPCMessage); // Evicted, not refetched. - expect(store.get({ method: 'tools/list' })).toBeUndefined(); + expect(store.get({ method: 'tools/list', partition: part() })).toBeUndefined(); expect(await toolDef(client, 'a')).toBeUndefined(); expect(listCount()).toBe(before); }); @@ -343,12 +536,12 @@ describe('Client response-cache substrate', () => { await client.connect(clientTx); await client.listResources(); await client.listResourceTemplates(); - expect(store.get({ method: 'resources/list' })).toBeDefined(); - expect(store.get({ method: 'resources/templates/list' })).toBeDefined(); + expect(store.get({ method: 'resources/list', partition: part() })).toBeDefined(); + expect(store.get({ method: 'resources/templates/list', partition: part() })).toBeDefined(); await serverTx.send({ jsonrpc: '2.0', method: 'notifications/resources/list_changed' } as JSONRPCMessage); - expect(store.get({ method: 'resources/list' })).toBeUndefined(); - expect(store.get({ method: 'resources/templates/list' })).toBeUndefined(); + expect(store.get({ method: 'resources/list', partition: part() })).toBeUndefined(); + expect(store.get({ method: 'resources/templates/list', partition: part() })).toBeUndefined(); }); it('_resetConnectionState leaves a user-supplied store untouched and drops the derived index', async () => { @@ -357,12 +550,12 @@ describe('Client response-cache substrate', () => { const client = modernClient(store); await client.connect(clientTx); await client.listTools(); - expect(store.get({ method: 'tools/list' })).toBeDefined(); + expect(store.get({ method: 'tools/list', partition: part() })).toBeDefined(); await client.close(); // A user-supplied store is NOT cleared on close/reconnect (defeats the // only reason to supply one); the per-instance default IS cleared. - expect(store.get({ method: 'tools/list' })).toBeDefined(); + expect(store.get({ method: 'tools/list', partition: part() })).toBeDefined(); // The derived index is connection-scoped regardless: it is dropped, and // the next read re-derives from the (still-populated) store. expect((cacheOf(client) as unknown as { _toolIndex?: unknown })._toolIndex).toBeUndefined(); @@ -407,9 +600,9 @@ describe('Client response-cache substrate', () => { expect(errors.map(e => e.message)).toContain('redis down'); }); - it('a custom store whose evict() throws is routed to onerror and dispatch still runs', async () => { + it('a custom store whose delete() throws on the list_changed eviction path is routed to onerror and dispatch still runs', async () => { const store = new InMemoryResponseCacheStore(); - store.evict = () => { + store.delete = () => { throw new Error('boom'); }; const { clientTx, serverTx } = await scriptedModernServer([[TOOL_A]]); @@ -427,3 +620,649 @@ describe('Client response-cache substrate', () => { expect(dispatched).toBe(true); }); }); + +/** Freeze the cache's clock at `t` for deterministic freshness assertions. */ +const setNow = (client: Client, t: number): void => { + (cacheOf(client) as unknown as { _now: () => number })._now = () => t; +}; + +describe('Client honours cacheHints (SEP-2549)', () => { + it('listTools(): within TTL → no wire request; after TTL → refetch', async () => { + const { clientTx, listCount } = await scriptedModernServer([[TOOL_A, TOOL_B]], { listHint: { ttlMs: 30_000 } }); + const client = modernClient(); + await client.connect(clientTx); + setNow(client, 1_000_000); + + const first = await client.listTools(); + expect(first.tools.map(t => t.name)).toEqual(['a', 'b']); + expect(listCount()).toBe(1); + + // Within TTL → cache hit, no wire request. + setNow(client, 1_020_000); + const second = await client.listTools(); + expect(second.tools.map(t => t.name)).toEqual(['a', 'b']); + expect(listCount()).toBe(1); + // Clone-on-serve: hit is a fresh copy, not the stored object. + expect(second).not.toBe(first); + + // After TTL → stale, refetch. + setNow(client, 1_040_000); + await client.listTools(); + expect(listCount()).toBe(2); + }); + + it("cacheMode: 'refresh' always fetches and re-stores; 'bypass' fetches without read or write", async () => { + const store = new InMemoryResponseCacheStore(); + const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000 } }); + const client = modernClient(store); + await client.connect(clientTx); + setNow(client, 1_000_000); + + await client.listTools(); + expect(listCount()).toBe(1); + const stamp1 = store.get({ method: 'tools/list', partition: part() })?.stamp; + + // 'refresh' ignores the still-fresh entry, fetches, and re-stores (new stamp). + await client.listTools(undefined, { cacheMode: 'refresh' }); + expect(listCount()).toBe(2); + const stamp2 = store.get({ method: 'tools/list', partition: part() })?.stamp; + expect(stamp2).toBeGreaterThan(stamp1!); + + // 'bypass' fetches but neither reads nor writes the cache. + await client.listTools(undefined, { cacheMode: 'bypass' }); + expect(listCount()).toBe(3); + expect(store.get({ method: 'tools/list', partition: part() })?.stamp).toBe(stamp2); + + // Default 'use' still serves the entry 'refresh' wrote. + await client.listTools(); + expect(listCount()).toBe(3); + }); + + it('listChanged eviction beats TTL: a still-fresh entry is dropped on the relevant notification', async () => { + const { clientTx, serverTx, listCount } = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000 } }); + const client = modernClient(); + await client.connect(clientTx); + setNow(client, 1_000_000); + + await client.listTools(); + await client.listTools(); + expect(listCount()).toBe(1); + + // Relevant notification ⇒ entry immediately stale (spec): the next call refetches even within TTL. + await serverTx.send({ jsonrpc: '2.0', method: 'notifications/tools/list_changed' } as JSONRPCMessage); + await client.listTools(); + expect(listCount()).toBe(2); + }); + + it('defaultCacheTtlMs: 0 (the default) means always-fetch but mirroring still works', async () => { + const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 0 } }); + const client = modernClient(); + await client.connect(clientTx); + setNow(client, 1_000_000); + + await client.listTools(); + // ttlMs:0 ⇒ expiresAt === now ⇒ never served from cache. + await client.listTools(); + expect(listCount()).toBe(2); + // …but the entry IS stored (retain-for-schema), so the derived index works. + expect((await toolDef(client, 'a'))?.name).toBe('a'); + }); + + it('an explicit server ttlMs:0 is honoured as immediately stale (server hint wins over defaultCacheTtlMs)', async () => { + const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 0 } }); + // defaultCacheTtlMs only applies when the result lacks ttlMs (e.g. a + // legacy-era response); a 2026 server's explicit 0 is the spec's + // "immediately stale" and is honoured as-is. + const client = modernClient(undefined, { defaultCacheTtlMs: 60_000 }); + await client.connect(clientTx); + setNow(client, 1_000_000); + + await client.listTools(); + await client.listTools(); + expect(listCount()).toBe(2); + }); + + it("same serverIdentity, different cachePartition: 'public' entries shared; 'private' entries isolated", async () => { + const store = new InMemoryResponseCacheStore(); + // Public scope: alice writes, bob (different cachePartition, SAME server) reads from the server's shared partition. + { + const a = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000, cacheScope: 'public' } }); + const alice = modernClient(store, { cachePartition: 'alice' }); + await alice.connect(a.clientTx); + setNow(alice, 1_000_000); + await alice.listTools(); + expect(a.listCount()).toBe(1); + // Stored under the server's shared partition (`[serverIdentity, '']`). + expect(store.get({ method: 'tools/list', partition: part() })?.scope).toBe('public'); + expect(store.get({ method: 'tools/list', partition: part('alice') })).toBeUndefined(); + + const b = await scriptedModernServer([[TOOL_B]], { listHint: { ttlMs: 60_000, cacheScope: 'public' } }); + const bob = modernClient(store, { cachePartition: 'bob' }); + await bob.connect(b.clientTx); + setNow(bob, 1_000_000); + const { tools } = await bob.listTools(); + // Public-share across two clients of the SAME server on one store: bob is served alice's entry without a wire request. + expect(tools.map(t => t.name)).toEqual(['a']); + expect(b.listCount()).toBe(0); + } + store.clear(); + // Private scope: alice writes under her own partition; bob misses and fetches his own. + { + const a = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000, cacheScope: 'private' } }); + const alice = modernClient(store, { cachePartition: 'alice' }); + await alice.connect(a.clientTx); + setNow(alice, 1_000_000); + await alice.listTools(); + expect(store.get({ method: 'tools/list', partition: part('alice') })?.scope).toBe('private'); + expect(store.get({ method: 'tools/list', partition: part() })).toBeUndefined(); + + const b = await scriptedModernServer([[TOOL_B]], { listHint: { ttlMs: 60_000, cacheScope: 'private' } }); + const bob = modernClient(store, { cachePartition: 'bob' }); + await bob.connect(b.clientTx); + setNow(bob, 1_000_000); + const { tools } = await bob.listTools(); + // Own-partition miss + shared-partition miss ⇒ bob fetches; alice's private entry never crosses. + expect(tools.map(t => t.name)).toEqual(['b']); + expect(b.listCount()).toBe(1); + // toolDefinition (mirroring source) reads from each client's own partition. + expect((await toolDef(alice, 'a'))?.name).toBe('a'); + expect((await toolDef(bob, 'b'))?.name).toBe('b'); + expect(await toolDef(bob, 'a')).toBeUndefined(); + } + }); + + it("different serverIdentity on a shared store: no cross-talk even for 'public' entries", async () => { + const store = new InMemoryResponseCacheStore(); + // Server X stamps public; client x writes under [x@1.0.0, '']. + const sx = await scriptedModernServer([[TOOL_A]], { + listHint: { ttlMs: 60_000, cacheScope: 'public' }, + serverInfo: { name: 'x', version: '1.0.0' } + }); + const x = modernClient(store); + await x.connect(sx.clientTx); + setNow(x, 1_000_000); + await x.listTools(); + expect(store.get({ method: 'tools/list', partition: part('', 'x@1.0.0') })?.scope).toBe('public'); + + // Server Y on the SAME store: y misses x's entry (different serverIdentity) and fetches its own. + const sy = await scriptedModernServer([[TOOL_B]], { + listHint: { ttlMs: 60_000, cacheScope: 'public' }, + serverInfo: { name: 'y', version: '1.0.0' } + }); + const y = modernClient(store); + await y.connect(sy.clientTx); + setNow(y, 1_000_000); + const { tools } = await y.listTools(); + expect(tools.map(t => t.name)).toEqual(['b']); + expect(sy.listCount()).toBe(1); + // Both entries co-exist under their own server namespaces. + expect(store.get({ method: 'tools/list', partition: part('', 'y@1.0.0') })?.scope).toBe('public'); + expect((await toolDef(x, 'a'))?.name).toBe('a'); + expect(await toolDef(y, 'a')).toBeUndefined(); + }); + + it("list_changed eviction is partition-scoped on a shared store: one server's notification leaves co-tenants' entries intact", async () => { + const store = new InMemoryResponseCacheStore(); + // Two clients on DIFFERENT servers share one store. Each has a fresh + // public tools/list entry under its own server-identity partition. + const sx = await scriptedModernServer([[TOOL_A]], { + listHint: { ttlMs: 60_000, cacheScope: 'public' }, + serverInfo: { name: 'x', version: '1.0.0' } + }); + const x = modernClient(store); + await x.connect(sx.clientTx); + setNow(x, 1_000_000); + await x.listTools(); + + const sy = await scriptedModernServer([[TOOL_B]], { + listHint: { ttlMs: 60_000, cacheScope: 'public' }, + serverInfo: { name: 'y', version: '1.0.0' } + }); + const y = modernClient(store); + await y.connect(sy.clientTx); + setNow(y, 1_000_000); + await y.listTools(); + expect(store.get({ method: 'tools/list', partition: part('', 'x@1.0.0') })).toBeDefined(); + expect(store.get({ method: 'tools/list', partition: part('', 'y@1.0.0') })).toBeDefined(); + + // Server X sends list_changed → only x's entry is dropped; y's + // co-tenant entry survives (evict() targets the connected server's + // two partition singletons, never the method-wide store.evict()). + await sx.serverTx.send({ jsonrpc: '2.0', method: 'notifications/tools/list_changed' } as JSONRPCMessage); + expect(store.get({ method: 'tools/list', partition: part('', 'x@1.0.0') })).toBeUndefined(); + expect(store.get({ method: 'tools/list', partition: part('', 'y@1.0.0') })).toBeDefined(); + // y still cache-serves its own entry without a wire request. + await y.listTools(); + expect(sy.listCount()).toBe(1); + expect((await toolDef(y, 'b'))?.name).toBe('b'); + }); + + it("a malicious serverInfo cannot bleed into another server's principal slot (JSON encoding is collision-free)", async () => { + const store = new InMemoryResponseCacheStore(); + // The legitimate server B with principal 'victim'. Under naive + // `${name}@${version}|${cachePartition}` concat its private partition + // would be `realServer@1.0|victim`. + const sb = await scriptedModernServer([[TOOL_B]], { + listHint: { ttlMs: 60_000, cacheScope: 'private' }, + serverInfo: { name: 'realServer', version: '1.0' } + }); + const victim = modernClient(store, { cachePartition: 'victim' }); + await victim.connect(sb.clientTx); + setNow(victim, 1_000_000); + await victim.listTools(); + expect(sb.listCount()).toBe(1); + + // A malicious server A whose `name` embeds `@`/`|` to target B's + // naive-concat private slot. With JSON encoding the partition is + // `["realServer@1.0|victim@",""]` ≠ `["realServer@1.0","victim"]` — + // no collision possible regardless of what characters the + // server-controlled string carries. + const sa = await scriptedModernServer([[TOOL_A]], { + listHint: { ttlMs: 60_000, cacheScope: 'public' }, + serverInfo: { name: 'realServer@1.0|victim', version: '' } + }); + const attacker = modernClient(store); + await attacker.connect(sa.clientTx); + setNow(attacker, 1_000_000); + await attacker.listTools(); + + // B's private entry is unreachable from A (and vice versa): victim + // still cache-serves its own entry, attacker never observed it. + const again = await victim.listTools(); + expect(again.tools.map(t => t.name)).toEqual(['b']); + expect(sb.listCount()).toBe(1); + expect(await toolDef(attacker, 'b')).toBeUndefined(); + expect((await toolDef(victim, 'b'))?.name).toBe('b'); + }); + + it("a server flipping cacheScope private→public on a 'refresh' deletes the shadowing private-partition entry", async () => { + const store = new InMemoryResponseCacheStore(); + const pages: Tool[][] = [[TOOL_A]]; + const opts: ScriptOptions = { listHint: { ttlMs: 60_000, cacheScope: 'private' } }; + const a = await scriptedModernServer(pages, opts); + const alice = modernClient(store, { cachePartition: 'alice' }); + await alice.connect(a.clientTx); + setNow(alice, 1_000_000); + // Warm: private-scoped → stored under [serverIdentity, 'alice']. + await alice.listTools(); + expect(store.get({ method: 'tools/list', partition: part('alice') })?.scope).toBe('private'); + // Server flips the same key's scope to 'public' AND changes the body. + opts.listHint = { ttlMs: 60_000, cacheScope: 'public' }; + pages[0] = [TOOL_B]; + await alice.listTools(undefined, { cacheMode: 'refresh' }); + // Fresh body stored at the shared partition; the now-stale private + // entry is DELETED so it cannot shadow the public one on the + // own-first probe. + expect(store.get({ method: 'tools/list', partition: part() })?.scope).toBe('public'); + expect(store.get({ method: 'tools/list', partition: part('alice') })).toBeUndefined(); + // Next default-mode read serves the FRESH public body from cache (no wire). + const { tools } = await alice.listTools(); + expect(tools.map(t => t.name)).toEqual(['b']); + expect(a.listCount()).toBe(2); + }); + + it("the shared-partition fallback drops entries whose stored scope is not 'public' (misconfigured-co-tenant guard)", async () => { + const store = new InMemoryResponseCacheStore(); + // A misconfigured co-tenant (omits cachePartition, default '') writes a + // PRIVATE-scoped entry — which lands at the server's shared partition. + const a = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000, cacheScope: 'private' } }); + const misconfigured = modernClient(store); + await misconfigured.connect(a.clientTx); + setNow(misconfigured, 1_000_000); + await misconfigured.listTools(); + expect(store.get({ method: 'tools/list', partition: part() })?.scope).toBe('private'); + + // A correctly-partitioned client probes own partition (miss), then the + // shared one — which holds the misconfigured client's PRIVATE entry. + // The `entry.scope === 'public'` gate drops it; bob fetches over the + // wire instead of leaking the private body. + const b = await scriptedModernServer([[TOOL_B]], { listHint: { ttlMs: 60_000, cacheScope: 'private' } }); + const bob = modernClient(store, { cachePartition: 'bob' }); + await bob.connect(b.clientTx); + setNow(bob, 1_000_000); + const { tools } = await bob.listTools(); + expect(tools.map(t => t.name)).toEqual(['b']); + expect(b.listCount()).toBe(1); + }); + + it('readResource(): keyed by uri, partitioned by scope, absent cacheScope is private; ttl≤0 is not stored', async () => { + const store = new InMemoryResponseCacheStore(); + const { clientTx, wireCount } = await scriptedModernServer([[TOOL_A]], { + readHint: { ttlMs: 60_000, cacheScope: 'private' } + }); + const client = modernClient(store, { cachePartition: 'alice' }); + await client.connect(clientTx); + setNow(client, 1_000_000); + + const r1 = await client.readResource({ uri: 'res://one' }); + expect(r1.contents[0]).toMatchObject({ text: 'body:res://one' }); + expect(wireCount('resources/read')).toBe(1); + // Within TTL → cache hit on the same uri. + const r2 = await client.readResource({ uri: 'res://one' }); + expect(r2.contents[0]).toMatchObject({ text: 'body:res://one' }); + expect(wireCount('resources/read')).toBe(1); + // Different uri → distinct key, fetch. + await client.readResource({ uri: 'res://two' }); + expect(wireCount('resources/read')).toBe(2); + // 'refresh' on the first uri → fetch. + await client.readResource({ uri: 'res://one' }, { cacheMode: 'refresh' }); + expect(wireCount('resources/read')).toBe(3); + // Stored under alice's partition only (private). + expect(store.get({ method: 'resources/read', params: 'res://one', partition: part('alice') })).toBeDefined(); + expect(store.get({ method: 'resources/read', params: 'res://one', partition: part() })).toBeUndefined(); + + // bob on a shared store cannot read alice's private resource body. + const b = await scriptedModernServer([[TOOL_A]], { readHint: { ttlMs: 60_000, cacheScope: 'private' } }); + const bob = modernClient(store, { cachePartition: 'bob' }); + await bob.connect(b.clientTx); + setNow(bob, 1_000_000); + await bob.readResource({ uri: 'res://one' }); + expect(b.wireCount('resources/read')).toBe(1); + }); + + // The wire codec rejects a 2026-07-28 cacheable result without `cacheScope` + // (it is a required field), so the absent-scope path is unreachable through + // `request()`. The `_freshness` private-default is defence-in-depth only; + // the partition test above asserts the explicit-`'private'` storage slot. + + it('readResource(): ttl≤0 is not stored (unbounded URI keyspace) but the result still returns', async () => { + const store = new InMemoryResponseCacheStore(); + const { clientTx, wireCount } = await scriptedModernServer([[TOOL_A]], { readHint: { ttlMs: 0, cacheScope: 'public' } }); + const client = modernClient(store); + await client.connect(clientTx); + setNow(client, 1_000_000); + const r = await client.readResource({ uri: 'res://x' }); + expect(r.contents[0]).toMatchObject({ text: 'body:res://x' }); + expect(store.get({ method: 'resources/read', params: 'res://x', partition: part() })).toBeUndefined(); + await client.readResource({ uri: 'res://x' }); + expect(wireCount('resources/read')).toBe(2); + }); + + it('readResource(): 600 distinct ttl=0 URIs issue zero store.delete() calls (evictKey skipped on a cold default-mode miss)', async () => { + // Regression: every ttl≤0 default-mode read used to call + // `evictKey('resources/read', uri)` unconditionally, which issued 1–2 + // `store.delete()` calls against a cold key — wasted round trips on + // an async store across a ttl≤0 working set. The evict is now skipped + // when `_serveFromCache` already proved nothing fresh is held. + const store = new InMemoryResponseCacheStore(); + let deletes = 0; + const realDelete = store.delete.bind(store); + (store as ResponseCacheStore).delete = key => { + deletes++; + return realDelete(key); + }; + const { clientTx } = await scriptedModernServer([[TOOL_A]], { readHint: { ttlMs: 0, cacheScope: 'public' } }); + const client = modernClient(store); + await client.connect(clientTx); + setNow(client, 1_000_000); + + for (let i = 0; i < 600; i++) await client.readResource({ uri: `res://cold/${i}` }); + expect(store.size).toBe(0); + // No store.delete() issued for any of the 600 cold-miss ttl≤0 reads. + expect(deletes).toBe(0); + // `captureGeneration` recorded one entry per read URI (the in-flight + // guard's presence record); none was bumped — `evictKey` was never + // reached. The map is bounded by keys the CLIENT chose to read. + const gen = (cacheOf(client) as unknown as { _evictionGeneration: Map })._evictionGeneration; + expect(gen.size).toBe(600); + expect([...gen.values()].every(v => v === 0)).toBe(true); + }); + + it('600 distinct-URI notifications/resources/updated with no prior readResource do not grow the eviction-generation map; a read URI is still guarded', async () => { + // Regression: `evictKey` used to bump (and therefore record) the + // per-URI generation unconditionally, so a server streaming + // `resources/updated` for distinct URIs grew `_evictionGeneration` + // without bound — server-controlled heap growth. `evictKey` now only + // bumps a key the client has captured. + const store = new InMemoryResponseCacheStore(); + const { clientTx, serverTx } = await scriptedModernServer([[TOOL_A]], { + readHint: { ttlMs: 60_000, cacheScope: 'private' } + }); + const client = modernClient(store); + await client.connect(clientTx); + setNow(client, 1_000_000); + const gen = (cacheOf(client) as unknown as { _evictionGeneration: Map })._evictionGeneration; + + for (let i = 0; i < 600; i++) { + await serverTx.send({ + jsonrpc: '2.0', + method: 'notifications/resources/updated', + params: { uri: `res://never-read/${i}` } + } as JSONRPCMessage); + } + expect(gen.size).toBe(0); + + // A URI the client HAS read is recorded by captureGeneration; an + // `updated` for it bumps (the in-flight guard still works). + await client.readResource({ uri: 'res://hot' }); + expect(cacheOf(client).captureGeneration('resources/read', 'res://hot')).toBe(0); + await serverTx.send({ + jsonrpc: '2.0', + method: 'notifications/resources/updated', + params: { uri: 'res://hot' } + } as JSONRPCMessage); + expect(cacheOf(client).captureGeneration('resources/read', 'res://hot')).toBe(1); + expect(gen.size).toBe(1); + }); + + it("readResource(): a 'refresh' that returns ttl≤0 evicts the previously-warm entry; the next default-mode read fetches fresh", async () => { + const store = new InMemoryResponseCacheStore(); + const opts: ScriptOptions = { readHint: { ttlMs: 60_000, cacheScope: 'private' } }; + const { clientTx, wireCount } = await scriptedModernServer([[TOOL_A]], opts); + const client = modernClient(store, { cachePartition: 'alice' }); + await client.connect(clientTx); + setNow(client, 1_000_000); + + // Warm: ttl=60s. + await client.readResource({ uri: 'res://x' }); + expect(wireCount('resources/read')).toBe(1); + await client.readResource({ uri: 'res://x' }); + expect(wireCount('resources/read')).toBe(1); + expect(store.get({ method: 'resources/read', params: 'res://x', partition: part('alice') })).toBeDefined(); + + // Server flips to ttl=0; a 'refresh' fetch returns ttl≤0 → the held + // positive-TTL entry is evicted, not left stale-but-fresh. + opts.readHint = { ttlMs: 0, cacheScope: 'private' }; + await client.readResource({ uri: 'res://x' }, { cacheMode: 'refresh' }); + expect(wireCount('resources/read')).toBe(2); + expect(store.get({ method: 'resources/read', params: 'res://x', partition: part('alice') })).toBeUndefined(); + + // The next default-mode read fetches fresh (the entry was evicted). + await client.readResource({ uri: 'res://x' }); + expect(wireCount('resources/read')).toBe(3); + }); + + it('a pre-aborted signal on a warm-cache hit rejects with SdkError(RequestTimeout) — the abort is not swallowed by the cache serve', async () => { + const store = new InMemoryResponseCacheStore(); + const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000, cacheScope: 'public' } }); + const client = modernClient(store); + await client.connect(clientTx); + setNow(client, 1_000_000); + + await client.listTools(); + expect(listCount()).toBe(1); + // Warm — a plain second call would be cache-served. With a pre-aborted + // signal it must reject the same way the wire path would. + const ac = new AbortController(); + ac.abort('user cancelled'); + const error = await client.listTools(undefined, { signal: ac.signal }).catch(e => e as SdkError); + expect(error).toBeInstanceOf(SdkError); + expect((error as SdkError).code).toBe(SdkErrorCode.RequestTimeout); + expect((error as SdkError).message).toContain('user cancelled'); + // The aborted call did not reach the wire. + expect(listCount()).toBe(1); + }); + + it('notifications/resources/updated evicts the cached resources/read entry for that URI from both partitions', async () => { + const store = new InMemoryResponseCacheStore(); + const { clientTx, serverTx, wireCount } = await scriptedModernServer([[TOOL_A]], { + readHint: { ttlMs: 60_000, cacheScope: 'private' } + }); + const client = modernClient(store, { cachePartition: 'alice' }); + await client.connect(clientTx); + setNow(client, 1_000_000); + + await client.readResource({ uri: 'res://one' }); + await client.readResource({ uri: 'res://two' }); + expect(wireCount('resources/read')).toBe(2); + // Within TTL → cache hit. + await client.readResource({ uri: 'res://one' }); + expect(wireCount('resources/read')).toBe(2); + + // Subscribe → updated → re-read flow: the per-URI eviction drops the + // cached body from BOTH partitions; the next read for THAT uri + // refetches even within TTL; the sibling uri is untouched. + await serverTx.send({ jsonrpc: '2.0', method: 'notifications/resources/updated', params: { uri: 'res://one' } } as JSONRPCMessage); + expect(store.get({ method: 'resources/read', params: 'res://one', partition: part('alice') })).toBeUndefined(); + await client.readResource({ uri: 'res://one' }); + expect(wireCount('resources/read')).toBe(3); + await client.readResource({ uri: 'res://two' }); + expect(wireCount('resources/read')).toBe(3); + + // A `resources/updated` without a string `uri` is a no-op (matches the + // mcp.d guard). + await serverTx.send({ jsonrpc: '2.0', method: 'notifications/resources/updated', params: {} } as JSONRPCMessage); + await client.readResource({ uri: 'res://one' }); + expect(wireCount('resources/read')).toBe(3); + }); + + it('ttlMs is clamped at 24h (MAX_CACHE_TTL_MS) so a server cannot pin an entry indefinitely', async () => { + const store = new InMemoryResponseCacheStore(); + const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], { + listHint: { ttlMs: Number.MAX_SAFE_INTEGER, cacheScope: 'public' } + }); + const client = modernClient(store); + await client.connect(clientTx); + setNow(client, 1_000_000); + await client.listTools(); + const entry = store.get({ method: 'tools/list', partition: part() }); + // expiresAt = now + min(ttlMs, 24h) + expect(entry?.expiresAt).toBe(1_000_000 + 86_400_000); + // Just under 24h → still served from cache. + setNow(client, 1_000_000 + 86_400_000 - 1); + await client.listTools(); + expect(listCount()).toBe(1); + // Past 24h → refetch. + setNow(client, 1_000_000 + 86_400_000 + 1); + await client.listTools(); + expect(listCount()).toBe(2); + }); + + it('the default in-memory store is bounded: 600 distinct readResource URIs cap at 512 with oldest-first eviction', async () => { + const store = new InMemoryResponseCacheStore(); + const { clientTx } = await scriptedModernServer([[TOOL_A]], { readHint: { ttlMs: 60_000, cacheScope: 'public' } }); + const client = modernClient(store); + await client.connect(clientTx); + setNow(client, 1_000_000); + + for (let i = 0; i < 600; i++) await client.readResource({ uri: `res://${i}` }); + expect(store.size).toBe(512); + // The first 88 URIs (oldest insertions) were evicted; the tail survived. + expect(store.get({ method: 'resources/read', params: 'res://0', partition: part() })).toBeUndefined(); + expect(store.get({ method: 'resources/read', params: 'res://87', partition: part() })).toBeUndefined(); + expect(store.get({ method: 'resources/read', params: 'res://88', partition: part() })).toBeDefined(); + expect(store.get({ method: 'resources/read', params: 'res://599', partition: part() })).toBeDefined(); + }); + + it('the maxEntries cap never evicts the tools/list singleton: 600 readResource URIs leave the derived index intact', async () => { + const store = new InMemoryResponseCacheStore(); + const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], { + listHint: { ttlMs: 60_000, cacheScope: 'public' }, + readHint: { ttlMs: 60_000, cacheScope: 'public' } + }); + const client = modernClient(store); + await client.connect(clientTx); + setNow(client, 1_000_000); + + await client.listTools(); + expect((await toolDef(client, 'a'))?.name).toBe('a'); + for (let i = 0; i < 600; i++) await client.readResource({ uri: `res://${i}` }); + // 512 capped resources/read entries + the exempt tools/list singleton. + expect(store.size).toBe(513); + // The list singleton survived the FIFO churn → derived index still hits; + // a fresh listTools() within TTL is still cache-served. + expect(store.get({ method: 'tools/list', partition: part() })).toBeDefined(); + expect((await toolDef(client, 'a'))?.name).toBe('a'); + await client.listTools(); + expect(listCount()).toBe(1); + }); + + it('an in-flight readResource() does not re-cache a stale body when resources/updated for that URI lands mid-request', async () => { + const store = new InMemoryResponseCacheStore(); + const [clientTx, serverTx] = InMemoryTransport.createLinkedPair(); + let reads = 0; + let pendingId: string | number | undefined; + serverTx.onmessage = m => { + const r = m as JSONRPCRequest; + if (r.id === undefined) return; + if (r.method === 'server/discover') { + void serverTx.send({ + jsonrpc: '2.0', + id: r.id, + result: { + resultType: 'complete', + supportedVersions: [MODERN], + capabilities: { resources: {} }, + serverInfo: { name: 'scripted', version: '1.0.0' } + } + }); + } else if (r.method === 'resources/read') { + reads++; + pendingId = r.id; // defer — the test drives the response + } + }; + await serverTx.start(); + const client = modernClient(store, { cachePartition: 'alice' }); + await client.connect(clientTx); + setNow(client, 1_000_000); + + const respond = (text: string): void => + void serverTx.send({ + jsonrpc: '2.0', + id: pendingId!, + result: { + resultType: 'complete', + ttlMs: 60_000, + cacheScope: 'private', + contents: [{ uri: 'res://x', mimeType: 'text/plain', text }] + } + }); + + // Kick off the read; let the request reach the server. + const inflight = client.readResource({ uri: 'res://x' }); + await new Promise(resolve => setTimeout(resolve, 0)); + expect(reads).toBe(1); + // resources/updated for THIS uri lands while the read is in flight → + // bumps the per-URI generation; the eventual write is suppressed. + await serverTx.send({ jsonrpc: '2.0', method: 'notifications/resources/updated', params: { uri: 'res://x' } } as JSONRPCMessage); + respond('stale'); + const r1 = await inflight; + expect(r1.contents[0]).toMatchObject({ text: 'stale' }); + expect(store.get({ method: 'resources/read', params: 'res://x', partition: part('alice') })).toBeUndefined(); + + // The next read for the same URI refetches (no stale cache hit) and + // its write goes through (fresh capture). + const next = client.readResource({ uri: 'res://x' }); + await new Promise(resolve => setTimeout(resolve, 0)); + expect(reads).toBe(2); + respond('fresh'); + expect((await next).contents[0]).toMatchObject({ text: 'fresh' }); + expect(store.get({ method: 'resources/read', params: 'res://x', partition: part('alice') })).toBeDefined(); + }); + + it('a custom store whose get() rejects degrades to a miss; the request still reaches the wire', async () => { + const store = new InMemoryResponseCacheStore(); + (store as ResponseCacheStore).get = () => Promise.reject(new Error('redis down')); + const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000 } }); + const client = modernClient(store); + const errors: Error[] = []; + client.onerror = e => errors.push(e); + await client.connect(clientTx); + setNow(client, 1_000_000); + + const { tools } = await client.listTools(); + expect(tools.map(t => t.name)).toEqual(['a']); + expect(listCount()).toBe(1); + expect(errors.map(e => e.message)).toContain('redis down'); + }); +}); diff --git a/packages/core/src/shared/protocol.ts b/packages/core/src/shared/protocol.ts index 46064bc938..c49ed22994 100644 --- a/packages/core/src/shared/protocol.ts +++ b/packages/core/src/shared/protocol.ts @@ -1298,7 +1298,16 @@ export abstract class Protocol { } } - options?.signal?.throwIfAborted(); + // An already-aborted caller signal must surface the same way an + // in-flight abort does (`SdkError(RequestTimeout, reason)` via + // `cancel()` below). Bare `throwIfAborted()` would propagate the + // raw `signal.reason` instead, so callers that introduce an async + // hop before `request()` (e.g. a cache freshness check) would see + // a different rejection type depending on where the abort lands. + if (options?.signal?.aborted) { + const reason = options.signal.reason; + throw reason instanceof SdkError ? reason : new SdkError(SdkErrorCode.RequestTimeout, String(reason)); + } // Spec basic/patterns/cancellation §Transport-Specific (2026-07-28): // on Streamable HTTP, closing the per-request SSE stream IS the From bf3bc858429f77b47263ff92feb287851c85f9f1 Mon Sep 17 00:00:00 2001 From: Felix Weinberger Date: Mon, 22 Jun 2026 20:41:55 +0000 Subject: [PATCH 2/2] docs(examples): caching story asserts client-side honouring; README adds cacheMode + custom-store sections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The client now calls listTools() and readResource() twice each and asserts the second of each pair is cache-served — the server's resource handler counts how many times it ran and exposes that via a read-count tool, so the example verifies (server-side) that the cache hit never reached the wire. Demonstrates cacheMode:'refresh' and the post-refresh return to cache-serving. README drops the follow-up note (honouring is shipped), adds a §cacheMode section, and adds a §Custom store section showing the four-method ResponseCacheStore interface shape with the cachePartition guidance for shared stores. --- examples/caching/README.md | 48 +++++++++++++++++++++++++++++++++++--- examples/caching/client.ts | 43 +++++++++++++++++++++++++++++++--- examples/caching/server.ts | 39 ++++++++++++++++++++++++++++++- 3 files changed, 123 insertions(+), 7 deletions(-) diff --git a/examples/caching/README.md b/examples/caching/README.md index 665ee3cc7e..840602a815 100644 --- a/examples/caching/README.md +++ b/examples/caching/README.md @@ -1,10 +1,52 @@ # caching `CacheableResult` freshness hints (protocol revision 2026-07-28). The server declares hints at two layers — a per-registration `cacheHint` on the resource and server-level `ServerOptions.cacheHints` — and the SDK resolves most-specific-author-first (handler-return fields would -take precedence over both) and stamps `ttlMs`/`cacheScope` on the wire toward modern clients only. The client reads the stamped values back. - -> Full client-side cache **honouring** (re-using a still-fresh result instead of re-requesting) is a follow-up; this example reads what the server emits today. +take precedence over both) and stamps `ttlMs`/`cacheScope` on the wire toward modern clients only. The client honours the stamped values: a still-fresh held entry is served without a round trip. ```bash pnpm tsx examples/caching/client.ts ``` + +The client calls `listTools()` and `readResource()` twice each; the second of each pair is served from the response cache. The server exposes a `request-count` tool (how many `tools/list` requests reached it) and a `read-count` tool (how many times the resource handler ran), so the example asserts each counter is unchanged after the cache-served call and increments after `cacheMode: 'refresh'`. + +## `cacheMode` + +Per-call control on the cacheable verbs (`listTools()` / `listPrompts()` / `listResources()` / `listResourceTemplates()` / `readResource()`): + +```ts +await client.readResource({ uri: 'config://app' }); // 'use' (default): serve from cache if fresh +await client.readResource({ uri: 'config://app' }, { cacheMode: 'refresh' }); // always fetch, then re-store +await client.readResource({ uri: 'config://app' }, { cacheMode: 'bypass' }); // fetch; do not read or write the cache +``` + +A `list_changed` notification still evicts immediately regardless of TTL. + +## Custom store + +The default per-client `InMemoryResponseCacheStore` (bounded at 512 entries by default) is enough for most hosts. To back the cache with something persistent (Redis, KV, IndexedDB), implement the five-method `ResponseCacheStore` interface — the store is a dumb keyed-value carrier; freshness and partitioning are the client's job: + +```ts +import type { CacheEntry, CacheKey, CacheScope, ResponseCacheStore } from '@modelcontextprotocol/client'; + +class MyStore implements ResponseCacheStore { + async get(key: CacheKey): Promise { + /* read {value, stamp, expiresAt, scope} from your backend */ + } + async set(key: CacheKey, entry: { value: unknown; expiresAt?: number; scope?: CacheScope }): Promise { + /* write entry under key; return a monotonically-increasing stamp */ + } + async delete(key: CacheKey): Promise { + /* drop the single entry under key (no-op if absent) */ + } + async evict(method: string): Promise { + /* drop every entry whose key.method === method (across every partition) */ + } + async clear(): Promise { + /* drop everything */ + } +} + +const client = new Client({ name: 'host', version: '1.0.0' }, { responseCacheStore: new MyStore(), cachePartition: principalId }); +``` + +The SDK scopes every entry by the connected server's identity automatically — you do not encode server identity into `cachePartition` or the store key yourself. When one store backs several principals against the same server, set `ClientOptions.cachePartition` to a stable identity of the authorization context (e.g. the auth subject) so `'private'`-scoped entries are isolated per principal; `'public'`-scoped entries are shared within the connected server's namespace automatically. Note `serverInfo` is self-reported, so a server that deliberately impersonates another's `name`/`version` shares its `'public'` slot; the per-principal isolation holds regardless. diff --git a/examples/caching/client.ts b/examples/caching/client.ts index f7137edf89..6354dbe48d 100644 --- a/examples/caching/client.ts +++ b/examples/caching/client.ts @@ -1,8 +1,7 @@ /** * Reads the cache hints emitted on cacheable results (2026-07-28 connections - * only) and asserts the configured values reached the wire. Full client-side - * cache *honouring* (re-using a fresh result instead of re-requesting) is a - * follow-up — see the SDK's tracking issue for client cache support. + * only) and asserts the client honours them: a still-fresh cached entry is + * served without a round trip. */ import { check, connectFromArgs, runClient } from '../harness.js'; @@ -11,22 +10,60 @@ interface Cacheable { cacheScope?: 'public' | 'private'; } +async function callCount(client: Awaited>, name: 'read-count' | 'request-count'): Promise { + const r = await client.callTool({ name }); + return Number((r.content[0] as { text: string }).text); +} + runClient('caching', async () => { // connectFromArgs picks transport (default: spawn ./server.ts over stdio; --http ) and era (--legacy) from argv. Your code would construct a Client and connect over your chosen transport directly. const client = await connectFromArgs(import.meta.dirname); check.equal(client.getNegotiatedProtocolVersion(), '2026-07-28'); + // The server stamps `tools/list` with `ttlMs: 30_000, cacheScope: 'public'`. const tools = (await client.listTools()) as Cacheable & Awaited>; check.equal(tools.ttlMs, 30_000); check.equal(tools.cacheScope, 'public'); + // `request-count` proves the wire was reached exactly once. + check.equal(await callCount(client, 'request-count'), 1); + + // The second call is served from the response cache: the server-side + // `tools/list` counter is unchanged, and the result is a fresh copy of the + // held entry (so mutating it cannot reach the cache). + const toolsAgain = await client.listTools(); + check.deepEqual( + toolsAgain.tools.map(t => t.name), + tools.tools.map(t => t.name) + ); + check.equal(await callCount(client, 'request-count'), 1); + + // `cacheMode: 'refresh'` always fetches and re-stores: the counter moves. + await client.listTools(undefined, { cacheMode: 'refresh' }); + check.equal(await callCount(client, 'request-count'), 2); const resources = (await client.listResources()) as Cacheable & Awaited>; check.equal(resources.ttlMs, 5000); check.equal(resources.cacheScope, 'public'); + // `readResource`: the resource handler counts how many times it ran, and + // the `read-count` tool exposes that counter. const read = (await client.readResource({ uri: 'config://app' })) as Cacheable & Awaited>; check.equal(read.ttlMs, 60_000); check.equal(read.cacheScope, 'private'); + check.equal(await callCount(client, 'read-count'), 1); + + // Within TTL, default `cacheMode: 'use'` → served from cache; the server + // handler does not run. + await client.readResource({ uri: 'config://app' }); + check.equal(await callCount(client, 'read-count'), 1); + + // `cacheMode: 'refresh'` always fetches and re-stores. + await client.readResource({ uri: 'config://app' }, { cacheMode: 'refresh' }); + check.equal(await callCount(client, 'read-count'), 2); + + // After the refresh the entry is fresh again — back to cache-served. + await client.readResource({ uri: 'config://app' }); + check.equal(await callCount(client, 'read-count'), 2); await client.close(); }); diff --git a/examples/caching/server.ts b/examples/caching/server.ts index 4b33f6fec7..b680355736 100644 --- a/examples/caching/server.ts +++ b/examples/caching/server.ts @@ -17,6 +17,13 @@ import { McpServer } from '@modelcontextprotocol/server'; import { runServerFromArgs } from '../harness.js'; +// Module-level (process-wide) counters so the values survive the harness's +// stateless HTTP leg (fresh `buildServer()` per request) as well as stdio's +// single per-connection instance. The client asserts against these to prove a +// cache-served call never reached the server. +let readCount = 0; +let listCount = 0; + function buildServer(): McpServer { const server = new McpServer( { name: 'caching-example', version: '1.0.0' }, @@ -40,12 +47,42 @@ function buildServer(): McpServer { description: 'Static application config (rarely changes)', cacheHint: { ttlMs: 60_000, cacheScope: 'private' } }, - async uri => ({ contents: [{ uri: uri.href, mimeType: 'application/json', text: '{"feature":true}' }] }) + async uri => { + readCount++; + return { contents: [{ uri: uri.href, mimeType: 'application/json', text: '{"feature":true}' }] }; + } ); // A tool, so tools/list has something to cache. server.registerTool('noop', { description: 'no-op' }, async () => ({ content: [{ type: 'text', text: 'ok' }] })); + // Exposes the server-side `resources/read` invocation count so the client + // can assert that a cache-served call did not reach the wire. + server.registerTool('read-count', { description: 'Number of resources/read calls that reached this server' }, async () => ({ + content: [{ type: 'text', text: String(readCount) }] + })); + + // Exposes the server-side `tools/list` invocation count. + server.registerTool('request-count', { description: 'Number of tools/list requests that reached this server' }, async () => ({ + content: [{ type: 'text', text: String(listCount) }] + })); + + // Wrap the auto-generated `tools/list` handler so the example can prove a + // cache-served `listTools()` never reached the wire. `McpServer` registers + // the handler lazily on the first `registerTool()`; we re-seat it here so + // every dispatch increments `listCount` before delegating to the original. + // (Reaches the underlying request-handler map directly — there is no public + // wrapper hook; acceptable for an instrumentation example.) + const handlers = (server.server as unknown as { _requestHandlers: Map Promise> }) + ._requestHandlers; + const original = handlers.get('tools/list'); + if (original) { + handlers.set('tools/list', (...a) => { + listCount++; + return original(...a); + }); + } + return server; }