From a250495d77e3a45698357f9b63b42c4e70c4ea32 Mon Sep 17 00:00:00 2001
From: Felix Weinberger <fweinberger@anthropic.com>
Date: Mon, 22 Jun 2026 20:39:13 +0000
Subject: [PATCH 1/2] feat(client): honor SEP-2549 cacheHints (ttlMs/scope) on
 cacheable list*/readResource
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The four list verbs and readResource now serve a still-fresh ResponseCacheStore
entry without a round trip when the server-stamped ttlMs has not elapsed.
Additive on the substrate (#2336): _listAllPages now stamps {expiresAt, scope}
on the aggregate write; a _serveFromCache front gates each verb on freshness;
readResource is newly cached (URI-keyed; only stored when ttl > 0, since the
URI keyspace is unbounded and there is no derived index).

Per-call CacheableRequestOptions.cacheMode ('use' | 'refresh' | 'bypass') maps
to mcp.d's CacheMode. ClientOptions.cachePartition is the per-principal slot
for 'private'-scoped entries (the spec's MUST-NOT-share-across-authz-contexts);
'public' entries always live at partition '' so a shared store serves them to
every co-tenant. ClientResponseCache reads probe own-partition then '' (mcp.d's
two-probe order — own-first because scope is only known after a fetch); the
toolDefinition/outputValidator derived indices use the same probe so SEP-2243
mirroring works under partitioning. readResource applies the same partition
derivation as the list verbs and treats absent cacheScope as 'private', so a
shared store cannot serve one principal's resource body to another.

ClientOptions.defaultCacheTtlMs (default 0) supplies the TTL when the result
lacks one (e.g. a legacy-era response); an explicit server-sent ttlMs:0 is
honoured as immediately stale. List aggregates are always stored regardless of
TTL (mcp.d's retainForSchema posture) so callTool's mirroring/output-validation
index keeps working at any TTL while the freshness gate never serves a stale
entry. A list_changed eviction beats TTL (the existing partition-agnostic
evict). Clock seam (now) injectable on ClientResponseCache for tests.

New exports: CacheMode, CacheableRequestOptions.
---
 .changeset/client-honor-cache-hints.md        |   7 +
 .changeset/client-response-cache-substrate.md |   2 +-
 .../protocol-pre-aborted-signal-wrap.md       |   5 +
 docs/migration-SKILL.md                       |   2 +
 docs/migration.md                             |  19 +
 packages/client/src/client/client.ts          | 285 +++++-
 packages/client/src/client/responseCache.ts   | 461 +++++++--
 packages/client/src/index.ts                  |  14 +-
 packages/client/test/client/listen.test.ts    |  26 +
 .../test/client/mcpParamMirroring.test.ts     | 117 ++-
 .../client/test/client/responseCache.test.ts  | 909 +++++++++++++++++-
 packages/core/src/shared/protocol.ts          |  11 +-
 12 files changed, 1699 insertions(+), 159 deletions(-)
 create mode 100644 .changeset/client-honor-cache-hints.md
 create mode 100644 .changeset/protocol-pre-aborted-signal-wrap.md

diff --git a/.changeset/client-honor-cache-hints.md b/.changeset/client-honor-cache-hints.md
new file mode 100644
index 0000000000..2a7659db63
--- /dev/null
+++ b/.changeset/client-honor-cache-hints.md
@@ -0,0 +1,7 @@
+---
+'@modelcontextprotocol/client': minor
+---
+
+`Client` now **honours** the server-stamped SEP-2549 `ttlMs`/`cacheScope` cache hints on the cacheable verbs (`listTools()`, `listPrompts()`, `listResources()`, `listResourceTemplates()`, `readResource()`): a still-fresh held entry is served without a round trip. New `CacheableRequestOptions.cacheMode` (`'use'` — the default; `'refresh'` — always fetch and re-store; `'bypass'` — fetch without consulting or writing the cache) gives per-call control. The behaviour is opt-in by hint: a server that sends `ttlMs: 0` (the conservative default this SDK's server stamps) sees byte-identical behaviour — every call fetches.
+
+Entries are automatically scoped by connected-server identity (derived from `serverInfo` after connect, encoded collision-free via `JSON.stringify`); `ClientOptions.cachePartition` is the opaque per-principal slot for `'private'`-scoped entries — set it to your principal identifier (e.g. the auth subject) when one `responseCacheStore` backs several principals. With the default `''` every entry lives at the connected server's shared partition (the safe single-tenant posture). `ClientOptions.defaultCacheTtlMs` (default `0`) supplies the TTL when a result lacks one (e.g. a legacy-era response); the server-supplied `ttlMs` is clamped at 24 h (`MAX_CACHE_TTL_MS`). The list verbs always store the aggregate (so `callTool`'s mirroring/output-validation index keeps working at any TTL); `readResource` stores only when the resolved TTL is positive. `notifications/resources/updated` evicts the cached `resources/read` body for that URI. `ResponseCacheStore` gained `delete(key)`; `InMemoryResponseCacheStore` is now bounded (`{ maxEntries }`, default 512, oldest-first eviction). New exports: `CacheMode`, `CacheableRequestOptions`, `InMemoryResponseCacheStoreOptions`, `MAX_CACHE_TTL_MS`.
diff --git a/.changeset/client-response-cache-substrate.md b/.changeset/client-response-cache-substrate.md
index 16a0621cac..9d957d5f0c 100644
--- a/.changeset/client-response-cache-substrate.md
+++ b/.changeset/client-response-cache-substrate.md
@@ -2,6 +2,6 @@
 '@modelcontextprotocol/client': major
 ---
 
-`Client.listTools()` / `listPrompts()` / `listResources()` / `listResourceTemplates()` now **auto-aggregate every page** when called without a `cursor` and return the complete result with `nextCursor: undefined` (matching the C#, Java, and mcp.d SDKs). Pass an explicit `{ cursor }` string to fetch a single page; the per-page path is unchanged. Existing manual pagination loops keep working — the first iteration returns everything and the loop exits — but can be deleted. The aggregated result is written to the new pluggable `ResponseCacheStore` (default: a fresh per-instance `InMemoryResponseCacheStore`); a `ClientResponseCache` collaborator owns the eviction-generation guard and the derived `tools/list` index that `callTool`'s output validation and SEP-2243 `Mcp-Param-*` mirroring read. New exports: `ResponseCacheStore`, `CacheKey`, `CacheEntry`, `CacheScope`, `MaybePromise`, `InMemoryResponseCacheStore`; new `ClientOptions.responseCacheStore` / `ClientOptions.listMaxPages` (caps the auto-aggregate walk at 64 pages by default; throws `SdkError` with `SdkErrorCode.ListPaginationExceeded` on overrun so a partial aggregate is never cached). The store interface is async-ready (`MaybePromise<…>`); the in-memory default stays synchronous. **A store instance must not be shared across `Client` instances at all in v2.0.x** — entries are keyed by method only (server-identity confusion + `clear()`/`evict()` cross-talk); per-principal partitioning that enables safe sharing arrives with the full caching engine.
+`Client.listTools()` / `listPrompts()` / `listResources()` / `listResourceTemplates()` now **auto-aggregate every page** when called without a `cursor` and return the complete result with `nextCursor: undefined` (matching the C#, Java, and mcp.d SDKs). Pass an explicit `{ cursor }` string to fetch a single page; the per-page path is unchanged. Existing manual pagination loops keep working — the first iteration returns everything and the loop exits — but can be deleted. The aggregated result is written to the new pluggable `ResponseCacheStore` (default: a fresh per-instance `InMemoryResponseCacheStore`); a `ClientResponseCache` collaborator owns the eviction-generation guard and the derived `tools/list` index that `callTool`'s output validation and SEP-2243 `Mcp-Param-*` mirroring read. New exports: `ResponseCacheStore`, `CacheKey`, `CacheEntry`, `CacheScope`, `MaybePromise`, `InMemoryResponseCacheStore`; new `ClientOptions.responseCacheStore` / `ClientOptions.listMaxPages` (caps the auto-aggregate walk at 64 pages by default; throws `SdkError` with `SdkErrorCode.ListPaginationExceeded` on overrun so a partial aggregate is never cached). The store interface is async-ready (`MaybePromise<…>`); the in-memory default stays synchronous. Entries are automatically scoped by the connected server's identity and (when set) the consumer-supplied `cachePartition`, so a shared store does not collide across servers or principals; evictions are likewise scoped to the connected server's partitions.
 
 **Behavior change (every era):** output-schema validator compilation is now lazy — validators are compiled on the first `callTool()` against the cached `tools/list` entry, not eagerly inside `listTools()` — and non-throwing: an uncompilable `outputSchema` is `console.warn`-ed and validation is skipped for that tool only (previously `listTools()` threw). A pluggable `jsonSchemaValidator` provider therefore observes compilation at `callTool` time, not `listTools` time. The legacy-era `listTools()` path is unchanged at the wire level but is observably different at the validator-lifecycle level.
diff --git a/.changeset/protocol-pre-aborted-signal-wrap.md b/.changeset/protocol-pre-aborted-signal-wrap.md
new file mode 100644
index 0000000000..a0a95758c8
--- /dev/null
+++ b/.changeset/protocol-pre-aborted-signal-wrap.md
@@ -0,0 +1,5 @@
+---
+'@modelcontextprotocol/core': patch
+---
+
+`Protocol.request()` now rejects with `SdkError(RequestTimeout, reason)` when called with an already-aborted signal, matching in-flight aborts. Previously the raw `signal.reason` was thrown.
diff --git a/docs/migration-SKILL.md b/docs/migration-SKILL.md
index 14eb419f57..ae20884449 100644
--- a/docs/migration-SKILL.md
+++ b/docs/migration-SKILL.md
@@ -572,6 +572,8 @@ side: auto-fulfilment is on by default (`ClientOptions.inputRequired`, `maxRound
 
 `Client.listTools()`, `listPrompts()`, `listResources()`, `listResourceTemplates()` called without a `cursor` now auto-aggregate every page and return the complete result (`nextCursor: undefined`); an explicit `{ cursor }` string still returns one page. Manual `do { … } while (cursor !== undefined)` loops keep working (the first call returns everything and the loop exits after one iteration) — replace them with the bare no-arg call. New `ClientOptions.listMaxPages` (default 64) caps the aggregate walk only; overrun throws `SdkError` (`SdkErrorCode.ListPaginationExceeded`).
 
+`Client.listTools()` / `listPrompts()` / `listResources()` / `listResourceTemplates()` / `readResource()` now honour the server-stamped SEP-2549 `ttlMs`/`cacheScope`: a still-fresh cached entry is returned without a round trip. Opt-in by server hint — a server that sends `ttlMs: 0` (the SDK's default stamp) sees no behaviour change. Per-call override: pass `{ cacheMode: 'refresh' }` (always fetch and re-store) or `{ cacheMode: 'bypass' }` (fetch without touching the cache). Server `ttlMs` is clamped at 24 h (`MAX_CACHE_TTL_MS`). Entries are automatically scoped by connected-server identity; new `ClientOptions.cachePartition` (per-principal slot for `'private'`-scoped entries on a shared `responseCacheStore`; default `''`) and `ClientOptions.defaultCacheTtlMs` (TTL when the result lacks one, e.g. legacy-era responses; default `0`). `ResponseCacheStore` gained `delete(key)` (driven by `notifications/resources/updated`); `InMemoryResponseCacheStore` is now bounded (`{ maxEntries }`, default 512).
+
 Output-schema validator compilation is now lazy (first `callTool()` against the cached `tools/list` entry) and non-throwing (an uncompilable `outputSchema` is `console.warn`-ed and validation is skipped for that tool only); `listTools()` no longer throws on an uncompilable `outputSchema`. Applies on every era — the legacy-era `listTools()` path is unchanged at the wire level only.
 
 No code changes required; wire-behavior note: on a 2026-07-28 Streamable HTTP connection, aborting an in-flight client request (caller `signal` / timeout) closes that request's SSE response stream as the spec cancellation signal — `notifications/cancelled` is no longer POSTed
diff --git a/docs/migration.md b/docs/migration.md
index 6208654bea..008c69e86d 100644
--- a/docs/migration.md
+++ b/docs/migration.md
@@ -575,6 +575,25 @@ const { tools } = await client.listTools();
 
 The auto-aggregate walk is capped at `ClientOptions.listMaxPages` pages (default 64; `0` disables) and throws an `SdkError` with `SdkErrorCode.ListPaginationExceeded` if the server's pagination does not converge, so a partial aggregate is never returned. The cap applies only to the no-`cursor` aggregate path; explicit per-page calls are never capped. The aggregated result is also written to the client's response cache (the source for `callTool`'s output-schema validation and SEP-2243 header mirroring).
 
+### Client honours server cache hints (SEP-2549)
+
+On a 2026-07-28 connection the cacheable verbs — `listTools()`, `listPrompts()`, `listResources()`, `listResourceTemplates()`, and `readResource()` — now serve a still-fresh held entry without a round trip when the server-stamped `ttlMs` has not elapsed. The behaviour is opt-in **by server hint**: a server that sends `ttlMs: 0` (the conservative default the SDK's `McpServer` stamps unless configured otherwise) sees byte-identical behaviour — every call fetches. A `list_changed` notification still evicts immediately regardless of TTL.
+
+Per-call control via the new `CacheableRequestOptions.cacheMode` (`'use'` is the default):
+
+```typescript
+await client.listTools(); // serve from cache if fresh
+await client.listTools(undefined, { cacheMode: 'refresh' }); // always fetch, then re-store
+await client.listTools(undefined, { cacheMode: 'bypass' }); // fetch; do not read or write the cache
+```
+
+New `ClientOptions`:
+
+- `cachePartition?: string` — the opaque per-principal identifier for `'private'`-scoped entries (the spec's "MUST NOT share across authorization contexts"). Entries are automatically scoped by connected-server identity (derived from `serverInfo`), so one `responseCacheStore` may back several clients without consumer-side encoding; set `cachePartition` to your principal identifier (e.g. the auth subject) when sharing a store across principals. With the default `''` every entry — public or private — lives at the connected server's shared partition (the safe single-tenant posture). Note `serverInfo` is self-reported, so a server that deliberately impersonates another's `name`/`version` shares its `'public'` slot; the per-principal isolation holds regardless.
+- `defaultCacheTtlMs?: number` — applied when a cacheable result lacks `ttlMs` (e.g. a legacy-era response). Default `0` — never serve from cache; the list aggregate is still **stored** so `callTool`'s mirroring/output-validation index keeps working regardless. The server-supplied `ttlMs` is clamped at 24 h (`MAX_CACHE_TTL_MS`).
+
+The `ResponseCacheStore` interface gained `delete(key)` (the per-URI invalidation `notifications/resources/updated` drives) — custom stores written against the alpha substrate need to add it. The default `InMemoryResponseCacheStore` is now bounded (default 512 entries, oldest-first eviction; configurable via `{ maxEntries }`).
+
 **Output-schema validator lifecycle (every era):** validator compilation is now lazy — validators are compiled on the first `callTool()` against the cached `tools/list` entry, not eagerly inside `listTools()` — and non-throwing: an uncompilable `outputSchema` is `console.warn`-ed
 and validation is skipped for that tool only. In v1, `listTools()` threw on an uncompilable `outputSchema`; now it succeeds, and a pluggable `jsonSchemaValidator` provider observes compilation at `callTool` time, not `listTools` time. The legacy-era `listTools()` path is
 unchanged at the wire level but is observably different at the validator-lifecycle level.
diff --git a/packages/client/src/client/client.ts b/packages/client/src/client/client.ts
index 07071e8e34..61c16e221b 100644
--- a/packages/client/src/client/client.ts
+++ b/packages/client/src/client/client.ts
@@ -82,8 +82,8 @@ import {
     SubscriptionFilterSchema
 } from '@modelcontextprotocol/core';
 
-import type { ResponseCacheStore } from './responseCache.js';
-import { ClientResponseCache, InMemoryResponseCacheStore } from './responseCache.js';
+import type { CacheMode, CacheScope, ResponseCacheStore } from './responseCache.js';
+import { ClientResponseCache, InMemoryResponseCacheStore, MAX_CACHE_TTL_MS } from './responseCache.js';
 import type { ResolvedVersionNegotiation, VersionNegotiationOptions } from './versionNegotiation.js';
 import { detectProbeEnvironment, detectProbeTransportKind, negotiateEra, resolveVersionNegotiation } from './versionNegotiation.js';
 
@@ -278,18 +278,65 @@ export type ClientOptions = ProtocolOptions & {
     /**
      * The response-cache store backing the client's derived views (the cached
      * `tools/list` result that {@linkcode Client.callTool | callTool}'s output
-     * validation and SEP-2243 header mirroring will read once the stacked
-     * SEP-2243 PR lands; this commit ships only the seam). Defaults to a fresh
+     * validation and SEP-2243 header mirroring read) and the SEP-2549
+     * cache-hint serving on the cacheable verbs. Defaults to a fresh
      * {@linkcode InMemoryResponseCacheStore} per client.
      *
-     * **Do not share one store across clients at all in v2.0.x** — entries
-     * are keyed by method + params only, so two clients connected to
-     * different servers (even under the same credential) collide on
-     * `tools/list`, and one client's `list_changed` evicts every co-tenant's
-     * entry. Supply your own only as a single-client backing store.
-     * Per-principal partitioning that enables safe sharing is #39.
+     * Entries are automatically scoped by connected-server identity (derived
+     * from `serverInfo` after connect) AND, for `'private'`-scoped results,
+     * by `cachePartition` — encoded collision-free via `JSON.stringify`, so a
+     * server cannot craft a `serverInfo` that bleeds into another server's
+     * namespace or another principal's slot. One store may therefore back
+     * several clients (e.g. a host pool against the same server, or one
+     * persistent KV across servers); `list_changed` evictions are scoped to
+     * the connected server's partitions, so co-tenants are unaffected. Set
+     * `cachePartition` to your principal identifier (e.g. the auth subject)
+     * when sharing across principals. Note `serverInfo` is self-reported — a
+     * server that deliberately impersonates
+     * another's `name`/`version` shares its `'public'` slot; the
+     * per-principal isolation via `cachePartition` holds regardless.
      */
     responseCacheStore?: ResponseCacheStore;
+
+    /**
+     * Opaque per-principal identifier for response-cache writes whose
+     * server-reported `cacheScope` is `'private'` (the spec's "MUST NOT share
+     * across authorization contexts"). Within the connected server's
+     * namespace, `'public'`-scoped entries live at the shared
+     * `[serverIdentity, '']` partition and `'private'`-scoped entries at
+     * `[serverIdentity, cachePartition]`. Set this to a stable identity of
+     * the authorization context (e.g. the auth subject) when one
+     * `responseCacheStore` backs several principals; with the
+     * default `''` every entry — public or private — lives at the server's
+     * shared partition, which is the safe single-tenant posture.
+     */
+    cachePartition?: string;
+
+    /**
+     * TTL (ms) applied when a cacheable result arrives without a `ttlMs`
+     * field. Default `0` — a result without an explicit hint is never served
+     * from cache (every call refetches), but it is still **stored** so the
+     * `tools/list`-derived index that {@linkcode Client.callTool | callTool}'s
+     * SEP-2243 mirroring and output-schema validation read keeps working
+     * regardless. The spec defines absent-or-≤0 as "immediately stale".
+     */
+    defaultCacheTtlMs?: number;
+};
+
+/**
+ * {@linkcode RequestOptions} extended with the per-call cache disposition for
+ * the cacheable verbs (`listTools()` / `listPrompts()` / `listResources()` /
+ * `listResourceTemplates()` / `readResource()`). See {@linkcode CacheMode}.
+ */
+export type CacheableRequestOptions = RequestOptions & {
+    /**
+     * `'use'` (default) serves a still-fresh cached entry without a round
+     * trip; `'refresh'` always fetches and re-stores; `'bypass'` fetches
+     * without consulting or writing the cache. Applies to the no-`cursor`
+     * auto-aggregate path on the list verbs and to `readResource`; ignored
+     * elsewhere.
+     */
+    cacheMode?: CacheMode;
 };
 
 /**
@@ -418,6 +465,7 @@ export class Client extends Protocol<ClientContext> {
      * `Mcp-Param-*` mirroring through `toolDefinition` on top.
      */
     private readonly _cache: ClientResponseCache;
+    private readonly _defaultCacheTtlMs: number;
     private readonly _listMaxPages: number;
     private _listChangedDebounceTimers: Map<string, ReturnType<typeof setTimeout>> = new Map();
     /**
@@ -516,8 +564,10 @@ export class Client extends Protocol<ClientContext> {
         this._cache = new ClientResponseCache(
             options?.responseCacheStore ?? new InMemoryResponseCacheStore(),
             options?.responseCacheStore !== undefined,
-            error => this._reportStoreError(error)
+            error => this._reportStoreError(error),
+            options?.cachePartition ?? ''
         );
+        this._defaultCacheTtlMs = options?.defaultCacheTtlMs ?? 0;
         this._listMaxPages = options?.listMaxPages ?? DEFAULT_LIST_MAX_PAGES;
 
         // Store list changed config for setup after connection (when we know server capabilities)
@@ -618,23 +668,29 @@ export class Client extends Protocol<ClientContext> {
      * @internal
      */
     private _setupListChangedHandlers(config: ListChangedHandlers): void {
+        // Every autoRefresh fetcher forces `cacheMode: 'refresh'`: the
+        // notification handler's `_cache.evict()` runs first, but a custom
+        // store whose `delete()` no-ops (or rejects) leaves the stale entry
+        // in place — a default-mode `list*()` would then cache-serve the very
+        // entry the notification declared stale. `'refresh'` bypasses the
+        // cache read so the fetcher always reaches the wire and overwrites.
         if (config.tools && this._serverCapabilities?.tools?.listChanged) {
             this._setupListChangedHandler('tools', 'notifications/tools/list_changed', config.tools, async () => {
-                const result = await this.listTools();
+                const result = await this.listTools(undefined, { cacheMode: 'refresh' });
                 return result.tools;
             });
         }
 
         if (config.prompts && this._serverCapabilities?.prompts?.listChanged) {
             this._setupListChangedHandler('prompts', 'notifications/prompts/list_changed', config.prompts, async () => {
-                const result = await this.listPrompts();
+                const result = await this.listPrompts(undefined, { cacheMode: 'refresh' });
                 return result.prompts;
             });
         }
 
         if (config.resources && this._serverCapabilities?.resources?.listChanged) {
             this._setupListChangedHandler('resources', 'notifications/resources/list_changed', config.resources, async () => {
-                const result = await this.listResources();
+                const result = await this.listResources(undefined, { cacheMode: 'refresh' });
                 return result.resources;
             });
         }
@@ -913,6 +969,7 @@ export class Client extends Protocol<ClientContext> {
 
             this._serverCapabilities = result.capabilities;
             this._serverVersion = result.serverInfo;
+            this._cache.setServerIdentity(this._deriveServerIdentity(transport));
             // HTTP transports must set the protocol version in each header after initialization.
             if (transport.setProtocolVersion) {
                 transport.setProtocolVersion(result.protocolVersion);
@@ -996,6 +1053,7 @@ export class Client extends Protocol<ClientContext> {
 
         this._serverCapabilities = result.discover.capabilities;
         this._serverVersion = result.discover.serverInfo;
+        this._cache.setServerIdentity(this._deriveServerIdentity(transport));
         this._instructions = result.discover.instructions;
         // Modern selection: the same connection state the legacy handshake completion sets.
         this._negotiatedProtocolVersion = result.version;
@@ -1105,6 +1163,20 @@ export class Client extends Protocol<ClientContext> {
         return this._serverVersion;
     }
 
+    /**
+     * The connected server's identity for response-cache partitioning. The
+     * `serverInfo` `name@version` pair when available (the spec requires it on
+     * both `initialize` and `server/discover`); falls back to the transport's
+     * `sessionId` otherwise. The value itself is server-controlled — the
+     * collision-safety of the storage partition comes from
+     * {@linkcode ClientResponseCache}'s JSON-array encoding around it, not
+     * from any character it does or does not contain.
+     */
+    private _deriveServerIdentity(transport: Transport): string {
+        const v = this._serverVersion;
+        return v === undefined ? (transport.sessionId ?? '') : `${v.name}@${v.version}`;
+    }
+
     /**
      * After initialization has completed, this will be populated with the protocol version negotiated
      * during the initialize handshake. When manually reconstructing a transport for reconnection, pass this
@@ -1345,7 +1417,7 @@ export class Client extends Protocol<ClientContext> {
      * );
      * ```
      */
-    async listPrompts(params?: ListPromptsRequest['params'], options?: RequestOptions): Promise<ListPromptsResult> {
+    async listPrompts(params?: ListPromptsRequest['params'], options?: CacheableRequestOptions): Promise<ListPromptsResult> {
         if (!this._serverCapabilities?.prompts && !this._enforceStrictCapabilities) {
             // Respect capability negotiation: server does not support prompts
             console.debug('Client.listPrompts() called but server does not advertise prompts capability - returning empty list');
@@ -1354,6 +1426,8 @@ export class Client extends Protocol<ClientContext> {
         if (params?.cursor !== undefined) {
             return this.request({ method: 'prompts/list', params }, options);
         }
+        const hit = await this._serveFromCache<ListPromptsResult>('prompts/list', undefined, options);
+        if (hit !== undefined) return hit;
         return this._listAllPages<ListPromptsResult>('prompts/list', params, options, (acc, page) => acc.prompts.push(...page.prompts));
     }
 
@@ -1383,7 +1457,7 @@ export class Client extends Protocol<ClientContext> {
      * );
      * ```
      */
-    async listResources(params?: ListResourcesRequest['params'], options?: RequestOptions): Promise<ListResourcesResult> {
+    async listResources(params?: ListResourcesRequest['params'], options?: CacheableRequestOptions): Promise<ListResourcesResult> {
         if (!this._serverCapabilities?.resources && !this._enforceStrictCapabilities) {
             // Respect capability negotiation: server does not support resources
             console.debug('Client.listResources() called but server does not advertise resources capability - returning empty list');
@@ -1392,6 +1466,8 @@ export class Client extends Protocol<ClientContext> {
         if (params?.cursor !== undefined) {
             return this.request({ method: 'resources/list', params }, options);
         }
+        const hit = await this._serveFromCache<ListResourcesResult>('resources/list', undefined, options);
+        if (hit !== undefined) return hit;
         return this._listAllPages<ListResourcesResult>('resources/list', params, options, (acc, page) =>
             acc.resources.push(...page.resources)
         );
@@ -1411,7 +1487,7 @@ export class Client extends Protocol<ClientContext> {
      */
     async listResourceTemplates(
         params?: ListResourceTemplatesRequest['params'],
-        options?: RequestOptions
+        options?: CacheableRequestOptions
     ): Promise<ListResourceTemplatesResult> {
         if (!this._serverCapabilities?.resources && !this._enforceStrictCapabilities) {
             // Respect capability negotiation: server does not support resources
@@ -1423,6 +1499,8 @@ export class Client extends Protocol<ClientContext> {
         if (params?.cursor !== undefined) {
             return this.request({ method: 'resources/templates/list', params }, options);
         }
+        const hit = await this._serveFromCache<ListResourceTemplatesResult>('resources/templates/list', undefined, options);
+        if (hit !== undefined) return hit;
         return this._listAllPages<ListResourceTemplatesResult>('resources/templates/list', params, options, (acc, page) =>
             acc.resourceTemplates.push(...page.resourceTemplates)
         );
@@ -1453,10 +1531,14 @@ export class Client extends Protocol<ClientContext> {
     private async _listAllPages<R extends { nextCursor?: string }>(
         method: RequestMethod,
         baseParams: { readonly [key: string]: unknown } | undefined,
-        options: RequestOptions | undefined,
+        options: CacheableRequestOptions | undefined,
         append: (acc: R, page: R) => void,
         finalize?: (acc: R) => void
     ): Promise<R> {
+        // `'bypass'` is the no-touch path: the cache is neither read nor
+        // written, so the substrate is byte-untouched (the `tools/list`
+        // derived index keeps whatever it held).
+        const bypass = options?.cacheMode === 'bypass';
         // Capture the eviction generation BEFORE page 1: a `list_changed` that
         // lands mid-walk bumps the counter, and the terminal `write` skips
         // when it observes the bump (the result still returns to the caller —
@@ -1482,10 +1564,76 @@ export class Client extends Protocol<ClientContext> {
         }
         acc.nextCursor = undefined;
         finalize?.(acc);
-        await this._cache.write(method, acc, generation);
+        if (bypass) return acc;
+        // The aggregate is ALWAYS written: even when the resolved TTL is ≤0
+        // the entry is stored already-stale (mcp.d's `retainForSchema`
+        // posture) so the `tools/list`-derived index keeps working regardless,
+        // while the freshness gate in `_serveFromCache` never serves it.
+        // Page-1 carries the result-level `ttlMs`/`cacheScope` (`acc` IS the
+        // mutated page-1 object).
+        await this._cache.write(method, acc, generation, this._freshness(acc));
         return acc;
     }
 
+    /**
+     * Compute the {@linkcode ClientResponseCache.write} freshness payload from
+     * a cacheable result body. The single seam through which the client reads
+     * `ttlMs`/`cacheScope` (mcp.d's `cachedFetch` engine). The fields pass
+     * through the loose result schema, so they are read off the runtime body;
+     * a missing `ttlMs` falls back to
+     * {@linkcode ClientOptions | ClientOptions.defaultCacheTtlMs}; an explicit server-sent
+     * `ttlMs` (including `0` — the spec's "immediately stale") is honoured
+     * as-is. The default of `0` means `expiresAt === now()` ⇒ never served,
+     * only stored. A missing `cacheScope` is treated as `'private'` — the
+     * spec's `'public'` grant ("any client … MAY serve to any user") is too
+     * strong to infer by default, and matches this SDK's server-side stamp
+     * default.
+     */
+    private _freshness(result: unknown, params?: string): { expiresAt: number; scope: CacheScope; params?: string } {
+        const body = result as { ttlMs?: unknown; cacheScope?: unknown };
+        const ttlMs = typeof body.ttlMs === 'number' ? body.ttlMs : this._defaultCacheTtlMs;
+        const scope: CacheScope = body.cacheScope === 'public' ? 'public' : 'private';
+        // Clamp at 24h (`MAX_CACHE_TTL_MS`) so a server cannot pin an entry
+        // indefinitely; floor at 0 so a negative `ttlMs` is treated as
+        // immediately stale (the spec's absent-or-≤0 rule).
+        return { expiresAt: this._cache.now() + Math.min(Math.max(0, ttlMs), MAX_CACHE_TTL_MS), scope, params };
+    }
+
+    /**
+     * The cache-serving front of every cacheable verb (mcp.d's `cachedFetch`
+     * read half): under `cacheMode: 'use'` (the default), a held entry whose
+     * `expiresAt` is in the future is returned as a `structuredClone` and the
+     * round trip is skipped. `'refresh'` and `'bypass'` always fetch (the
+     * caller decides whether to write). A custom store whose `get()` rejects
+     * is routed to `onerror` and treated as a miss — cache bookkeeping never
+     * blocks a request from reaching the wire.
+     */
+    private async _serveFromCache<R>(
+        method: string,
+        params: string | undefined,
+        options: CacheableRequestOptions | undefined
+    ): Promise<R | undefined> {
+        if (options?.cacheMode === 'bypass' || options?.cacheMode === 'refresh') return undefined;
+        const entry = await this._cache.read(method, params).catch(error => void this._reportStoreError(error));
+        if (entry?.expiresAt !== undefined && entry.expiresAt > this._cache.now()) {
+            // A pre-aborted caller signal must reject the same way it would on
+            // the wire path (`Protocol.request()` wraps an already-aborted
+            // signal as `SdkError(RequestTimeout, reason)`); without this guard
+            // a cache hit would resolve successfully and silently swallow the
+            // abort.
+            if (options?.signal?.aborted) {
+                const reason = options.signal.reason;
+                throw reason instanceof SdkError ? reason : new SdkError(SdkErrorCode.RequestTimeout, String(reason));
+            }
+            // Clone on the way out so a caller mutating the returned aggregate
+            // (e.g. `result.tools.sort(...)`) cannot reach the cache or the
+            // stamp-memoized indices derived from it — the same invariant
+            // `_cache.write` upholds on the way in.
+            return structuredClone(entry.value) as R;
+        }
+        return undefined;
+    }
+
     /** Route a custom-store failure to `onerror` without aborting the surrounding dispatch. */
     private _reportStoreError(e: unknown): void {
         this.onerror?.(e instanceof Error ? e : new Error(String(e)));
@@ -1529,9 +1677,48 @@ export class Client extends Protocol<ClientContext> {
         return tool === undefined ? undefined : scanXMcpHeaderDeclarations(tool.inputSchema);
     }
 
-    /** Reads the contents of a resource by URI. */
-    async readResource(params: ReadResourceRequest['params'], options?: RequestOptions): Promise<ReadResourceResult> {
-        return this.request({ method: 'resources/read', params }, options);
+    /**
+     * Reads the contents of a resource by URI.
+     *
+     * Honours the result's `ttlMs`/`cacheScope` (SEP-2549): a still-fresh
+     * cached body for the same `uri` is returned without a round trip
+     * (`cacheMode: 'use'`, the default). The cache key is `{method, uri}`
+     * partitioned by the resolved scope — `'private'` (the default when the
+     * server omits the field) is stored under this client's
+     * {@linkcode ClientOptions | ClientOptions.cachePartition}, so a shared
+     * store cannot serve one principal's resource body to another. Unlike the
+     * list verbs, a result whose resolved TTL is ≤0 is **not** stored
+     * (`resources/read` has no derived index and the URI keyspace is
+     * unbounded).
+     */
+    async readResource(params: ReadResourceRequest['params'], options?: CacheableRequestOptions): Promise<ReadResourceResult> {
+        const hit = await this._serveFromCache<ReadResourceResult>('resources/read', params.uri, options);
+        if (hit !== undefined) return hit;
+        // Capture the per-URI eviction generation BEFORE the request: a
+        // `notifications/resources/updated` for this URI arriving while the
+        // read is in flight bumps it (via `evictKey`), and the terminal
+        // `write` skips so the now-stale body is not re-cached. Same guard as
+        // the list verbs' `_listAllPages`, keyed by `{method, uri}`.
+        const generation = this._cache.captureGeneration('resources/read', params.uri);
+        const result = await this.request({ method: 'resources/read', params }, options);
+        if (options?.cacheMode !== 'bypass') {
+            const freshness = this._freshness(result, params.uri);
+            if (freshness.expiresAt > this._cache.now()) {
+                await this._cache.write('resources/read', result, generation, freshness);
+            } else if (options?.cacheMode === 'refresh') {
+                // ttl≤0 → drop any held positive-TTL entry so the next
+                // default-mode read fetches fresh — a `'refresh'` that
+                // returns ttl≤0 must not leave the previously-warm entry
+                // serving stale until its old expiry (mcp.d's `cachedFetch`
+                // write half evicts on the no-store branch). Only on the
+                // `'refresh'` path: a default-mode miss already proved
+                // nothing fresh is held (`_serveFromCache` returned
+                // `undefined`), so `evictKey`'s store deletes would be wasted
+                // round trips against an async store on a ttl≤0 working set.
+                await this._cache.evictKey('resources/read', params.uri);
+            }
+        }
+        return result;
     }
 
     /** Subscribes to change notifications for a resource. The server must support resource subscriptions. */
@@ -1587,8 +1774,14 @@ export class Client extends Protocol<ClientContext> {
         }
 
         // Honor RequestOptions.signal exactly as request() does: an
-        // already-aborted signal rejects synchronously before any setup.
-        options?.signal?.throwIfAborted();
+        // already-aborted signal rejects synchronously before any setup, and
+        // the rejection is the same `SdkError(RequestTimeout, reason)` wrap
+        // request() / `_serveFromCache` apply (unless `reason` is already an
+        // SdkError — preserved verbatim).
+        if (options?.signal?.aborted) {
+            const reason = options.signal.reason;
+            throw reason instanceof SdkError ? reason : new SdkError(SdkErrorCode.RequestTimeout, String(reason));
+        }
 
         const requestAbort = new AbortController();
         // The listen request's JSON-RPC id (= the spec's subscription id
@@ -1773,16 +1966,27 @@ export class Client extends Protocol<ClientContext> {
         // `Object.prototype` member name (`constructor`, `toString`, …) does
         // not reach the iteration as a non-iterable function.
         const evicted = Object.hasOwn(LIST_CHANGED_EVICTIONS, raw.method) ? LIST_CHANGED_EVICTIONS[raw.method] : undefined;
-        if (evicted !== undefined) {
+        if (raw.method === 'notifications/resources/updated') {
+            // Per-URI eviction (mcp.d's `invalidateLogical`): the now-cached
+            // `resources/read` body for this URI is stale on receipt; drop it
+            // from BOTH partitions so the next `readResource` for the same URI
+            // refetches even within TTL (the documented subscribe → updated →
+            // re-read flow). Fire-and-forget like the `list_changed` path —
+            // dispatch must not block on an async store.
+            const uri = (raw.params as { uri?: unknown } | undefined)?.uri;
+            if (typeof uri === 'string') void this._cache.evictKey('resources/read', uri);
+        } else if (evicted !== undefined) {
             for (const method of evicted) {
                 // `evict()` bumps the generation FIRST and unconditionally
                 // (the `_cacheListResult` race guard relies on the bump, not
-                // on the store's evict completing), then awaits the store. A
-                // custom store's `evict()` may throw or reject; route to
-                // `onerror` and proceed so dispatch (and the user's
-                // `listChanged` handler) runs regardless. Fire-and-forget —
-                // dispatch must not block on an async store.
-                void this._cache.evict(method).catch(error => this._reportStoreError(error));
+                // on the store's deletes completing), then drops only THIS
+                // server's two partition singletons — co-tenants on a shared
+                // store keep their entries. Store failures are reported via
+                // `onerror` inside `evict()` and the call resolves, so
+                // dispatch (and the user's `listChanged` handler) runs
+                // regardless. Fire-and-forget — dispatch must not block on
+                // an async store.
+                void this._cache.evict(method);
             }
         }
         if (raw.method === 'notifications/subscriptions/acknowledged') {
@@ -1966,12 +2170,15 @@ export class Client extends Protocol<ClientContext> {
             if (!mirroringActive || !isHeaderMismatch || options?.toolDefinition !== undefined) {
                 throw error;
             }
-            const refreshOptions = { signal: options?.signal, timeout: options?.timeout };
-            // A custom store's `evict()` may throw or reject (the documented
-            // store contract); route that to `onerror` and proceed — the
-            // generation bump already happened, so the refetch overwrites the
-            // stale entry regardless.
-            await this._cache.evict('tools/list').catch(error_ => this._reportStoreError(error_));
+            // `cacheMode: 'refresh'` so the recovery refetch always reaches
+            // the wire — `evict()` reports a custom store's `delete()`
+            // failure via `onerror` and resolves, but a no-op/failing
+            // `delete()` leaves the stale entry in place; without `'refresh'`
+            // the `listTools()` below would cache-serve the very entry whose
+            // staleness triggered this recovery. The generation bump still
+            // happens regardless, so the refetch's write overwrites.
+            const refreshOptions = { signal: options?.signal, timeout: options?.timeout, cacheMode: 'refresh' as const };
+            await this._cache.evict('tools/list');
             // The recovery refetch may itself fail (e.g. `listMaxPages`, a
             // transient error that hits only the `tools/list` walk). Surface
             // it via `onerror` so the real cause is observable, then proceed
@@ -2063,7 +2270,7 @@ export class Client extends Protocol<ClientContext> {
      * );
      * ```
      */
-    async listTools(params?: ListToolsRequest['params'], options?: RequestOptions): Promise<ListToolsResult> {
+    async listTools(params?: ListToolsRequest['params'], options?: CacheableRequestOptions): Promise<ListToolsResult> {
         if (!this._serverCapabilities?.tools && !this._enforceStrictCapabilities) {
             // Respect capability negotiation: server does not support tools
             console.debug('Client.listTools() called but server does not advertise tools capability - returning empty list');
@@ -2079,6 +2286,8 @@ export class Client extends Protocol<ClientContext> {
             this._excludeInvalidXMcpHeaderTools(page);
             return page;
         }
+        const hit = await this._serveFromCache<ListToolsResult>('tools/list', undefined, options);
+        if (hit !== undefined) return hit;
         // Auto-aggregate: SEP-2243 invalid-`x-mcp-header` exclusion runs on
         // the complete aggregate via the `finalize` hook before the cache
         // write, so the cached entry never holds an unmirrorable tool.
diff --git a/packages/client/src/client/responseCache.ts b/packages/client/src/client/responseCache.ts
index 8fbe90ca49..cc99221e7e 100644
--- a/packages/client/src/client/responseCache.ts
+++ b/packages/client/src/client/responseCache.ts
@@ -1,35 +1,49 @@
 import type { ListToolsResult, Tool } from '@modelcontextprotocol/core';
 
 /**
- * Minimal response-cache substrate (the kernel of #39's design).
+ * Client-side response cache for SEP-2549 (`CacheableResult`) freshness hints.
  *
  * The store is a dumb keyed-value carrier: every freshness, scope and
  * invalidation decision lives in the {@linkcode ClientResponseCache} (the
- * `Client`'s single cache-coordination collaborator). This file
- * deliberately
- * ships only what the SEP-2243 mirroring path and the existing
- * `tools/list`-derived validators need today — the full `cacheHints` engine
- * (TTL short-circuiting, public/private partitioning, `CacheMode`) lands with
- * the rest of #39 on top of the same interface.
+ * `Client`'s single cache-coordination collaborator). The `stamp` field is
+ * mcp.d's re-derivation key — a derived view (e.g. the `name → Tool` index)
+ * re-computes only when the backing entry's stamp changes.
  *
  * Reference design: mcp.d `client/cache.d` / `client/client.d` (`CacheStore`,
- * `cachedTool`). The `stamp` field is mcp.d's re-derivation key — a derived
- * view (e.g. the `name → Tool` index) re-computes only when the backing
- * entry's stamp changes.
+ * `cachedTool`, `cachedFetch`, `invalidateLogical`).
  */
 
 /** A value or a promise of one. The store interface is async-ready; the in-memory default returns plain values. */
 export type MaybePromise<T> = T | Promise<T>;
 
-/** The freshness scope of a cached entry (#39's `cacheHints.scope`). */
+/** The freshness scope of a cached entry (SEP-2549 `cacheHints.scope`). */
 export type CacheScope = 'public' | 'private';
 
+/**
+ * Per-call cache disposition for the cacheable verbs (`listTools()` /
+ * `listPrompts()` / `listResources()` / `listResourceTemplates()` /
+ * `readResource()`):
+ *
+ * - `'use'` (the default) — serve a still-fresh cached entry without a round
+ *   trip; on miss/stale, fetch and write.
+ * - `'refresh'` — always fetch (ignore any held entry) and write the fresh
+ *   result.
+ * - `'bypass'` — fetch without consulting OR writing the cache (the result is
+ *   not stored). The `tools/list`-derived index (mirroring / output
+ *   validation) is therefore unaffected by a `'bypass'` call.
+ */
+export type CacheMode = 'use' | 'refresh' | 'bypass';
+
 /**
  * A logical cache address. `params` is the canonical result-affecting params
  * key (`''` for the four list ops, the `uri` for `resources/read`); omitted is
- * equivalent to `''`. `partition` is the per-principal identity slot reserved
- * for #39's shared-store partitioning — always `''` today (the
- * `Client` never populates it); omitted is equivalent to `''`.
+ * equivalent to `''`. `partition` namespaces the entry by connected-server
+ * identity AND per-principal scope: the `Client` writes a JSON-encoded
+ * `[serverIdentity, principal]` pair (so a server-controlled `serverInfo`
+ * string cannot bleed into the principal slot regardless of what characters
+ * it contains). A `'public'`-scoped entry lives at `[serverIdentity, '']`; a
+ * `'private'`-scoped entry at `[serverIdentity, cachePartition]`. Omitted is
+ * equivalent to `''`.
  */
 export interface CacheKey {
     readonly method: string;
@@ -41,11 +55,10 @@ export interface CacheKey {
  * One cached response body. `value` is the verbatim decoded result; `stamp` is
  * the store-generated monotonically increasing write counter — opaque to
  * callers. Derived views (e.g. a `name → Tool` index) memoize against it and
- * re-derive only when it changes. `expiresAt` and `scope` are the
- * client-computed freshness metadata (#39 — `expiresAt = now + ttlMs`,
- * `scope` from `cacheHints`); the substrate does not populate them yet, but
- * the slot exists so a custom store written today persists them once #39
- * lands without a signature change.
+ * re-derive only when it changes. `expiresAt` (absolute ms epoch, `now +
+ * ttlMs`) and `scope` are the client-computed freshness metadata; the store
+ * MUST persist them and hand them back on `get` so the read path can decide
+ * freshness and gate the shared-partition fallback on `scope === 'public'`.
  */
 export interface CacheEntry {
     readonly value: unknown;
@@ -63,17 +76,16 @@ export interface CacheEntry {
  * in-memory default stays synchronous (plain values are valid under
  * `MaybePromise`). The `Client` `await`s every call site.
  *
- * **A store instance MUST NOT be shared across `Client` instances at
- * all in v2.0.x.** Entries are keyed by method + params only (the
- * `Client` never populates `partition` today), so two clients
- * connected to different servers — even under the same credential — collide on
- * `tools/list` (server-identity confusion); a `list_changed` from one server
- * evicts every co-tenant's entry; and one client reconnecting drops the
- * derived indices that read the shared store. The `Client`
- * constructor always allocates a fresh {@linkcode InMemoryResponseCacheStore}
- * when `responseCacheStore` is not supplied; pass your own only as a
- * single-client backing store. Per-principal partitioning that enables safe
- * sharing arrives with the full #39 `cacheHints` engine.
+ * Entries are keyed by `{method, params, partition}` where `partition` is the
+ * `Client`-derived `[serverIdentity, principal]` JSON pair, so one store
+ * instance is safe to share across `Client` instances connected to different
+ * servers and/or principals: writes from distinct connections never collide,
+ * the shared-partition read fallback is gated on the stored
+ * `scope === 'public'`, and `list_changed` / `HEADER_MISMATCH` evictions are
+ * scoped to the connected server's two partitions — co-tenants on a shared
+ * store are unaffected. The `Client` constructor still allocates a fresh
+ * {@linkcode InMemoryResponseCacheStore} per instance by default; supply your
+ * own to share or persist.
  */
 export interface ResponseCacheStore {
     get(key: CacheKey): MaybePromise<CacheEntry | undefined>;
@@ -81,52 +93,163 @@ export interface ResponseCacheStore {
      * Writes `entry` under `key` and returns the store-generated stamp the
      * resulting {@linkcode CacheEntry} carries. The store owns the stamp
      * counter; callers do not supply one. The caller owns `expiresAt` and
-     * `scope` (the client-computed freshness metadata; not yet populated by
-     * the substrate — #39 wires them); the store MUST persist them and hand
-     * them back on `get`.
+     * `scope` (the client-computed freshness metadata); the store MUST persist
+     * them and hand them back on `get`.
      */
     set(key: CacheKey, entry: { value: unknown; expiresAt?: number; scope?: CacheScope }): MaybePromise<number>;
-    /** Drop every entry for `method` (the `list_changed` invalidation). */
+    /**
+     * Drop the single entry under `key` (no-op if absent). Called for both
+     * `notifications/resources/updated` (per-URI) and the `list_changed`
+     * notifications (the list singletons live at `{method, params: '', partition}`).
+     */
+    delete(key: CacheKey): MaybePromise<void>;
+    /**
+     * Drop every entry for `method` across every partition. The `Client` does
+     * NOT call this (its `list_changed` path issues two partition-scoped
+     * `delete()` calls so co-tenants on a shared store keep their entries);
+     * kept on the interface for callers that want a method-wide bulk-clear.
+     */
     evict(method: string): MaybePromise<void>;
     /** Drop every entry (connection reset). */
     clear(): MaybePromise<void>;
 }
 
+/** Options for {@linkcode InMemoryResponseCacheStore}. */
+export interface InMemoryResponseCacheStoreOptions {
+    /**
+     * Maximum number of held `resources/read` entries (the only
+     * unbounded-keyspace method). When inserting a new `resources/read` key
+     * would exceed this, the oldest such entry (by insertion order) is
+     * evicted first. The list-singleton methods (`tools/list`,
+     * `prompts/list`, `resources/list`, `resources/templates/list`,
+     * `server/discover`) are **exempt** — they hold at most one entry per
+     * partition and back the `tools/list`-derived index, so an unbounded URI
+     * working set never displaces them. The default of `512` bounds growth on
+     * a long-lived client against template-expanded URIs. `0` disables the
+     * bound.
+     */
+    maxEntries?: number;
+}
+
+/**
+ * Methods whose entries are exempt from the
+ * {@linkcode InMemoryResponseCacheStoreOptions.maxEntries} cap. Each holds at
+ * most one entry per partition (a small bounded set) and the
+ * `tools/list`-derived index depends on its entry surviving regardless of the
+ * `resources/read` working-set size. Only `resources/read` keys count toward
+ * the cap and are eligible for FIFO eviction.
+ */
+const CAP_EXEMPT_METHODS: ReadonlySet<string> = new Set([
+    'tools/list',
+    'prompts/list',
+    'resources/list',
+    'resources/templates/list',
+    'server/discover'
+]);
+
 /**
- * In-memory default. Unbounded — the four list ops write at most one entry
- * each, so a bound is not yet useful; the LRU cap arrives with `resources/read`
- * caching in #39.
+ * In-memory default. Bounded by an insertion-ordered size cap (default `512`;
+ * see {@linkcode InMemoryResponseCacheStoreOptions.maxEntries}) on the
+ * `resources/read` keyspace so an unbounded stream of distinct URIs cannot
+ * grow it without limit; the list-singleton methods are exempt and never
+ * evicted by the cap. `Map` preserves insertion order, so the oldest live
+ * capped key is the first matching iteration entry.
  */
 export class InMemoryResponseCacheStore implements ResponseCacheStore {
     private readonly _entries = new Map<string, CacheEntry>();
+    private readonly _maxEntries: number;
     private _stamp = 0;
+    /** Count of held entries that are subject to the cap (i.e. not in {@linkcode CAP_EXEMPT_METHODS}). */
+    private _cappedSize = 0;
+
+    constructor(options?: InMemoryResponseCacheStoreOptions) {
+        this._maxEntries = options?.maxEntries ?? 512;
+    }
+
+    /** Number of held entries (for diagnostics / bounding tests). */
+    get size(): number {
+        return this._entries.size;
+    }
 
     get(key: CacheKey): CacheEntry | undefined {
         return this._entries.get(keyOf(key));
     }
 
     set(key: CacheKey, entry: { value: unknown; expiresAt?: number; scope?: CacheScope }): number {
+        const k = keyOf(key);
+        const exempt = CAP_EXEMPT_METHODS.has(key.method);
+        const isNew = !this._entries.has(k);
+        // Evict the oldest CAPPED entry first when adding a NEW capped key
+        // would exceed the cap (re-set of an existing key never evicts; an
+        // exempt-method write never evicts). `Map` iteration order is
+        // insertion order, so the first non-exempt key is the oldest one.
+        if (!exempt && isNew && this._maxEntries > 0 && this._cappedSize >= this._maxEntries) {
+            for (const oldKey of this._entries.keys()) {
+                if (!CAP_EXEMPT_METHODS.has(oldKey.slice(0, oldKey.indexOf('\0')))) {
+                    this._entries.delete(oldKey);
+                    this._cappedSize--;
+                    break;
+                }
+            }
+        }
         const stamp = ++this._stamp;
-        this._entries.set(keyOf(key), { ...entry, stamp });
+        this._entries.set(k, { ...entry, stamp });
+        if (isNew && !exempt) this._cappedSize++;
         return stamp;
     }
 
+    delete(key: CacheKey): void {
+        if (this._entries.delete(keyOf(key)) && !CAP_EXEMPT_METHODS.has(key.method)) this._cappedSize--;
+    }
+
     evict(method: string): void {
         const prefix = `${method}\0`;
+        const exempt = CAP_EXEMPT_METHODS.has(method);
         for (const k of this._entries.keys()) {
-            if (k.startsWith(prefix)) this._entries.delete(k);
+            if (k.startsWith(prefix)) {
+                this._entries.delete(k);
+                if (!exempt) this._cappedSize--;
+            }
         }
     }
 
     clear(): void {
         this._entries.clear();
+        this._cappedSize = 0;
     }
 }
 
+/**
+ * Serialize a {@linkcode CacheKey} for the in-memory map. `method` is always
+ * an SDK-set MCP method string (never contains a NUL), so the `\0` prefix
+ * delimiter is safe and lets {@linkcode InMemoryResponseCacheStore.evict} do a
+ * cheap prefix scan. `partition` (already a JSON-encoded
+ * `[serverIdentity, principal]` pair) and `params` (a resource URI on the
+ * `resources/read` path — caller-controlled) are JSON-array-encoded together,
+ * which is collision-free regardless of any NUL or delimiter characters they
+ * carry.
+ */
 function keyOf(key: CacheKey): string {
-    return `${key.method}\0${key.partition ?? ''}\0${key.params ?? ''}`;
+    return `${key.method}\0${JSON.stringify([key.partition ?? '', key.params ?? ''])}`;
+}
+
+/**
+ * Serialize a `{method, params}` pair for the eviction-generation map. The
+ * list singletons key on `method` alone (their {@linkcode ClientResponseCache.evict}
+ * is whole-method); `resources/read` keys on `` `${method}\0${uri}` `` so
+ * {@linkcode ClientResponseCache.evictKey} bumps a per-URI counter.
+ */
+function genKey(method: string, params?: string): string {
+    return params === undefined ? method : `${method}\0${params}`;
 }
 
+/**
+ * Upper bound on the server-supplied `ttlMs` honoured by
+ * {@linkcode ClientResponseCache} (24h). A server cannot pin an entry
+ * indefinitely.
+ */
+export const MAX_CACHE_TTL_MS = 86_400_000;
+
 /**
  * The `Client`'s cache-coordination collaborator.
  *
@@ -142,11 +265,21 @@ function keyOf(key: CacheKey): string {
  */
 export class ClientResponseCache {
     /**
-     * Per-method eviction-generation counter. {@linkcode evict} bumps it before
-     * touching the store; {@linkcode captureGeneration} reads it before a list
-     * walk's page 1; {@linkcode write} skips when it moved — so a
-     * `list_changed` arriving mid-walk is not overwritten by the walk's stale
-     * aggregate.
+     * Per-logical-key eviction-generation counter. {@linkcode evict} (whole
+     * method) and {@linkcode evictKey} (single `{method, params}`) bump it
+     * before touching the store; {@linkcode captureGeneration} reads it before
+     * the request; {@linkcode write} skips when it moved — so a `list_changed`
+     * arriving mid-walk, or a `resources/updated` arriving while a
+     * `readResource()` for the same URI is in flight, is not overwritten by
+     * the in-flight request's stale write. The map key is `method` for the
+     * list singletons and `` `${method}\0${params}` `` for per-URI keys.
+     *
+     * Growth is bounded by keys the CLIENT has issued a `captureGeneration`
+     * for: {@linkcode captureGeneration} records the key (so an interleaved
+     * {@linkcode evictKey} sees there is an in-flight write to suppress);
+     * {@linkcode evictKey} only bumps a key that is already recorded — a
+     * server streaming `notifications/resources/updated` for URIs the client
+     * has never read therefore cannot grow this map.
      */
     private readonly _evictionGeneration = new Map<string, number>();
     /**
@@ -163,6 +296,16 @@ export class ClientResponseCache {
      * concrete type.
      */
     private _toolOutputValidatorIndex?: { stamp: number; byName: Map<string, unknown> };
+    /**
+     * The connected server's identity (`serverInfo.name@version`, or a
+     * transport-supplied surrogate). Set by the `Client` immediately after a
+     * successful connect; `''` is the pre-connect sentinel. Every storage
+     * partition is derived from this (see `_partitionFor`), so two
+     * clients sharing one store but connected to different servers never
+     * collide on `tools/list` and a server cannot read another server's
+     * `'public'` entries.
+     */
+    private _serverIdentity = '';
 
     constructor(
         private readonly _store: ResponseCacheStore,
@@ -178,24 +321,163 @@ export class ClientResponseCache {
          * fetched — the failure is reported here and the write resolves. The
          * `Client` wires this to `onerror`.
          */
-        private readonly _reportError: (error: unknown) => void = () => {}
+        private readonly _reportError: (error: unknown) => void = () => {},
+        /**
+         * The opaque per-principal identifier for this client (the
+         * `private`-scope storage slot within the connected server's
+         * namespace). `''` (the default) makes the `private` slot identical to
+         * the server's shared `public` slot — the safe single-tenant posture.
+         * See `_partitionFor`.
+         */
+        private readonly _cachePartition: string = '',
+        /**
+         * Clock seam (testing). The freshness check (`entry.expiresAt > now()`)
+         * and the `expiresAt = now() + ttlMs` stamp both read it via
+         * {@linkcode now}. Default `Date.now`.
+         */
+        private readonly _now: () => number = Date.now
     ) {}
 
+    /** The clock used for every freshness computation and check. */
+    now(): number {
+        return this._now();
+    }
+
+    /**
+     * Record the connected server's identity. Called by `Client` immediately
+     * after a successful connect (`serverInfo.name@version`, or the
+     * transport's `sessionId` when `serverInfo` is unavailable). Every
+     * partition derived after this call is scoped to this identity; entries
+     * written under the pre-connect `''` sentinel are no longer reachable.
+     */
+    setServerIdentity(identity: string): void {
+        this._serverIdentity = identity;
+    }
+
+    /**
+     * Derive the storage partition for `scope`. The encoding is
+     * `JSON.stringify([serverIdentity, principal])` — JSON escaping makes it
+     * collision-free by construction: a malicious server cannot craft a
+     * `serverInfo.name`/`version` whose concatenated form bleeds into another
+     * server's namespace or another principal's slot, regardless of `@` / `|`
+     * / `"` / NUL in the server-controlled strings. `'public'` →
+     * `[serverIdentity, '']` (shared within this server); `'private'` →
+     * `[serverIdentity, cachePartition]`. When `cachePartition` is `''` the
+     * two coincide.
+     */
+    private _partitionFor(scope: CacheScope): string {
+        return JSON.stringify([this._serverIdentity, scope === 'public' ? '' : this._cachePartition]);
+    }
+
+    /**
+     * Two-probe lookup: this client's own (private) partition first, then the
+     * connected server's shared (public) partition. The shared probe is gated
+     * on `entry.scope === 'public'` — a co-tenant client that omits
+     * `cachePartition` writes its `'private'`-scoped entries at the public
+     * partition, and serving those to a correctly-partitioned client would
+     * leak private bodies (mcp.d's `cachedEntry` two-probe order; the scope
+     * gate is defence-in-depth on top of the partition split). When
+     * `cachePartition` is `''` the two partitions are identical and only one
+     * probe is issued.
+     */
+    private async _probe(method: string, params?: string): Promise<CacheEntry | undefined> {
+        const key = { method, params: params ?? '' };
+        const ownPartition = this._partitionFor('private');
+        const own = await this._store.get({ ...key, partition: ownPartition });
+        if (own !== undefined) return own;
+        const sharedPartition = this._partitionFor('public');
+        if (sharedPartition === ownPartition) return undefined;
+        const shared = await this._store.get({ ...key, partition: sharedPartition });
+        return shared?.scope === 'public' ? shared : undefined;
+    }
+
     /**
      * Bump the per-method generation (so an in-flight {@linkcode write} for the
-     * same method becomes a no-op) and evict the store entry. The generation
-     * bump is unconditional and FIRST — the {@linkcode write} race guard relies
-     * on the bump, not on the store's evict completing. A custom store's
-     * `evict()` may throw or reject; the caller routes that to `onerror`.
+     * same method becomes a no-op) and drop the connected server's two list
+     * singletons (own + shared partition; `params: ''`). The generation bump
+     * is unconditional and FIRST — the {@linkcode write} race guard relies on
+     * the bump, not on the store's deletes completing.
+     *
+     * Eviction is scoped to this client's `[serverIdentity, principal]`
+     * partitions (mirroring {@linkcode evictKey}) — the method-wide
+     * `store.evict()` is NOT called, so on a shared store one server's
+     * `list_changed` cannot wipe a co-tenant's entry. A custom store's
+     * `delete()` may throw or reject; each partition is guarded
+     * independently so a failure on one does not skip the other, the failure
+     * is reported via the constructor's sink, and the call resolves so
+     * dispatch proceeds.
      */
     async evict(method: string): Promise<void> {
         this._evictionGeneration.set(method, (this._evictionGeneration.get(method) ?? 0) + 1);
-        await this._store.evict(method);
+        const ownPartition = this._partitionFor('private');
+        const sharedPartition = this._partitionFor('public');
+        try {
+            await this._store.delete({ method, params: '', partition: ownPartition });
+        } catch (error) {
+            this._reportError(error);
+        }
+        if (sharedPartition !== ownPartition) {
+            try {
+                await this._store.delete({ method, params: '', partition: sharedPartition });
+            } catch (error) {
+                this._reportError(error);
+            }
+        }
+    }
+
+    /**
+     * Drop the single logical entry `{method, params}` from BOTH the private
+     * and public partitions for this client's connected server (mcp.d's
+     * `invalidateLogical`). Used for `notifications/resources/updated`'s
+     * per-URI eviction. The per-key generation is bumped FIRST (so an
+     * in-flight {@linkcode write} for the same `{method, params}` becomes a
+     * no-op and cannot re-cache the now-stale body) but only when the key was
+     * already recorded by {@linkcode captureGeneration} — bounding the map to
+     * keys the client has actually read. A custom store's `delete()` may
+     * throw or reject; each partition's delete is guarded independently so a
+     * failure on one does not skip the other, and the call resolves so
+     * dispatch proceeds.
+     */
+    async evictKey(method: string, params: string): Promise<void> {
+        const gk = genKey(method, params);
+        // Only bump a key the client has actually captured: if no entry is
+        // present there is no in-flight write to suppress, and an
+        // unconditional bump would let a server streaming distinct-URI
+        // `resources/updated` notifications grow this map without bound. The
+        // store deletes still run regardless (a previously-written entry may
+        // be held even when the generation entry has since been cleared by
+        // `resetForReconnect`).
+        const current = this._evictionGeneration.get(gk);
+        if (current !== undefined) this._evictionGeneration.set(gk, current + 1);
+        const ownPartition = this._partitionFor('private');
+        const sharedPartition = this._partitionFor('public');
+        try {
+            await this._store.delete({ method, params, partition: ownPartition });
+        } catch (error) {
+            this._reportError(error);
+        }
+        if (sharedPartition !== ownPartition) {
+            try {
+                await this._store.delete({ method, params, partition: sharedPartition });
+            } catch (error) {
+                this._reportError(error);
+            }
+        }
     }
 
-    /** Snapshot the eviction generation for `method` before a list walk's page 1. */
-    captureGeneration(method: string): number {
-        return this._evictionGeneration.get(method) ?? 0;
+    /**
+     * Snapshot the eviction generation for `{method, params}` before issuing
+     * the request (a list walk's page 1, or a `resources/read` for `params`).
+     * Records the key so an interleaved {@linkcode evictKey} for the same
+     * `{method, params}` knows there is an in-flight write to suppress and
+     * bumps; without the record, `evictKey`'s recorded-only bump would skip
+     * and the stale body would be cached.
+     */
+    captureGeneration(method: string, params?: string): number {
+        const gk = genKey(method, params);
+        const current = this._evictionGeneration.get(gk) ?? 0;
+        this._evictionGeneration.set(gk, current);
+        return current;
     }
 
     /**
@@ -212,19 +494,67 @@ export class ClientResponseCache {
      * `reportError` sink and the write resolves — cache bookkeeping never
      * costs the caller a result it already fetched (consistent with the
      * eviction path).
+     *
+     * `freshness` carries the client-computed `expiresAt` (absolute ms epoch,
+     * `now + ttlMs`) and the server-reported `cacheScope`. The storage
+     * `partition` is derived from the scope via `_partitionFor`:
+     * `'public'` → `[serverIdentity, '']` (shared within this server);
+     * `'private'` → `[serverIdentity, cachePartition]` (so a shared store
+     * never serves a private entry to another identity). Absent `freshness`
+     * preserves the substrate write (no `expiresAt`, private partition) — the
+     * `tools/list` retain-for-schema posture: never served by
+     * {@linkcode read}'s freshness gate, always readable by
+     * {@linkcode toolDefinition}.
+     *
+     * After storing under the derived partition, the same `{method, params}`
+     * is deleted from the OPPOSITE partition (mirroring {@linkcode evictKey}'s
+     * two-partition posture). A server that flips a result's `cacheScope` for
+     * the same key would otherwise leave the previous entry in the other slot
+     * — and since `_probe` checks own-partition first, a stale private entry
+     * would shadow the fresh public one (or a stale public entry would keep
+     * serving co-tenants). Both store calls are independently guarded so a
+     * custom store's failure on one does not skip the other.
      */
-    async write(method: string, value: unknown, capturedGen: number): Promise<void> {
-        if ((this._evictionGeneration.get(method) ?? 0) !== capturedGen) return;
+    async write(
+        method: string,
+        value: unknown,
+        capturedGen: number,
+        freshness?: { expiresAt: number; scope: CacheScope; params?: string }
+    ): Promise<void> {
+        if ((this._evictionGeneration.get(genKey(method, freshness?.params)) ?? 0) !== capturedGen) return;
+        const params = freshness?.params ?? '';
+        const ownPartition = this._partitionFor('private');
+        const sharedPartition = this._partitionFor('public');
+        const partition = (freshness?.scope ?? 'private') === 'public' ? sharedPartition : ownPartition;
         try {
-            await this._store.set({ method }, { value: structuredClone(value) });
+            await this._store.set(
+                { method, params, partition },
+                { value: structuredClone(value), expiresAt: freshness?.expiresAt, scope: freshness?.scope }
+            );
         } catch (error) {
             this._reportError(error);
         }
+        if (sharedPartition !== ownPartition) {
+            try {
+                await this._store.delete({
+                    method,
+                    params,
+                    partition: partition === ownPartition ? sharedPartition : ownPartition
+                });
+            } catch (error) {
+                this._reportError(error);
+            }
+        }
     }
 
-    /** Read the cached entry for `{method}` (the four list verbs). */
-    async read(method: string): Promise<CacheEntry | undefined> {
-        return this._store.get({ method });
+    /**
+     * Read the cached entry for `{method, params}` via the two-probe lookup
+     * (own-partition then this server's shared partition, gated on
+     * `scope === 'public'`). The caller owns the freshness check
+     * (`entry.expiresAt > now()`); a missing `expiresAt` is never fresh.
+     */
+    async read(method: string, params?: string): Promise<CacheEntry | undefined> {
+        return this._probe(method, params);
     }
 
     /**
@@ -233,15 +563,16 @@ export class ClientResponseCache {
      * the only reason to supply one. The generation map and every derived
      * index are dropped regardless: they are connection-scoped even when the
      * backing store survives, so the next read re-derives from whatever the
-     * store still holds. The default impl is synchronous, so the
-     * `MaybePromise<void>` return is a plain void here and the caller need not
-     * await.
+     * store still holds. The server identity returns to the pre-connect
+     * sentinel. The default impl is synchronous, so the `MaybePromise<void>`
+     * return is a plain void here and the caller need not await.
      */
     resetForReconnect(): void {
         if (!this._isUserSupplied) void this._store.clear();
         this._evictionGeneration.clear();
         this._toolIndex = undefined;
         this._toolOutputValidatorIndex = undefined;
+        this._serverIdentity = '';
     }
 
     /**
@@ -255,7 +586,7 @@ export class ClientResponseCache {
      * and, via {@linkcode outputValidator}, its output-schema validation.
      */
     async toolDefinition(name: string): Promise<Tool | undefined> {
-        const entry = await this._store.get({ method: 'tools/list' });
+        const entry = await this._probe('tools/list');
         if (entry === undefined) {
             this._toolIndex = undefined;
             return undefined;
@@ -286,7 +617,7 @@ export class ClientResponseCache {
      * index simply omits it.
      */
     async outputValidator<V>(name: string, compile: (tool: Tool) => V | undefined): Promise<V | undefined> {
-        const entry = await this._store.get({ method: 'tools/list' });
+        const entry = await this._probe('tools/list');
         if (entry === undefined) {
             this._toolOutputValidatorIndex = undefined;
             return undefined;
diff --git a/packages/client/src/index.ts b/packages/client/src/index.ts
index 694084598d..05dfa807a6 100644
--- a/packages/client/src/index.ts
+++ b/packages/client/src/index.ts
@@ -52,15 +52,23 @@ export {
     PrivateKeyJwtProvider,
     StaticPrivateKeyJwtProvider
 } from './client/authExtensions.js';
-export type { CallToolRequestOptions, ClientOptions, McpSubscription } from './client/client.js';
+export type { CacheableRequestOptions, CallToolRequestOptions, ClientOptions, McpSubscription } from './client/client.js';
 export { Client } from './client/client.js';
 export { getSupportedElicitationModes } from './client/client.js';
 export type { DiscoverAndRequestJwtAuthGrantOptions, JwtAuthGrantResult, RequestJwtAuthGrantOptions } from './client/crossAppAccess.js';
 export { discoverAndRequestJwtAuthGrant, exchangeJwtAuthGrant, requestJwtAuthorizationGrant } from './client/crossAppAccess.js';
 export type { LoggingOptions, Middleware, RequestLogger } from './client/middleware.js';
 export { applyMiddlewares, createMiddleware, withLogging, withOAuth } from './client/middleware.js';
-export type { CacheEntry, CacheKey, CacheScope, MaybePromise, ResponseCacheStore } from './client/responseCache.js';
-export { InMemoryResponseCacheStore } from './client/responseCache.js';
+export type {
+    CacheEntry,
+    CacheKey,
+    CacheMode,
+    CacheScope,
+    InMemoryResponseCacheStoreOptions,
+    MaybePromise,
+    ResponseCacheStore
+} from './client/responseCache.js';
+export { InMemoryResponseCacheStore, MAX_CACHE_TTL_MS } from './client/responseCache.js';
 export type { SSEClientTransportOptions } from './client/sse.js';
 export { SSEClientTransport, SseError } from './client/sse.js';
 export type { VersionNegotiationMode, VersionNegotiationOptions, VersionNegotiationProbeOptions } from './client/versionNegotiation.js';
diff --git a/packages/client/test/client/listen.test.ts b/packages/client/test/client/listen.test.ts
index 2476325c23..855dbc14b8 100644
--- a/packages/client/test/client/listen.test.ts
+++ b/packages/client/test/client/listen.test.ts
@@ -379,6 +379,32 @@ describe('Client.listen()', () => {
         await client.close();
     });
 
+    it('options.signal already aborted: listen() rejects with SdkError(RequestTimeout) before any setup (parity with request())', async () => {
+        const { clientTx, written } = await scriptedModern();
+        const client = new Client({ name: 'c', version: '1' }, { versionNegotiation: { mode: 'auto' } });
+        await client.connect(clientTx);
+        written.length = 0;
+        const ac = new AbortController();
+        ac.abort('user cancelled');
+        const error = await client.listen({ toolsListChanged: true }, { signal: ac.signal }).catch(e => e as SdkError);
+        // Same wrap as `Protocol.request()` / `_serveFromCache`: a non-SdkError
+        // reason is wrapped as RequestTimeout; the reason text is preserved.
+        expect(error).toBeInstanceOf(SdkError);
+        expect((error as SdkError).code).toBe(SdkErrorCode.RequestTimeout);
+        expect((error as SdkError).message).toContain('user cancelled');
+        // No subscriptions/listen reached the wire; no listen state registered.
+        await flush();
+        expect(written.find(m => (m as { method?: string }).method === 'subscriptions/listen')).toBeUndefined();
+        expect((client as unknown as { _listenState: Map<unknown, unknown> })._listenState.size).toBe(0);
+        // An SdkError reason is preserved verbatim (not double-wrapped).
+        const ac2 = new AbortController();
+        const own = new SdkError(SdkErrorCode.NotConnected, 'upstream');
+        ac2.abort(own);
+        const error2 = await client.listen({ toolsListChanged: true }, { signal: ac2.signal }).catch(e => e as SdkError);
+        expect(error2).toBe(own);
+        await client.close();
+    });
+
     it('options.signal aborted while opening: listen() rejects fast with the signal reason', async () => {
         const [clientTx, serverTx] = InMemoryTransport.createLinkedPair();
         const written: JSONRPCMessage[] = [];
diff --git a/packages/client/test/client/mcpParamMirroring.test.ts b/packages/client/test/client/mcpParamMirroring.test.ts
index 424e0d5289..1d8752d469 100644
--- a/packages/client/test/client/mcpParamMirroring.test.ts
+++ b/packages/client/test/client/mcpParamMirroring.test.ts
@@ -16,6 +16,8 @@ import { InMemoryResponseCacheStore, type ResponseCacheStore } from '../../src/c
 import { StreamableHTTPClientTransport } from '../../src/client/streamableHttp.js';
 
 const MODERN = '2026-07-28';
+/** Partition the `Client` derives for the scripted server (`serverInfo.name@version`, default `cachePartition`). */
+const PART = JSON.stringify(['scripted@1.0.0', '']);
 
 const REGION_TOOL: Tool = {
     name: 'route',
@@ -125,7 +127,9 @@ describe('SEP-2243 Mcp-Param-* mirroring (modern era)', () => {
         const { tools } = await client.listTools();
         expect(tools.map(t => t.name)).toEqual(['route']);
         expect(warn).toHaveBeenCalledWith(expect.stringContaining("excluding tool 'broken'"));
-        expect((store.get({ method: 'tools/list' })?.value as { tools: Tool[] }).tools.map(t => t.name)).toEqual(['route']);
+        expect((store.get({ method: 'tools/list', partition: PART })?.value as { tools: Tool[] }).tools.map(t => t.name)).toEqual([
+            'route'
+        ]);
         // The explicit-cursor per-page path is filtered too (the spec's MUST
         // has no carve-out for paginated reads).
         const page = await client.listTools({ cursor: '0' });
@@ -161,18 +165,78 @@ describe('SEP-2243 Mcp-Param-* mirroring (modern era)', () => {
         const { clientTx, callHeaders, listCount } = await scriptedModernServer([[REGION_TOOL]], /* rejectFirstCall */ true);
         const client = modernClient(store);
         await client.connect(clientTx);
-        // Seed a STALE entry (no declarations) so callTool reads it and the
-        // first send carries no param headers — server rejects
+        // Seed a STALE entry at the connected-server partition (a STALE
+        // declaration on `region`) so callTool reads IT and the first send
+        // carries the stale `Mcp-Param-Stale-Region` header — server rejects
         // HEADER_MISMATCH, client evicts, refetches via listTools()
-        // (the live REGION_TOOL), and retries with the headers.
-        store.set({ method: 'tools/list' }, { value: { tools: [{ name: 'route', inputSchema: { type: 'object', properties: {} } }] } });
+        // (the live REGION_TOOL), and retries with the correct header.
+        store.set(
+            { method: 'tools/list', partition: PART },
+            {
+                value: {
+                    tools: [
+                        {
+                            name: 'route',
+                            inputSchema: { type: 'object', properties: { region: { type: 'string', 'x-mcp-header': 'Stale-Region' } } }
+                        }
+                    ]
+                }
+            }
+        );
 
         const result = await client.callTool({ name: 'route', arguments: { region: 'ap' } });
         expect(result.content?.[0]).toEqual({ type: 'text', text: 'ok' });
         expect(listCount()).toBe(1);
-        expect(callHeaders).toEqual([undefined, { 'Mcp-Param-Region': 'ap' }]);
+        // First send mirrored the SEEDED stale declaration (proves the
+        // stale-cache read path, not cold-cache); retry mirrored the live one.
+        expect(callHeaders).toEqual([{ 'Mcp-Param-Stale-Region': 'ap' }, { 'Mcp-Param-Region': 'ap' }]);
         // The recovery refetch wrote a fresh cache entry (REGION_TOOL, with the declaration).
-        expect((store.get({ method: 'tools/list' })?.value as { tools: Tool[] }).tools[0]?.inputSchema.properties).toHaveProperty('region');
+        expect(
+            (store.get({ method: 'tools/list', partition: PART })?.value as { tools: Tool[] }).tools[0]?.inputSchema.properties
+        ).toHaveProperty('region');
+    });
+
+    it("HEADER_MISMATCH recovery refetch reaches the wire even when the store's delete() no-ops (cacheMode:'refresh' bypasses the stale entry)", async () => {
+        // A custom store whose `delete()` is a no-op (or rejects) leaves the
+        // stale `tools/list` entry in place after `evict()`. The recovery
+        // refetch must NOT be cache-served that stale entry — it carries
+        // `cacheMode: 'refresh'` so it always reaches the wire and overwrites.
+        const store = new InMemoryResponseCacheStore();
+        (store as ResponseCacheStore).delete = () => undefined;
+        const { clientTx, callHeaders, listCount } = await scriptedModernServer([[REGION_TOOL]], /* rejectFirstCall */ true);
+        const client = modernClient(store);
+        await client.connect(clientTx);
+        // Seed a STALE-and-fresh entry (the declaration mirrors as
+        // `Stale-Region`; expiresAt in the future so a default-mode
+        // `listTools()` WOULD serve it if not for `'refresh'`).
+        store.set(
+            { method: 'tools/list', partition: PART },
+            {
+                value: {
+                    tools: [
+                        {
+                            name: 'route',
+                            inputSchema: { type: 'object', properties: { region: { type: 'string', 'x-mcp-header': 'Stale-Region' } } }
+                        }
+                    ]
+                },
+                expiresAt: Date.now() + 60_000,
+                scope: 'public'
+            }
+        );
+
+        const result = await client.callTool({ name: 'route', arguments: { region: 'ap' } });
+        expect(result.content?.[0]).toEqual({ type: 'text', text: 'ok' });
+        // The refetch hit the wire (delete() no-op did NOT short-circuit it
+        // into a cache serve of the stale seed).
+        expect(listCount()).toBe(1);
+        // Retry mirrored the LIVE declaration, not the stale seed.
+        expect(callHeaders).toEqual([{ 'Mcp-Param-Stale-Region': 'ap' }, { 'Mcp-Param-Region': 'ap' }]);
+        // The refetch's write overwrote the stale entry (the no-op delete
+        // never dropped it; the `'refresh'` write replaced it).
+        expect(
+            (store.get({ method: 'tools/list', partition: PART })?.value as { tools: Tool[] }).tools[0]?.inputSchema.properties
+        ).toHaveProperty(['region', 'x-mcp-header'], 'Region');
     });
 
     it('callTool() with a cold cache issues NO tools/list and sends without Mcp-Param-* headers (cache reads only)', async () => {
@@ -233,15 +297,32 @@ describe('SEP-2243 Mcp-Param-* mirroring (modern era)', () => {
         const { clientTx, callHeaders, listCount } = await scriptedModernServer([[PAGE1], [REGION_TOOL]], /* rejectFirstCall */ true);
         const client = modernClient(store);
         await client.connect(clientTx);
-        // Seed a STALE entry so the first send goes without headers; the
-        // recovery refetch (via listTools()) then walks BOTH pages.
-        store.set({ method: 'tools/list' }, { value: { tools: [PAGE1] } });
+        // Seed a STALE entry at the connected-server partition (one stale
+        // page; `route` carries a STALE declaration) so callTool reads it,
+        // mirrors the stale header on the first send, and the recovery
+        // refetch (via listTools()) then walks BOTH live pages.
+        store.set(
+            { method: 'tools/list', partition: PART },
+            {
+                value: {
+                    tools: [
+                        PAGE1,
+                        {
+                            name: 'route',
+                            inputSchema: { type: 'object', properties: { region: { type: 'string', 'x-mcp-header': 'Stale-Region' } } }
+                        }
+                    ]
+                }
+            }
+        );
 
         const result = await client.callTool({ name: 'route', arguments: { region: 'us-west1' } });
         expect(result.content?.[0]).toEqual({ type: 'text', text: 'ok' });
         // The recovery refetch walked both pages.
         expect(listCount()).toBe(2);
-        expect(callHeaders).toEqual([undefined, { 'Mcp-Param-Region': 'us-west1' }]);
+        // First send mirrored the SEEDED stale declaration (proves the
+        // stale-cache read path); retry mirrored the live page-2 declaration.
+        expect(callHeaders).toEqual([{ 'Mcp-Param-Stale-Region': 'us-west1' }, { 'Mcp-Param-Region': 'us-west1' }]);
         // A follow-up call still mirrors from the cached entry (no extra list).
         await client.callTool({ name: 'route', arguments: { region: 'eu' } });
         expect(callHeaders[2]).toEqual({ 'Mcp-Param-Region': 'eu' });
@@ -253,12 +334,16 @@ describe('SEP-2243 Mcp-Param-* mirroring (modern era)', () => {
         const { clientTx, serverTx, callHeaders, listCount } = await scriptedModernServer([[REGION_TOOL]]);
         const client = modernClient(store);
         await client.connect(clientTx);
-        // Seed a STALE entry (no declarations); list_changed evicts it; the
-        // next callTool reads cold and sends without headers — callTool
-        // never refetches on its own.
-        store.set({ method: 'tools/list' }, { value: { tools: [{ name: 'route', inputSchema: { type: 'object', properties: {} } }] } });
+        // Seed a STALE entry at the connected-server partition; list_changed
+        // evicts it (partition-scoped delete); the next callTool reads cold
+        // and sends without headers — callTool never refetches on its own.
+        store.set(
+            { method: 'tools/list', partition: PART },
+            { value: { tools: [{ name: 'route', inputSchema: { type: 'object', properties: {} } }] } }
+        );
+        expect(store.get({ method: 'tools/list', partition: PART })).toBeDefined();
         await serverTx.send({ jsonrpc: '2.0', method: 'notifications/tools/list_changed' } as JSONRPCMessage);
-        expect(store.get({ method: 'tools/list' })).toBeUndefined();
+        expect(store.get({ method: 'tools/list', partition: PART })).toBeUndefined();
 
         const result = await client.callTool({ name: 'route', arguments: { region: 'us' } });
         expect(result.content?.[0]).toEqual({ type: 'text', text: 'ok' });
diff --git a/packages/client/test/client/responseCache.test.ts b/packages/client/test/client/responseCache.test.ts
index 127be04bf8..c10cb521a3 100644
--- a/packages/client/test/client/responseCache.test.ts
+++ b/packages/client/test/client/responseCache.test.ts
@@ -12,7 +12,7 @@ import { InMemoryTransport, SdkError, SdkErrorCode } from '@modelcontextprotocol
 import { describe, expect, it } from 'vitest';
 
 import { Client } from '../../src/client/client.js';
-import type { ResponseCacheStore } from '../../src/client/responseCache.js';
+import type { CacheEntry, ResponseCacheStore } from '../../src/client/responseCache.js';
 import { ClientResponseCache, InMemoryResponseCacheStore } from '../../src/client/responseCache.js';
 
 const MODERN = '2026-07-28';
@@ -20,6 +20,16 @@ const MODERN = '2026-07-28';
 const TOOL_A: Tool = { name: 'a', inputSchema: { type: 'object', properties: {} } };
 const TOOL_B: Tool = { name: 'b', inputSchema: { type: 'object', properties: {} } };
 
+/**
+ * Partition the `Client` derives for the scripted server (`serverInfo:
+ * {name:'scripted', version:'1.0.0'}`) and `principal` (default `''` ⇒ the
+ * server's shared/public slot). The encoding is the same JSON-array form
+ * `ClientResponseCache._partitionFor` produces.
+ */
+const part = (principal = '', serverIdentity = 'scripted@1.0.0'): string => JSON.stringify([serverIdentity, principal]);
+/** The pre-connect / direct-`ClientResponseCache` sentinel partition (`['', '']`). */
+const PRE = JSON.stringify(['', '']);
+
 describe('InMemoryResponseCacheStore', () => {
     it('get/set/evict/clear round-trip; evict is method-scoped; set returns the store-generated stamp', () => {
         const store = new InMemoryResponseCacheStore();
@@ -30,7 +40,7 @@ describe('InMemoryResponseCacheStore', () => {
         expect(s2).toBeGreaterThan(s1);
         expect(s3).toBeGreaterThan(s2);
         expect(store.get({ method: 'tools/list' })).toEqual({ value: 1, stamp: s1 });
-        // Store persists caller-supplied freshness metadata (#39 wires population; the slot exists today).
+        // Store persists caller-supplied freshness metadata.
         expect(store.get({ method: 'resources/read', params: 'file:///a' })).toEqual({
             value: 3,
             stamp: s3,
@@ -46,19 +56,96 @@ describe('InMemoryResponseCacheStore', () => {
         expect(store.get({ method: 'prompts/list' })).toBeUndefined();
     });
 
-    it('partition is part of the key serialization (always empty today; #39 wires population)', () => {
+    it('partition is part of the key serialization; evict(method) is partition-agnostic', () => {
         const store = new InMemoryResponseCacheStore();
         store.set({ method: 'tools/list', partition: 'p1' }, { value: 'a' });
         store.set({ method: 'tools/list', partition: 'p2' }, { value: 'b' });
         expect(store.get({ method: 'tools/list', partition: 'p1' })?.value).toBe('a');
         expect(store.get({ method: 'tools/list', partition: 'p2' })?.value).toBe('b');
-        // The Client never populates partition today, so the default-partition slot is distinct.
+        // The default-partition slot is distinct.
         expect(store.get({ method: 'tools/list' })).toBeUndefined();
         // evict(method) is partition-agnostic.
         store.evict('tools/list');
         expect(store.get({ method: 'tools/list', partition: 'p1' })).toBeUndefined();
         expect(store.get({ method: 'tools/list', partition: 'p2' })).toBeUndefined();
     });
+
+    it('keyOf is collision-free for NUL / quote / delimiter characters in partition and params', () => {
+        const store = new InMemoryResponseCacheStore();
+        // A NUL in `partition` cannot smuggle into `params` (and vice versa) —
+        // the `[partition, params]` JSON-array encoding escapes every control
+        // and quote character.
+        store.set({ method: 'resources/read', partition: 'a\0b', params: 'c' }, { value: 1 });
+        store.set({ method: 'resources/read', partition: 'a', params: 'b\0c' }, { value: 2 });
+        expect(store.get({ method: 'resources/read', partition: 'a\0b', params: 'c' })?.value).toBe(1);
+        expect(store.get({ method: 'resources/read', partition: 'a', params: 'b\0c' })?.value).toBe(2);
+        // Same for the partition's own JSON-shaped content: the outer
+        // JSON.stringify escapes the inner quotes.
+        store.set({ method: 'tools/list', partition: '["x",""]' }, { value: 'real' });
+        store.set({ method: 'tools/list', partition: '["x","' }, { value: 'spoof' });
+        expect(store.get({ method: 'tools/list', partition: '["x",""]' })?.value).toBe('real');
+    });
+
+    it('maxEntries cap: oldest-first eviction; re-set of an existing key never evicts; 0 disables the bound', () => {
+        const small = new InMemoryResponseCacheStore({ maxEntries: 2 });
+        small.set({ method: 'resources/read', params: 'a' }, { value: 'a' });
+        small.set({ method: 'resources/read', params: 'b' }, { value: 'b' });
+        // Re-set of an existing key updates in place without consuming
+        // capacity (Map preserves the original insertion position).
+        small.set({ method: 'resources/read', params: 'a' }, { value: 'a2' });
+        expect(small.size).toBe(2);
+        expect(small.get({ method: 'resources/read', params: 'a' })?.value).toBe('a2');
+        // A NEW key at capacity evicts the oldest insertion ('a').
+        small.set({ method: 'resources/read', params: 'c' }, { value: 'c' });
+        expect(small.get({ method: 'resources/read', params: 'a' })).toBeUndefined();
+        expect(small.get({ method: 'resources/read', params: 'b' })?.value).toBe('b');
+        expect(small.get({ method: 'resources/read', params: 'c' })?.value).toBe('c');
+        expect(small.size).toBe(2);
+
+        // 0 disables the bound.
+        const unbounded = new InMemoryResponseCacheStore({ maxEntries: 0 });
+        for (let i = 0; i < 1000; i++) unbounded.set({ method: 'resources/read', params: String(i) }, { value: i });
+        expect(unbounded.size).toBe(1000);
+    });
+
+    it('maxEntries cap exempts list-singleton methods: a resources/read flood never evicts tools/list', () => {
+        const store = new InMemoryResponseCacheStore({ maxEntries: 3 });
+        // List singletons are exempt: never counted, never evicted by the cap.
+        store.set({ method: 'tools/list' }, { value: 'T' });
+        store.set({ method: 'prompts/list' }, { value: 'P' });
+        store.set({ method: 'resources/list' }, { value: 'R' });
+        store.set({ method: 'resources/templates/list' }, { value: 'RT' });
+        store.set({ method: 'server/discover' }, { value: 'D' });
+        // Five exempt entries already exceed maxEntries=3; a resources/read
+        // write does NOT evict any of them — the cap counts only non-exempt
+        // keys.
+        for (let i = 0; i < 5; i++) store.set({ method: 'resources/read', params: String(i) }, { value: i });
+        expect(store.get({ method: 'tools/list' })?.value).toBe('T');
+        expect(store.get({ method: 'prompts/list' })?.value).toBe('P');
+        expect(store.get({ method: 'resources/list' })?.value).toBe('R');
+        expect(store.get({ method: 'resources/templates/list' })?.value).toBe('RT');
+        expect(store.get({ method: 'server/discover' })?.value).toBe('D');
+        // Only 3 resources/read entries survive (oldest two evicted).
+        expect(store.get({ method: 'resources/read', params: '0' })).toBeUndefined();
+        expect(store.get({ method: 'resources/read', params: '1' })).toBeUndefined();
+        expect(store.get({ method: 'resources/read', params: '2' })?.value).toBe(2);
+        expect(store.get({ method: 'resources/read', params: '4' })?.value).toBe(4);
+        expect(store.size).toBe(8);
+        // An exempt-method write at capacity never evicts a resources/read entry.
+        store.set({ method: 'tools/list', partition: 'p2' }, { value: 'T2' });
+        expect(store.get({ method: 'resources/read', params: '2' })?.value).toBe(2);
+    });
+
+    it('delete(key) drops the single entry; no-op when absent', () => {
+        const store = new InMemoryResponseCacheStore();
+        store.set({ method: 'resources/read', params: 'a', partition: 'p' }, { value: 1 });
+        store.set({ method: 'resources/read', params: 'b', partition: 'p' }, { value: 2 });
+        store.delete({ method: 'resources/read', params: 'a', partition: 'p' });
+        expect(store.get({ method: 'resources/read', params: 'a', partition: 'p' })).toBeUndefined();
+        expect(store.get({ method: 'resources/read', params: 'b', partition: 'p' })?.value).toBe(2);
+        // Absent key: no-op.
+        store.delete({ method: 'resources/read', params: 'a', partition: 'p' });
+    });
 });
 
 describe('ClientResponseCache', () => {
@@ -69,11 +156,11 @@ describe('ClientResponseCache', () => {
         await cache.evict('tools/list');
         await cache.write('tools/list', { tools: [TOOL_A] }, gen);
         // Generation moved between capture and write → the stale aggregate is dropped.
-        expect(store.get({ method: 'tools/list' })).toBeUndefined();
+        expect(store.get({ method: 'tools/list', partition: PRE })).toBeUndefined();
         // A fresh capture after the evict writes through.
         const gen2 = cache.captureGeneration('tools/list');
         await cache.write('tools/list', { tools: [TOOL_A] }, gen2);
-        expect(store.get({ method: 'tools/list' })).toBeDefined();
+        expect(store.get({ method: 'tools/list', partition: PRE })).toBeDefined();
     });
 
     it('resetForReconnect: clears the default store, leaves a user-supplied store, ALWAYS drops generation + indices', async () => {
@@ -85,7 +172,7 @@ describe('ClientResponseCache', () => {
         await userCache.evict('prompts/list');
         expect(userCache.captureGeneration('prompts/list')).toBe(1);
         userCache.resetForReconnect();
-        expect(userStore.get({ method: 'tools/list' })).toBeDefined();
+        expect(userStore.get({ method: 'tools/list', partition: PRE })).toBeDefined();
         expect(userCache.captureGeneration('prompts/list')).toBe(0);
         // Index dropped → re-derived from the (still-populated) store on next read.
         expect((userCache as unknown as { _toolIndex?: unknown })._toolIndex).toBeUndefined();
@@ -96,7 +183,7 @@ describe('ClientResponseCache', () => {
         const defCache = new ClientResponseCache(defStore, false);
         await defCache.write('tools/list', { tools: [TOOL_A] }, defCache.captureGeneration('tools/list'));
         defCache.resetForReconnect();
-        expect(defStore.get({ method: 'tools/list' })).toBeUndefined();
+        expect(defStore.get({ method: 'tools/list', partition: PRE })).toBeUndefined();
         expect(await defCache.toolDefinition('a')).toBeUndefined();
     });
 
@@ -109,11 +196,85 @@ describe('ClientResponseCache', () => {
         value.tools.length = 0;
         // The cached entry is a structuredClone, so the store and the
         // stamp-memoized index are unaffected.
-        expect((store.get({ method: 'tools/list' })?.value as { tools: Tool[] }).tools.map(t => t.name)).toEqual(['a', 'b']);
+        expect((store.get({ method: 'tools/list', partition: PRE })?.value as { tools: Tool[] }).tools.map(t => t.name)).toEqual([
+            'a',
+            'b'
+        ]);
         expect((await cache.toolDefinition('a'))?.name).toBe('a');
         expect((await cache.toolDefinition('b'))?.name).toBe('b');
     });
 
+    it('evictKey bumps the per-key generation so an in-flight write for the same {method, params} is suppressed', async () => {
+        const store = new InMemoryResponseCacheStore();
+        const cache = new ClientResponseCache(store, false);
+        // Capture BEFORE the request; evictKey lands mid-flight; the stale
+        // write is dropped (mirrors the list_changed-during-walk guard,
+        // keyed per URI).
+        const gen = cache.captureGeneration('resources/read', 'res://a');
+        await cache.evictKey('resources/read', 'res://a');
+        await cache.write('resources/read', { contents: [] }, gen, { expiresAt: 1e9, scope: 'private', params: 'res://a' });
+        expect(store.get({ method: 'resources/read', params: 'res://a', partition: PRE })).toBeUndefined();
+        // A sibling URI's generation is independent: evictKey('a') does not
+        // suppress a write for 'b'.
+        const genB = cache.captureGeneration('resources/read', 'res://b');
+        await cache.evictKey('resources/read', 'res://a');
+        await cache.write('resources/read', { contents: [] }, genB, { expiresAt: 1e9, scope: 'private', params: 'res://b' });
+        expect(store.get({ method: 'resources/read', params: 'res://b', partition: PRE })).toBeDefined();
+        // A fresh capture after the evictKey writes through.
+        const gen2 = cache.captureGeneration('resources/read', 'res://a');
+        await cache.write('resources/read', { contents: [] }, gen2, { expiresAt: 1e9, scope: 'private', params: 'res://a' });
+        expect(store.get({ method: 'resources/read', params: 'res://a', partition: PRE })).toBeDefined();
+    });
+
+    it('evictKey: own-partition store.delete rejecting does not skip the shared-partition delete', async () => {
+        const deleted: string[] = [];
+        const store: ResponseCacheStore = {
+            get: () => undefined,
+            set: () => 0,
+            evict: () => {},
+            clear: () => {},
+            delete: key => {
+                if (key.partition === JSON.stringify(['srv', 'alice'])) return Promise.reject(new Error('own boom'));
+                deleted.push(key.partition ?? '');
+                return undefined;
+            }
+        };
+        const reported: unknown[] = [];
+        const cache = new ClientResponseCache(store, true, e => reported.push(e), 'alice');
+        cache.setServerIdentity('srv');
+        await cache.evictKey('resources/read', 'res://x');
+        // Own-partition rejected → reported; shared-partition delete still ran.
+        expect((reported[0] as Error).message).toBe('own boom');
+        expect(deleted).toEqual([JSON.stringify(['srv', ''])]);
+    });
+
+    it("write/read/evict address the list singletons consistently as params: '' on a non-normalizing custom store", async () => {
+        // A custom store that keys on the raw CacheKey without normalizing
+        // omitted/undefined `params` to '' (e.g. JSON.stringify, which drops
+        // undefined members). Every SDK→store call must therefore send the
+        // SAME params shape so write/read/evict address one backend key.
+        const entries = new Map<string, CacheEntry>();
+        let stamp = 0;
+        const store: ResponseCacheStore = {
+            get: k => entries.get(JSON.stringify(k)),
+            set: (k, e) => (entries.set(JSON.stringify(k), { ...e, stamp: ++stamp }), stamp),
+            delete: k => void entries.delete(JSON.stringify(k)),
+            evict: () => {},
+            clear: () => entries.clear()
+        };
+        const cache = new ClientResponseCache(store, true);
+        await cache.write('tools/list', { tools: [TOOL_A] }, cache.captureGeneration('tools/list'), {
+            expiresAt: 1e9,
+            scope: 'private'
+        });
+        // The read path finds the entry the write path stored.
+        expect((await cache.read('tools/list'))?.value).toEqual({ tools: [TOOL_A] });
+        // The list_changed eviction path deletes the SAME backend key — gone.
+        await cache.evict('tools/list');
+        expect(await cache.read('tools/list')).toBeUndefined();
+        expect(entries.size).toBe(0);
+    });
+
     it('a custom store whose set() rejects is routed to reportError and write still resolves', async () => {
         const store: ResponseCacheStore = new InMemoryResponseCacheStore();
         store.set = () => Promise.reject(new Error('redis down'));
@@ -131,7 +292,7 @@ describe('ClientResponseCache', () => {
         const cache = new ClientResponseCache(store, true);
         expect(await cache.toolDefinition('a')).toBeUndefined();
 
-        store.set({ method: 'tools/list' }, { value: { tools: [TOOL_A, TOOL_B] } });
+        store.set({ method: 'tools/list', partition: PRE }, { value: { tools: [TOOL_A, TOOL_B] } });
         const hit = await cache.toolDefinition('a');
         expect(hit?.name).toBe('a');
         // Same backing entry → identical reference (memoized index, not re-derived).
@@ -139,7 +300,7 @@ describe('ClientResponseCache', () => {
 
         // A fresh write bumps the store stamp → the index re-derives (the new
         // entry's tool instance is what comes back, not the memoized one).
-        store.set({ method: 'tools/list' }, { value: { tools: [{ ...TOOL_A }, { ...TOOL_B }] } });
+        store.set({ method: 'tools/list', partition: PRE }, { value: { tools: [{ ...TOOL_A }, { ...TOOL_B }] } });
         const hit2 = await cache.toolDefinition('a');
         expect(hit2?.name).toBe('a');
         expect(hit2).not.toBe(hit);
@@ -151,15 +312,24 @@ interface Scripted {
     serverTx: InMemoryTransport;
     listCount: () => number;
     listParams: () => ({ cursor?: string; _meta?: unknown } | undefined)[];
+    wireCount: (method: string) => number;
 }
 
-async function scriptedModernServer(pages: Tool[][]): Promise<Scripted> {
+interface ScriptOptions {
+    listHint?: { ttlMs?: number; cacheScope?: 'public' | 'private' };
+    readHint?: { ttlMs?: number; cacheScope?: 'public' | 'private' };
+    serverInfo?: { name: string; version: string };
+}
+
+async function scriptedModernServer(pages: Tool[][], opts: ScriptOptions = {}): Promise<Scripted> {
     const [clientTx, serverTx] = InMemoryTransport.createLinkedPair();
     let lists = 0;
+    const wireCounts = new Map<string, number>();
     const params: ({ cursor?: string; _meta?: unknown } | undefined)[] = [];
     serverTx.onmessage = m => {
         const r = m as JSONRPCRequest;
         if (r.id === undefined) return;
+        wireCounts.set(r.method, (wireCounts.get(r.method) ?? 0) + 1);
         if (r.method === 'server/discover') {
             void serverTx.send({
                 jsonrpc: '2.0',
@@ -168,7 +338,7 @@ async function scriptedModernServer(pages: Tool[][]): Promise<Scripted> {
                     resultType: 'complete',
                     supportedVersions: [MODERN],
                     capabilities: { tools: { listChanged: true }, prompts: {}, resources: {} },
-                    serverInfo: { name: 'scripted', version: '1.0.0' }
+                    serverInfo: opts.serverInfo ?? { name: 'scripted', version: '1.0.0' }
                 }
             });
         } else if (r.method === 'tools/list') {
@@ -182,8 +352,8 @@ async function scriptedModernServer(pages: Tool[][]): Promise<Scripted> {
                 id: r.id,
                 result: {
                     resultType: 'complete',
-                    ttlMs: 0,
-                    cacheScope: 'private',
+                    ttlMs: opts.listHint?.ttlMs ?? 0,
+                    cacheScope: opts.listHint?.cacheScope ?? 'private',
                     tools: pages[idx] ?? [],
                     ...(next !== undefined && { nextCursor: next })
                 }
@@ -193,18 +363,41 @@ async function scriptedModernServer(pages: Tool[][]): Promise<Scripted> {
             void serverTx.send({
                 jsonrpc: '2.0',
                 id: r.id,
-                result: { resultType: 'complete', ttlMs: 0, cacheScope: 'private', [key]: [] }
+                result: {
+                    resultType: 'complete',
+                    ttlMs: opts.listHint?.ttlMs ?? 0,
+                    cacheScope: opts.listHint?.cacheScope ?? 'private',
+                    [key]: []
+                }
+            });
+        } else if (r.method === 'resources/read') {
+            const uri = (r.params as { uri: string }).uri;
+            void serverTx.send({
+                jsonrpc: '2.0',
+                id: r.id,
+                result: {
+                    resultType: 'complete',
+                    ...(opts.readHint?.ttlMs !== undefined && { ttlMs: opts.readHint.ttlMs }),
+                    ...(opts.readHint?.cacheScope !== undefined && { cacheScope: opts.readHint.cacheScope }),
+                    contents: [{ uri, mimeType: 'text/plain', text: `body:${uri}` }]
+                }
             });
         }
     };
     await serverTx.start();
-    return { clientTx, serverTx, listCount: () => lists, listParams: () => params };
+    return {
+        clientTx,
+        serverTx,
+        listCount: () => lists,
+        listParams: () => params,
+        wireCount: m => wireCounts.get(m) ?? 0
+    };
 }
 
-function modernClient(store?: InMemoryResponseCacheStore): Client {
+function modernClient(store?: InMemoryResponseCacheStore, extra?: { cachePartition?: string; defaultCacheTtlMs?: number }): Client {
     return new Client(
         { name: 'cache-client', version: '1.0.0' },
-        { versionNegotiation: { mode: { pin: MODERN } }, ...(store && { responseCacheStore: store }) }
+        { versionNegotiation: { mode: { pin: MODERN } }, ...(store && { responseCacheStore: store }), ...extra }
     );
 }
 
@@ -223,7 +416,7 @@ describe('Client response-cache substrate', () => {
         const page = await client.listTools({ cursor: '1' });
         expect(page.tools.map(t => t.name)).toEqual(['b']);
         expect(page.nextCursor).toBeUndefined();
-        expect(store.get({ method: 'tools/list' })).toBeUndefined();
+        expect(store.get({ method: 'tools/list', partition: part() })).toBeUndefined();
         expect(listCount()).toBe(1);
 
         // No cursor → aggregates every page and writes one entry.
@@ -232,7 +425,7 @@ describe('Client response-cache substrate', () => {
         expect(nextCursor).toBeUndefined();
         expect(listCount()).toBe(3);
 
-        const entry = store.get({ method: 'tools/list' });
+        const entry = store.get({ method: 'tools/list', partition: part() });
         expect((entry?.value as { tools: Tool[] }).tools.map(t => t.name)).toEqual(['a', 'b']);
     });
 
@@ -287,7 +480,7 @@ describe('Client response-cache substrate', () => {
         expect((error as SdkError).message).toMatch(/exceeded listMaxPages \(2\); server pagination did not terminate/);
         expect((error as SdkError).data).toEqual({ method: 'tools/list', listMaxPages: 2 });
         // Aggregate-then-write: the throw happens before the cache write, so nothing is cached.
-        expect(store.get({ method: 'tools/list' })).toBeUndefined();
+        expect(store.get({ method: 'tools/list', partition: part() })).toBeUndefined();
         // The per-page path is never capped.
         const page = await client.listTools({ cursor: '2' });
         expect(page.tools.map(t => t.name)).toEqual(['a']);
@@ -302,9 +495,9 @@ describe('Client response-cache substrate', () => {
         await client.listPrompts();
         await client.listResources();
         await client.listResourceTemplates();
-        expect(store.get({ method: 'prompts/list' })).toBeDefined();
-        expect(store.get({ method: 'resources/list' })).toBeDefined();
-        expect(store.get({ method: 'resources/templates/list' })).toBeDefined();
+        expect(store.get({ method: 'prompts/list', partition: part() })).toBeDefined();
+        expect(store.get({ method: 'resources/list', partition: part() })).toBeDefined();
+        expect(store.get({ method: 'resources/templates/list', partition: part() })).toBeDefined();
     });
 
     it('toolDefinition through the Client wiring: miss before any list, hit after', async () => {
@@ -325,13 +518,13 @@ describe('Client response-cache substrate', () => {
         const client = modernClient(store);
         await client.connect(clientTx);
         await client.listTools();
-        expect(store.get({ method: 'tools/list' })).toBeDefined();
+        expect(store.get({ method: 'tools/list', partition: part() })).toBeDefined();
         expect(await toolDef(client, 'a')).toBeDefined();
 
         const before = listCount();
         await serverTx.send({ jsonrpc: '2.0', method: 'notifications/tools/list_changed' } as JSONRPCMessage);
         // Evicted, not refetched.
-        expect(store.get({ method: 'tools/list' })).toBeUndefined();
+        expect(store.get({ method: 'tools/list', partition: part() })).toBeUndefined();
         expect(await toolDef(client, 'a')).toBeUndefined();
         expect(listCount()).toBe(before);
     });
@@ -343,12 +536,12 @@ describe('Client response-cache substrate', () => {
         await client.connect(clientTx);
         await client.listResources();
         await client.listResourceTemplates();
-        expect(store.get({ method: 'resources/list' })).toBeDefined();
-        expect(store.get({ method: 'resources/templates/list' })).toBeDefined();
+        expect(store.get({ method: 'resources/list', partition: part() })).toBeDefined();
+        expect(store.get({ method: 'resources/templates/list', partition: part() })).toBeDefined();
 
         await serverTx.send({ jsonrpc: '2.0', method: 'notifications/resources/list_changed' } as JSONRPCMessage);
-        expect(store.get({ method: 'resources/list' })).toBeUndefined();
-        expect(store.get({ method: 'resources/templates/list' })).toBeUndefined();
+        expect(store.get({ method: 'resources/list', partition: part() })).toBeUndefined();
+        expect(store.get({ method: 'resources/templates/list', partition: part() })).toBeUndefined();
     });
 
     it('_resetConnectionState leaves a user-supplied store untouched and drops the derived index', async () => {
@@ -357,12 +550,12 @@ describe('Client response-cache substrate', () => {
         const client = modernClient(store);
         await client.connect(clientTx);
         await client.listTools();
-        expect(store.get({ method: 'tools/list' })).toBeDefined();
+        expect(store.get({ method: 'tools/list', partition: part() })).toBeDefined();
 
         await client.close();
         // A user-supplied store is NOT cleared on close/reconnect (defeats the
         // only reason to supply one); the per-instance default IS cleared.
-        expect(store.get({ method: 'tools/list' })).toBeDefined();
+        expect(store.get({ method: 'tools/list', partition: part() })).toBeDefined();
         // The derived index is connection-scoped regardless: it is dropped, and
         // the next read re-derives from the (still-populated) store.
         expect((cacheOf(client) as unknown as { _toolIndex?: unknown })._toolIndex).toBeUndefined();
@@ -407,9 +600,9 @@ describe('Client response-cache substrate', () => {
         expect(errors.map(e => e.message)).toContain('redis down');
     });
 
-    it('a custom store whose evict() throws is routed to onerror and dispatch still runs', async () => {
+    it('a custom store whose delete() throws on the list_changed eviction path is routed to onerror and dispatch still runs', async () => {
         const store = new InMemoryResponseCacheStore();
-        store.evict = () => {
+        store.delete = () => {
             throw new Error('boom');
         };
         const { clientTx, serverTx } = await scriptedModernServer([[TOOL_A]]);
@@ -427,3 +620,649 @@ describe('Client response-cache substrate', () => {
         expect(dispatched).toBe(true);
     });
 });
+
+/** Freeze the cache's clock at `t` for deterministic freshness assertions. */
+const setNow = (client: Client, t: number): void => {
+    (cacheOf(client) as unknown as { _now: () => number })._now = () => t;
+};
+
+describe('Client honours cacheHints (SEP-2549)', () => {
+    it('listTools(): within TTL → no wire request; after TTL → refetch', async () => {
+        const { clientTx, listCount } = await scriptedModernServer([[TOOL_A, TOOL_B]], { listHint: { ttlMs: 30_000 } });
+        const client = modernClient();
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        const first = await client.listTools();
+        expect(first.tools.map(t => t.name)).toEqual(['a', 'b']);
+        expect(listCount()).toBe(1);
+
+        // Within TTL → cache hit, no wire request.
+        setNow(client, 1_020_000);
+        const second = await client.listTools();
+        expect(second.tools.map(t => t.name)).toEqual(['a', 'b']);
+        expect(listCount()).toBe(1);
+        // Clone-on-serve: hit is a fresh copy, not the stored object.
+        expect(second).not.toBe(first);
+
+        // After TTL → stale, refetch.
+        setNow(client, 1_040_000);
+        await client.listTools();
+        expect(listCount()).toBe(2);
+    });
+
+    it("cacheMode: 'refresh' always fetches and re-stores; 'bypass' fetches without read or write", async () => {
+        const store = new InMemoryResponseCacheStore();
+        const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000 } });
+        const client = modernClient(store);
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        await client.listTools();
+        expect(listCount()).toBe(1);
+        const stamp1 = store.get({ method: 'tools/list', partition: part() })?.stamp;
+
+        // 'refresh' ignores the still-fresh entry, fetches, and re-stores (new stamp).
+        await client.listTools(undefined, { cacheMode: 'refresh' });
+        expect(listCount()).toBe(2);
+        const stamp2 = store.get({ method: 'tools/list', partition: part() })?.stamp;
+        expect(stamp2).toBeGreaterThan(stamp1!);
+
+        // 'bypass' fetches but neither reads nor writes the cache.
+        await client.listTools(undefined, { cacheMode: 'bypass' });
+        expect(listCount()).toBe(3);
+        expect(store.get({ method: 'tools/list', partition: part() })?.stamp).toBe(stamp2);
+
+        // Default 'use' still serves the entry 'refresh' wrote.
+        await client.listTools();
+        expect(listCount()).toBe(3);
+    });
+
+    it('listChanged eviction beats TTL: a still-fresh entry is dropped on the relevant notification', async () => {
+        const { clientTx, serverTx, listCount } = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000 } });
+        const client = modernClient();
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        await client.listTools();
+        await client.listTools();
+        expect(listCount()).toBe(1);
+
+        // Relevant notification ⇒ entry immediately stale (spec): the next call refetches even within TTL.
+        await serverTx.send({ jsonrpc: '2.0', method: 'notifications/tools/list_changed' } as JSONRPCMessage);
+        await client.listTools();
+        expect(listCount()).toBe(2);
+    });
+
+    it('defaultCacheTtlMs: 0 (the default) means always-fetch but mirroring still works', async () => {
+        const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 0 } });
+        const client = modernClient();
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        await client.listTools();
+        // ttlMs:0 ⇒ expiresAt === now ⇒ never served from cache.
+        await client.listTools();
+        expect(listCount()).toBe(2);
+        // …but the entry IS stored (retain-for-schema), so the derived index works.
+        expect((await toolDef(client, 'a'))?.name).toBe('a');
+    });
+
+    it('an explicit server ttlMs:0 is honoured as immediately stale (server hint wins over defaultCacheTtlMs)', async () => {
+        const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 0 } });
+        // defaultCacheTtlMs only applies when the result lacks ttlMs (e.g. a
+        // legacy-era response); a 2026 server's explicit 0 is the spec's
+        // "immediately stale" and is honoured as-is.
+        const client = modernClient(undefined, { defaultCacheTtlMs: 60_000 });
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        await client.listTools();
+        await client.listTools();
+        expect(listCount()).toBe(2);
+    });
+
+    it("same serverIdentity, different cachePartition: 'public' entries shared; 'private' entries isolated", async () => {
+        const store = new InMemoryResponseCacheStore();
+        // Public scope: alice writes, bob (different cachePartition, SAME server) reads from the server's shared partition.
+        {
+            const a = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000, cacheScope: 'public' } });
+            const alice = modernClient(store, { cachePartition: 'alice' });
+            await alice.connect(a.clientTx);
+            setNow(alice, 1_000_000);
+            await alice.listTools();
+            expect(a.listCount()).toBe(1);
+            // Stored under the server's shared partition (`[serverIdentity, '']`).
+            expect(store.get({ method: 'tools/list', partition: part() })?.scope).toBe('public');
+            expect(store.get({ method: 'tools/list', partition: part('alice') })).toBeUndefined();
+
+            const b = await scriptedModernServer([[TOOL_B]], { listHint: { ttlMs: 60_000, cacheScope: 'public' } });
+            const bob = modernClient(store, { cachePartition: 'bob' });
+            await bob.connect(b.clientTx);
+            setNow(bob, 1_000_000);
+            const { tools } = await bob.listTools();
+            // Public-share across two clients of the SAME server on one store: bob is served alice's entry without a wire request.
+            expect(tools.map(t => t.name)).toEqual(['a']);
+            expect(b.listCount()).toBe(0);
+        }
+        store.clear();
+        // Private scope: alice writes under her own partition; bob misses and fetches his own.
+        {
+            const a = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000, cacheScope: 'private' } });
+            const alice = modernClient(store, { cachePartition: 'alice' });
+            await alice.connect(a.clientTx);
+            setNow(alice, 1_000_000);
+            await alice.listTools();
+            expect(store.get({ method: 'tools/list', partition: part('alice') })?.scope).toBe('private');
+            expect(store.get({ method: 'tools/list', partition: part() })).toBeUndefined();
+
+            const b = await scriptedModernServer([[TOOL_B]], { listHint: { ttlMs: 60_000, cacheScope: 'private' } });
+            const bob = modernClient(store, { cachePartition: 'bob' });
+            await bob.connect(b.clientTx);
+            setNow(bob, 1_000_000);
+            const { tools } = await bob.listTools();
+            // Own-partition miss + shared-partition miss ⇒ bob fetches; alice's private entry never crosses.
+            expect(tools.map(t => t.name)).toEqual(['b']);
+            expect(b.listCount()).toBe(1);
+            // toolDefinition (mirroring source) reads from each client's own partition.
+            expect((await toolDef(alice, 'a'))?.name).toBe('a');
+            expect((await toolDef(bob, 'b'))?.name).toBe('b');
+            expect(await toolDef(bob, 'a')).toBeUndefined();
+        }
+    });
+
+    it("different serverIdentity on a shared store: no cross-talk even for 'public' entries", async () => {
+        const store = new InMemoryResponseCacheStore();
+        // Server X stamps public; client x writes under [x@1.0.0, ''].
+        const sx = await scriptedModernServer([[TOOL_A]], {
+            listHint: { ttlMs: 60_000, cacheScope: 'public' },
+            serverInfo: { name: 'x', version: '1.0.0' }
+        });
+        const x = modernClient(store);
+        await x.connect(sx.clientTx);
+        setNow(x, 1_000_000);
+        await x.listTools();
+        expect(store.get({ method: 'tools/list', partition: part('', 'x@1.0.0') })?.scope).toBe('public');
+
+        // Server Y on the SAME store: y misses x's entry (different serverIdentity) and fetches its own.
+        const sy = await scriptedModernServer([[TOOL_B]], {
+            listHint: { ttlMs: 60_000, cacheScope: 'public' },
+            serverInfo: { name: 'y', version: '1.0.0' }
+        });
+        const y = modernClient(store);
+        await y.connect(sy.clientTx);
+        setNow(y, 1_000_000);
+        const { tools } = await y.listTools();
+        expect(tools.map(t => t.name)).toEqual(['b']);
+        expect(sy.listCount()).toBe(1);
+        // Both entries co-exist under their own server namespaces.
+        expect(store.get({ method: 'tools/list', partition: part('', 'y@1.0.0') })?.scope).toBe('public');
+        expect((await toolDef(x, 'a'))?.name).toBe('a');
+        expect(await toolDef(y, 'a')).toBeUndefined();
+    });
+
+    it("list_changed eviction is partition-scoped on a shared store: one server's notification leaves co-tenants' entries intact", async () => {
+        const store = new InMemoryResponseCacheStore();
+        // Two clients on DIFFERENT servers share one store. Each has a fresh
+        // public tools/list entry under its own server-identity partition.
+        const sx = await scriptedModernServer([[TOOL_A]], {
+            listHint: { ttlMs: 60_000, cacheScope: 'public' },
+            serverInfo: { name: 'x', version: '1.0.0' }
+        });
+        const x = modernClient(store);
+        await x.connect(sx.clientTx);
+        setNow(x, 1_000_000);
+        await x.listTools();
+
+        const sy = await scriptedModernServer([[TOOL_B]], {
+            listHint: { ttlMs: 60_000, cacheScope: 'public' },
+            serverInfo: { name: 'y', version: '1.0.0' }
+        });
+        const y = modernClient(store);
+        await y.connect(sy.clientTx);
+        setNow(y, 1_000_000);
+        await y.listTools();
+        expect(store.get({ method: 'tools/list', partition: part('', 'x@1.0.0') })).toBeDefined();
+        expect(store.get({ method: 'tools/list', partition: part('', 'y@1.0.0') })).toBeDefined();
+
+        // Server X sends list_changed → only x's entry is dropped; y's
+        // co-tenant entry survives (evict() targets the connected server's
+        // two partition singletons, never the method-wide store.evict()).
+        await sx.serverTx.send({ jsonrpc: '2.0', method: 'notifications/tools/list_changed' } as JSONRPCMessage);
+        expect(store.get({ method: 'tools/list', partition: part('', 'x@1.0.0') })).toBeUndefined();
+        expect(store.get({ method: 'tools/list', partition: part('', 'y@1.0.0') })).toBeDefined();
+        // y still cache-serves its own entry without a wire request.
+        await y.listTools();
+        expect(sy.listCount()).toBe(1);
+        expect((await toolDef(y, 'b'))?.name).toBe('b');
+    });
+
+    it("a malicious serverInfo cannot bleed into another server's principal slot (JSON encoding is collision-free)", async () => {
+        const store = new InMemoryResponseCacheStore();
+        // The legitimate server B with principal 'victim'. Under naive
+        // `${name}@${version}|${cachePartition}` concat its private partition
+        // would be `realServer@1.0|victim`.
+        const sb = await scriptedModernServer([[TOOL_B]], {
+            listHint: { ttlMs: 60_000, cacheScope: 'private' },
+            serverInfo: { name: 'realServer', version: '1.0' }
+        });
+        const victim = modernClient(store, { cachePartition: 'victim' });
+        await victim.connect(sb.clientTx);
+        setNow(victim, 1_000_000);
+        await victim.listTools();
+        expect(sb.listCount()).toBe(1);
+
+        // A malicious server A whose `name` embeds `@`/`|` to target B's
+        // naive-concat private slot. With JSON encoding the partition is
+        // `["realServer@1.0|victim@",""]` ≠ `["realServer@1.0","victim"]` —
+        // no collision possible regardless of what characters the
+        // server-controlled string carries.
+        const sa = await scriptedModernServer([[TOOL_A]], {
+            listHint: { ttlMs: 60_000, cacheScope: 'public' },
+            serverInfo: { name: 'realServer@1.0|victim', version: '' }
+        });
+        const attacker = modernClient(store);
+        await attacker.connect(sa.clientTx);
+        setNow(attacker, 1_000_000);
+        await attacker.listTools();
+
+        // B's private entry is unreachable from A (and vice versa): victim
+        // still cache-serves its own entry, attacker never observed it.
+        const again = await victim.listTools();
+        expect(again.tools.map(t => t.name)).toEqual(['b']);
+        expect(sb.listCount()).toBe(1);
+        expect(await toolDef(attacker, 'b')).toBeUndefined();
+        expect((await toolDef(victim, 'b'))?.name).toBe('b');
+    });
+
+    it("a server flipping cacheScope private→public on a 'refresh' deletes the shadowing private-partition entry", async () => {
+        const store = new InMemoryResponseCacheStore();
+        const pages: Tool[][] = [[TOOL_A]];
+        const opts: ScriptOptions = { listHint: { ttlMs: 60_000, cacheScope: 'private' } };
+        const a = await scriptedModernServer(pages, opts);
+        const alice = modernClient(store, { cachePartition: 'alice' });
+        await alice.connect(a.clientTx);
+        setNow(alice, 1_000_000);
+        // Warm: private-scoped → stored under [serverIdentity, 'alice'].
+        await alice.listTools();
+        expect(store.get({ method: 'tools/list', partition: part('alice') })?.scope).toBe('private');
+        // Server flips the same key's scope to 'public' AND changes the body.
+        opts.listHint = { ttlMs: 60_000, cacheScope: 'public' };
+        pages[0] = [TOOL_B];
+        await alice.listTools(undefined, { cacheMode: 'refresh' });
+        // Fresh body stored at the shared partition; the now-stale private
+        // entry is DELETED so it cannot shadow the public one on the
+        // own-first probe.
+        expect(store.get({ method: 'tools/list', partition: part() })?.scope).toBe('public');
+        expect(store.get({ method: 'tools/list', partition: part('alice') })).toBeUndefined();
+        // Next default-mode read serves the FRESH public body from cache (no wire).
+        const { tools } = await alice.listTools();
+        expect(tools.map(t => t.name)).toEqual(['b']);
+        expect(a.listCount()).toBe(2);
+    });
+
+    it("the shared-partition fallback drops entries whose stored scope is not 'public' (misconfigured-co-tenant guard)", async () => {
+        const store = new InMemoryResponseCacheStore();
+        // A misconfigured co-tenant (omits cachePartition, default '') writes a
+        // PRIVATE-scoped entry — which lands at the server's shared partition.
+        const a = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000, cacheScope: 'private' } });
+        const misconfigured = modernClient(store);
+        await misconfigured.connect(a.clientTx);
+        setNow(misconfigured, 1_000_000);
+        await misconfigured.listTools();
+        expect(store.get({ method: 'tools/list', partition: part() })?.scope).toBe('private');
+
+        // A correctly-partitioned client probes own partition (miss), then the
+        // shared one — which holds the misconfigured client's PRIVATE entry.
+        // The `entry.scope === 'public'` gate drops it; bob fetches over the
+        // wire instead of leaking the private body.
+        const b = await scriptedModernServer([[TOOL_B]], { listHint: { ttlMs: 60_000, cacheScope: 'private' } });
+        const bob = modernClient(store, { cachePartition: 'bob' });
+        await bob.connect(b.clientTx);
+        setNow(bob, 1_000_000);
+        const { tools } = await bob.listTools();
+        expect(tools.map(t => t.name)).toEqual(['b']);
+        expect(b.listCount()).toBe(1);
+    });
+
+    it('readResource(): keyed by uri, partitioned by scope, absent cacheScope is private; ttl≤0 is not stored', async () => {
+        const store = new InMemoryResponseCacheStore();
+        const { clientTx, wireCount } = await scriptedModernServer([[TOOL_A]], {
+            readHint: { ttlMs: 60_000, cacheScope: 'private' }
+        });
+        const client = modernClient(store, { cachePartition: 'alice' });
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        const r1 = await client.readResource({ uri: 'res://one' });
+        expect(r1.contents[0]).toMatchObject({ text: 'body:res://one' });
+        expect(wireCount('resources/read')).toBe(1);
+        // Within TTL → cache hit on the same uri.
+        const r2 = await client.readResource({ uri: 'res://one' });
+        expect(r2.contents[0]).toMatchObject({ text: 'body:res://one' });
+        expect(wireCount('resources/read')).toBe(1);
+        // Different uri → distinct key, fetch.
+        await client.readResource({ uri: 'res://two' });
+        expect(wireCount('resources/read')).toBe(2);
+        // 'refresh' on the first uri → fetch.
+        await client.readResource({ uri: 'res://one' }, { cacheMode: 'refresh' });
+        expect(wireCount('resources/read')).toBe(3);
+        // Stored under alice's partition only (private).
+        expect(store.get({ method: 'resources/read', params: 'res://one', partition: part('alice') })).toBeDefined();
+        expect(store.get({ method: 'resources/read', params: 'res://one', partition: part() })).toBeUndefined();
+
+        // bob on a shared store cannot read alice's private resource body.
+        const b = await scriptedModernServer([[TOOL_A]], { readHint: { ttlMs: 60_000, cacheScope: 'private' } });
+        const bob = modernClient(store, { cachePartition: 'bob' });
+        await bob.connect(b.clientTx);
+        setNow(bob, 1_000_000);
+        await bob.readResource({ uri: 'res://one' });
+        expect(b.wireCount('resources/read')).toBe(1);
+    });
+
+    // The wire codec rejects a 2026-07-28 cacheable result without `cacheScope`
+    // (it is a required field), so the absent-scope path is unreachable through
+    // `request()`. The `_freshness` private-default is defence-in-depth only;
+    // the partition test above asserts the explicit-`'private'` storage slot.
+
+    it('readResource(): ttl≤0 is not stored (unbounded URI keyspace) but the result still returns', async () => {
+        const store = new InMemoryResponseCacheStore();
+        const { clientTx, wireCount } = await scriptedModernServer([[TOOL_A]], { readHint: { ttlMs: 0, cacheScope: 'public' } });
+        const client = modernClient(store);
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+        const r = await client.readResource({ uri: 'res://x' });
+        expect(r.contents[0]).toMatchObject({ text: 'body:res://x' });
+        expect(store.get({ method: 'resources/read', params: 'res://x', partition: part() })).toBeUndefined();
+        await client.readResource({ uri: 'res://x' });
+        expect(wireCount('resources/read')).toBe(2);
+    });
+
+    it('readResource(): 600 distinct ttl=0 URIs issue zero store.delete() calls (evictKey skipped on a cold default-mode miss)', async () => {
+        // Regression: every ttl≤0 default-mode read used to call
+        // `evictKey('resources/read', uri)` unconditionally, which issued 1–2
+        // `store.delete()` calls against a cold key — wasted round trips on
+        // an async store across a ttl≤0 working set. The evict is now skipped
+        // when `_serveFromCache` already proved nothing fresh is held.
+        const store = new InMemoryResponseCacheStore();
+        let deletes = 0;
+        const realDelete = store.delete.bind(store);
+        (store as ResponseCacheStore).delete = key => {
+            deletes++;
+            return realDelete(key);
+        };
+        const { clientTx } = await scriptedModernServer([[TOOL_A]], { readHint: { ttlMs: 0, cacheScope: 'public' } });
+        const client = modernClient(store);
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        for (let i = 0; i < 600; i++) await client.readResource({ uri: `res://cold/${i}` });
+        expect(store.size).toBe(0);
+        // No store.delete() issued for any of the 600 cold-miss ttl≤0 reads.
+        expect(deletes).toBe(0);
+        // `captureGeneration` recorded one entry per read URI (the in-flight
+        // guard's presence record); none was bumped — `evictKey` was never
+        // reached. The map is bounded by keys the CLIENT chose to read.
+        const gen = (cacheOf(client) as unknown as { _evictionGeneration: Map<string, number> })._evictionGeneration;
+        expect(gen.size).toBe(600);
+        expect([...gen.values()].every(v => v === 0)).toBe(true);
+    });
+
+    it('600 distinct-URI notifications/resources/updated with no prior readResource do not grow the eviction-generation map; a read URI is still guarded', async () => {
+        // Regression: `evictKey` used to bump (and therefore record) the
+        // per-URI generation unconditionally, so a server streaming
+        // `resources/updated` for distinct URIs grew `_evictionGeneration`
+        // without bound — server-controlled heap growth. `evictKey` now only
+        // bumps a key the client has captured.
+        const store = new InMemoryResponseCacheStore();
+        const { clientTx, serverTx } = await scriptedModernServer([[TOOL_A]], {
+            readHint: { ttlMs: 60_000, cacheScope: 'private' }
+        });
+        const client = modernClient(store);
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+        const gen = (cacheOf(client) as unknown as { _evictionGeneration: Map<string, number> })._evictionGeneration;
+
+        for (let i = 0; i < 600; i++) {
+            await serverTx.send({
+                jsonrpc: '2.0',
+                method: 'notifications/resources/updated',
+                params: { uri: `res://never-read/${i}` }
+            } as JSONRPCMessage);
+        }
+        expect(gen.size).toBe(0);
+
+        // A URI the client HAS read is recorded by captureGeneration; an
+        // `updated` for it bumps (the in-flight guard still works).
+        await client.readResource({ uri: 'res://hot' });
+        expect(cacheOf(client).captureGeneration('resources/read', 'res://hot')).toBe(0);
+        await serverTx.send({
+            jsonrpc: '2.0',
+            method: 'notifications/resources/updated',
+            params: { uri: 'res://hot' }
+        } as JSONRPCMessage);
+        expect(cacheOf(client).captureGeneration('resources/read', 'res://hot')).toBe(1);
+        expect(gen.size).toBe(1);
+    });
+
+    it("readResource(): a 'refresh' that returns ttl≤0 evicts the previously-warm entry; the next default-mode read fetches fresh", async () => {
+        const store = new InMemoryResponseCacheStore();
+        const opts: ScriptOptions = { readHint: { ttlMs: 60_000, cacheScope: 'private' } };
+        const { clientTx, wireCount } = await scriptedModernServer([[TOOL_A]], opts);
+        const client = modernClient(store, { cachePartition: 'alice' });
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        // Warm: ttl=60s.
+        await client.readResource({ uri: 'res://x' });
+        expect(wireCount('resources/read')).toBe(1);
+        await client.readResource({ uri: 'res://x' });
+        expect(wireCount('resources/read')).toBe(1);
+        expect(store.get({ method: 'resources/read', params: 'res://x', partition: part('alice') })).toBeDefined();
+
+        // Server flips to ttl=0; a 'refresh' fetch returns ttl≤0 → the held
+        // positive-TTL entry is evicted, not left stale-but-fresh.
+        opts.readHint = { ttlMs: 0, cacheScope: 'private' };
+        await client.readResource({ uri: 'res://x' }, { cacheMode: 'refresh' });
+        expect(wireCount('resources/read')).toBe(2);
+        expect(store.get({ method: 'resources/read', params: 'res://x', partition: part('alice') })).toBeUndefined();
+
+        // The next default-mode read fetches fresh (the entry was evicted).
+        await client.readResource({ uri: 'res://x' });
+        expect(wireCount('resources/read')).toBe(3);
+    });
+
+    it('a pre-aborted signal on a warm-cache hit rejects with SdkError(RequestTimeout) — the abort is not swallowed by the cache serve', async () => {
+        const store = new InMemoryResponseCacheStore();
+        const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000, cacheScope: 'public' } });
+        const client = modernClient(store);
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        await client.listTools();
+        expect(listCount()).toBe(1);
+        // Warm — a plain second call would be cache-served. With a pre-aborted
+        // signal it must reject the same way the wire path would.
+        const ac = new AbortController();
+        ac.abort('user cancelled');
+        const error = await client.listTools(undefined, { signal: ac.signal }).catch(e => e as SdkError);
+        expect(error).toBeInstanceOf(SdkError);
+        expect((error as SdkError).code).toBe(SdkErrorCode.RequestTimeout);
+        expect((error as SdkError).message).toContain('user cancelled');
+        // The aborted call did not reach the wire.
+        expect(listCount()).toBe(1);
+    });
+
+    it('notifications/resources/updated evicts the cached resources/read entry for that URI from both partitions', async () => {
+        const store = new InMemoryResponseCacheStore();
+        const { clientTx, serverTx, wireCount } = await scriptedModernServer([[TOOL_A]], {
+            readHint: { ttlMs: 60_000, cacheScope: 'private' }
+        });
+        const client = modernClient(store, { cachePartition: 'alice' });
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        await client.readResource({ uri: 'res://one' });
+        await client.readResource({ uri: 'res://two' });
+        expect(wireCount('resources/read')).toBe(2);
+        // Within TTL → cache hit.
+        await client.readResource({ uri: 'res://one' });
+        expect(wireCount('resources/read')).toBe(2);
+
+        // Subscribe → updated → re-read flow: the per-URI eviction drops the
+        // cached body from BOTH partitions; the next read for THAT uri
+        // refetches even within TTL; the sibling uri is untouched.
+        await serverTx.send({ jsonrpc: '2.0', method: 'notifications/resources/updated', params: { uri: 'res://one' } } as JSONRPCMessage);
+        expect(store.get({ method: 'resources/read', params: 'res://one', partition: part('alice') })).toBeUndefined();
+        await client.readResource({ uri: 'res://one' });
+        expect(wireCount('resources/read')).toBe(3);
+        await client.readResource({ uri: 'res://two' });
+        expect(wireCount('resources/read')).toBe(3);
+
+        // A `resources/updated` without a string `uri` is a no-op (matches the
+        // mcp.d guard).
+        await serverTx.send({ jsonrpc: '2.0', method: 'notifications/resources/updated', params: {} } as JSONRPCMessage);
+        await client.readResource({ uri: 'res://one' });
+        expect(wireCount('resources/read')).toBe(3);
+    });
+
+    it('ttlMs is clamped at 24h (MAX_CACHE_TTL_MS) so a server cannot pin an entry indefinitely', async () => {
+        const store = new InMemoryResponseCacheStore();
+        const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], {
+            listHint: { ttlMs: Number.MAX_SAFE_INTEGER, cacheScope: 'public' }
+        });
+        const client = modernClient(store);
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+        await client.listTools();
+        const entry = store.get({ method: 'tools/list', partition: part() });
+        // expiresAt = now + min(ttlMs, 24h)
+        expect(entry?.expiresAt).toBe(1_000_000 + 86_400_000);
+        // Just under 24h → still served from cache.
+        setNow(client, 1_000_000 + 86_400_000 - 1);
+        await client.listTools();
+        expect(listCount()).toBe(1);
+        // Past 24h → refetch.
+        setNow(client, 1_000_000 + 86_400_000 + 1);
+        await client.listTools();
+        expect(listCount()).toBe(2);
+    });
+
+    it('the default in-memory store is bounded: 600 distinct readResource URIs cap at 512 with oldest-first eviction', async () => {
+        const store = new InMemoryResponseCacheStore();
+        const { clientTx } = await scriptedModernServer([[TOOL_A]], { readHint: { ttlMs: 60_000, cacheScope: 'public' } });
+        const client = modernClient(store);
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        for (let i = 0; i < 600; i++) await client.readResource({ uri: `res://${i}` });
+        expect(store.size).toBe(512);
+        // The first 88 URIs (oldest insertions) were evicted; the tail survived.
+        expect(store.get({ method: 'resources/read', params: 'res://0', partition: part() })).toBeUndefined();
+        expect(store.get({ method: 'resources/read', params: 'res://87', partition: part() })).toBeUndefined();
+        expect(store.get({ method: 'resources/read', params: 'res://88', partition: part() })).toBeDefined();
+        expect(store.get({ method: 'resources/read', params: 'res://599', partition: part() })).toBeDefined();
+    });
+
+    it('the maxEntries cap never evicts the tools/list singleton: 600 readResource URIs leave the derived index intact', async () => {
+        const store = new InMemoryResponseCacheStore();
+        const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], {
+            listHint: { ttlMs: 60_000, cacheScope: 'public' },
+            readHint: { ttlMs: 60_000, cacheScope: 'public' }
+        });
+        const client = modernClient(store);
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        await client.listTools();
+        expect((await toolDef(client, 'a'))?.name).toBe('a');
+        for (let i = 0; i < 600; i++) await client.readResource({ uri: `res://${i}` });
+        // 512 capped resources/read entries + the exempt tools/list singleton.
+        expect(store.size).toBe(513);
+        // The list singleton survived the FIFO churn → derived index still hits;
+        // a fresh listTools() within TTL is still cache-served.
+        expect(store.get({ method: 'tools/list', partition: part() })).toBeDefined();
+        expect((await toolDef(client, 'a'))?.name).toBe('a');
+        await client.listTools();
+        expect(listCount()).toBe(1);
+    });
+
+    it('an in-flight readResource() does not re-cache a stale body when resources/updated for that URI lands mid-request', async () => {
+        const store = new InMemoryResponseCacheStore();
+        const [clientTx, serverTx] = InMemoryTransport.createLinkedPair();
+        let reads = 0;
+        let pendingId: string | number | undefined;
+        serverTx.onmessage = m => {
+            const r = m as JSONRPCRequest;
+            if (r.id === undefined) return;
+            if (r.method === 'server/discover') {
+                void serverTx.send({
+                    jsonrpc: '2.0',
+                    id: r.id,
+                    result: {
+                        resultType: 'complete',
+                        supportedVersions: [MODERN],
+                        capabilities: { resources: {} },
+                        serverInfo: { name: 'scripted', version: '1.0.0' }
+                    }
+                });
+            } else if (r.method === 'resources/read') {
+                reads++;
+                pendingId = r.id; // defer — the test drives the response
+            }
+        };
+        await serverTx.start();
+        const client = modernClient(store, { cachePartition: 'alice' });
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        const respond = (text: string): void =>
+            void serverTx.send({
+                jsonrpc: '2.0',
+                id: pendingId!,
+                result: {
+                    resultType: 'complete',
+                    ttlMs: 60_000,
+                    cacheScope: 'private',
+                    contents: [{ uri: 'res://x', mimeType: 'text/plain', text }]
+                }
+            });
+
+        // Kick off the read; let the request reach the server.
+        const inflight = client.readResource({ uri: 'res://x' });
+        await new Promise(resolve => setTimeout(resolve, 0));
+        expect(reads).toBe(1);
+        // resources/updated for THIS uri lands while the read is in flight →
+        // bumps the per-URI generation; the eventual write is suppressed.
+        await serverTx.send({ jsonrpc: '2.0', method: 'notifications/resources/updated', params: { uri: 'res://x' } } as JSONRPCMessage);
+        respond('stale');
+        const r1 = await inflight;
+        expect(r1.contents[0]).toMatchObject({ text: 'stale' });
+        expect(store.get({ method: 'resources/read', params: 'res://x', partition: part('alice') })).toBeUndefined();
+
+        // The next read for the same URI refetches (no stale cache hit) and
+        // its write goes through (fresh capture).
+        const next = client.readResource({ uri: 'res://x' });
+        await new Promise(resolve => setTimeout(resolve, 0));
+        expect(reads).toBe(2);
+        respond('fresh');
+        expect((await next).contents[0]).toMatchObject({ text: 'fresh' });
+        expect(store.get({ method: 'resources/read', params: 'res://x', partition: part('alice') })).toBeDefined();
+    });
+
+    it('a custom store whose get() rejects degrades to a miss; the request still reaches the wire', async () => {
+        const store = new InMemoryResponseCacheStore();
+        (store as ResponseCacheStore).get = () => Promise.reject(new Error('redis down'));
+        const { clientTx, listCount } = await scriptedModernServer([[TOOL_A]], { listHint: { ttlMs: 60_000 } });
+        const client = modernClient(store);
+        const errors: Error[] = [];
+        client.onerror = e => errors.push(e);
+        await client.connect(clientTx);
+        setNow(client, 1_000_000);
+
+        const { tools } = await client.listTools();
+        expect(tools.map(t => t.name)).toEqual(['a']);
+        expect(listCount()).toBe(1);
+        expect(errors.map(e => e.message)).toContain('redis down');
+    });
+});
diff --git a/packages/core/src/shared/protocol.ts b/packages/core/src/shared/protocol.ts
index 46064bc938..c49ed22994 100644
--- a/packages/core/src/shared/protocol.ts
+++ b/packages/core/src/shared/protocol.ts
@@ -1298,7 +1298,16 @@ export abstract class Protocol<ContextT extends BaseContext> {
                 }
             }
 
-            options?.signal?.throwIfAborted();
+            // An already-aborted caller signal must surface the same way an
+            // in-flight abort does (`SdkError(RequestTimeout, reason)` via
+            // `cancel()` below). Bare `throwIfAborted()` would propagate the
+            // raw `signal.reason` instead, so callers that introduce an async
+            // hop before `request()` (e.g. a cache freshness check) would see
+            // a different rejection type depending on where the abort lands.
+            if (options?.signal?.aborted) {
+                const reason = options.signal.reason;
+                throw reason instanceof SdkError ? reason : new SdkError(SdkErrorCode.RequestTimeout, String(reason));
+            }
 
             // Spec basic/patterns/cancellation §Transport-Specific (2026-07-28):
             // on Streamable HTTP, closing the per-request SSE stream IS the

From bf3bc858429f77b47263ff92feb287851c85f9f1 Mon Sep 17 00:00:00 2001
From: Felix Weinberger <fweinberger@anthropic.com>
Date: Mon, 22 Jun 2026 20:41:55 +0000
Subject: [PATCH 2/2] docs(examples): caching story asserts client-side
 honouring; README adds cacheMode + custom-store sections
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The client now calls listTools() and readResource() twice each and asserts the
second of each pair is cache-served — the server's resource handler counts how
many times it ran and exposes that via a read-count tool, so the example
verifies (server-side) that the cache hit never reached the wire. Demonstrates
cacheMode:'refresh' and the post-refresh return to cache-serving.

README drops the follow-up note (honouring is shipped), adds a §cacheMode
section, and adds a §Custom store section showing the four-method
ResponseCacheStore interface shape with the cachePartition guidance for shared
stores.
---
 examples/caching/README.md | 48 +++++++++++++++++++++++++++++++++++---
 examples/caching/client.ts | 43 +++++++++++++++++++++++++++++++---
 examples/caching/server.ts | 39 ++++++++++++++++++++++++++++++-
 3 files changed, 123 insertions(+), 7 deletions(-)

diff --git a/examples/caching/README.md b/examples/caching/README.md
index 665ee3cc7e..840602a815 100644
--- a/examples/caching/README.md
+++ b/examples/caching/README.md
@@ -1,10 +1,52 @@
 # caching
 
 `CacheableResult` freshness hints (protocol revision 2026-07-28). The server declares hints at two layers — a per-registration `cacheHint` on the resource and server-level `ServerOptions.cacheHints` — and the SDK resolves most-specific-author-first (handler-return fields would
-take precedence over both) and stamps `ttlMs`/`cacheScope` on the wire toward modern clients only. The client reads the stamped values back.
-
-> Full client-side cache **honouring** (re-using a still-fresh result instead of re-requesting) is a follow-up; this example reads what the server emits today.
+take precedence over both) and stamps `ttlMs`/`cacheScope` on the wire toward modern clients only. The client honours the stamped values: a still-fresh held entry is served without a round trip.
 
 ```bash
 pnpm tsx examples/caching/client.ts
 ```
+
+The client calls `listTools()` and `readResource()` twice each; the second of each pair is served from the response cache. The server exposes a `request-count` tool (how many `tools/list` requests reached it) and a `read-count` tool (how many times the resource handler ran), so the example asserts each counter is unchanged after the cache-served call and increments after `cacheMode: 'refresh'`.
+
+## `cacheMode`
+
+Per-call control on the cacheable verbs (`listTools()` / `listPrompts()` / `listResources()` / `listResourceTemplates()` / `readResource()`):
+
+```ts
+await client.readResource({ uri: 'config://app' }); // 'use' (default): serve from cache if fresh
+await client.readResource({ uri: 'config://app' }, { cacheMode: 'refresh' }); // always fetch, then re-store
+await client.readResource({ uri: 'config://app' }, { cacheMode: 'bypass' }); // fetch; do not read or write the cache
+```
+
+A `list_changed` notification still evicts immediately regardless of TTL.
+
+## Custom store
+
+The default per-client `InMemoryResponseCacheStore` (bounded at 512 entries by default) is enough for most hosts. To back the cache with something persistent (Redis, KV, IndexedDB), implement the five-method `ResponseCacheStore` interface — the store is a dumb keyed-value carrier; freshness and partitioning are the client's job:
+
+```ts
+import type { CacheEntry, CacheKey, CacheScope, ResponseCacheStore } from '@modelcontextprotocol/client';
+
+class MyStore implements ResponseCacheStore {
+    async get(key: CacheKey): Promise<CacheEntry | undefined> {
+        /* read {value, stamp, expiresAt, scope} from your backend */
+    }
+    async set(key: CacheKey, entry: { value: unknown; expiresAt?: number; scope?: CacheScope }): Promise<number> {
+        /* write entry under key; return a monotonically-increasing stamp */
+    }
+    async delete(key: CacheKey): Promise<void> {
+        /* drop the single entry under key (no-op if absent) */
+    }
+    async evict(method: string): Promise<void> {
+        /* drop every entry whose key.method === method (across every partition) */
+    }
+    async clear(): Promise<void> {
+        /* drop everything */
+    }
+}
+
+const client = new Client({ name: 'host', version: '1.0.0' }, { responseCacheStore: new MyStore(), cachePartition: principalId });
+```
+
+The SDK scopes every entry by the connected server's identity automatically — you do not encode server identity into `cachePartition` or the store key yourself. When one store backs several principals against the same server, set `ClientOptions.cachePartition` to a stable identity of the authorization context (e.g. the auth subject) so `'private'`-scoped entries are isolated per principal; `'public'`-scoped entries are shared within the connected server's namespace automatically. Note `serverInfo` is self-reported, so a server that deliberately impersonates another's `name`/`version` shares its `'public'` slot; the per-principal isolation holds regardless.
diff --git a/examples/caching/client.ts b/examples/caching/client.ts
index f7137edf89..6354dbe48d 100644
--- a/examples/caching/client.ts
+++ b/examples/caching/client.ts
@@ -1,8 +1,7 @@
 /**
  * Reads the cache hints emitted on cacheable results (2026-07-28 connections
- * only) and asserts the configured values reached the wire. Full client-side
- * cache *honouring* (re-using a fresh result instead of re-requesting) is a
- * follow-up — see the SDK's tracking issue for client cache support.
+ * only) and asserts the client honours them: a still-fresh cached entry is
+ * served without a round trip.
  */
 import { check, connectFromArgs, runClient } from '../harness.js';
 
@@ -11,22 +10,60 @@ interface Cacheable {
     cacheScope?: 'public' | 'private';
 }
 
+async function callCount(client: Awaited<ReturnType<typeof connectFromArgs>>, name: 'read-count' | 'request-count'): Promise<number> {
+    const r = await client.callTool({ name });
+    return Number((r.content[0] as { text: string }).text);
+}
+
 runClient('caching', async () => {
     // connectFromArgs picks transport (default: spawn ./server.ts over stdio; --http <url>) and era (--legacy) from argv. Your code would construct a Client and connect over your chosen transport directly.
     const client = await connectFromArgs(import.meta.dirname);
     check.equal(client.getNegotiatedProtocolVersion(), '2026-07-28');
 
+    // The server stamps `tools/list` with `ttlMs: 30_000, cacheScope: 'public'`.
     const tools = (await client.listTools()) as Cacheable & Awaited<ReturnType<typeof client.listTools>>;
     check.equal(tools.ttlMs, 30_000);
     check.equal(tools.cacheScope, 'public');
+    // `request-count` proves the wire was reached exactly once.
+    check.equal(await callCount(client, 'request-count'), 1);
+
+    // The second call is served from the response cache: the server-side
+    // `tools/list` counter is unchanged, and the result is a fresh copy of the
+    // held entry (so mutating it cannot reach the cache).
+    const toolsAgain = await client.listTools();
+    check.deepEqual(
+        toolsAgain.tools.map(t => t.name),
+        tools.tools.map(t => t.name)
+    );
+    check.equal(await callCount(client, 'request-count'), 1);
+
+    // `cacheMode: 'refresh'` always fetches and re-stores: the counter moves.
+    await client.listTools(undefined, { cacheMode: 'refresh' });
+    check.equal(await callCount(client, 'request-count'), 2);
 
     const resources = (await client.listResources()) as Cacheable & Awaited<ReturnType<typeof client.listResources>>;
     check.equal(resources.ttlMs, 5000);
     check.equal(resources.cacheScope, 'public');
 
+    // `readResource`: the resource handler counts how many times it ran, and
+    // the `read-count` tool exposes that counter.
     const read = (await client.readResource({ uri: 'config://app' })) as Cacheable & Awaited<ReturnType<typeof client.readResource>>;
     check.equal(read.ttlMs, 60_000);
     check.equal(read.cacheScope, 'private');
+    check.equal(await callCount(client, 'read-count'), 1);
+
+    // Within TTL, default `cacheMode: 'use'` → served from cache; the server
+    // handler does not run.
+    await client.readResource({ uri: 'config://app' });
+    check.equal(await callCount(client, 'read-count'), 1);
+
+    // `cacheMode: 'refresh'` always fetches and re-stores.
+    await client.readResource({ uri: 'config://app' }, { cacheMode: 'refresh' });
+    check.equal(await callCount(client, 'read-count'), 2);
+
+    // After the refresh the entry is fresh again — back to cache-served.
+    await client.readResource({ uri: 'config://app' });
+    check.equal(await callCount(client, 'read-count'), 2);
 
     await client.close();
 });
diff --git a/examples/caching/server.ts b/examples/caching/server.ts
index 4b33f6fec7..b680355736 100644
--- a/examples/caching/server.ts
+++ b/examples/caching/server.ts
@@ -17,6 +17,13 @@ import { McpServer } from '@modelcontextprotocol/server';
 
 import { runServerFromArgs } from '../harness.js';
 
+// Module-level (process-wide) counters so the values survive the harness's
+// stateless HTTP leg (fresh `buildServer()` per request) as well as stdio's
+// single per-connection instance. The client asserts against these to prove a
+// cache-served call never reached the server.
+let readCount = 0;
+let listCount = 0;
+
 function buildServer(): McpServer {
     const server = new McpServer(
         { name: 'caching-example', version: '1.0.0' },
@@ -40,12 +47,42 @@ function buildServer(): McpServer {
             description: 'Static application config (rarely changes)',
             cacheHint: { ttlMs: 60_000, cacheScope: 'private' }
         },
-        async uri => ({ contents: [{ uri: uri.href, mimeType: 'application/json', text: '{"feature":true}' }] })
+        async uri => {
+            readCount++;
+            return { contents: [{ uri: uri.href, mimeType: 'application/json', text: '{"feature":true}' }] };
+        }
     );
 
     // A tool, so tools/list has something to cache.
     server.registerTool('noop', { description: 'no-op' }, async () => ({ content: [{ type: 'text', text: 'ok' }] }));
 
+    // Exposes the server-side `resources/read` invocation count so the client
+    // can assert that a cache-served call did not reach the wire.
+    server.registerTool('read-count', { description: 'Number of resources/read calls that reached this server' }, async () => ({
+        content: [{ type: 'text', text: String(readCount) }]
+    }));
+
+    // Exposes the server-side `tools/list` invocation count.
+    server.registerTool('request-count', { description: 'Number of tools/list requests that reached this server' }, async () => ({
+        content: [{ type: 'text', text: String(listCount) }]
+    }));
+
+    // Wrap the auto-generated `tools/list` handler so the example can prove a
+    // cache-served `listTools()` never reached the wire. `McpServer` registers
+    // the handler lazily on the first `registerTool()`; we re-seat it here so
+    // every dispatch increments `listCount` before delegating to the original.
+    // (Reaches the underlying request-handler map directly — there is no public
+    // wrapper hook; acceptable for an instrumentation example.)
+    const handlers = (server.server as unknown as { _requestHandlers: Map<string, (...a: unknown[]) => Promise<unknown>> })
+        ._requestHandlers;
+    const original = handlers.get('tools/list');
+    if (original) {
+        handlers.set('tools/list', (...a) => {
+            listCount++;
+            return original(...a);
+        });
+    }
+
     return server;
 }