diff --git a/Directory.Build.props b/Directory.Build.props
index 2648829..395d55d 100644
--- a/Directory.Build.props
+++ b/Directory.Build.props
@@ -9,7 +9,7 @@
     <!-- Version can be overridden from the command line: -p:Version=0.3.1
          AssemblyVersion and FileVersion are derived automatically by the SDK
          (prerelease suffixes like -beta001 are stripped for assembly versions). -->
-<Version>0.10.34</Version>
+<Version>0.10.35</Version>
   </PropertyGroup>
 
   <!-- NuGet package metadata (shared across all packable projects) -->
diff --git a/src/RockBot.A2A/AgentCardSummarizer.cs b/src/RockBot.A2A/AgentCardSummarizer.cs
index 992967e..5ccf9d4 100644
--- a/src/RockBot.A2A/AgentCardSummarizer.cs
+++ b/src/RockBot.A2A/AgentCardSummarizer.cs
@@ -58,7 +58,7 @@ The summary must give enough detail that another agent can confidently decide "t
                 """;
 
             var messages = new[] { new ChatMessage(ChatRole.User, prompt) };
-            var response = await llmClient.GetResponseAsync(messages, ModelTier.Low, cancellationToken: ct);
+            var response = await llmClient.GetResponseAsync(messages, ModelTier.Low, options: null, cancellationToken: ct);
             return response.Text?.Trim() ?? fallback;
         }
         catch (Exception ex)
diff --git a/src/RockBot.Agent/McpBridge/McpBridgeService.cs b/src/RockBot.Agent/McpBridge/McpBridgeService.cs
index b1b1b2f..8e6c42c 100644
--- a/src/RockBot.Agent/McpBridge/McpBridgeService.cs
+++ b/src/RockBot.Agent/McpBridge/McpBridgeService.cs
@@ -563,7 +563,7 @@ The summary must give enough detail that an agent can confidently decide "this i
                     """;
 
                 var messages = new[] { new ChatMessage(ChatRole.User, prompt) };
-                var response = await _llmClient.GetResponseAsync(messages, cancellationToken: ct);
+                var response = await _llmClient.GetResponseAsync(messages, options: null, cancellationToken: ct);
                 summaryText = response.Text?.Trim();
             }
             catch (Exception ex)
diff --git a/src/RockBot.Host.Abstractions/ILlmClient.cs b/src/RockBot.Host.Abstractions/ILlmClient.cs
index 6b2ce46..eb0f7af 100644
--- a/src/RockBot.Host.Abstractions/ILlmClient.cs
+++ b/src/RockBot.Host.Abstractions/ILlmClient.cs
@@ -18,17 +18,27 @@ public interface ILlmClient
     /// <summary>
     /// Calls the LLM using the <see cref="ModelTier.Balanced"/> client.
     /// </summary>
+    /// <remarks>
+    /// <paramref name="cancellationToken"/> is mandatory: the gateway uses it to
+    /// drain queued and in-flight calls when the caller is preempted (e.g. when
+    /// a user message cancels the dream cycle). Callers without a natural ct
+    /// MUST pass <see cref="CancellationToken.None"/> explicitly so the choice
+    /// is intentional and visible in code review. See <c>design/llm-gateway.md</c>.
+    /// </remarks>
     Task<ChatResponse> GetResponseAsync(
         IEnumerable<ChatMessage> messages,
-        ChatOptions? options = null,
-        CancellationToken cancellationToken = default);
+        ChatOptions? options,
+        CancellationToken cancellationToken);
 
     /// <summary>
     /// Calls the LLM using the client for the specified <paramref name="tier"/>.
     /// </summary>
+    /// <remarks>
+    /// <paramref name="cancellationToken"/> is mandatory: see the single-arg overload.
+    /// </remarks>
     Task<ChatResponse> GetResponseAsync(
         IEnumerable<ChatMessage> messages,
         ModelTier tier,
-        ChatOptions? options = null,
-        CancellationToken cancellationToken = default);
+        ChatOptions? options,
+        CancellationToken cancellationToken);
 }
diff --git a/src/RockBot.Host/LlmClient.cs b/src/RockBot.Host/LlmClient.cs
index 0e2aafa..6d24b13 100644
--- a/src/RockBot.Host/LlmClient.cs
+++ b/src/RockBot.Host/LlmClient.cs
@@ -20,16 +20,16 @@ internal sealed class LlmClient(
     /// <summary>Calls the LLM using the Balanced tier.</summary>
     public Task<ChatResponse> GetResponseAsync(
         IEnumerable<ChatMessage> messages,
-        ChatOptions? options = null,
-        CancellationToken cancellationToken = default)
+        ChatOptions? options,
+        CancellationToken cancellationToken)
         => GetResponseAsync(messages, ModelTier.Balanced, options, cancellationToken);
 
     /// <summary>Calls the LLM using the specified tier, falling back to Balanced on failure for Low/High tiers.</summary>
     public async Task<ChatResponse> GetResponseAsync(
         IEnumerable<ChatMessage> messages,
         ModelTier tier,
-        ChatOptions? options = null,
-        CancellationToken cancellationToken = default)
+        ChatOptions? options,
+        CancellationToken cancellationToken)
     {
         try
         {
diff --git a/src/RockBot.Host/SessionSummaryService.cs b/src/RockBot.Host/SessionSummaryService.cs
index f42a96d..03b8c65 100644
--- a/src/RockBot.Host/SessionSummaryService.cs
+++ b/src/RockBot.Host/SessionSummaryService.cs
@@ -149,7 +149,11 @@ private async Task EvaluateSessionAsync(string sessionId, IReadOnlyList<Conversa
 
         try
         {
-            var response = await _llmClient.GetResponseAsync(messages, new ChatOptions());
+            // Timer-driven background evaluation: no caller-supplied ct. A future
+            // refactor could expose IHostApplicationLifetime.ApplicationStopping so
+            // agent shutdown cancels in-flight evaluation. For now the work is tied
+            // to the agent process lifetime.
+            var response = await _llmClient.GetResponseAsync(messages, new ChatOptions(), CancellationToken.None);
             var raw = response.Text?.Trim() ?? string.Empty;
             var json = ExtractJsonObject(raw);
 
diff --git a/src/RockBot.Memory/MemoryTools.cs b/src/RockBot.Memory/MemoryTools.cs
index c0c04d5..d0fbdae 100644
--- a/src/RockBot.Memory/MemoryTools.cs
+++ b/src/RockBot.Memory/MemoryTools.cs
@@ -349,7 +349,12 @@ private async Task<List<MemoryEntry>> ExpandToMemoryEntriesAsync(
 
         try
         {
-            var response = await _llmClient.GetResponseAsync(messages, options);
+            // Detached background work: SaveMemory queues this via Task.Run with no
+            // caller-supplied ct, so the LLM call has no cancellation source. A future
+            // refactor could plumb IHostApplicationLifetime.ApplicationStopping for
+            // graceful shutdown of in-flight extraction; for now the work is tied to
+            // the agent process lifetime.
+            var response = await _llmClient.GetResponseAsync(messages, options, CancellationToken.None);
             var raw = response.Text?.Trim() ?? string.Empty;
             var json = ExtractJsonArray(raw);
 
diff --git a/src/RockBot.Skills/SkillTools.cs b/src/RockBot.Skills/SkillTools.cs
index 294acd0..027e30c 100644
--- a/src/RockBot.Skills/SkillTools.cs
+++ b/src/RockBot.Skills/SkillTools.cs
@@ -190,7 +190,12 @@ private async Task GenerateSummaryAsync(string name, string content)
                 new(ChatRole.User, content)
             };
 
-            var response = await _llmClient.GetResponseAsync(messages, new ChatOptions());
+            // Detached background work: skill save queues this via Task.Run with no
+            // caller-supplied ct, so the LLM call has no cancellation source. The
+            // summary refresh is best-effort; if the agent shuts down mid-call the
+            // task is orphaned. A future refactor could use ApplicationStopping.
+            var response = await _llmClient.GetResponseAsync(
+                messages, new ChatOptions(), CancellationToken.None);
             var summary = response.Text?.Trim() ?? string.Empty;
 
             if (string.IsNullOrWhiteSpace(summary))
diff --git a/tests/RockBot.Host.Tests/LlmGatewayTests.cs b/tests/RockBot.Host.Tests/LlmGatewayTests.cs
index 9620288..10d4f3e 100644
--- a/tests/RockBot.Host.Tests/LlmGatewayTests.cs
+++ b/tests/RockBot.Host.Tests/LlmGatewayTests.cs
@@ -191,6 +191,43 @@ await WaitUntilAsync(
         Assert.AreEqual(99, followup);
     }
 
+    [TestMethod]
+    public async Task ExecuteAsync_CancellationWhileInFlight_AbortsAndReleasesSlot()
+    {
+        using var gateway = CreateGateway(low: 1);
+        using var cts = new CancellationTokenSource();
+        var operationStarted = new TaskCompletionSource();
+
+        // Operation respects ct: it parks until ct fires, then throws.
+        var task = gateway.ExecuteAsync<int>(ModelTier.Low, async ct =>
+        {
+            operationStarted.SetResult();
+            await Task.Delay(Timeout.Infinite, ct);
+            return 0;
+        }, cts.Token);
+
+        await operationStarted.Task;
+        await WaitUntilAsync(
+            () => gateway.GetInFlightCount(ModelTier.Low) == 1,
+            TimeSpan.FromSeconds(5));
+
+        cts.Cancel();
+
+        await Assert.ThrowsAsync<OperationCanceledException>(async () => await task);
+
+        // Slot should have been released; in-flight back to zero
+        await WaitUntilAsync(
+            () => gateway.GetInFlightCount(ModelTier.Low) == 0,
+            TimeSpan.FromSeconds(5));
+
+        // And a follow-up call should proceed
+        var followup = await gateway.ExecuteAsync(
+            ModelTier.Low,
+            ct => Task.FromResult(123),
+            CancellationToken.None);
+        Assert.AreEqual(123, followup);
+    }
+
     [TestMethod]
     public async Task ExecuteAsync_ExceptionInOperation_ReleasesSlot()
     {