From 4a79172753237a174b511dbe9a032954c6908c67 Mon Sep 17 00:00:00 2001 From: Rockford Lhotka Date: Thu, 7 May 2026 20:37:54 -0500 Subject: [PATCH 1/2] LLM gateway: ct audit + mandatory ct on ILlmClient (phase 4 of #352) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final phase: makes the cancellation contract enforceable at compile time and fixes the call sites that were silently dropping ct. Audit results across the codebase: 16 ILlmClient.GetResponseAsync call sites. 13 already passed ct correctly. 3 violations found: - src/RockBot.Memory/MemoryTools.cs:352 (background SaveMemory worker via Task.Run — no caller-supplied ct) - src/RockBot.Skills/SkillTools.cs:193 (background skill summary generation via Task.Run — no caller-supplied ct) - src/RockBot.Host/SessionSummaryService.cs:152 (timer-driven evaluation — no caller-supplied ct) All three are detached background work. They now pass CancellationToken.None explicitly with comments documenting the design choice and a note that future work could plumb IHostApplicationLifetime.ApplicationStopping for graceful shutdown of in-flight calls. The audit forces this to be intentional rather than accidental. Compile-time enforcement: - ILlmClient.GetResponseAsync (both overloads) drop the "= default" on cancellationToken. Callers must now pass it explicitly; the compiler catches future violations. - LlmClient implementation matched (defaults removed for consistency). - Two existing call sites (AgentCardSummarizer, McpBridgeService) used the named-arg pattern `cancellationToken: ct` which silently picked up the default for `options`; these are made explicit with `options: null`. Tests: - Adds ExecuteAsync_CancellationWhileInFlight_AbortsAndReleasesSlot, closing the in-flight cancellation gap the design called out for this phase. Verifies that cancelling ct mid-operation aborts, releases the slot, and that follow-up calls succeed (no leaked slots). - 13 LlmGatewayTests pass; 629 Host tests pass; full solution test run clean. This completes #352. Phase 1 (concurrency cap), phase 3 (bounded queue), and phase 4 (ct audit + mandatory ct) all landed; phase 2 (gateway retry) was dropped in favor of keeping SDK retry enabled. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/RockBot.A2A/AgentCardSummarizer.cs | 2 +- .../McpBridge/McpBridgeService.cs | 2 +- src/RockBot.Host.Abstractions/ILlmClient.cs | 18 +++++++-- src/RockBot.Host/LlmClient.cs | 8 ++-- src/RockBot.Host/SessionSummaryService.cs | 6 ++- src/RockBot.Memory/MemoryTools.cs | 7 +++- src/RockBot.Skills/SkillTools.cs | 7 +++- tests/RockBot.Host.Tests/LlmGatewayTests.cs | 37 +++++++++++++++++++ 8 files changed, 74 insertions(+), 13 deletions(-) diff --git a/src/RockBot.A2A/AgentCardSummarizer.cs b/src/RockBot.A2A/AgentCardSummarizer.cs index 992967ec..5ccf9d43 100644 --- a/src/RockBot.A2A/AgentCardSummarizer.cs +++ b/src/RockBot.A2A/AgentCardSummarizer.cs @@ -58,7 +58,7 @@ The summary must give enough detail that another agent can confidently decide "t """; var messages = new[] { new ChatMessage(ChatRole.User, prompt) }; - var response = await llmClient.GetResponseAsync(messages, ModelTier.Low, cancellationToken: ct); + var response = await llmClient.GetResponseAsync(messages, ModelTier.Low, options: null, cancellationToken: ct); return response.Text?.Trim() ?? fallback; } catch (Exception ex) diff --git a/src/RockBot.Agent/McpBridge/McpBridgeService.cs b/src/RockBot.Agent/McpBridge/McpBridgeService.cs index b1b1b2f3..8e6c42c5 100644 --- a/src/RockBot.Agent/McpBridge/McpBridgeService.cs +++ b/src/RockBot.Agent/McpBridge/McpBridgeService.cs @@ -563,7 +563,7 @@ The summary must give enough detail that an agent can confidently decide "this i """; var messages = new[] { new ChatMessage(ChatRole.User, prompt) }; - var response = await _llmClient.GetResponseAsync(messages, cancellationToken: ct); + var response = await _llmClient.GetResponseAsync(messages, options: null, cancellationToken: ct); summaryText = response.Text?.Trim(); } catch (Exception ex) diff --git a/src/RockBot.Host.Abstractions/ILlmClient.cs b/src/RockBot.Host.Abstractions/ILlmClient.cs index 6b2ce467..eb0f7afc 100644 --- a/src/RockBot.Host.Abstractions/ILlmClient.cs +++ b/src/RockBot.Host.Abstractions/ILlmClient.cs @@ -18,17 +18,27 @@ public interface ILlmClient /// /// Calls the LLM using the client. /// + /// + /// is mandatory: the gateway uses it to + /// drain queued and in-flight calls when the caller is preempted (e.g. when + /// a user message cancels the dream cycle). Callers without a natural ct + /// MUST pass explicitly so the choice + /// is intentional and visible in code review. See design/llm-gateway.md. + /// Task GetResponseAsync( IEnumerable messages, - ChatOptions? options = null, - CancellationToken cancellationToken = default); + ChatOptions? options, + CancellationToken cancellationToken); /// /// Calls the LLM using the client for the specified . /// + /// + /// is mandatory: see the single-arg overload. + /// Task GetResponseAsync( IEnumerable messages, ModelTier tier, - ChatOptions? options = null, - CancellationToken cancellationToken = default); + ChatOptions? options, + CancellationToken cancellationToken); } diff --git a/src/RockBot.Host/LlmClient.cs b/src/RockBot.Host/LlmClient.cs index 0e2aafaa..6d24b134 100644 --- a/src/RockBot.Host/LlmClient.cs +++ b/src/RockBot.Host/LlmClient.cs @@ -20,16 +20,16 @@ internal sealed class LlmClient( /// Calls the LLM using the Balanced tier. public Task GetResponseAsync( IEnumerable messages, - ChatOptions? options = null, - CancellationToken cancellationToken = default) + ChatOptions? options, + CancellationToken cancellationToken) => GetResponseAsync(messages, ModelTier.Balanced, options, cancellationToken); /// Calls the LLM using the specified tier, falling back to Balanced on failure for Low/High tiers. public async Task GetResponseAsync( IEnumerable messages, ModelTier tier, - ChatOptions? options = null, - CancellationToken cancellationToken = default) + ChatOptions? options, + CancellationToken cancellationToken) { try { diff --git a/src/RockBot.Host/SessionSummaryService.cs b/src/RockBot.Host/SessionSummaryService.cs index f42a96d7..03b8c65f 100644 --- a/src/RockBot.Host/SessionSummaryService.cs +++ b/src/RockBot.Host/SessionSummaryService.cs @@ -149,7 +149,11 @@ private async Task EvaluateSessionAsync(string sessionId, IReadOnlyList> ExpandToMemoryEntriesAsync( try { - var response = await _llmClient.GetResponseAsync(messages, options); + // Detached background work: SaveMemory queues this via Task.Run with no + // caller-supplied ct, so the LLM call has no cancellation source. A future + // refactor could plumb IHostApplicationLifetime.ApplicationStopping for + // graceful shutdown of in-flight extraction; for now the work is tied to + // the agent process lifetime. + var response = await _llmClient.GetResponseAsync(messages, options, CancellationToken.None); var raw = response.Text?.Trim() ?? string.Empty; var json = ExtractJsonArray(raw); diff --git a/src/RockBot.Skills/SkillTools.cs b/src/RockBot.Skills/SkillTools.cs index 294acd07..027e30cc 100644 --- a/src/RockBot.Skills/SkillTools.cs +++ b/src/RockBot.Skills/SkillTools.cs @@ -190,7 +190,12 @@ private async Task GenerateSummaryAsync(string name, string content) new(ChatRole.User, content) }; - var response = await _llmClient.GetResponseAsync(messages, new ChatOptions()); + // Detached background work: skill save queues this via Task.Run with no + // caller-supplied ct, so the LLM call has no cancellation source. The + // summary refresh is best-effort; if the agent shuts down mid-call the + // task is orphaned. A future refactor could use ApplicationStopping. + var response = await _llmClient.GetResponseAsync( + messages, new ChatOptions(), CancellationToken.None); var summary = response.Text?.Trim() ?? string.Empty; if (string.IsNullOrWhiteSpace(summary)) diff --git a/tests/RockBot.Host.Tests/LlmGatewayTests.cs b/tests/RockBot.Host.Tests/LlmGatewayTests.cs index 96202885..10d4f3e6 100644 --- a/tests/RockBot.Host.Tests/LlmGatewayTests.cs +++ b/tests/RockBot.Host.Tests/LlmGatewayTests.cs @@ -191,6 +191,43 @@ await WaitUntilAsync( Assert.AreEqual(99, followup); } + [TestMethod] + public async Task ExecuteAsync_CancellationWhileInFlight_AbortsAndReleasesSlot() + { + using var gateway = CreateGateway(low: 1); + using var cts = new CancellationTokenSource(); + var operationStarted = new TaskCompletionSource(); + + // Operation respects ct: it parks until ct fires, then throws. + var task = gateway.ExecuteAsync(ModelTier.Low, async ct => + { + operationStarted.SetResult(); + await Task.Delay(Timeout.Infinite, ct); + return 0; + }, cts.Token); + + await operationStarted.Task; + await WaitUntilAsync( + () => gateway.GetInFlightCount(ModelTier.Low) == 1, + TimeSpan.FromSeconds(5)); + + cts.Cancel(); + + await Assert.ThrowsAsync(async () => await task); + + // Slot should have been released; in-flight back to zero + await WaitUntilAsync( + () => gateway.GetInFlightCount(ModelTier.Low) == 0, + TimeSpan.FromSeconds(5)); + + // And a follow-up call should proceed + var followup = await gateway.ExecuteAsync( + ModelTier.Low, + ct => Task.FromResult(123), + CancellationToken.None); + Assert.AreEqual(123, followup); + } + [TestMethod] public async Task ExecuteAsync_ExceptionInOperation_ReleasesSlot() { From c965a1e206afdf43c0c24f7650f7e32efd891a81 Mon Sep 17 00:00:00 2001 From: Rockford Lhotka Date: Thu, 7 May 2026 20:45:13 -0500 Subject: [PATCH 2/2] Bump version to 0.10.35 for K8s test deploy of gateway ct-audit branch Co-Authored-By: Claude Opus 4.7 (1M context) --- Directory.Build.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Directory.Build.props b/Directory.Build.props index 26488294..395d55d8 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -9,7 +9,7 @@ -0.10.34 +0.10.35