diff --git a/Directory.Build.props b/Directory.Build.props index 2648829..395d55d 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -9,7 +9,7 @@ -0.10.34 +0.10.35 diff --git a/src/RockBot.A2A/AgentCardSummarizer.cs b/src/RockBot.A2A/AgentCardSummarizer.cs index 992967e..5ccf9d4 100644 --- a/src/RockBot.A2A/AgentCardSummarizer.cs +++ b/src/RockBot.A2A/AgentCardSummarizer.cs @@ -58,7 +58,7 @@ The summary must give enough detail that another agent can confidently decide "t """; var messages = new[] { new ChatMessage(ChatRole.User, prompt) }; - var response = await llmClient.GetResponseAsync(messages, ModelTier.Low, cancellationToken: ct); + var response = await llmClient.GetResponseAsync(messages, ModelTier.Low, options: null, cancellationToken: ct); return response.Text?.Trim() ?? fallback; } catch (Exception ex) diff --git a/src/RockBot.Agent/McpBridge/McpBridgeService.cs b/src/RockBot.Agent/McpBridge/McpBridgeService.cs index b1b1b2f..8e6c42c 100644 --- a/src/RockBot.Agent/McpBridge/McpBridgeService.cs +++ b/src/RockBot.Agent/McpBridge/McpBridgeService.cs @@ -563,7 +563,7 @@ The summary must give enough detail that an agent can confidently decide "this i """; var messages = new[] { new ChatMessage(ChatRole.User, prompt) }; - var response = await _llmClient.GetResponseAsync(messages, cancellationToken: ct); + var response = await _llmClient.GetResponseAsync(messages, options: null, cancellationToken: ct); summaryText = response.Text?.Trim(); } catch (Exception ex) diff --git a/src/RockBot.Host.Abstractions/ILlmClient.cs b/src/RockBot.Host.Abstractions/ILlmClient.cs index 6b2ce46..eb0f7af 100644 --- a/src/RockBot.Host.Abstractions/ILlmClient.cs +++ b/src/RockBot.Host.Abstractions/ILlmClient.cs @@ -18,17 +18,27 @@ public interface ILlmClient /// /// Calls the LLM using the client. /// + /// + /// is mandatory: the gateway uses it to + /// drain queued and in-flight calls when the caller is preempted (e.g. when + /// a user message cancels the dream cycle). Callers without a natural ct + /// MUST pass explicitly so the choice + /// is intentional and visible in code review. See design/llm-gateway.md. + /// Task GetResponseAsync( IEnumerable messages, - ChatOptions? options = null, - CancellationToken cancellationToken = default); + ChatOptions? options, + CancellationToken cancellationToken); /// /// Calls the LLM using the client for the specified . /// + /// + /// is mandatory: see the single-arg overload. + /// Task GetResponseAsync( IEnumerable messages, ModelTier tier, - ChatOptions? options = null, - CancellationToken cancellationToken = default); + ChatOptions? options, + CancellationToken cancellationToken); } diff --git a/src/RockBot.Host/LlmClient.cs b/src/RockBot.Host/LlmClient.cs index 0e2aafa..6d24b13 100644 --- a/src/RockBot.Host/LlmClient.cs +++ b/src/RockBot.Host/LlmClient.cs @@ -20,16 +20,16 @@ internal sealed class LlmClient( /// Calls the LLM using the Balanced tier. public Task GetResponseAsync( IEnumerable messages, - ChatOptions? options = null, - CancellationToken cancellationToken = default) + ChatOptions? options, + CancellationToken cancellationToken) => GetResponseAsync(messages, ModelTier.Balanced, options, cancellationToken); /// Calls the LLM using the specified tier, falling back to Balanced on failure for Low/High tiers. public async Task GetResponseAsync( IEnumerable messages, ModelTier tier, - ChatOptions? options = null, - CancellationToken cancellationToken = default) + ChatOptions? options, + CancellationToken cancellationToken) { try { diff --git a/src/RockBot.Host/SessionSummaryService.cs b/src/RockBot.Host/SessionSummaryService.cs index f42a96d..03b8c65 100644 --- a/src/RockBot.Host/SessionSummaryService.cs +++ b/src/RockBot.Host/SessionSummaryService.cs @@ -149,7 +149,11 @@ private async Task EvaluateSessionAsync(string sessionId, IReadOnlyList> ExpandToMemoryEntriesAsync( try { - var response = await _llmClient.GetResponseAsync(messages, options); + // Detached background work: SaveMemory queues this via Task.Run with no + // caller-supplied ct, so the LLM call has no cancellation source. A future + // refactor could plumb IHostApplicationLifetime.ApplicationStopping for + // graceful shutdown of in-flight extraction; for now the work is tied to + // the agent process lifetime. + var response = await _llmClient.GetResponseAsync(messages, options, CancellationToken.None); var raw = response.Text?.Trim() ?? string.Empty; var json = ExtractJsonArray(raw); diff --git a/src/RockBot.Skills/SkillTools.cs b/src/RockBot.Skills/SkillTools.cs index 294acd0..027e30c 100644 --- a/src/RockBot.Skills/SkillTools.cs +++ b/src/RockBot.Skills/SkillTools.cs @@ -190,7 +190,12 @@ private async Task GenerateSummaryAsync(string name, string content) new(ChatRole.User, content) }; - var response = await _llmClient.GetResponseAsync(messages, new ChatOptions()); + // Detached background work: skill save queues this via Task.Run with no + // caller-supplied ct, so the LLM call has no cancellation source. The + // summary refresh is best-effort; if the agent shuts down mid-call the + // task is orphaned. A future refactor could use ApplicationStopping. + var response = await _llmClient.GetResponseAsync( + messages, new ChatOptions(), CancellationToken.None); var summary = response.Text?.Trim() ?? string.Empty; if (string.IsNullOrWhiteSpace(summary)) diff --git a/tests/RockBot.Host.Tests/LlmGatewayTests.cs b/tests/RockBot.Host.Tests/LlmGatewayTests.cs index 9620288..10d4f3e 100644 --- a/tests/RockBot.Host.Tests/LlmGatewayTests.cs +++ b/tests/RockBot.Host.Tests/LlmGatewayTests.cs @@ -191,6 +191,43 @@ await WaitUntilAsync( Assert.AreEqual(99, followup); } + [TestMethod] + public async Task ExecuteAsync_CancellationWhileInFlight_AbortsAndReleasesSlot() + { + using var gateway = CreateGateway(low: 1); + using var cts = new CancellationTokenSource(); + var operationStarted = new TaskCompletionSource(); + + // Operation respects ct: it parks until ct fires, then throws. + var task = gateway.ExecuteAsync(ModelTier.Low, async ct => + { + operationStarted.SetResult(); + await Task.Delay(Timeout.Infinite, ct); + return 0; + }, cts.Token); + + await operationStarted.Task; + await WaitUntilAsync( + () => gateway.GetInFlightCount(ModelTier.Low) == 1, + TimeSpan.FromSeconds(5)); + + cts.Cancel(); + + await Assert.ThrowsAsync(async () => await task); + + // Slot should have been released; in-flight back to zero + await WaitUntilAsync( + () => gateway.GetInFlightCount(ModelTier.Low) == 0, + TimeSpan.FromSeconds(5)); + + // And a follow-up call should proceed + var followup = await gateway.ExecuteAsync( + ModelTier.Low, + ct => Task.FromResult(123), + CancellationToken.None); + Assert.AreEqual(123, followup); + } + [TestMethod] public async Task ExecuteAsync_ExceptionInOperation_ReleasesSlot() {