diff --git a/Directory.Build.props b/Directory.Build.props
index 2648829..395d55d 100644
--- a/Directory.Build.props
+++ b/Directory.Build.props
@@ -9,7 +9,7 @@
-0.10.34
+0.10.35
diff --git a/src/RockBot.A2A/AgentCardSummarizer.cs b/src/RockBot.A2A/AgentCardSummarizer.cs
index 992967e..5ccf9d4 100644
--- a/src/RockBot.A2A/AgentCardSummarizer.cs
+++ b/src/RockBot.A2A/AgentCardSummarizer.cs
@@ -58,7 +58,7 @@ The summary must give enough detail that another agent can confidently decide "t
""";
var messages = new[] { new ChatMessage(ChatRole.User, prompt) };
- var response = await llmClient.GetResponseAsync(messages, ModelTier.Low, cancellationToken: ct);
+ var response = await llmClient.GetResponseAsync(messages, ModelTier.Low, options: null, cancellationToken: ct);
return response.Text?.Trim() ?? fallback;
}
catch (Exception ex)
diff --git a/src/RockBot.Agent/McpBridge/McpBridgeService.cs b/src/RockBot.Agent/McpBridge/McpBridgeService.cs
index b1b1b2f..8e6c42c 100644
--- a/src/RockBot.Agent/McpBridge/McpBridgeService.cs
+++ b/src/RockBot.Agent/McpBridge/McpBridgeService.cs
@@ -563,7 +563,7 @@ The summary must give enough detail that an agent can confidently decide "this i
""";
var messages = new[] { new ChatMessage(ChatRole.User, prompt) };
- var response = await _llmClient.GetResponseAsync(messages, cancellationToken: ct);
+ var response = await _llmClient.GetResponseAsync(messages, options: null, cancellationToken: ct);
summaryText = response.Text?.Trim();
}
catch (Exception ex)
diff --git a/src/RockBot.Host.Abstractions/ILlmClient.cs b/src/RockBot.Host.Abstractions/ILlmClient.cs
index 6b2ce46..eb0f7af 100644
--- a/src/RockBot.Host.Abstractions/ILlmClient.cs
+++ b/src/RockBot.Host.Abstractions/ILlmClient.cs
@@ -18,17 +18,27 @@ public interface ILlmClient
///
/// Calls the LLM using the client.
///
+ ///
+ /// is mandatory: the gateway uses it to
+ /// drain queued and in-flight calls when the caller is preempted (e.g. when
+ /// a user message cancels the dream cycle). Callers without a natural ct
+ /// MUST pass explicitly so the choice
+ /// is intentional and visible in code review. See design/llm-gateway.md.
+ ///
Task GetResponseAsync(
IEnumerable messages,
- ChatOptions? options = null,
- CancellationToken cancellationToken = default);
+ ChatOptions? options,
+ CancellationToken cancellationToken);
///
/// Calls the LLM using the client for the specified .
///
+ ///
+ /// is mandatory: see the single-arg overload.
+ ///
Task GetResponseAsync(
IEnumerable messages,
ModelTier tier,
- ChatOptions? options = null,
- CancellationToken cancellationToken = default);
+ ChatOptions? options,
+ CancellationToken cancellationToken);
}
diff --git a/src/RockBot.Host/LlmClient.cs b/src/RockBot.Host/LlmClient.cs
index 0e2aafa..6d24b13 100644
--- a/src/RockBot.Host/LlmClient.cs
+++ b/src/RockBot.Host/LlmClient.cs
@@ -20,16 +20,16 @@ internal sealed class LlmClient(
/// Calls the LLM using the Balanced tier.
public Task GetResponseAsync(
IEnumerable messages,
- ChatOptions? options = null,
- CancellationToken cancellationToken = default)
+ ChatOptions? options,
+ CancellationToken cancellationToken)
=> GetResponseAsync(messages, ModelTier.Balanced, options, cancellationToken);
/// Calls the LLM using the specified tier, falling back to Balanced on failure for Low/High tiers.
public async Task GetResponseAsync(
IEnumerable messages,
ModelTier tier,
- ChatOptions? options = null,
- CancellationToken cancellationToken = default)
+ ChatOptions? options,
+ CancellationToken cancellationToken)
{
try
{
diff --git a/src/RockBot.Host/SessionSummaryService.cs b/src/RockBot.Host/SessionSummaryService.cs
index f42a96d..03b8c65 100644
--- a/src/RockBot.Host/SessionSummaryService.cs
+++ b/src/RockBot.Host/SessionSummaryService.cs
@@ -149,7 +149,11 @@ private async Task EvaluateSessionAsync(string sessionId, IReadOnlyList> ExpandToMemoryEntriesAsync(
try
{
- var response = await _llmClient.GetResponseAsync(messages, options);
+ // Detached background work: SaveMemory queues this via Task.Run with no
+ // caller-supplied ct, so the LLM call has no cancellation source. A future
+ // refactor could plumb IHostApplicationLifetime.ApplicationStopping for
+ // graceful shutdown of in-flight extraction; for now the work is tied to
+ // the agent process lifetime.
+ var response = await _llmClient.GetResponseAsync(messages, options, CancellationToken.None);
var raw = response.Text?.Trim() ?? string.Empty;
var json = ExtractJsonArray(raw);
diff --git a/src/RockBot.Skills/SkillTools.cs b/src/RockBot.Skills/SkillTools.cs
index 294acd0..027e30c 100644
--- a/src/RockBot.Skills/SkillTools.cs
+++ b/src/RockBot.Skills/SkillTools.cs
@@ -190,7 +190,12 @@ private async Task GenerateSummaryAsync(string name, string content)
new(ChatRole.User, content)
};
- var response = await _llmClient.GetResponseAsync(messages, new ChatOptions());
+ // Detached background work: skill save queues this via Task.Run with no
+ // caller-supplied ct, so the LLM call has no cancellation source. The
+ // summary refresh is best-effort; if the agent shuts down mid-call the
+ // task is orphaned. A future refactor could use ApplicationStopping.
+ var response = await _llmClient.GetResponseAsync(
+ messages, new ChatOptions(), CancellationToken.None);
var summary = response.Text?.Trim() ?? string.Empty;
if (string.IsNullOrWhiteSpace(summary))
diff --git a/tests/RockBot.Host.Tests/LlmGatewayTests.cs b/tests/RockBot.Host.Tests/LlmGatewayTests.cs
index 9620288..10d4f3e 100644
--- a/tests/RockBot.Host.Tests/LlmGatewayTests.cs
+++ b/tests/RockBot.Host.Tests/LlmGatewayTests.cs
@@ -191,6 +191,43 @@ await WaitUntilAsync(
Assert.AreEqual(99, followup);
}
+ [TestMethod]
+ public async Task ExecuteAsync_CancellationWhileInFlight_AbortsAndReleasesSlot()
+ {
+ using var gateway = CreateGateway(low: 1);
+ using var cts = new CancellationTokenSource();
+ var operationStarted = new TaskCompletionSource();
+
+ // Operation respects ct: it parks until ct fires, then throws.
+ var task = gateway.ExecuteAsync(ModelTier.Low, async ct =>
+ {
+ operationStarted.SetResult();
+ await Task.Delay(Timeout.Infinite, ct);
+ return 0;
+ }, cts.Token);
+
+ await operationStarted.Task;
+ await WaitUntilAsync(
+ () => gateway.GetInFlightCount(ModelTier.Low) == 1,
+ TimeSpan.FromSeconds(5));
+
+ cts.Cancel();
+
+ await Assert.ThrowsAsync(async () => await task);
+
+ // Slot should have been released; in-flight back to zero
+ await WaitUntilAsync(
+ () => gateway.GetInFlightCount(ModelTier.Low) == 0,
+ TimeSpan.FromSeconds(5));
+
+ // And a follow-up call should proceed
+ var followup = await gateway.ExecuteAsync(
+ ModelTier.Low,
+ ct => Task.FromResult(123),
+ CancellationToken.None);
+ Assert.AreEqual(123, followup);
+ }
+
[TestMethod]
public async Task ExecuteAsync_ExceptionInOperation_ReleasesSlot()
{