Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<!-- Version can be overridden from the command line: -p:Version=0.3.1
AssemblyVersion and FileVersion are derived automatically by the SDK
(prerelease suffixes like -beta001 are stripped for assembly versions). -->
<Version>0.10.34</Version>
<Version>0.10.35</Version>
</PropertyGroup>

<!-- NuGet package metadata (shared across all packable projects) -->
Expand Down
2 changes: 1 addition & 1 deletion src/RockBot.A2A/AgentCardSummarizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ The summary must give enough detail that another agent can confidently decide "t
""";

var messages = new[] { new ChatMessage(ChatRole.User, prompt) };
var response = await llmClient.GetResponseAsync(messages, ModelTier.Low, cancellationToken: ct);
var response = await llmClient.GetResponseAsync(messages, ModelTier.Low, options: null, cancellationToken: ct);
return response.Text?.Trim() ?? fallback;
}
catch (Exception ex)
Expand Down
2 changes: 1 addition & 1 deletion src/RockBot.Agent/McpBridge/McpBridgeService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ The summary must give enough detail that an agent can confidently decide "this i
""";

var messages = new[] { new ChatMessage(ChatRole.User, prompt) };
var response = await _llmClient.GetResponseAsync(messages, cancellationToken: ct);
var response = await _llmClient.GetResponseAsync(messages, options: null, cancellationToken: ct);
summaryText = response.Text?.Trim();
}
catch (Exception ex)
Expand Down
18 changes: 14 additions & 4 deletions src/RockBot.Host.Abstractions/ILlmClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,27 @@ public interface ILlmClient
/// <summary>
/// Calls the LLM using the <see cref="ModelTier.Balanced"/> client.
/// </summary>
/// <remarks>
/// <paramref name="cancellationToken"/> is mandatory: the gateway uses it to
/// drain queued and in-flight calls when the caller is preempted (e.g. when
/// a user message cancels the dream cycle). Callers without a natural ct
/// MUST pass <see cref="CancellationToken.None"/> explicitly so the choice
/// is intentional and visible in code review. See <c>design/llm-gateway.md</c>.
/// </remarks>
Task<ChatResponse> GetResponseAsync(
IEnumerable<ChatMessage> messages,
ChatOptions? options = null,
CancellationToken cancellationToken = default);
ChatOptions? options,
CancellationToken cancellationToken);

/// <summary>
/// Calls the LLM using the client for the specified <paramref name="tier"/>.
/// </summary>
/// <remarks>
/// <paramref name="cancellationToken"/> is mandatory: see the single-arg overload.
/// </remarks>
Task<ChatResponse> GetResponseAsync(
IEnumerable<ChatMessage> messages,
ModelTier tier,
ChatOptions? options = null,
CancellationToken cancellationToken = default);
ChatOptions? options,
CancellationToken cancellationToken);
}
8 changes: 4 additions & 4 deletions src/RockBot.Host/LlmClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@ internal sealed class LlmClient(
/// <summary>Calls the LLM using the Balanced tier.</summary>
public Task<ChatResponse> GetResponseAsync(
IEnumerable<ChatMessage> messages,
ChatOptions? options = null,
CancellationToken cancellationToken = default)
ChatOptions? options,
CancellationToken cancellationToken)
=> GetResponseAsync(messages, ModelTier.Balanced, options, cancellationToken);

/// <summary>Calls the LLM using the specified tier, falling back to Balanced on failure for Low/High tiers.</summary>
public async Task<ChatResponse> GetResponseAsync(
IEnumerable<ChatMessage> messages,
ModelTier tier,
ChatOptions? options = null,
CancellationToken cancellationToken = default)
ChatOptions? options,
CancellationToken cancellationToken)
{
try
{
Expand Down
6 changes: 5 additions & 1 deletion src/RockBot.Host/SessionSummaryService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,11 @@ private async Task EvaluateSessionAsync(string sessionId, IReadOnlyList<Conversa

try
{
var response = await _llmClient.GetResponseAsync(messages, new ChatOptions());
// Timer-driven background evaluation: no caller-supplied ct. A future
// refactor could expose IHostApplicationLifetime.ApplicationStopping so
// agent shutdown cancels in-flight evaluation. For now the work is tied
// to the agent process lifetime.
var response = await _llmClient.GetResponseAsync(messages, new ChatOptions(), CancellationToken.None);
var raw = response.Text?.Trim() ?? string.Empty;
var json = ExtractJsonObject(raw);

Expand Down
7 changes: 6 additions & 1 deletion src/RockBot.Memory/MemoryTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,12 @@ private async Task<List<MemoryEntry>> ExpandToMemoryEntriesAsync(

try
{
var response = await _llmClient.GetResponseAsync(messages, options);
// Detached background work: SaveMemory queues this via Task.Run with no
// caller-supplied ct, so the LLM call has no cancellation source. A future
// refactor could plumb IHostApplicationLifetime.ApplicationStopping for
// graceful shutdown of in-flight extraction; for now the work is tied to
// the agent process lifetime.
var response = await _llmClient.GetResponseAsync(messages, options, CancellationToken.None);
var raw = response.Text?.Trim() ?? string.Empty;
var json = ExtractJsonArray(raw);

Expand Down
7 changes: 6 additions & 1 deletion src/RockBot.Skills/SkillTools.cs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,12 @@ private async Task GenerateSummaryAsync(string name, string content)
new(ChatRole.User, content)
};

var response = await _llmClient.GetResponseAsync(messages, new ChatOptions());
// Detached background work: skill save queues this via Task.Run with no
// caller-supplied ct, so the LLM call has no cancellation source. The
// summary refresh is best-effort; if the agent shuts down mid-call the
// task is orphaned. A future refactor could use ApplicationStopping.
var response = await _llmClient.GetResponseAsync(
messages, new ChatOptions(), CancellationToken.None);
var summary = response.Text?.Trim() ?? string.Empty;

if (string.IsNullOrWhiteSpace(summary))
Expand Down
37 changes: 37 additions & 0 deletions tests/RockBot.Host.Tests/LlmGatewayTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,43 @@ await WaitUntilAsync(
Assert.AreEqual(99, followup);
}

[TestMethod]
public async Task ExecuteAsync_CancellationWhileInFlight_AbortsAndReleasesSlot()
{
using var gateway = CreateGateway(low: 1);
using var cts = new CancellationTokenSource();
var operationStarted = new TaskCompletionSource();

// Operation respects ct: it parks until ct fires, then throws.
var task = gateway.ExecuteAsync<int>(ModelTier.Low, async ct =>
{
operationStarted.SetResult();
await Task.Delay(Timeout.Infinite, ct);
return 0;
}, cts.Token);

await operationStarted.Task;
await WaitUntilAsync(
() => gateway.GetInFlightCount(ModelTier.Low) == 1,
TimeSpan.FromSeconds(5));

cts.Cancel();

await Assert.ThrowsAsync<OperationCanceledException>(async () => await task);

// Slot should have been released; in-flight back to zero
await WaitUntilAsync(
() => gateway.GetInFlightCount(ModelTier.Low) == 0,
TimeSpan.FromSeconds(5));

// And a follow-up call should proceed
var followup = await gateway.ExecuteAsync(
ModelTier.Low,
ct => Task.FromResult(123),
CancellationToken.None);
Assert.AreEqual(123, followup);
}

[TestMethod]
public async Task ExecuteAsync_ExceptionInOperation_ReleasesSlot()
{
Expand Down
Loading