Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/RockBot.Agent/Program.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.ClientModel;
using System.ClientModel.Primitives;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
Expand Down Expand Up @@ -127,7 +128,10 @@ IChatClient BuildOpenAIClient(LlmTierConfig config)
Endpoint = new Uri(config.Endpoint!),
// Extend from the 100s default — subagents with large tool sets generate
// longer responses that can exceed the default before the body is fully read.
NetworkTimeout = TimeSpan.FromMinutes(5)
NetworkTimeout = TimeSpan.FromMinutes(5),
// Disable SDK retry: the LlmGateway owns rate-limit retry policy. Without
// this, gateway and SDK both retry on 429 and silently double-retry.
RetryPolicy = new ClientRetryPolicy(maxRetries: 0)
})
.GetChatClient(config.ModelId!).AsIChatClient();
}
Expand Down
16 changes: 16 additions & 0 deletions src/RockBot.Host.Abstractions/LlmGatewayOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,20 @@ public sealed class LlmGatewayOptions
/// Expensive judgment calls; lower cap.
/// </summary>
public int HighMaxConcurrent { get; set; } = 2;

/// <summary>
/// Maximum number of retry attempts on rate-limit (HTTP 429) responses before
/// the call surfaces the failure to the caller. Each retry honors any
/// <c>Retry-After</c> response header; in its absence, exponential backoff
/// (1s, 2s, 4s, 8s, ...) is used, capped by <see cref="MaxBackoffSeconds"/>.
/// Set to zero to disable retry on rate-limit errors.
/// </summary>
public int MaxRateLimitRetries { get; set; } = 5;

/// <summary>
/// Maximum backoff (in seconds) between retry attempts when no
/// <c>Retry-After</c> header is supplied by the provider. Caps the
/// exponential growth of fallback backoff.
/// </summary>
public int MaxBackoffSeconds { get; set; } = 16;
}
67 changes: 67 additions & 0 deletions src/RockBot.Host/DefaultLlmRateLimitClassifier.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
using System.ClientModel;
using System.ClientModel.Primitives;
using System.Globalization;
using System.Net;

namespace RockBot.Host;

/// <summary>
/// Default <see cref="ILlmRateLimitClassifier"/>: detects rate-limit errors
/// surfaced by the OpenAI SDK (via <see cref="ClientResultException"/>) and
/// generic <see cref="HttpRequestException"/>s carrying a 429 status. Walks the
/// exception chain so wrapped errors are caught.
/// </summary>
internal sealed class DefaultLlmRateLimitClassifier : ILlmRateLimitClassifier
{
public bool TryClassify(Exception exception, out TimeSpan? retryAfter)
{
retryAfter = null;

var current = exception;
while (current is not null)
{
if (current is ClientResultException cre && cre.Status == 429)
{
retryAfter = ParseRetryAfter(cre.GetRawResponse());
return true;
}

if (current is HttpRequestException hre && hre.StatusCode == HttpStatusCode.TooManyRequests)
{
// HttpRequestException does not carry response headers, so we
// cannot extract Retry-After here. The gateway falls back to
// exponential backoff.
return true;
}

current = current.InnerException;
}

return false;
}

private static TimeSpan? ParseRetryAfter(PipelineResponse? response)
{
if (response is null) return null;
if (!response.Headers.TryGetValue("retry-after", out var raw) || string.IsNullOrEmpty(raw))
return null;

// Numeric form: integer seconds.
if (int.TryParse(raw, NumberStyles.Integer, CultureInfo.InvariantCulture, out var seconds)
&& seconds >= 0)
{
return TimeSpan.FromSeconds(seconds);
}

// HTTP-date form (RFC 7231).
if (DateTimeOffset.TryParse(raw, CultureInfo.InvariantCulture,
DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal,
out var when))
{
var diff = when - DateTimeOffset.UtcNow;
return diff > TimeSpan.Zero ? diff : TimeSpan.Zero;
}

return null;
}
}
12 changes: 12 additions & 0 deletions src/RockBot.Host/HostDiagnostics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,18 @@ public static class HostDiagnostics
unit: "ms",
description: "Time spent waiting for a per-tier LLM gateway slot");

/// <summary>
/// Number of rate-limit retries performed by the LLM gateway. Tagged by tier
/// and by retry-after source (<c>"header"</c> when the provider supplied a
/// <c>Retry-After</c> hint, <c>"backoff"</c> when the gateway used its
/// exponential-backoff fallback).
/// </summary>
public static readonly Counter<long> LlmGatewayRateLimitRetries =
Meter.CreateCounter<long>(
"rockbot.llm.gateway.rate_limit_retries",
unit: "{retry}",
description: "Number of rate-limit (429) retries performed by the LLM gateway");

// ── Agent turn metrics — recorded at architectural boundaries ─────────────

/// <summary>Duration from user message receipt to final reply published.</summary>
Expand Down
22 changes: 22 additions & 0 deletions src/RockBot.Host/ILlmRateLimitClassifier.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
namespace RockBot.Host;

/// <summary>
/// Classifies exceptions thrown by underlying LLM SDK calls to determine whether
/// they represent a rate-limit (HTTP 429) condition that the gateway should retry.
/// </summary>
/// <remarks>
/// Implementations walk the exception chain to find a rate-limit indicator and,
/// where possible, extract the provider's <c>Retry-After</c> hint so the gateway
/// can honor it precisely instead of falling back to exponential backoff.
/// Pluggable so different providers (OpenAI, Anthropic-direct, Copilot, etc.)
/// can surface their own rate-limit shapes.
/// </remarks>
internal interface ILlmRateLimitClassifier
{
/// <summary>
/// Returns <c>true</c> if <paramref name="exception"/> indicates a rate-limit
/// condition that should be retried. <paramref name="retryAfter"/> is set to
/// the provider-supplied wait duration when available.
/// </summary>
bool TryClassify(Exception exception, out TimeSpan? retryAfter);
}
89 changes: 85 additions & 4 deletions src/RockBot.Host/LlmGateway.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,15 @@ namespace RockBot.Host;
internal sealed class LlmGateway : ILlmGateway, IDisposable
{
private readonly TierSlot[] _slots;
private readonly ILlmRateLimitClassifier _classifier;
private readonly ILogger<LlmGateway> _logger;
private readonly int _maxRetries;
private readonly int _maxBackoffSeconds;

public LlmGateway(IOptions<LlmGatewayOptions> options, ILogger<LlmGateway> logger)
public LlmGateway(
IOptions<LlmGatewayOptions> options,
ILlmRateLimitClassifier classifier,
ILogger<LlmGateway> logger)
{
var opts = options.Value;

Expand All @@ -53,11 +59,26 @@ public LlmGateway(IOptions<LlmGatewayOptions> options, ILogger<LlmGateway> logge
_slots[(int)tier] = new TierSlot(cap);
}

if (opts.MaxRateLimitRetries < 0)
throw new ArgumentOutOfRangeException(
nameof(options),
$"LlmGatewayOptions.MaxRateLimitRetries must be >= 0 (was {opts.MaxRateLimitRetries}).");

if (opts.MaxBackoffSeconds < 1)
throw new ArgumentOutOfRangeException(
nameof(options),
$"LlmGatewayOptions.MaxBackoffSeconds must be >= 1 (was {opts.MaxBackoffSeconds}).");

_classifier = classifier;
_logger = logger;
_maxRetries = opts.MaxRateLimitRetries;
_maxBackoffSeconds = opts.MaxBackoffSeconds;

_logger.LogInformation(
"LlmGateway: per-tier concurrency caps Low={Low} Balanced={Balanced} High={High}",
opts.LowMaxConcurrent, opts.BalancedMaxConcurrent, opts.HighMaxConcurrent);
"LlmGateway: per-tier concurrency caps Low={Low} Balanced={Balanced} High={High}, " +
"rate-limit retries Max={MaxRetries} backoff cap={MaxBackoff}s",
opts.LowMaxConcurrent, opts.BalancedMaxConcurrent, opts.HighMaxConcurrent,
_maxRetries, _maxBackoffSeconds);
}

/// <summary>
Expand Down Expand Up @@ -99,7 +120,8 @@ public async Task<T> ExecuteAsync<T>(
Interlocked.Increment(ref slot.InFlight);
try
{
return await operation(cancellationToken).ConfigureAwait(false);
return await ExecuteWithRetryAsync(tier, tierTag, operation, cancellationToken)
.ConfigureAwait(false);
}
finally
{
Expand All @@ -108,6 +130,65 @@ public async Task<T> ExecuteAsync<T>(
}
}

/// <summary>
/// Invokes <paramref name="operation"/> and, on rate-limit (HTTP 429) failures,
/// retries up to <c>MaxRateLimitRetries</c> times. The slot is held throughout
/// — releasing during retry waits does not help, since rate limits are per-tier
/// so any other call in the same tier would hit the same limit.
/// </summary>
private async Task<T> ExecuteWithRetryAsync<T>(
ModelTier tier,
KeyValuePair<string, object?> tierTag,
Func<CancellationToken, Task<T>> operation,
CancellationToken cancellationToken)
{
var attempt = 0;
while (true)
{
try
{
return await operation(cancellationToken).ConfigureAwait(false);
}
catch (Exception ex) when (
attempt < _maxRetries
&& !cancellationToken.IsCancellationRequested
&& _classifier.TryClassify(ex, out var classifierRetryAfter))
{
attempt++;

var source = classifierRetryAfter.HasValue ? "header" : "backoff";
var wait = classifierRetryAfter ?? ComputeBackoff(attempt);

_logger.LogWarning(
"LlmGateway: rate-limit on tier {Tier} (attempt {Attempt}/{Max}); " +
"waiting {WaitSeconds}s ({Source}) before retry",
tier, attempt, _maxRetries, wait.TotalSeconds, source);

HostDiagnostics.LlmGatewayRateLimitRetries.Add(
1,
tierTag,
new KeyValuePair<string, object?>("rockbot.llm.gateway.retry_after_source", source));

try
{
await Task.Delay(wait, cancellationToken).ConfigureAwait(false);
}
catch (OperationCanceledException)
{
throw;
}
}
}
}

private TimeSpan ComputeBackoff(int attempt)
{
// 1s, 2s, 4s, 8s, ..., capped at MaxBackoffSeconds.
// attempt is 1-based.
var seconds = Math.Min(Math.Pow(2, attempt - 1), _maxBackoffSeconds);
return TimeSpan.FromSeconds(seconds);
}

public void Dispose()
{
foreach (var slot in _slots)
Expand Down
1 change: 1 addition & 0 deletions src/RockBot.Host/ServiceCollectionExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public static IServiceCollection AddRockBotHost(
services.AddSingleton<LlmCostEstimator>();
services.Configure<LlmPricingOptions>(_ => { });
services.Configure<LlmGatewayOptions>(_ => { });
services.AddSingleton<ILlmRateLimitClassifier, DefaultLlmRateLimitClassifier>();
services.AddSingleton<ILlmGateway, LlmGateway>();
services.AddTransient<ILlmClient, LlmClient>();
services.AddSingleton<IToolProgressNotifier, ToolProgressNotifier>();
Expand Down
9 changes: 8 additions & 1 deletion src/RockBot.ResearchAgent/Program.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.ClientModel;
using System.ClientModel.Primitives;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
Expand Down Expand Up @@ -49,7 +50,13 @@ IChatClient BuildClient(LlmTierConfig config)
{
return new OpenAIClient(
new ApiKeyCredential(config.ApiKey!),
new OpenAIClientOptions { Endpoint = new Uri(config.Endpoint!) })
new OpenAIClientOptions
{
Endpoint = new Uri(config.Endpoint!),
// Disable SDK retry: the LlmGateway owns rate-limit retry policy.
// Without this, gateway and SDK both retry on 429 and silently double-retry.
RetryPolicy = new ClientRetryPolicy(maxRetries: 0)
})
.GetChatClient(config.ModelId!).AsIChatClient();
}

Expand Down
8 changes: 7 additions & 1 deletion src/RockBot.SampleAgent/Program.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.ClientModel;
using System.ClientModel.Primitives;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
Expand Down Expand Up @@ -26,7 +27,12 @@
{
var openAiClient = new OpenAIClient(
new ApiKeyCredential(apiKey),
new OpenAIClientOptions { Endpoint = new Uri(endpoint) });
new OpenAIClientOptions
{
Endpoint = new Uri(endpoint),
// Disable SDK retry: the LlmGateway owns rate-limit retry policy.
RetryPolicy = new ClientRetryPolicy(maxRetries: 0)
});

builder.Services.AddRockBotChatClient(
openAiClient.GetChatClient(modelId).AsIChatClient());
Expand Down
Loading
Loading