diff --git a/src/RockBot.Agent/Program.cs b/src/RockBot.Agent/Program.cs
index 3119d815..1899f4a0 100644
--- a/src/RockBot.Agent/Program.cs
+++ b/src/RockBot.Agent/Program.cs
@@ -1,4 +1,5 @@
using System.ClientModel;
+using System.ClientModel.Primitives;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
@@ -127,7 +128,10 @@ IChatClient BuildOpenAIClient(LlmTierConfig config)
Endpoint = new Uri(config.Endpoint!),
// Extend from the 100s default — subagents with large tool sets generate
// longer responses that can exceed the default before the body is fully read.
- NetworkTimeout = TimeSpan.FromMinutes(5)
+ NetworkTimeout = TimeSpan.FromMinutes(5),
+ // Disable SDK retry: the LlmGateway owns rate-limit retry policy. Without
+ // this, gateway and SDK both retry on 429 and silently double-retry.
+ RetryPolicy = new ClientRetryPolicy(maxRetries: 0)
})
.GetChatClient(config.ModelId!).AsIChatClient();
}
diff --git a/src/RockBot.Host.Abstractions/LlmGatewayOptions.cs b/src/RockBot.Host.Abstractions/LlmGatewayOptions.cs
index 36b705cb..f2583676 100644
--- a/src/RockBot.Host.Abstractions/LlmGatewayOptions.cs
+++ b/src/RockBot.Host.Abstractions/LlmGatewayOptions.cs
@@ -27,4 +27,20 @@ public sealed class LlmGatewayOptions
/// Expensive judgment calls; lower cap.
///
public int HighMaxConcurrent { get; set; } = 2;
+
+ ///
+ /// Maximum number of retry attempts on rate-limit (HTTP 429) responses before
+ /// the call surfaces the failure to the caller. Each retry honors any
+ /// Retry-After response header; in its absence, exponential backoff
+ /// (1s, 2s, 4s, 8s, ...) is used, capped by .
+ /// Set to zero to disable retry on rate-limit errors.
+ ///
+ public int MaxRateLimitRetries { get; set; } = 5;
+
+ ///
+ /// Maximum backoff (in seconds) between retry attempts when no
+ /// Retry-After header is supplied by the provider. Caps the
+ /// exponential growth of fallback backoff.
+ ///
+ public int MaxBackoffSeconds { get; set; } = 16;
}
diff --git a/src/RockBot.Host/DefaultLlmRateLimitClassifier.cs b/src/RockBot.Host/DefaultLlmRateLimitClassifier.cs
new file mode 100644
index 00000000..29a6bc21
--- /dev/null
+++ b/src/RockBot.Host/DefaultLlmRateLimitClassifier.cs
@@ -0,0 +1,67 @@
+using System.ClientModel;
+using System.ClientModel.Primitives;
+using System.Globalization;
+using System.Net;
+
+namespace RockBot.Host;
+
+///
+/// Default : detects rate-limit errors
+/// surfaced by the OpenAI SDK (via ) and
+/// generic s carrying a 429 status. Walks the
+/// exception chain so wrapped errors are caught.
+///
+internal sealed class DefaultLlmRateLimitClassifier : ILlmRateLimitClassifier
+{
+ public bool TryClassify(Exception exception, out TimeSpan? retryAfter)
+ {
+ retryAfter = null;
+
+ var current = exception;
+ while (current is not null)
+ {
+ if (current is ClientResultException cre && cre.Status == 429)
+ {
+ retryAfter = ParseRetryAfter(cre.GetRawResponse());
+ return true;
+ }
+
+ if (current is HttpRequestException hre && hre.StatusCode == HttpStatusCode.TooManyRequests)
+ {
+ // HttpRequestException does not carry response headers, so we
+ // cannot extract Retry-After here. The gateway falls back to
+ // exponential backoff.
+ return true;
+ }
+
+ current = current.InnerException;
+ }
+
+ return false;
+ }
+
+ private static TimeSpan? ParseRetryAfter(PipelineResponse? response)
+ {
+ if (response is null) return null;
+ if (!response.Headers.TryGetValue("retry-after", out var raw) || string.IsNullOrEmpty(raw))
+ return null;
+
+ // Numeric form: integer seconds.
+ if (int.TryParse(raw, NumberStyles.Integer, CultureInfo.InvariantCulture, out var seconds)
+ && seconds >= 0)
+ {
+ return TimeSpan.FromSeconds(seconds);
+ }
+
+ // HTTP-date form (RFC 7231).
+ if (DateTimeOffset.TryParse(raw, CultureInfo.InvariantCulture,
+ DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal,
+ out var when))
+ {
+ var diff = when - DateTimeOffset.UtcNow;
+ return diff > TimeSpan.Zero ? diff : TimeSpan.Zero;
+ }
+
+ return null;
+ }
+}
diff --git a/src/RockBot.Host/HostDiagnostics.cs b/src/RockBot.Host/HostDiagnostics.cs
index 9b4f81d0..3fcf5272 100644
--- a/src/RockBot.Host/HostDiagnostics.cs
+++ b/src/RockBot.Host/HostDiagnostics.cs
@@ -58,6 +58,18 @@ public static class HostDiagnostics
unit: "ms",
description: "Time spent waiting for a per-tier LLM gateway slot");
+ ///
+ /// Number of rate-limit retries performed by the LLM gateway. Tagged by tier
+ /// and by retry-after source ("header" when the provider supplied a
+ /// Retry-After hint, "backoff" when the gateway used its
+ /// exponential-backoff fallback).
+ ///
+ public static readonly Counter LlmGatewayRateLimitRetries =
+ Meter.CreateCounter(
+ "rockbot.llm.gateway.rate_limit_retries",
+ unit: "{retry}",
+ description: "Number of rate-limit (429) retries performed by the LLM gateway");
+
// ── Agent turn metrics — recorded at architectural boundaries ─────────────
/// Duration from user message receipt to final reply published.
diff --git a/src/RockBot.Host/ILlmRateLimitClassifier.cs b/src/RockBot.Host/ILlmRateLimitClassifier.cs
new file mode 100644
index 00000000..46799805
--- /dev/null
+++ b/src/RockBot.Host/ILlmRateLimitClassifier.cs
@@ -0,0 +1,22 @@
+namespace RockBot.Host;
+
+///
+/// Classifies exceptions thrown by underlying LLM SDK calls to determine whether
+/// they represent a rate-limit (HTTP 429) condition that the gateway should retry.
+///
+///
+/// Implementations walk the exception chain to find a rate-limit indicator and,
+/// where possible, extract the provider's Retry-After hint so the gateway
+/// can honor it precisely instead of falling back to exponential backoff.
+/// Pluggable so different providers (OpenAI, Anthropic-direct, Copilot, etc.)
+/// can surface their own rate-limit shapes.
+///
+internal interface ILlmRateLimitClassifier
+{
+ ///
+ /// Returns true if indicates a rate-limit
+ /// condition that should be retried. is set to
+ /// the provider-supplied wait duration when available.
+ ///
+ bool TryClassify(Exception exception, out TimeSpan? retryAfter);
+}
diff --git a/src/RockBot.Host/LlmGateway.cs b/src/RockBot.Host/LlmGateway.cs
index 595db073..0fb622fb 100644
--- a/src/RockBot.Host/LlmGateway.cs
+++ b/src/RockBot.Host/LlmGateway.cs
@@ -28,9 +28,15 @@ namespace RockBot.Host;
internal sealed class LlmGateway : ILlmGateway, IDisposable
{
private readonly TierSlot[] _slots;
+ private readonly ILlmRateLimitClassifier _classifier;
private readonly ILogger _logger;
+ private readonly int _maxRetries;
+ private readonly int _maxBackoffSeconds;
- public LlmGateway(IOptions options, ILogger logger)
+ public LlmGateway(
+ IOptions options,
+ ILlmRateLimitClassifier classifier,
+ ILogger logger)
{
var opts = options.Value;
@@ -53,11 +59,26 @@ public LlmGateway(IOptions options, ILogger logge
_slots[(int)tier] = new TierSlot(cap);
}
+ if (opts.MaxRateLimitRetries < 0)
+ throw new ArgumentOutOfRangeException(
+ nameof(options),
+ $"LlmGatewayOptions.MaxRateLimitRetries must be >= 0 (was {opts.MaxRateLimitRetries}).");
+
+ if (opts.MaxBackoffSeconds < 1)
+ throw new ArgumentOutOfRangeException(
+ nameof(options),
+ $"LlmGatewayOptions.MaxBackoffSeconds must be >= 1 (was {opts.MaxBackoffSeconds}).");
+
+ _classifier = classifier;
_logger = logger;
+ _maxRetries = opts.MaxRateLimitRetries;
+ _maxBackoffSeconds = opts.MaxBackoffSeconds;
_logger.LogInformation(
- "LlmGateway: per-tier concurrency caps Low={Low} Balanced={Balanced} High={High}",
- opts.LowMaxConcurrent, opts.BalancedMaxConcurrent, opts.HighMaxConcurrent);
+ "LlmGateway: per-tier concurrency caps Low={Low} Balanced={Balanced} High={High}, " +
+ "rate-limit retries Max={MaxRetries} backoff cap={MaxBackoff}s",
+ opts.LowMaxConcurrent, opts.BalancedMaxConcurrent, opts.HighMaxConcurrent,
+ _maxRetries, _maxBackoffSeconds);
}
///
@@ -99,7 +120,8 @@ public async Task ExecuteAsync(
Interlocked.Increment(ref slot.InFlight);
try
{
- return await operation(cancellationToken).ConfigureAwait(false);
+ return await ExecuteWithRetryAsync(tier, tierTag, operation, cancellationToken)
+ .ConfigureAwait(false);
}
finally
{
@@ -108,6 +130,65 @@ public async Task ExecuteAsync(
}
}
+ ///
+ /// Invokes and, on rate-limit (HTTP 429) failures,
+ /// retries up to MaxRateLimitRetries times. The slot is held throughout
+ /// — releasing during retry waits does not help, since rate limits are per-tier
+ /// so any other call in the same tier would hit the same limit.
+ ///
+ private async Task ExecuteWithRetryAsync(
+ ModelTier tier,
+ KeyValuePair tierTag,
+ Func> operation,
+ CancellationToken cancellationToken)
+ {
+ var attempt = 0;
+ while (true)
+ {
+ try
+ {
+ return await operation(cancellationToken).ConfigureAwait(false);
+ }
+ catch (Exception ex) when (
+ attempt < _maxRetries
+ && !cancellationToken.IsCancellationRequested
+ && _classifier.TryClassify(ex, out var classifierRetryAfter))
+ {
+ attempt++;
+
+ var source = classifierRetryAfter.HasValue ? "header" : "backoff";
+ var wait = classifierRetryAfter ?? ComputeBackoff(attempt);
+
+ _logger.LogWarning(
+ "LlmGateway: rate-limit on tier {Tier} (attempt {Attempt}/{Max}); " +
+ "waiting {WaitSeconds}s ({Source}) before retry",
+ tier, attempt, _maxRetries, wait.TotalSeconds, source);
+
+ HostDiagnostics.LlmGatewayRateLimitRetries.Add(
+ 1,
+ tierTag,
+ new KeyValuePair("rockbot.llm.gateway.retry_after_source", source));
+
+ try
+ {
+ await Task.Delay(wait, cancellationToken).ConfigureAwait(false);
+ }
+ catch (OperationCanceledException)
+ {
+ throw;
+ }
+ }
+ }
+ }
+
+ private TimeSpan ComputeBackoff(int attempt)
+ {
+ // 1s, 2s, 4s, 8s, ..., capped at MaxBackoffSeconds.
+ // attempt is 1-based.
+ var seconds = Math.Min(Math.Pow(2, attempt - 1), _maxBackoffSeconds);
+ return TimeSpan.FromSeconds(seconds);
+ }
+
public void Dispose()
{
foreach (var slot in _slots)
diff --git a/src/RockBot.Host/ServiceCollectionExtensions.cs b/src/RockBot.Host/ServiceCollectionExtensions.cs
index 0de383d6..90b675a4 100644
--- a/src/RockBot.Host/ServiceCollectionExtensions.cs
+++ b/src/RockBot.Host/ServiceCollectionExtensions.cs
@@ -35,6 +35,7 @@ public static IServiceCollection AddRockBotHost(
services.AddSingleton();
services.Configure(_ => { });
services.Configure(_ => { });
+ services.AddSingleton();
services.AddSingleton();
services.AddTransient();
services.AddSingleton();
diff --git a/src/RockBot.ResearchAgent/Program.cs b/src/RockBot.ResearchAgent/Program.cs
index f4079cf3..a5b23148 100644
--- a/src/RockBot.ResearchAgent/Program.cs
+++ b/src/RockBot.ResearchAgent/Program.cs
@@ -1,4 +1,5 @@
using System.ClientModel;
+using System.ClientModel.Primitives;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
@@ -49,7 +50,13 @@ IChatClient BuildClient(LlmTierConfig config)
{
return new OpenAIClient(
new ApiKeyCredential(config.ApiKey!),
- new OpenAIClientOptions { Endpoint = new Uri(config.Endpoint!) })
+ new OpenAIClientOptions
+ {
+ Endpoint = new Uri(config.Endpoint!),
+ // Disable SDK retry: the LlmGateway owns rate-limit retry policy.
+ // Without this, gateway and SDK both retry on 429 and silently double-retry.
+ RetryPolicy = new ClientRetryPolicy(maxRetries: 0)
+ })
.GetChatClient(config.ModelId!).AsIChatClient();
}
diff --git a/src/RockBot.SampleAgent/Program.cs b/src/RockBot.SampleAgent/Program.cs
index 78c9136f..1343f0fb 100644
--- a/src/RockBot.SampleAgent/Program.cs
+++ b/src/RockBot.SampleAgent/Program.cs
@@ -1,4 +1,5 @@
using System.ClientModel;
+using System.ClientModel.Primitives;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
@@ -26,7 +27,12 @@
{
var openAiClient = new OpenAIClient(
new ApiKeyCredential(apiKey),
- new OpenAIClientOptions { Endpoint = new Uri(endpoint) });
+ new OpenAIClientOptions
+ {
+ Endpoint = new Uri(endpoint),
+ // Disable SDK retry: the LlmGateway owns rate-limit retry policy.
+ RetryPolicy = new ClientRetryPolicy(maxRetries: 0)
+ });
builder.Services.AddRockBotChatClient(
openAiClient.GetChatClient(modelId).AsIChatClient());
diff --git a/tests/RockBot.Host.Tests/DefaultLlmRateLimitClassifierTests.cs b/tests/RockBot.Host.Tests/DefaultLlmRateLimitClassifierTests.cs
new file mode 100644
index 00000000..15c1e8d0
--- /dev/null
+++ b/tests/RockBot.Host.Tests/DefaultLlmRateLimitClassifierTests.cs
@@ -0,0 +1,81 @@
+using System.Net;
+
+namespace RockBot.Host.Tests;
+
+[TestClass]
+public class DefaultLlmRateLimitClassifierTests
+{
+ private readonly DefaultLlmRateLimitClassifier _classifier = new();
+
+ [TestMethod]
+ public void TryClassify_NonRateLimitException_ReturnsFalse()
+ {
+ var ex = new InvalidOperationException("boom");
+ var result = _classifier.TryClassify(ex, out var retryAfter);
+ Assert.IsFalse(result);
+ Assert.IsNull(retryAfter);
+ }
+
+ [TestMethod]
+ public void TryClassify_HttpRequestException_429_ReturnsTrue()
+ {
+ var ex = new HttpRequestException(
+ HttpRequestError.Unknown,
+ "rate limited",
+ inner: null,
+ statusCode: HttpStatusCode.TooManyRequests);
+
+ var result = _classifier.TryClassify(ex, out var retryAfter);
+ Assert.IsTrue(result);
+ // HttpRequestException carries no headers, so no Retry-After is extracted.
+ Assert.IsNull(retryAfter);
+ }
+
+ [TestMethod]
+ public void TryClassify_HttpRequestException_NotRateLimit_ReturnsFalse()
+ {
+ var ex = new HttpRequestException(
+ HttpRequestError.Unknown,
+ "server error",
+ inner: null,
+ statusCode: HttpStatusCode.InternalServerError);
+
+ var result = _classifier.TryClassify(ex, out var retryAfter);
+ Assert.IsFalse(result);
+ Assert.IsNull(retryAfter);
+ }
+
+ [TestMethod]
+ public void TryClassify_WrappedRateLimit_WalksInnerExceptions()
+ {
+ var inner = new HttpRequestException(
+ HttpRequestError.Unknown,
+ "rate limited",
+ inner: null,
+ statusCode: HttpStatusCode.TooManyRequests);
+ var outer = new InvalidOperationException("wrapper", inner);
+
+ var result = _classifier.TryClassify(outer, out _);
+ Assert.IsTrue(result, "Classifier must walk the inner-exception chain");
+ }
+
+ [TestMethod]
+ public void TryClassify_Aggregate_DoesNotWalkInnerExceptionsCollection()
+ {
+ // Document current behavior: AggregateException's InnerExceptions collection is
+ // not walked. Only AggregateException.InnerException (the first inner) is.
+ // If a future caller wraps via AggregateException, this test will fail and force
+ // an explicit decision.
+ var rateLimit = new HttpRequestException(
+ HttpRequestError.Unknown,
+ "rate limited",
+ inner: null,
+ statusCode: HttpStatusCode.TooManyRequests);
+ var agg = new AggregateException(new InvalidOperationException("first"), rateLimit);
+
+ var result = _classifier.TryClassify(agg, out _);
+ // agg.InnerException is the first one (InvalidOperationException), not the
+ // rate-limit one. Classifier walks only the linear chain.
+ Assert.IsFalse(result);
+ }
+}
diff --git a/tests/RockBot.Host.Tests/LlmGatewayTests.cs b/tests/RockBot.Host.Tests/LlmGatewayTests.cs
index 1c1fe9df..5852cb7f 100644
--- a/tests/RockBot.Host.Tests/LlmGatewayTests.cs
+++ b/tests/RockBot.Host.Tests/LlmGatewayTests.cs
@@ -6,15 +6,65 @@ namespace RockBot.Host.Tests;
[TestClass]
public class LlmGatewayTests
{
- private static LlmGateway CreateGateway(int low = 2, int balanced = 2, int high = 2)
+ private static LlmGateway CreateGateway(
+ int low = 2,
+ int balanced = 2,
+ int high = 2,
+ int maxRetries = 0,
+ int maxBackoffSeconds = 16,
+ ILlmRateLimitClassifier? classifier = null)
{
var options = Options.Create(new LlmGatewayOptions
{
LowMaxConcurrent = low,
BalancedMaxConcurrent = balanced,
HighMaxConcurrent = high,
+ MaxRateLimitRetries = maxRetries,
+ MaxBackoffSeconds = maxBackoffSeconds,
});
- return new LlmGateway(options, NullLogger.Instance);
+ return new LlmGateway(
+ options,
+ classifier ?? new NeverRateLimitClassifier(),
+ NullLogger.Instance);
+ }
+
+ /// Stub classifier that never reports rate-limit conditions.
+ private sealed class NeverRateLimitClassifier : ILlmRateLimitClassifier
+ {
+ public bool TryClassify(Exception exception, out TimeSpan? retryAfter)
+ {
+ retryAfter = null;
+ return false;
+ }
+ }
+
+ ///
+ /// Stub classifier that recognises a custom marker exception as rate-limit and
+ /// surfaces an optional Retry-After hint carried on the exception.
+ ///
+ private sealed class FakeRateLimitException(TimeSpan? retryAfter = null) : Exception("simulated 429")
+ {
+ public TimeSpan? RetryAfter { get; } = retryAfter;
+ }
+
+ private sealed class FakeRateLimitClassifier : ILlmRateLimitClassifier
+ {
+ public bool TryClassify(Exception exception, out TimeSpan? retryAfter)
+ {
+ // Walk the inner-exception chain so wrapped throws are still recognised.
+ var current = exception;
+ while (current is not null)
+ {
+ if (current is FakeRateLimitException frle)
+ {
+ retryAfter = frle.RetryAfter;
+ return true;
+ }
+ current = current.InnerException;
+ }
+ retryAfter = null;
+ return false;
+ }
}
[TestMethod]
@@ -219,7 +269,169 @@ public void Constructor_CapBelowOne_Throws()
{
var bad = Options.Create(new LlmGatewayOptions { LowMaxConcurrent = 0 });
Assert.ThrowsExactly(() =>
- new LlmGateway(bad, NullLogger.Instance));
+ new LlmGateway(bad, new NeverRateLimitClassifier(), NullLogger.Instance));
+ }
+
+ [TestMethod]
+ public void Constructor_NegativeMaxRetries_Throws()
+ {
+ var bad = Options.Create(new LlmGatewayOptions { MaxRateLimitRetries = -1 });
+ Assert.ThrowsExactly(() =>
+ new LlmGateway(bad, new NeverRateLimitClassifier(), NullLogger.Instance));
+ }
+
+ [TestMethod]
+ public void Constructor_MaxBackoffBelowOne_Throws()
+ {
+ var bad = Options.Create(new LlmGatewayOptions { MaxBackoffSeconds = 0 });
+ Assert.ThrowsExactly(() =>
+ new LlmGateway(bad, new NeverRateLimitClassifier(), NullLogger.Instance));
+ }
+
+ [TestMethod]
+ public async Task ExecuteAsync_NonRateLimitError_NotRetried()
+ {
+ using var gateway = CreateGateway(maxRetries: 5, classifier: new FakeRateLimitClassifier());
+ var attempts = 0;
+
+ await Assert.ThrowsExactlyAsync(async () =>
+ await gateway.ExecuteAsync(
+ ModelTier.Balanced,
+ ct =>
+ {
+ attempts++;
+ throw new InvalidOperationException("not a 429");
+ },
+ CancellationToken.None));
+
+ Assert.AreEqual(1, attempts, "Non-rate-limit errors must not be retried");
+ }
+
+ [TestMethod]
+ public async Task ExecuteAsync_RateLimit_RetriesAndSucceeds()
+ {
+ using var gateway = CreateGateway(
+ maxRetries: 3,
+ classifier: new FakeRateLimitClassifier());
+ var attempts = 0;
+
+ var result = await gateway.ExecuteAsync(
+ ModelTier.Balanced,
+ ct =>
+ {
+ attempts++;
+ if (attempts < 3)
+ throw new FakeRateLimitException(retryAfter: TimeSpan.FromMilliseconds(1));
+ return Task.FromResult(42);
+ },
+ CancellationToken.None);
+
+ Assert.AreEqual(42, result);
+ Assert.AreEqual(3, attempts, "Should have retried twice before success");
+ }
+
+ [TestMethod]
+ public async Task ExecuteAsync_RateLimit_ExhaustsRetriesAndThrows()
+ {
+ using var gateway = CreateGateway(
+ maxRetries: 2,
+ classifier: new FakeRateLimitClassifier());
+ var attempts = 0;
+
+ await Assert.ThrowsExactlyAsync(async () =>
+ await gateway.ExecuteAsync(
+ ModelTier.Balanced,
+ ct =>
+ {
+ attempts++;
+ throw new FakeRateLimitException(retryAfter: TimeSpan.FromMilliseconds(1));
+ },
+ CancellationToken.None));
+
+ // Initial attempt + 2 retries = 3 attempts total.
+ Assert.AreEqual(3, attempts);
+ }
+
+ [TestMethod]
+ public async Task ExecuteAsync_RateLimit_HonorsRetryAfter()
+ {
+ using var gateway = CreateGateway(
+ maxRetries: 1,
+ classifier: new FakeRateLimitClassifier());
+ var sw = System.Diagnostics.Stopwatch.StartNew();
+ var attempts = 0;
+
+ await gateway.ExecuteAsync(
+ ModelTier.Balanced,
+ ct =>
+ {
+ attempts++;
+ if (attempts == 1)
+ throw new FakeRateLimitException(retryAfter: TimeSpan.FromMilliseconds(200));
+ return Task.FromResult(0);
+ },
+ CancellationToken.None);
+
+ sw.Stop();
+ Assert.AreEqual(2, attempts);
+ Assert.IsTrue(sw.ElapsedMilliseconds >= 180,
+ $"Expected at least ~200ms wait honoring Retry-After, but only {sw.ElapsedMilliseconds}ms elapsed");
+ }
+
+ [TestMethod]
+ public async Task ExecuteAsync_RateLimit_CancelDuringWait_Aborts()
+ {
+ using var gateway = CreateGateway(
+ maxRetries: 5,
+ classifier: new FakeRateLimitClassifier());
+ using var cts = new CancellationTokenSource();
+ var attempts = 0;
+
+ var task = gateway.ExecuteAsync(
+ ModelTier.Balanced,
+ ct =>
+ {
+ attempts++;
+ throw new FakeRateLimitException(retryAfter: TimeSpan.FromSeconds(30));
+ },
+ cts.Token);
+
+ // Let the first attempt run and start the retry wait.
+ await WaitUntilAsync(() => attempts == 1, TimeSpan.FromSeconds(5));
+
+ cts.Cancel();
+
+ await Assert.ThrowsAsync(async () => await task);
+ Assert.AreEqual(1, attempts, "Cancellation should abort during the retry wait");
+ }
+
+ [TestMethod]
+ public async Task ExecuteAsync_RateLimit_NoRetryAfter_UsesExponentialBackoff()
+ {
+ var maxBackoffSeconds = 1; // Cap backoff at 1s so the test runs quickly.
+ using var gateway = CreateGateway(
+ maxRetries: 1,
+ maxBackoffSeconds: maxBackoffSeconds,
+ classifier: new FakeRateLimitClassifier());
+ var sw = System.Diagnostics.Stopwatch.StartNew();
+ var attempts = 0;
+
+ await gateway.ExecuteAsync(
+ ModelTier.Balanced,
+ ct =>
+ {
+ attempts++;
+ if (attempts == 1)
+ throw new FakeRateLimitException(retryAfter: null);
+ return Task.FromResult(0);
+ },
+ CancellationToken.None);
+
+ sw.Stop();
+ Assert.AreEqual(2, attempts);
+ // Attempt 1 backoff is 2^0 = 1 second; capped at maxBackoff so still 1s.
+ Assert.IsTrue(sw.ElapsedMilliseconds >= 900,
+ $"Expected at least ~1s exponential backoff, but only {sw.ElapsedMilliseconds}ms elapsed");
}
private static async Task WaitUntilAsync(Func predicate, TimeSpan timeout)