diff --git a/src/RockBot.Agent/Program.cs b/src/RockBot.Agent/Program.cs
index 3119d815..1899f4a0 100644
--- a/src/RockBot.Agent/Program.cs
+++ b/src/RockBot.Agent/Program.cs
@@ -1,4 +1,5 @@
 using System.ClientModel;
+using System.ClientModel.Primitives;
 using Microsoft.Extensions.AI;
 using Microsoft.Extensions.Configuration;
 using Microsoft.Extensions.DependencyInjection;
@@ -127,7 +128,10 @@ IChatClient BuildOpenAIClient(LlmTierConfig config)
             Endpoint = new Uri(config.Endpoint!),
             // Extend from the 100s default — subagents with large tool sets generate
             // longer responses that can exceed the default before the body is fully read.
-            NetworkTimeout = TimeSpan.FromMinutes(5)
+            NetworkTimeout = TimeSpan.FromMinutes(5),
+            // Disable SDK retry: the LlmGateway owns rate-limit retry policy. Without
+            // this, gateway and SDK both retry on 429 and silently double-retry.
+            RetryPolicy = new ClientRetryPolicy(maxRetries: 0)
         })
         .GetChatClient(config.ModelId!).AsIChatClient();
 }
diff --git a/src/RockBot.Host.Abstractions/LlmGatewayOptions.cs b/src/RockBot.Host.Abstractions/LlmGatewayOptions.cs
index 36b705cb..f2583676 100644
--- a/src/RockBot.Host.Abstractions/LlmGatewayOptions.cs
+++ b/src/RockBot.Host.Abstractions/LlmGatewayOptions.cs
@@ -27,4 +27,20 @@ public sealed class LlmGatewayOptions
     /// Expensive judgment calls; lower cap.
     /// </summary>
     public int HighMaxConcurrent { get; set; } = 2;
+
+    /// <summary>
+    /// Maximum number of retry attempts on rate-limit (HTTP 429) responses before
+    /// the call surfaces the failure to the caller. Each retry honors any
+    /// <c>Retry-After</c> response header; in its absence, exponential backoff
+    /// (1s, 2s, 4s, 8s, ...) is used, capped by <see cref="MaxBackoffSeconds"/>.
+    /// Set to zero to disable retry on rate-limit errors.
+    /// </summary>
+    public int MaxRateLimitRetries { get; set; } = 5;
+
+    /// <summary>
+    /// Maximum backoff (in seconds) between retry attempts when no
+    /// <c>Retry-After</c> header is supplied by the provider. Caps the
+    /// exponential growth of fallback backoff.
+    /// </summary>
+    public int MaxBackoffSeconds { get; set; } = 16;
 }
diff --git a/src/RockBot.Host/DefaultLlmRateLimitClassifier.cs b/src/RockBot.Host/DefaultLlmRateLimitClassifier.cs
new file mode 100644
index 00000000..29a6bc21
--- /dev/null
+++ b/src/RockBot.Host/DefaultLlmRateLimitClassifier.cs
@@ -0,0 +1,67 @@
+using System.ClientModel;
+using System.ClientModel.Primitives;
+using System.Globalization;
+using System.Net;
+
+namespace RockBot.Host;
+
+/// <summary>
+/// Default <see cref="ILlmRateLimitClassifier"/>: detects rate-limit errors
+/// surfaced by the OpenAI SDK (via <see cref="ClientResultException"/>) and
+/// generic <see cref="HttpRequestException"/>s carrying a 429 status. Walks the
+/// exception chain so wrapped errors are caught.
+/// </summary>
+internal sealed class DefaultLlmRateLimitClassifier : ILlmRateLimitClassifier
+{
+    public bool TryClassify(Exception exception, out TimeSpan? retryAfter)
+    {
+        retryAfter = null;
+
+        var current = exception;
+        while (current is not null)
+        {
+            if (current is ClientResultException cre && cre.Status == 429)
+            {
+                retryAfter = ParseRetryAfter(cre.GetRawResponse());
+                return true;
+            }
+
+            if (current is HttpRequestException hre && hre.StatusCode == HttpStatusCode.TooManyRequests)
+            {
+                // HttpRequestException does not carry response headers, so we
+                // cannot extract Retry-After here. The gateway falls back to
+                // exponential backoff.
+                return true;
+            }
+
+            current = current.InnerException;
+        }
+
+        return false;
+    }
+
+    private static TimeSpan? ParseRetryAfter(PipelineResponse? response)
+    {
+        if (response is null) return null;
+        if (!response.Headers.TryGetValue("retry-after", out var raw) || string.IsNullOrEmpty(raw))
+            return null;
+
+        // Numeric form: integer seconds.
+        if (int.TryParse(raw, NumberStyles.Integer, CultureInfo.InvariantCulture, out var seconds)
+            && seconds >= 0)
+        {
+            return TimeSpan.FromSeconds(seconds);
+        }
+
+        // HTTP-date form (RFC 7231).
+        if (DateTimeOffset.TryParse(raw, CultureInfo.InvariantCulture,
+                DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal,
+                out var when))
+        {
+            var diff = when - DateTimeOffset.UtcNow;
+            return diff > TimeSpan.Zero ? diff : TimeSpan.Zero;
+        }
+
+        return null;
+    }
+}
diff --git a/src/RockBot.Host/HostDiagnostics.cs b/src/RockBot.Host/HostDiagnostics.cs
index 9b4f81d0..3fcf5272 100644
--- a/src/RockBot.Host/HostDiagnostics.cs
+++ b/src/RockBot.Host/HostDiagnostics.cs
@@ -58,6 +58,18 @@ public static class HostDiagnostics
             unit: "ms",
             description: "Time spent waiting for a per-tier LLM gateway slot");
 
+    /// <summary>
+    /// Number of rate-limit retries performed by the LLM gateway. Tagged by tier
+    /// and by retry-after source (<c>"header"</c> when the provider supplied a
+    /// <c>Retry-After</c> hint, <c>"backoff"</c> when the gateway used its
+    /// exponential-backoff fallback).
+    /// </summary>
+    public static readonly Counter<long> LlmGatewayRateLimitRetries =
+        Meter.CreateCounter<long>(
+            "rockbot.llm.gateway.rate_limit_retries",
+            unit: "{retry}",
+            description: "Number of rate-limit (429) retries performed by the LLM gateway");
+
     // ── Agent turn metrics — recorded at architectural boundaries ─────────────
 
     /// <summary>Duration from user message receipt to final reply published.</summary>
diff --git a/src/RockBot.Host/ILlmRateLimitClassifier.cs b/src/RockBot.Host/ILlmRateLimitClassifier.cs
new file mode 100644
index 00000000..46799805
--- /dev/null
+++ b/src/RockBot.Host/ILlmRateLimitClassifier.cs
@@ -0,0 +1,22 @@
+namespace RockBot.Host;
+
+/// <summary>
+/// Classifies exceptions thrown by underlying LLM SDK calls to determine whether
+/// they represent a rate-limit (HTTP 429) condition that the gateway should retry.
+/// </summary>
+/// <remarks>
+/// Implementations walk the exception chain to find a rate-limit indicator and,
+/// where possible, extract the provider's <c>Retry-After</c> hint so the gateway
+/// can honor it precisely instead of falling back to exponential backoff.
+/// Pluggable so different providers (OpenAI, Anthropic-direct, Copilot, etc.)
+/// can surface their own rate-limit shapes.
+/// </remarks>
+internal interface ILlmRateLimitClassifier
+{
+    /// <summary>
+    /// Returns <c>true</c> if <paramref name="exception"/> indicates a rate-limit
+    /// condition that should be retried. <paramref name="retryAfter"/> is set to
+    /// the provider-supplied wait duration when available.
+    /// </summary>
+    bool TryClassify(Exception exception, out TimeSpan? retryAfter);
+}
diff --git a/src/RockBot.Host/LlmGateway.cs b/src/RockBot.Host/LlmGateway.cs
index 595db073..0fb622fb 100644
--- a/src/RockBot.Host/LlmGateway.cs
+++ b/src/RockBot.Host/LlmGateway.cs
@@ -28,9 +28,15 @@ namespace RockBot.Host;
 internal sealed class LlmGateway : ILlmGateway, IDisposable
 {
     private readonly TierSlot[] _slots;
+    private readonly ILlmRateLimitClassifier _classifier;
     private readonly ILogger<LlmGateway> _logger;
+    private readonly int _maxRetries;
+    private readonly int _maxBackoffSeconds;
 
-    public LlmGateway(IOptions<LlmGatewayOptions> options, ILogger<LlmGateway> logger)
+    public LlmGateway(
+        IOptions<LlmGatewayOptions> options,
+        ILlmRateLimitClassifier classifier,
+        ILogger<LlmGateway> logger)
     {
         var opts = options.Value;
 
@@ -53,11 +59,26 @@ public LlmGateway(IOptions<LlmGatewayOptions> options, ILogger<LlmGateway> logge
             _slots[(int)tier] = new TierSlot(cap);
         }
 
+        if (opts.MaxRateLimitRetries < 0)
+            throw new ArgumentOutOfRangeException(
+                nameof(options),
+                $"LlmGatewayOptions.MaxRateLimitRetries must be >= 0 (was {opts.MaxRateLimitRetries}).");
+
+        if (opts.MaxBackoffSeconds < 1)
+            throw new ArgumentOutOfRangeException(
+                nameof(options),
+                $"LlmGatewayOptions.MaxBackoffSeconds must be >= 1 (was {opts.MaxBackoffSeconds}).");
+
+        _classifier = classifier;
         _logger = logger;
+        _maxRetries = opts.MaxRateLimitRetries;
+        _maxBackoffSeconds = opts.MaxBackoffSeconds;
 
         _logger.LogInformation(
-            "LlmGateway: per-tier concurrency caps Low={Low} Balanced={Balanced} High={High}",
-            opts.LowMaxConcurrent, opts.BalancedMaxConcurrent, opts.HighMaxConcurrent);
+            "LlmGateway: per-tier concurrency caps Low={Low} Balanced={Balanced} High={High}, " +
+            "rate-limit retries Max={MaxRetries} backoff cap={MaxBackoff}s",
+            opts.LowMaxConcurrent, opts.BalancedMaxConcurrent, opts.HighMaxConcurrent,
+            _maxRetries, _maxBackoffSeconds);
     }
 
     /// <summary>
@@ -99,7 +120,8 @@ public async Task<T> ExecuteAsync<T>(
         Interlocked.Increment(ref slot.InFlight);
         try
         {
-            return await operation(cancellationToken).ConfigureAwait(false);
+            return await ExecuteWithRetryAsync(tier, tierTag, operation, cancellationToken)
+                .ConfigureAwait(false);
         }
         finally
         {
@@ -108,6 +130,65 @@ public async Task<T> ExecuteAsync<T>(
         }
     }
 
+    /// <summary>
+    /// Invokes <paramref name="operation"/> and, on rate-limit (HTTP 429) failures,
+    /// retries up to <c>MaxRateLimitRetries</c> times. The slot is held throughout
+    /// — releasing during retry waits does not help, since rate limits are per-tier
+    /// so any other call in the same tier would hit the same limit.
+    /// </summary>
+    private async Task<T> ExecuteWithRetryAsync<T>(
+        ModelTier tier,
+        KeyValuePair<string, object?> tierTag,
+        Func<CancellationToken, Task<T>> operation,
+        CancellationToken cancellationToken)
+    {
+        var attempt = 0;
+        while (true)
+        {
+            try
+            {
+                return await operation(cancellationToken).ConfigureAwait(false);
+            }
+            catch (Exception ex) when (
+                attempt < _maxRetries
+                && !cancellationToken.IsCancellationRequested
+                && _classifier.TryClassify(ex, out var classifierRetryAfter))
+            {
+                attempt++;
+
+                var source = classifierRetryAfter.HasValue ? "header" : "backoff";
+                var wait = classifierRetryAfter ?? ComputeBackoff(attempt);
+
+                _logger.LogWarning(
+                    "LlmGateway: rate-limit on tier {Tier} (attempt {Attempt}/{Max}); " +
+                    "waiting {WaitSeconds}s ({Source}) before retry",
+                    tier, attempt, _maxRetries, wait.TotalSeconds, source);
+
+                HostDiagnostics.LlmGatewayRateLimitRetries.Add(
+                    1,
+                    tierTag,
+                    new KeyValuePair<string, object?>("rockbot.llm.gateway.retry_after_source", source));
+
+                try
+                {
+                    await Task.Delay(wait, cancellationToken).ConfigureAwait(false);
+                }
+                catch (OperationCanceledException)
+                {
+                    throw;
+                }
+            }
+        }
+    }
+
+    private TimeSpan ComputeBackoff(int attempt)
+    {
+        // 1s, 2s, 4s, 8s, ..., capped at MaxBackoffSeconds.
+        // attempt is 1-based.
+        var seconds = Math.Min(Math.Pow(2, attempt - 1), _maxBackoffSeconds);
+        return TimeSpan.FromSeconds(seconds);
+    }
+
     public void Dispose()
     {
         foreach (var slot in _slots)
diff --git a/src/RockBot.Host/ServiceCollectionExtensions.cs b/src/RockBot.Host/ServiceCollectionExtensions.cs
index 0de383d6..90b675a4 100644
--- a/src/RockBot.Host/ServiceCollectionExtensions.cs
+++ b/src/RockBot.Host/ServiceCollectionExtensions.cs
@@ -35,6 +35,7 @@ public static IServiceCollection AddRockBotHost(
         services.AddSingleton<LlmCostEstimator>();
         services.Configure<LlmPricingOptions>(_ => { });
         services.Configure<LlmGatewayOptions>(_ => { });
+        services.AddSingleton<ILlmRateLimitClassifier, DefaultLlmRateLimitClassifier>();
         services.AddSingleton<ILlmGateway, LlmGateway>();
         services.AddTransient<ILlmClient, LlmClient>();
         services.AddSingleton<IToolProgressNotifier, ToolProgressNotifier>();
diff --git a/src/RockBot.ResearchAgent/Program.cs b/src/RockBot.ResearchAgent/Program.cs
index f4079cf3..a5b23148 100644
--- a/src/RockBot.ResearchAgent/Program.cs
+++ b/src/RockBot.ResearchAgent/Program.cs
@@ -1,4 +1,5 @@
 using System.ClientModel;
+using System.ClientModel.Primitives;
 using Microsoft.Extensions.AI;
 using Microsoft.Extensions.Configuration;
 using Microsoft.Extensions.DependencyInjection;
@@ -49,7 +50,13 @@ IChatClient BuildClient(LlmTierConfig config)
     {
         return new OpenAIClient(
             new ApiKeyCredential(config.ApiKey!),
-            new OpenAIClientOptions { Endpoint = new Uri(config.Endpoint!) })
+            new OpenAIClientOptions
+            {
+                Endpoint = new Uri(config.Endpoint!),
+                // Disable SDK retry: the LlmGateway owns rate-limit retry policy.
+                // Without this, gateway and SDK both retry on 429 and silently double-retry.
+                RetryPolicy = new ClientRetryPolicy(maxRetries: 0)
+            })
             .GetChatClient(config.ModelId!).AsIChatClient();
     }
 
diff --git a/src/RockBot.SampleAgent/Program.cs b/src/RockBot.SampleAgent/Program.cs
index 78c9136f..1343f0fb 100644
--- a/src/RockBot.SampleAgent/Program.cs
+++ b/src/RockBot.SampleAgent/Program.cs
@@ -1,4 +1,5 @@
 using System.ClientModel;
+using System.ClientModel.Primitives;
 using Microsoft.Extensions.AI;
 using Microsoft.Extensions.Configuration;
 using Microsoft.Extensions.DependencyInjection;
@@ -26,7 +27,12 @@
 {
     var openAiClient = new OpenAIClient(
         new ApiKeyCredential(apiKey),
-        new OpenAIClientOptions { Endpoint = new Uri(endpoint) });
+        new OpenAIClientOptions
+        {
+            Endpoint = new Uri(endpoint),
+            // Disable SDK retry: the LlmGateway owns rate-limit retry policy.
+            RetryPolicy = new ClientRetryPolicy(maxRetries: 0)
+        });
 
     builder.Services.AddRockBotChatClient(
         openAiClient.GetChatClient(modelId).AsIChatClient());
diff --git a/tests/RockBot.Host.Tests/DefaultLlmRateLimitClassifierTests.cs b/tests/RockBot.Host.Tests/DefaultLlmRateLimitClassifierTests.cs
new file mode 100644
index 00000000..15c1e8d0
--- /dev/null
+++ b/tests/RockBot.Host.Tests/DefaultLlmRateLimitClassifierTests.cs
@@ -0,0 +1,81 @@
+using System.Net;
+
+namespace RockBot.Host.Tests;
+
+[TestClass]
+public class DefaultLlmRateLimitClassifierTests
+{
+    private readonly DefaultLlmRateLimitClassifier _classifier = new();
+
+    [TestMethod]
+    public void TryClassify_NonRateLimitException_ReturnsFalse()
+    {
+        var ex = new InvalidOperationException("boom");
+        var result = _classifier.TryClassify(ex, out var retryAfter);
+        Assert.IsFalse(result);
+        Assert.IsNull(retryAfter);
+    }
+
+    [TestMethod]
+    public void TryClassify_HttpRequestException_429_ReturnsTrue()
+    {
+        var ex = new HttpRequestException(
+            HttpRequestError.Unknown,
+            "rate limited",
+            inner: null,
+            statusCode: HttpStatusCode.TooManyRequests);
+
+        var result = _classifier.TryClassify(ex, out var retryAfter);
+        Assert.IsTrue(result);
+        // HttpRequestException carries no headers, so no Retry-After is extracted.
+        Assert.IsNull(retryAfter);
+    }
+
+    [TestMethod]
+    public void TryClassify_HttpRequestException_NotRateLimit_ReturnsFalse()
+    {
+        var ex = new HttpRequestException(
+            HttpRequestError.Unknown,
+            "server error",
+            inner: null,
+            statusCode: HttpStatusCode.InternalServerError);
+
+        var result = _classifier.TryClassify(ex, out var retryAfter);
+        Assert.IsFalse(result);
+        Assert.IsNull(retryAfter);
+    }
+
+    [TestMethod]
+    public void TryClassify_WrappedRateLimit_WalksInnerExceptions()
+    {
+        var inner = new HttpRequestException(
+            HttpRequestError.Unknown,
+            "rate limited",
+            inner: null,
+            statusCode: HttpStatusCode.TooManyRequests);
+        var outer = new InvalidOperationException("wrapper", inner);
+
+        var result = _classifier.TryClassify(outer, out _);
+        Assert.IsTrue(result, "Classifier must walk the inner-exception chain");
+    }
+
+    [TestMethod]
+    public void TryClassify_Aggregate_DoesNotWalkInnerExceptionsCollection()
+    {
+        // Document current behavior: AggregateException's InnerExceptions collection is
+        // not walked. Only AggregateException.InnerException (the first inner) is.
+        // If a future caller wraps via AggregateException, this test will fail and force
+        // an explicit decision.
+        var rateLimit = new HttpRequestException(
+            HttpRequestError.Unknown,
+            "rate limited",
+            inner: null,
+            statusCode: HttpStatusCode.TooManyRequests);
+        var agg = new AggregateException(new InvalidOperationException("first"), rateLimit);
+
+        var result = _classifier.TryClassify(agg, out _);
+        // agg.InnerException is the first one (InvalidOperationException), not the
+        // rate-limit one. Classifier walks only the linear chain.
+        Assert.IsFalse(result);
+    }
+}
diff --git a/tests/RockBot.Host.Tests/LlmGatewayTests.cs b/tests/RockBot.Host.Tests/LlmGatewayTests.cs
index 1c1fe9df..5852cb7f 100644
--- a/tests/RockBot.Host.Tests/LlmGatewayTests.cs
+++ b/tests/RockBot.Host.Tests/LlmGatewayTests.cs
@@ -6,15 +6,65 @@ namespace RockBot.Host.Tests;
 [TestClass]
 public class LlmGatewayTests
 {
-    private static LlmGateway CreateGateway(int low = 2, int balanced = 2, int high = 2)
+    private static LlmGateway CreateGateway(
+        int low = 2,
+        int balanced = 2,
+        int high = 2,
+        int maxRetries = 0,
+        int maxBackoffSeconds = 16,
+        ILlmRateLimitClassifier? classifier = null)
     {
         var options = Options.Create(new LlmGatewayOptions
         {
             LowMaxConcurrent = low,
             BalancedMaxConcurrent = balanced,
             HighMaxConcurrent = high,
+            MaxRateLimitRetries = maxRetries,
+            MaxBackoffSeconds = maxBackoffSeconds,
         });
-        return new LlmGateway(options, NullLogger<LlmGateway>.Instance);
+        return new LlmGateway(
+            options,
+            classifier ?? new NeverRateLimitClassifier(),
+            NullLogger<LlmGateway>.Instance);
+    }
+
+    /// <summary>Stub classifier that never reports rate-limit conditions.</summary>
+    private sealed class NeverRateLimitClassifier : ILlmRateLimitClassifier
+    {
+        public bool TryClassify(Exception exception, out TimeSpan? retryAfter)
+        {
+            retryAfter = null;
+            return false;
+        }
+    }
+
+    /// <summary>
+    /// Stub classifier that recognises a custom marker exception as rate-limit and
+    /// surfaces an optional Retry-After hint carried on the exception.
+    /// </summary>
+    private sealed class FakeRateLimitException(TimeSpan? retryAfter = null) : Exception("simulated 429")
+    {
+        public TimeSpan? RetryAfter { get; } = retryAfter;
+    }
+
+    private sealed class FakeRateLimitClassifier : ILlmRateLimitClassifier
+    {
+        public bool TryClassify(Exception exception, out TimeSpan? retryAfter)
+        {
+            // Walk the inner-exception chain so wrapped throws are still recognised.
+            var current = exception;
+            while (current is not null)
+            {
+                if (current is FakeRateLimitException frle)
+                {
+                    retryAfter = frle.RetryAfter;
+                    return true;
+                }
+                current = current.InnerException;
+            }
+            retryAfter = null;
+            return false;
+        }
     }
 
     [TestMethod]
@@ -219,7 +269,169 @@ public void Constructor_CapBelowOne_Throws()
     {
         var bad = Options.Create(new LlmGatewayOptions { LowMaxConcurrent = 0 });
         Assert.ThrowsExactly<ArgumentOutOfRangeException>(() =>
-            new LlmGateway(bad, NullLogger<LlmGateway>.Instance));
+            new LlmGateway(bad, new NeverRateLimitClassifier(), NullLogger<LlmGateway>.Instance));
+    }
+
+    [TestMethod]
+    public void Constructor_NegativeMaxRetries_Throws()
+    {
+        var bad = Options.Create(new LlmGatewayOptions { MaxRateLimitRetries = -1 });
+        Assert.ThrowsExactly<ArgumentOutOfRangeException>(() =>
+            new LlmGateway(bad, new NeverRateLimitClassifier(), NullLogger<LlmGateway>.Instance));
+    }
+
+    [TestMethod]
+    public void Constructor_MaxBackoffBelowOne_Throws()
+    {
+        var bad = Options.Create(new LlmGatewayOptions { MaxBackoffSeconds = 0 });
+        Assert.ThrowsExactly<ArgumentOutOfRangeException>(() =>
+            new LlmGateway(bad, new NeverRateLimitClassifier(), NullLogger<LlmGateway>.Instance));
+    }
+
+    [TestMethod]
+    public async Task ExecuteAsync_NonRateLimitError_NotRetried()
+    {
+        using var gateway = CreateGateway(maxRetries: 5, classifier: new FakeRateLimitClassifier());
+        var attempts = 0;
+
+        await Assert.ThrowsExactlyAsync<InvalidOperationException>(async () =>
+            await gateway.ExecuteAsync<int>(
+                ModelTier.Balanced,
+                ct =>
+                {
+                    attempts++;
+                    throw new InvalidOperationException("not a 429");
+                },
+                CancellationToken.None));
+
+        Assert.AreEqual(1, attempts, "Non-rate-limit errors must not be retried");
+    }
+
+    [TestMethod]
+    public async Task ExecuteAsync_RateLimit_RetriesAndSucceeds()
+    {
+        using var gateway = CreateGateway(
+            maxRetries: 3,
+            classifier: new FakeRateLimitClassifier());
+        var attempts = 0;
+
+        var result = await gateway.ExecuteAsync(
+            ModelTier.Balanced,
+            ct =>
+            {
+                attempts++;
+                if (attempts < 3)
+                    throw new FakeRateLimitException(retryAfter: TimeSpan.FromMilliseconds(1));
+                return Task.FromResult(42);
+            },
+            CancellationToken.None);
+
+        Assert.AreEqual(42, result);
+        Assert.AreEqual(3, attempts, "Should have retried twice before success");
+    }
+
+    [TestMethod]
+    public async Task ExecuteAsync_RateLimit_ExhaustsRetriesAndThrows()
+    {
+        using var gateway = CreateGateway(
+            maxRetries: 2,
+            classifier: new FakeRateLimitClassifier());
+        var attempts = 0;
+
+        await Assert.ThrowsExactlyAsync<FakeRateLimitException>(async () =>
+            await gateway.ExecuteAsync<int>(
+                ModelTier.Balanced,
+                ct =>
+                {
+                    attempts++;
+                    throw new FakeRateLimitException(retryAfter: TimeSpan.FromMilliseconds(1));
+                },
+                CancellationToken.None));
+
+        // Initial attempt + 2 retries = 3 attempts total.
+        Assert.AreEqual(3, attempts);
+    }
+
+    [TestMethod]
+    public async Task ExecuteAsync_RateLimit_HonorsRetryAfter()
+    {
+        using var gateway = CreateGateway(
+            maxRetries: 1,
+            classifier: new FakeRateLimitClassifier());
+        var sw = System.Diagnostics.Stopwatch.StartNew();
+        var attempts = 0;
+
+        await gateway.ExecuteAsync(
+            ModelTier.Balanced,
+            ct =>
+            {
+                attempts++;
+                if (attempts == 1)
+                    throw new FakeRateLimitException(retryAfter: TimeSpan.FromMilliseconds(200));
+                return Task.FromResult(0);
+            },
+            CancellationToken.None);
+
+        sw.Stop();
+        Assert.AreEqual(2, attempts);
+        Assert.IsTrue(sw.ElapsedMilliseconds >= 180,
+            $"Expected at least ~200ms wait honoring Retry-After, but only {sw.ElapsedMilliseconds}ms elapsed");
+    }
+
+    [TestMethod]
+    public async Task ExecuteAsync_RateLimit_CancelDuringWait_Aborts()
+    {
+        using var gateway = CreateGateway(
+            maxRetries: 5,
+            classifier: new FakeRateLimitClassifier());
+        using var cts = new CancellationTokenSource();
+        var attempts = 0;
+
+        var task = gateway.ExecuteAsync<int>(
+            ModelTier.Balanced,
+            ct =>
+            {
+                attempts++;
+                throw new FakeRateLimitException(retryAfter: TimeSpan.FromSeconds(30));
+            },
+            cts.Token);
+
+        // Let the first attempt run and start the retry wait.
+        await WaitUntilAsync(() => attempts == 1, TimeSpan.FromSeconds(5));
+
+        cts.Cancel();
+
+        await Assert.ThrowsAsync<OperationCanceledException>(async () => await task);
+        Assert.AreEqual(1, attempts, "Cancellation should abort during the retry wait");
+    }
+
+    [TestMethod]
+    public async Task ExecuteAsync_RateLimit_NoRetryAfter_UsesExponentialBackoff()
+    {
+        var maxBackoffSeconds = 1; // Cap backoff at 1s so the test runs quickly.
+        using var gateway = CreateGateway(
+            maxRetries: 1,
+            maxBackoffSeconds: maxBackoffSeconds,
+            classifier: new FakeRateLimitClassifier());
+        var sw = System.Diagnostics.Stopwatch.StartNew();
+        var attempts = 0;
+
+        await gateway.ExecuteAsync(
+            ModelTier.Balanced,
+            ct =>
+            {
+                attempts++;
+                if (attempts == 1)
+                    throw new FakeRateLimitException(retryAfter: null);
+                return Task.FromResult(0);
+            },
+            CancellationToken.None);
+
+        sw.Stop();
+        Assert.AreEqual(2, attempts);
+        // Attempt 1 backoff is 2^0 = 1 second; capped at maxBackoff so still 1s.
+        Assert.IsTrue(sw.ElapsedMilliseconds >= 900,
+            $"Expected at least ~1s exponential backoff, but only {sw.ElapsedMilliseconds}ms elapsed");
     }
 
     private static async Task WaitUntilAsync(Func<bool> predicate, TimeSpan timeout)