Kill switch provides manual control to immediately disable problematic experiments or conditions without code deployment. Use it for emergency shutdowns when automated circuit breakers aren't fast enough or when you need explicit control.
Kill switch enables:
- Emergency shutdown: Instantly disable failing experiments
- Surgical control: Disable specific conditions while keeping others running
- Gradual rollback: Disable conditions incrementally during issues
- Manual override: Take control when automation isn't appropriate
No additional package required - built into core ExperimentFramework.
using ExperimentFramework.KillSwitch;
var killSwitch = new InMemoryKillSwitchProvider();
var experiments = ExperimentFrameworkBuilder.Create()
.Trial<IDatabase>(t => t
.UsingFeatureFlag("UseCloudDb")
.AddControl<LocalDb>("false")
.AddVariant<CloudDb>("true")
.OnErrorRedirectAndReplayDefault())
.WithKillSwitch(killSwitch)
.UseDispatchProxy();
builder.Services.AddExperimentFramework(experiments);
// Make kill switch accessible
builder.Services.AddSingleton<IKillSwitchProvider>(killSwitch);Disables all conditions, throws ExperimentDisabledException:
killSwitch.DisableExperiment(typeof(IDatabase));
// All calls to IDatabase now throw ExperimentDisabledException
await database.GetDataAsync(); // ThrowsDisables one condition, falls back according to error policy:
killSwitch.DisableTrial(typeof(IDatabase), "cloud");
// Calls to "cloud" condition fall back to control
// (if OnErrorRedirectAndReplayDefault is configured)// Re-enable specific condition
killSwitch.EnableTrial(typeof(IDatabase), "cloud");
// Re-enable entire experiment
killSwitch.EnableExperiment(typeof(IDatabase));bool isDisabled = killSwitch.IsExperimentDisabled(typeof(IDatabase));
bool isConditionDisabled = killSwitch.IsTrialDisabled(typeof(IDatabase), "cloud");Create HTTP endpoints for operational control:
// SECURITY: Create a whitelist of allowed experiment types to prevent type injection attacks
var experimentRegistry = new Dictionary<string, Type>(StringComparer.OrdinalIgnoreCase)
{
["IDatabase"] = typeof(IDatabase),
["ITaxProvider"] = typeof(ITaxProvider),
["IPaymentProcessor"] = typeof(IPaymentProcessor)
// Add all your experiment service types here
};
var app = builder.Build();
// Disable entire experiment
app.MapPost("/admin/experiments/disable", (
[FromQuery] string experimentName,
IKillSwitchProvider killSwitch) =>
{
if (!experimentRegistry.TryGetValue(experimentName, out var type))
return Results.NotFound($"Experiment '{experimentName}' not found");
killSwitch.DisableExperiment(type);
return Results.Ok($"Experiment {experimentName} disabled");
})
.RequireAuthorization("Admin");
// Disable specific condition
app.MapPost("/admin/experiments/disable-trial", (
[FromQuery] string experimentName,
[FromQuery] string trialKey,
IKillSwitchProvider killSwitch) =>
{
if (!experimentRegistry.TryGetValue(experimentName, out var type))
return Results.NotFound($"Experiment '{experimentName}' not found");
killSwitch.DisableTrial(type, trialKey);
return Results.Ok($"Condition '{trialKey}' of {experimentName} disabled");
})
.RequireAuthorization("Admin");
// Enable experiment
app.MapPost("/admin/experiments/enable", (
[FromQuery] string experimentName,
IKillSwitchProvider killSwitch) =>
{
if (!experimentRegistry.TryGetValue(experimentName, out var type))
return Results.NotFound($"Experiment '{experimentName}' not found");
killSwitch.EnableExperiment(type);
return Results.Ok($"Experiment {experimentName} enabled");
})
.RequireAuthorization("Admin");
// Get status
app.MapGet("/admin/experiments/status", (
[FromQuery] string experimentName,
IKillSwitchProvider killSwitch) =>
{
if (!experimentRegistry.TryGetValue(experimentName, out var type))
return Results.NotFound($"Experiment '{experimentName}' not found");
return Results.Ok(new
{
experiment = experimentName,
experimentDisabled = killSwitch.IsExperimentDisabled(type),
trials = new
{
cloud = killSwitch.IsTrialDisabled(type, "cloud"),
local = killSwitch.IsTrialDisabled(type, "local")
}
});
})
.RequireAuthorization("Admin");# Disable cloud condition
curl -X POST "https://api.example.com/admin/experiments/disable-trial?experimentName=IDatabase&trialKey=cloud" \
-H "Authorization: Bearer $ADMIN_TOKEN"
# Check status
curl "https://api.example.com/admin/experiments/status?experimentName=IDatabase" \
-H "Authorization: Bearer $ADMIN_TOKEN"
# Re-enable
curl -X POST "https://api.example.com/admin/experiments/enable?experimentName=IDatabase" \
-H "Authorization: Bearer $ADMIN_TOKEN"For multi-instance deployments, use Redis for shared state:
dotnet add package StackExchange.Redisusing StackExchange.Redis;
public class RedisKillSwitchProvider : IKillSwitchProvider
{
private readonly IConnectionMultiplexer _redis;
public RedisKillSwitchProvider(IConnectionMultiplexer redis)
{
_redis = redis;
}
public bool IsTrialDisabled(Type serviceType, string trialKey)
{
var db = _redis.GetDatabase();
var key = $"killswitch:trial:{serviceType.FullName}:{trialKey}";
return db.KeyExists(key);
}
public bool IsExperimentDisabled(Type serviceType)
{
var db = _redis.GetDatabase();
var key = $"killswitch:experiment:{serviceType.FullName}";
return db.KeyExists(key);
}
public void DisableTrial(Type serviceType, string trialKey)
{
var db = _redis.GetDatabase();
var key = $"killswitch:trial:{serviceType.FullName}:{trialKey}";
db.StringSet(key, "disabled", TimeSpan.FromHours(24));
}
public void DisableExperiment(Type serviceType)
{
var db = _redis.GetDatabase();
var key = $"killswitch:experiment:{serviceType.FullName}";
db.StringSet(key, "disabled", TimeSpan.FromHours(24));
}
public void EnableTrial(Type serviceType, string trialKey)
{
var db = _redis.GetDatabase();
var key = $"killswitch:trial:{serviceType.FullName}:{trialKey}";
db.KeyDelete(key);
}
public void EnableExperiment(Type serviceType)
{
var db = _redis.GetDatabase();
var key = $"killswitch:experiment:{serviceType.FullName}";
db.KeyDelete(key);
}
}
// Registration
builder.Services.AddSingleton<IConnectionMultiplexer>(sp =>
ConnectionMultiplexer.Connect("localhost:6379"));
builder.Services.AddSingleton<IKillSwitchProvider, RedisKillSwitchProvider>();
var killSwitch = builder.Services.BuildServiceProvider()
.GetRequiredService<IKillSwitchProvider>();
var experiments = ExperimentFrameworkBuilder.Create()
.Trial<IDatabase>(t => t.UsingFeatureFlag("UseCloudDb")...)
.WithKillSwitch(killSwitch)
.UseDispatchProxy();
builder.Services.AddExperimentFramework(experiments);Use both for defense in depth:
var killSwitch = new InMemoryKillSwitchProvider();
var experiments = ExperimentFrameworkBuilder.Create()
.Trial<IPaymentGateway>(t => t
.UsingFeatureFlag("UseNewGateway")
.AddControl<StableGateway>("false")
.AddVariant<NewGateway>("true")
.OnErrorRedirectAndReplayDefault())
.WithCircuitBreaker(options =>
{
options.FailureRatioThreshold = 0.5;
options.MinimumThroughput = 10;
options.OnCircuitOpen = CircuitBreakerAction.FallbackToDefault;
})
.WithKillSwitch(killSwitch)
.UseDispatchProxy();Decision tree:
- Kill switch checks first (fastest)
- If not killed, circuit breaker evaluates
- If circuit open, falls back
- If circuit closed, executes condition
When to use each:
- Circuit breaker: Automatic protection against failing conditions
- Kill switch: Manual intervention when automation insufficient
Payment gateway with kill switch and monitoring:
using ExperimentFramework.KillSwitch;
using ExperimentFramework.Metrics.Exporters;
// Setup
var killSwitch = new InMemoryKillSwitchProvider();
var metrics = new PrometheusExperimentMetrics();
var experiments = ExperimentFrameworkBuilder.Create()
.Trial<IPaymentGateway>(t => t
.UsingFeatureFlag("UseNewPaymentGateway")
.AddControl<StableGateway>("false")
.AddVariant<NewGateway>("true")
.OnErrorRedirectAndReplayDefault())
.WithCircuitBreaker(options =>
{
options.FailureRatioThreshold = 0.2;
options.MinimumThroughput = 5;
options.OnCircuitOpen = CircuitBreakerAction.FallbackToDefault;
})
.WithMetrics(metrics)
.WithKillSwitch(killSwitch)
.UseDispatchProxy();
builder.Services.AddExperimentFramework(experiments);
builder.Services.AddSingleton<IKillSwitchProvider>(killSwitch);
var app = builder.Build();
// Admin endpoints
app.MapPost("/admin/kill-switch/disable-new-gateway",
(IKillSwitchProvider ks) =>
{
ks.DisableTrial(typeof(IPaymentGateway), "true");
return Results.Ok("New gateway disabled");
})
.RequireAuthorization("Admin");
app.MapGet("/admin/kill-switch/status",
(IKillSwitchProvider ks) =>
{
return Results.Ok(new
{
newGatewayDisabled = ks.IsTrialDisabled(typeof(IPaymentGateway), "true"),
experimentDisabled = ks.IsExperimentDisabled(typeof(IPaymentGateway))
});
});
// Metrics endpoint
app.MapGet("/metrics", () => metrics.GeneratePrometheusOutput());
app.Run();Scenario:
- New payment gateway deployed at 10% traffic
- Error rate spikes to 15% (metrics show issue)
- Circuit breaker hasn't opened yet (< 20% threshold)
- Admin manually disables new gateway via kill switch
- All traffic routes to stable gateway immediately
- Team investigates issue
- After fix, admin re-enables new gateway
- Gradual rollout resumes
Always protect admin endpoints:
app.MapPost("/admin/experiments/disable", ...)
.RequireAuthorization("Admin")
.RequireHost("internal.example.com"); // Extra safetyTrack who disabled what and when:
app.MapPost("/admin/experiments/disable-trial", (
string experimentName,
string trialKey,
IKillSwitchProvider killSwitch,
ILogger<Program> logger,
HttpContext context) =>
{
if (!experimentRegistry.TryGetValue(experimentName, out var type))
return Results.NotFound();
var user = context.User.Identity?.Name ?? "Unknown";
killSwitch.DisableTrial(type, trialKey);
logger.LogWarning(
"Kill switch activated: User {User} disabled condition {Condition} of {Experiment}",
user, trialKey, experimentName);
return Results.Ok();
})
.RequireAuthorization("Admin");Prevent "forgotten" disabled experiments:
public void DisableTrial(Type serviceType, string trialKey)
{
var db = _redis.GetDatabase();
var key = $"killswitch:trial:{serviceType.FullName}:{trialKey}";
db.StringSet(key, "disabled", TimeSpan.FromHours(24)); // Auto-expire
}Document when to use kill switch:
# Kill Switch Runbook
## When to Use
- Error rate > 10% and increasing
- Circuit breaker not opening fast enough
- Security incident involving experiment
- Data corruption from experimental condition
## How to Use
1. Verify metrics confirm issue
2. Run: `curl -X POST .../disable-trial?experimentType=...&trialKey=...`
3. Verify traffic shifted via metrics
4. Create incident ticket
5. Investigate root cause
6. After fix, re-enable gradually
## Contacts
- On-call: pager duty rotation
- Experiment owner: @team-experimentsVerify it works before you need it:
[Fact]
public async Task KillSwitch_DisablesCondition()
{
// Arrange
var killSwitch = new InMemoryKillSwitchProvider();
var experiments = ExperimentFrameworkBuilder.Create()
.Trial<IDatabase>(t => t
.UsingFeatureFlag("UseCloudDb")
.AddControl<LocalDb>("false")
.AddVariant<CloudDb>("true")
.OnErrorRedirectAndReplayDefault())
.WithKillSwitch(killSwitch)
.UseDispatchProxy();
// Act
killSwitch.DisableTrial(typeof(IDatabase), "true");
// Assert - falls back to control
var result = await database.GetDataAsync();
Assert.Equal("Local", result.Source);
}Track kill switch usage:
app.MapPost("/admin/experiments/disable-trial", (
string experimentName,
string trialKey,
IKillSwitchProvider killSwitch,
PrometheusExperimentMetrics metrics) =>
{
if (!experimentRegistry.TryGetValue(experimentName, out var type))
return Results.NotFound();
killSwitch.DisableTrial(type, trialKey);
// Increment custom metric
metrics.IncrementCounter("killswitch_activations_total",
tags: new[]
{
new KeyValuePair<string, object>("experiment", experimentName),
new KeyValuePair<string, object>("trial", trialKey)
});
return Results.Ok();
});Symptom: Condition still executes after disabling.
Solutions:
- Verify same
IKillSwitchProviderinstance used in experiments and admin API - Check
WithKillSwitch()called beforeUseDispatchProxy() - Ensure correct service type and condition key (case-sensitive)
- For distributed: verify Redis connection
Symptom: Can't re-enable experiment.
Solutions:
- Call
EnableExperiment()orEnableTrial() - For distributed: check Redis keys manually
- Verify no conflicting feature flag settings
- Check if auto-expiration set (distributed mode)
Symptom: 401 when calling admin endpoints.
Solutions:
- Verify authentication configured correctly
- Check user has "Admin" role
- Ensure bearer token valid and not expired
- Circuit Breaker - Automatic failure protection
- Metrics - Monitor experiment health
- Error Handling - Fallback strategies