Skip to content

Commit ce3c8e3

Browse files
jchrostek-ddclaude
andauthored
feat: add AWS delegated authentication support (#1112)
## Summary Add support for AWS delegated authentication, allowing Lambda functions to authenticate with Datadog using their IAM role instead of static API keys. This mirrors the implementation in the main Datadog agent ([PR #46272](DataDog/datadog-agent#46272)). **How it works:** 1. Lambda function's IAM role signs an STS `GetCallerIdentity` request 2. The signed request is sent to Datadog's `/api/v2/intake-key` endpoint as authentication proof 3. If the role is configured in Datadog's intake mapping, a managed API key is returned 4. Falls back to other API key methods (Secrets Manager, KMS, SSM, static) if delegated auth fails **Note**: This function is in preview, customers will currently need to request access to use it. ## Tests - [x] Added an integration test that uses this new auth flow, verifies that we have logs in our Datadog Serverless account for this new lambda. Note that the IAM role needs to be added to a Datadog account mapping so the IAM role we are using for this integ test is hardcoded to be the same regardless of who is running it. 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 5e92189 commit ce3c8e3

14 files changed

Lines changed: 752 additions & 42 deletions

File tree

.gitlab/datasources/test-suites.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ test_suites:
33
- name: otlp
44
- name: snapstart
55
- name: lmi
6+
- name: auth

bottlecap/src/bin/bottlecap/main.rs

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,11 @@ async fn main() -> anyhow::Result<()> {
150150
let config = Arc::new(config::get_config(Path::new(&lambda_directory)));
151151

152152
let aws_config = Arc::new(aws_config);
153-
let api_key_factory = create_api_key_factory(&config, &aws_config);
153+
// Build one shared reqwest::Client for metrics, logs, trace proxy flushing, and calls to
154+
// Datadog APIs (e.g. delegated auth). reqwest::Client is Arc-based internally, so cloning
155+
// just increments a refcount and shares the connection pool.
156+
let shared_client = bottlecap::http::get_client(&config);
157+
let api_key_factory = create_api_key_factory(&config, &aws_config, &shared_client);
154158

155159
let r = response
156160
.await
@@ -161,6 +165,7 @@ async fn main() -> anyhow::Result<()> {
161165
Arc::clone(&aws_config),
162166
&config,
163167
&client,
168+
shared_client,
164169
&r,
165170
Arc::clone(&api_key_factory),
166171
start_time,
@@ -246,17 +251,23 @@ fn get_flush_strategy_for_mode(
246251
}
247252
}
248253

249-
fn create_api_key_factory(config: &Arc<Config>, aws_config: &Arc<AwsConfig>) -> Arc<ApiKeyFactory> {
254+
fn create_api_key_factory(
255+
config: &Arc<Config>,
256+
aws_config: &Arc<AwsConfig>,
257+
client: &reqwest::Client,
258+
) -> Arc<ApiKeyFactory> {
250259
let config = Arc::clone(config);
251260
let aws_config = Arc::clone(aws_config);
261+
let client = client.clone();
252262
let api_key_secret_reload_interval = config.api_key_secret_reload_interval;
253263

254264
Arc::new(ApiKeyFactory::new_from_resolver(
255265
Arc::new(move || {
256266
let config = Arc::clone(&config);
257267
let aws_config = Arc::clone(&aws_config);
268+
let client = client.clone();
258269

259-
Box::pin(async move { resolve_secrets(config, aws_config).await })
270+
Box::pin(async move { resolve_secrets(config, aws_config, client).await })
260271
}),
261272
api_key_secret_reload_interval,
262273
))
@@ -285,6 +296,7 @@ async fn extension_loop_active(
285296
aws_config: Arc<AwsConfig>,
286297
config: &Arc<Config>,
287298
client: &Client,
299+
shared_client: reqwest::Client,
288300
r: &RegisterResponse,
289301
api_key_factory: Arc<ApiKeyFactory>,
290302
start_time: Instant,
@@ -294,11 +306,6 @@ async fn extension_loop_active(
294306
let account_id = r.account_id.as_ref().unwrap_or(&"none".to_string()).clone();
295307
let tags_provider = setup_tag_provider(&Arc::clone(&aws_config), config, &account_id);
296308

297-
// Build one shared reqwest::Client for metrics, logs, and trace proxy flushing.
298-
// reqwest::Client is Arc-based internally, so cloning just increments a refcount
299-
// and shares the connection pool.
300-
let shared_client = bottlecap::http::get_client(config);
301-
302309
let (
303310
logs_agent_channel,
304311
logs_flusher,

bottlecap/src/config/env.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,12 @@ pub struct EnvConfig {
482482
/// The delay between two samples of the API Security schema collection, in seconds.
483483
#[serde(deserialize_with = "deserialize_optional_duration_from_seconds")]
484484
pub api_security_sample_delay: Option<Duration>,
485+
486+
/// @env `DD_ORG_UUID`
487+
///
488+
/// The Datadog organization UUID. When set, delegated auth is auto-enabled.
489+
#[serde(deserialize_with = "deserialize_string_or_int")]
490+
pub org_uuid: Option<String>,
485491
}
486492

487493
#[allow(clippy::too_many_lines)]
@@ -684,6 +690,8 @@ fn merge_config(config: &mut Config, env_config: &EnvConfig) {
684690
merge_option_to_value!(config, env_config, appsec_waf_timeout);
685691
merge_option_to_value!(config, env_config, api_security_enabled);
686692
merge_option_to_value!(config, env_config, api_security_sample_delay);
693+
694+
merge_string!(config, dd_org_uuid, env_config, org_uuid);
687695
}
688696

689697
#[derive(Debug, PartialEq, Clone, Copy)]
@@ -1044,6 +1052,8 @@ mod tests {
10441052
appsec_waf_timeout: Duration::from_secs(1),
10451053
api_security_enabled: false,
10461054
api_security_sample_delay: Duration::from_secs(60),
1055+
1056+
dd_org_uuid: String::default(),
10471057
};
10481058

10491059
assert_eq!(config, expected_config);

bottlecap/src/config/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,8 @@ pub struct Config {
364364
pub span_dedup_timeout: Option<Duration>,
365365
pub api_key_secret_reload_interval: Option<Duration>,
366366

367+
pub dd_org_uuid: String,
368+
367369
pub serverless_appsec_enabled: bool,
368370
pub appsec_rules: Option<String>,
369371
pub appsec_waf_timeout: Duration,
@@ -479,6 +481,8 @@ impl Default for Config {
479481
span_dedup_timeout: None,
480482
api_key_secret_reload_interval: None,
481483

484+
dd_org_uuid: String::default(),
485+
482486
serverless_appsec_enabled: false,
483487
appsec_rules: None,
484488
appsec_waf_timeout: Duration::from_millis(5),

bottlecap/src/config/yaml.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,8 @@ api_security_sample_delay: 60 # Seconds
10361036
dogstatsd_so_rcvbuf: Some(1_048_576),
10371037
dogstatsd_buffer_size: Some(65507),
10381038
dogstatsd_queue_size: Some(2048),
1039+
1040+
dd_org_uuid: String::default(),
10391041
};
10401042

10411043
// Assert that

bottlecap/src/secrets/decrypt.rs

Lines changed: 52 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,19 @@ use sha2::{Digest, Sha256};
1414
use std::io::Error;
1515
use std::sync::Arc;
1616
use tokio::time::Instant;
17-
use tracing::debug;
18-
use tracing::error;
17+
use tracing::{debug, error};
1918

20-
pub async fn resolve_secrets(config: Arc<Config>, aws_config: Arc<AwsConfig>) -> Option<String> {
19+
use crate::secrets::delegated_auth;
20+
21+
pub async fn resolve_secrets(
22+
config: Arc<Config>,
23+
aws_config: Arc<AwsConfig>,
24+
shared_client: Client,
25+
) -> Option<String> {
2126
let api_key_candidate = if !config.api_key_secret_arn.is_empty()
2227
|| !config.kms_api_key.is_empty()
2328
|| !config.api_key_ssm_arn.is_empty()
29+
|| !config.dd_org_uuid.is_empty()
2430
{
2531
let before_decrypt = Instant::now();
2632

@@ -40,38 +46,17 @@ pub async fn resolve_secrets(config: Arc<Config>, aws_config: Arc<AwsConfig>) ->
4046
}
4147
};
4248

43-
let mut aws_credentials = AwsCredentials::from_env();
44-
45-
if aws_credentials.aws_secret_access_key.is_empty()
46-
&& aws_credentials.aws_access_key_id.is_empty()
47-
&& !aws_credentials
48-
.aws_container_credentials_full_uri
49-
.is_empty()
50-
&& !aws_credentials.aws_container_authorization_token.is_empty()
51-
{
52-
// We're in Snap Start
53-
let credentials = match get_snapstart_credentials(&aws_credentials, &client).await {
54-
Ok(credentials) => credentials,
55-
Err(err) => {
56-
error!("Error getting Snap Start credentials: {}", err);
57-
return None;
58-
}
59-
};
60-
aws_credentials.aws_access_key_id = credentials["AccessKeyId"]
61-
.as_str()
62-
.unwrap_or_default()
63-
.to_string();
64-
aws_credentials.aws_secret_access_key = credentials["SecretAccessKey"]
65-
.as_str()
66-
.unwrap_or_default()
67-
.to_string();
68-
aws_credentials.aws_session_token = credentials["Token"]
69-
.as_str()
70-
.unwrap_or_default()
71-
.to_string();
72-
}
49+
let aws_credentials = get_aws_credentials(&client).await?;
7350

74-
let decrypted_key = if !config.kms_api_key.is_empty() {
51+
let decrypted_key = if !config.dd_org_uuid.is_empty() {
52+
delegated_auth::get_delegated_api_key(
53+
&config,
54+
&aws_config,
55+
&shared_client,
56+
&aws_credentials,
57+
)
58+
.await
59+
} else if !config.kms_api_key.is_empty() {
7560
decrypt_aws_kms(
7661
&client,
7762
config.kms_api_key.clone(),
@@ -258,6 +243,39 @@ async fn decrypt_aws_ssm(
258243
Err(Error::new(std::io::ErrorKind::InvalidData, v.to_string()).into())
259244
}
260245

246+
async fn get_aws_credentials(client: &Client) -> Option<AwsCredentials> {
247+
let mut aws_credentials = AwsCredentials::from_env();
248+
// We're in SnapStart — fetch short-lived credentials from the container endpoint
249+
if aws_credentials.aws_secret_access_key.is_empty()
250+
&& aws_credentials.aws_access_key_id.is_empty()
251+
&& !aws_credentials
252+
.aws_container_credentials_full_uri
253+
.is_empty()
254+
&& !aws_credentials.aws_container_authorization_token.is_empty()
255+
{
256+
let credentials = match get_snapstart_credentials(&aws_credentials, client).await {
257+
Ok(credentials) => credentials,
258+
Err(err) => {
259+
error!("Error getting SnapStart credentials: {}", err);
260+
return None;
261+
}
262+
};
263+
aws_credentials.aws_access_key_id = credentials["AccessKeyId"]
264+
.as_str()
265+
.unwrap_or_default()
266+
.to_string();
267+
aws_credentials.aws_secret_access_key = credentials["SecretAccessKey"]
268+
.as_str()
269+
.unwrap_or_default()
270+
.to_string();
271+
aws_credentials.aws_session_token = credentials["Token"]
272+
.as_str()
273+
.unwrap_or_default()
274+
.to_string();
275+
}
276+
Some(aws_credentials)
277+
}
278+
261279
async fn get_snapstart_credentials(
262280
aws_credentials: &AwsCredentials,
263281
client: &Client,

0 commit comments

Comments
 (0)