DataDog · tobz · Mar 31, 2026 · Mar 31, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/.cargo/config.toml b/.cargo/config.toml
@@ -1,5 +1,11 @@
 [build]
-rustflags = ["--cfg", "tokio_unstable"]
+rustflags = "--cfg tokio_unstable"
+
+[target.x86_64-unknown-linux-gnu]
+rustflags = "--cfg tokio_unstable -C target-feature=+sse,+sse2,+sse3,+sse4.1,+sse4.2,+popcnt"
+
+[target.x86_64-unknown-linux-musl]
+rustflags = "--cfg tokio_unstable -C target-feature=+sse,+sse2,+sse3,+sse4.1,+sse4.2,+popcnt"
 
 [env]
 JEMALLOC_SYS_WITH_MALLOC_CONF = "abort_conf:true,max_background_threads:1,narenas:1,tcache:false,thp:never,oversize_threshold:32768,dirty_decay_ms:1000,muzzy_decay_ms:0"
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -78,7 +78,7 @@ serde = { version = "1", default-features = false, features = [
   "std",
 ] }
 snafu = { version = "0.9", default-features = false, features = ["std"] }
-tokio = { version = "1.50", default-features = false }
+tokio = { version = "1.49", default-features = false }
 tracing = { version = "0.1", default-features = false, features = ["std"] }
 async-compression = { version = "0.4.13", default-features = false, features = [
   "gzip",
@@ -137,7 +137,7 @@ url = { version = "2", default-features = false }
 ndarray = { version = "0.16", default-features = false }
 ndarray-stats = { version = "0.6", default-features = false }
 noisy_float = { version = "0.2", default-features = false }
-libc = { version = "0.2.183", default-features = false }
+libc = { version = "0.2.169", default-features = false }
 socket2 = { version = "0.6", default-features = false }
 tonic-prost-build = { version = "0.14", default-features = false }
 tonic-prost = { version = "0.14", default-features = false }
@@ -220,6 +220,7 @@ num-traits = { version = "0.2", default-features = false }
 chumsky = { version = "0.12", default-features = false }
 logos = { version = "0.16", default-features = false }
 lru-slab = { version = "0.1.2", default-features = false }
+trie-hard = { git = "https://github.com/tobz/trie-hard.git", branch = "tobz/optimize", default-features = false }
-trie-hard = { git = "https://github.com/tobz/trie-hard.git", branch = "tobz/optimize", default-features = false }
+trie-hard = { git = "https://github.com/tobz/trie-hard.git", rev = "<current-commit-sha>", default-features = false }
-trie-hard = { git = "https://github.com/tobz/trie-hard.git", branch = "tobz/optimize", default-features = false }
+trie-hard = { git = "https://github.com/tobz/trie-hard.git", rev = "<current-commit-sha>", default-features = false }
 
 [patch.crates-io]
 # Forked version of `hyper-http-proxy` that removes an unused dependency on `rustls-native-certs`, which transitively depends

diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv
@@ -332,6 +332,7 @@ tracing-rolling-file,https://github.com/cavivie/tracing-rolling-file,MIT OR Apac
 tracing-serde,https://github.com/tokio-rs/tracing,MIT,Tokio Contributors <team@tokio.rs>
 tracing-subscriber,https://github.com/tokio-rs/tracing,MIT,"Eliza Weisman <eliza@buoyant.io>, David Barsky <me@davidbarsky.com>, Tokio Contributors <team@tokio.rs>"
 treediff,https://github.com/Byron/treediff-rs,MIT OR Apache-2.0,Sebastian Thiel <byronimo@gmail.com>
+trie-hard,https://github.com/cloudflare/trie-hard,Apache-2.0,The trie-hard Authors
 triomphe,https://github.com/Manishearth/triomphe,MIT OR Apache-2.0,"Manish Goregaokar <manishsmail@gmail.com>, The Servo Project Developers"
 try-lock,https://github.com/seanmonstar/try-lock,MIT,Sean McArthur <sean@seanmonstar.com>
 twox-hash,https://github.com/shepmaster/twox-hash,MIT,Jake Goulding <jake.goulding@gmail.com>

diff --git a/bin/agent-data-plane/Cargo.toml b/bin/agent-data-plane/Cargo.toml
@@ -20,11 +20,14 @@ chrono = { workspace = true }
 colored = { workspace = true }
 comfy-table = { workspace = true }
 datadog-protos = { workspace = true }
+foldhash = { workspace = true }
 futures = { workspace = true }
+hashbrown = { workspace = true }
 http = { workspace = true }
 http-body-util = { workspace = true }
 hyper = { workspace = true }
 memory-accounting = { workspace = true }
+metrics = { workspace = true }
 ottl = { workspace = true }
 papaya = { workspace = true }
 prometheus-exposition = { workspace = true }
@@ -40,6 +43,7 @@ saluki-error = { workspace = true }
 saluki-health = { workspace = true }
 saluki-io = { workspace = true }
 saluki-metadata = { workspace = true }
+saluki-metrics = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
 serde_yaml = { workspace = true }
@@ -52,6 +56,7 @@ tokio = { workspace = true, features = [
 ] }
 tonic = { workspace = true }
 tracing = { workspace = true }
+trie-hard = { workspace = true }
 uuid = { workspace = true }
 
 [target.'cfg(target_os = "linux")'.dependencies]
@@ -63,3 +68,11 @@ tikv-jemallocator = { workspace = true, features = [
 
 [build-dependencies]
 chrono = { workspace = true }
+
+[dev-dependencies]
+criterion = { workspace = true }
+saluki-metrics = { workspace = true, features = ["test"] }
+
+[[bench]]
+name = "tag_filterlist"
+harness = false
diff --git a/bin/agent-data-plane/benches/tag_filterlist.rs b/bin/agent-data-plane/benches/tag_filterlist.rs
@@ -0,0 +1,199 @@
+use std::collections::HashSet as StdHashSet;
+
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use foldhash::fast::RandomState as FoldHashState;
+use hashbrown::HashSet as HbHashSet;
+use trie_hard::TrieHard;
+
+const SMALL_TAGS: &[&str] = &["env", "host", "service", "region", "version"];
+
+const MEDIUM_TAGS: &[&str] = &[
+    "env",
+    "host",
+    "service",
+    "region",
+    "version",
+    "availability_zone",
+    "instance_type",
+    "cluster",
+    "namespace",
+    "pod_name",
+];
+
+const LARGE_TAGS: &[&str] = &[
+    "env",
+    "host",
+    "service",
+    "region",
+    "version",
+    "availability_zone",
+    "instance_type",
+    "cluster",
+    "namespace",
+    "pod_name",
+    "container_name",
+    "image_tag",
+    "team",
+    "cost_center",
+    "deployment",
+    "shard",
+    "replica",
+    "datacenter",
+    "cloud_provider",
+    "account_id",
+];
+
+const TAG_SETS: &[(&str, &[&str])] = &[("5", SMALL_TAGS), ("10", MEDIUM_TAGS), ("20", LARGE_TAGS)];
+
+// A key present in all sets.
+const HIT_KEY: &str = "env";
+// A key not present in any set.
+const MISS_KEY: &str = "nonexistent_tag_name";
+
+// Realistic metric tags for the full-scan benchmark.
+// 20 tags, of which 5 keys match the EXCLUDE_FILTER_NAMES (~75% miss rate).
+const METRIC_TAGS: &[&str] = &[
+    "env:prod",
+    "host:i-abc123def456",
+    "service:web-frontend",
+    "region:us-east-1",
+    "version:3.14.1",
+    "availability_zone:us-east-1a",
+    "instance_type:c5.2xlarge",
+    "cluster:main-prod",
+    "namespace:default",
+    "pod_name:web-frontend-6f8b9c7d4-x2k9m",
+    "container_name:app",
+    "image_tag:sha-a1b2c3d",
+    "team:platform",
+    "cost_center:eng-1234",
+    "deployment:canary",
+    "shard:03",
+    "replica:2",
+    "datacenter:dc1",
+    "cloud_provider:aws",
+    "account_id:123456789012",
+];
+
+// 5 tag names to exclude — matches env, host, region, availability_zone, instance_type.
+const EXCLUDE_FILTER_NAMES: &[&str] = &["env", "host", "region", "availability_zone", "instance_type"];
+
+fn build_std_hashset(names: &[&str]) -> StdHashSet<String> {
+    names.iter().map(|s| s.to_string()).collect()
+}
+
+fn build_hb_hashset(names: &[&str]) -> HbHashSet<String, FoldHashState> {
+    let mut set = HbHashSet::with_capacity_and_hasher(names.len(), FoldHashState::default());
+    set.extend(names.iter().map(|s| s.to_string()));
+    set
+}
+
+fn build_trie(names: &[&'static str]) -> TrieHard<'static, ()> {
+    let values: Vec<(&'static [u8], ())> = names.iter().map(|s| (s.as_bytes(), ())).collect();
+    TrieHard::new(values)
+}
+
+/// Extract the tag name (part before ':') from a "key:value" tag string.
+fn tag_name(tag: &str) -> &str {
+    tag.split_once(':').map_or(tag, |(name, _)| name)
+}
+
+fn bench_lookup_hit(c: &mut Criterion) {
+    let mut group = c.benchmark_group("should_keep_tag/hit");
+
+    for &(label, tags) in TAG_SETS {
+        let std_set = build_std_hashset(tags);
+        let hb_set = build_hb_hashset(tags);
+        let trie = build_trie(tags);
+
+        group.bench_with_input(BenchmarkId::new("std_hashset", label), &std_set, |b, set| {
+            b.iter(|| set.contains(HIT_KEY));
+        });
+
+        group.bench_with_input(BenchmarkId::new("hb_hashset", label), &hb_set, |b, set| {
+            b.iter(|| set.contains(HIT_KEY));
+        });
+
+        group.bench_with_input(BenchmarkId::new("trie", label), &trie, |b, trie| {
+            b.iter(|| trie.get(HIT_KEY).is_some());
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_lookup_miss(c: &mut Criterion) {
+    let mut group = c.benchmark_group("should_keep_tag/miss");
+
+    for &(label, tags) in TAG_SETS {
+        let std_set = build_std_hashset(tags);
+        let hb_set = build_hb_hashset(tags);
+        let trie = build_trie(tags);
+
+        group.bench_with_input(BenchmarkId::new("std_hashset", label), &std_set, |b, set| {
+            b.iter(|| set.contains(MISS_KEY));
+        });
+
+        group.bench_with_input(BenchmarkId::new("hb_hashset", label), &hb_set, |b, set| {
+            b.iter(|| set.contains(MISS_KEY));
+        });
+
+        group.bench_with_input(BenchmarkId::new("trie", label), &trie, |b, trie| {
+            b.iter(|| trie.get(MISS_KEY).is_some());
+        });
+    }
+
+    group.finish();
+}
+
+fn bench_full_scan(c: &mut Criterion) {
+    let mut group = c.benchmark_group("should_keep_tag/full_scan");
+
+    // Pre-extract tag names so we only measure the set membership check.
+    let metric_tag_names: Vec<&str> = METRIC_TAGS.iter().map(|t| tag_name(t)).collect();
+
+    let std_set = build_std_hashset(EXCLUDE_FILTER_NAMES);
+    let hb_set = build_hb_hashset(EXCLUDE_FILTER_NAMES);
+    let trie = build_trie(EXCLUDE_FILTER_NAMES);
+
+    group.bench_function("std_hashset", |b| {
+        b.iter(|| {
+            let mut kept = 0u32;
+            for name in &metric_tag_names {
+                if !std_set.contains(*name) {
+                    kept += 1;
+                }
+            }
+            kept
+        });
+    });
+
+    group.bench_function("hb_hashset", |b| {
+        b.iter(|| {
+            let mut kept = 0u32;
+            for name in &metric_tag_names {
+                if !hb_set.contains(*name) {
+                    kept += 1;
+                }
+            }
+            kept
+        });
+    });
+
+    group.bench_function("trie", |b| {
+        b.iter(|| {
+            let mut kept = 0u32;
+            for name in &metric_tag_names {
+                if trie.get(name).is_none() {
+                    kept += 1;
+                }
+            }
+            kept
+        });
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_lookup_hit, bench_lookup_miss, bench_full_scan);
+criterion_main!(benches);