Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .cargo/config.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
[build]
rustflags = ["--cfg", "tokio_unstable"]
rustflags = "--cfg tokio_unstable"

[target.x86_64-unknown-linux-gnu]
rustflags = "--cfg tokio_unstable -C target-feature=+sse,+sse2,+sse3,+sse4.1,+sse4.2,+popcnt"

[target.x86_64-unknown-linux-musl]
rustflags = "--cfg tokio_unstable -C target-feature=+sse,+sse2,+sse3,+sse4.1,+sse4.2,+popcnt"
Comment on lines 1 to +8
Copy link

Copilot AI Apr 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The .cargo/config.toml has been significantly modified: the rustflags format changed from an array to a string, and target-specific rustflags with SSE features were added. The PR description is empty and doesn't explain these infrastructure changes. Verify that these changes are intentional and compatible with the build system.

Copilot uses AI. Check for mistakes.

[env]
JEMALLOC_SYS_WITH_MALLOC_CONF = "abort_conf:true,max_background_threads:1,narenas:1,tcache:false,thp:never,oversize_threshold:32768,dirty_decay_ms:1000,muzzy_decay_ms:0"
45 changes: 44 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ serde = { version = "1", default-features = false, features = [
"std",
] }
snafu = { version = "0.9", default-features = false, features = ["std"] }
tokio = { version = "1.50", default-features = false }
tokio = { version = "1.49", default-features = false }
Copy link

Copilot AI Apr 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tokio version has been downgraded from 1.50 to 1.49. This appears to be intentional, but the PR description is empty and doesn't explain why. Verify that this downgrade is intentional and doesn't cause any compatibility issues with other dependencies.

Copilot uses AI. Check for mistakes.
tracing = { version = "0.1", default-features = false, features = ["std"] }
async-compression = { version = "0.4.13", default-features = false, features = [
"gzip",
Expand Down Expand Up @@ -137,7 +137,7 @@ url = { version = "2", default-features = false }
ndarray = { version = "0.16", default-features = false }
ndarray-stats = { version = "0.6", default-features = false }
noisy_float = { version = "0.2", default-features = false }
libc = { version = "0.2.183", default-features = false }
libc = { version = "0.2.169", default-features = false }
Copy link

Copilot AI Apr 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The libc version has been downgraded from 0.2.183 to 0.2.169. The PR description is empty and doesn't explain why this downgrade was made. Verify that this is intentional and understand the reason for this version change.

Copilot uses AI. Check for mistakes.
socket2 = { version = "0.6", default-features = false }
tonic-prost-build = { version = "0.14", default-features = false }
tonic-prost = { version = "0.14", default-features = false }
Expand Down Expand Up @@ -220,6 +220,7 @@ num-traits = { version = "0.2", default-features = false }
chumsky = { version = "0.12", default-features = false }
logos = { version = "0.16", default-features = false }
lru-slab = { version = "0.1.2", default-features = false }
trie-hard = { git = "https://github.com/tobz/trie-hard.git", branch = "tobz/optimize", default-features = false }
Copy link

Copilot AI Apr 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR adds a dependency on trie-hard from a custom git branch (tobz/optimize). Using a git branch dependency instead of a published release version can create reproducibility and maintenance issues. Consider either publishing this as a proper release, using a specific commit hash, or waiting for the changes to be merged upstream and released officially.

Suggested change
trie-hard = { git = "https://github.com/tobz/trie-hard.git", branch = "tobz/optimize", default-features = false }
trie-hard = { git = "https://github.com/tobz/trie-hard.git", rev = "<current-commit-sha>", default-features = false }

Copilot uses AI. Check for mistakes.

[patch.crates-io]
# Forked version of `hyper-http-proxy` that removes an unused dependency on `rustls-native-certs`, which transitively depends
Expand Down
1 change: 1 addition & 0 deletions LICENSE-3rdparty.csv
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ tracing-rolling-file,https://github.com/cavivie/tracing-rolling-file,MIT OR Apac
tracing-serde,https://github.com/tokio-rs/tracing,MIT,Tokio Contributors <team@tokio.rs>
tracing-subscriber,https://github.com/tokio-rs/tracing,MIT,"Eliza Weisman <eliza@buoyant.io>, David Barsky <me@davidbarsky.com>, Tokio Contributors <team@tokio.rs>"
treediff,https://github.com/Byron/treediff-rs,MIT OR Apache-2.0,Sebastian Thiel <byronimo@gmail.com>
trie-hard,https://github.com/cloudflare/trie-hard,Apache-2.0,The trie-hard Authors
triomphe,https://github.com/Manishearth/triomphe,MIT OR Apache-2.0,"Manish Goregaokar <manishsmail@gmail.com>, The Servo Project Developers"
try-lock,https://github.com/seanmonstar/try-lock,MIT,Sean McArthur <sean@seanmonstar.com>
twox-hash,https://github.com/shepmaster/twox-hash,MIT,Jake Goulding <jake.goulding@gmail.com>
Expand Down
13 changes: 13 additions & 0 deletions bin/agent-data-plane/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@ chrono = { workspace = true }
colored = { workspace = true }
comfy-table = { workspace = true }
datadog-protos = { workspace = true }
foldhash = { workspace = true }
futures = { workspace = true }
hashbrown = { workspace = true }
http = { workspace = true }
http-body-util = { workspace = true }
hyper = { workspace = true }
memory-accounting = { workspace = true }
metrics = { workspace = true }
ottl = { workspace = true }
papaya = { workspace = true }
prometheus-exposition = { workspace = true }
Expand All @@ -40,6 +43,7 @@ saluki-error = { workspace = true }
saluki-health = { workspace = true }
saluki-io = { workspace = true }
saluki-metadata = { workspace = true }
saluki-metrics = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
serde_yaml = { workspace = true }
Expand All @@ -52,6 +56,7 @@ tokio = { workspace = true, features = [
] }
tonic = { workspace = true }
tracing = { workspace = true }
trie-hard = { workspace = true }
uuid = { workspace = true }

[target.'cfg(target_os = "linux")'.dependencies]
Expand All @@ -63,3 +68,11 @@ tikv-jemallocator = { workspace = true, features = [

[build-dependencies]
chrono = { workspace = true }

[dev-dependencies]
criterion = { workspace = true }
saluki-metrics = { workspace = true, features = ["test"] }

[[bench]]
name = "tag_filterlist"
harness = false
199 changes: 199 additions & 0 deletions bin/agent-data-plane/benches/tag_filterlist.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
use std::collections::HashSet as StdHashSet;

use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use foldhash::fast::RandomState as FoldHashState;
use hashbrown::HashSet as HbHashSet;
use trie_hard::TrieHard;

const SMALL_TAGS: &[&str] = &["env", "host", "service", "region", "version"];

const MEDIUM_TAGS: &[&str] = &[
"env",
"host",
"service",
"region",
"version",
"availability_zone",
"instance_type",
"cluster",
"namespace",
"pod_name",
];

const LARGE_TAGS: &[&str] = &[
"env",
"host",
"service",
"region",
"version",
"availability_zone",
"instance_type",
"cluster",
"namespace",
"pod_name",
"container_name",
"image_tag",
"team",
"cost_center",
"deployment",
"shard",
"replica",
"datacenter",
"cloud_provider",
"account_id",
];

const TAG_SETS: &[(&str, &[&str])] = &[("5", SMALL_TAGS), ("10", MEDIUM_TAGS), ("20", LARGE_TAGS)];

// A key present in all sets.
const HIT_KEY: &str = "env";
// A key not present in any set.
const MISS_KEY: &str = "nonexistent_tag_name";

// Realistic metric tags for the full-scan benchmark.
// 20 tags, of which 5 keys match the EXCLUDE_FILTER_NAMES (~75% miss rate).
const METRIC_TAGS: &[&str] = &[
"env:prod",
"host:i-abc123def456",
"service:web-frontend",
"region:us-east-1",
"version:3.14.1",
"availability_zone:us-east-1a",
"instance_type:c5.2xlarge",
"cluster:main-prod",
"namespace:default",
"pod_name:web-frontend-6f8b9c7d4-x2k9m",
"container_name:app",
"image_tag:sha-a1b2c3d",
"team:platform",
"cost_center:eng-1234",
"deployment:canary",
"shard:03",
"replica:2",
"datacenter:dc1",
"cloud_provider:aws",
"account_id:123456789012",
];

// 5 tag names to exclude — matches env, host, region, availability_zone, instance_type.
const EXCLUDE_FILTER_NAMES: &[&str] = &["env", "host", "region", "availability_zone", "instance_type"];

fn build_std_hashset(names: &[&str]) -> StdHashSet<String> {
names.iter().map(|s| s.to_string()).collect()
}

fn build_hb_hashset(names: &[&str]) -> HbHashSet<String, FoldHashState> {
let mut set = HbHashSet::with_capacity_and_hasher(names.len(), FoldHashState::default());
set.extend(names.iter().map(|s| s.to_string()));
set
}

fn build_trie(names: &[&'static str]) -> TrieHard<'static, ()> {
let values: Vec<(&'static [u8], ())> = names.iter().map(|s| (s.as_bytes(), ())).collect();
TrieHard::new(values)
}

/// Extract the tag name (part before ':') from a "key:value" tag string.
fn tag_name(tag: &str) -> &str {
tag.split_once(':').map_or(tag, |(name, _)| name)
}

fn bench_lookup_hit(c: &mut Criterion) {
let mut group = c.benchmark_group("should_keep_tag/hit");

for &(label, tags) in TAG_SETS {
let std_set = build_std_hashset(tags);
let hb_set = build_hb_hashset(tags);
let trie = build_trie(tags);

group.bench_with_input(BenchmarkId::new("std_hashset", label), &std_set, |b, set| {
b.iter(|| set.contains(HIT_KEY));
});

group.bench_with_input(BenchmarkId::new("hb_hashset", label), &hb_set, |b, set| {
b.iter(|| set.contains(HIT_KEY));
});

group.bench_with_input(BenchmarkId::new("trie", label), &trie, |b, trie| {
b.iter(|| trie.get(HIT_KEY).is_some());
});
}

group.finish();
}

fn bench_lookup_miss(c: &mut Criterion) {
let mut group = c.benchmark_group("should_keep_tag/miss");

for &(label, tags) in TAG_SETS {
let std_set = build_std_hashset(tags);
let hb_set = build_hb_hashset(tags);
let trie = build_trie(tags);

group.bench_with_input(BenchmarkId::new("std_hashset", label), &std_set, |b, set| {
b.iter(|| set.contains(MISS_KEY));
});

group.bench_with_input(BenchmarkId::new("hb_hashset", label), &hb_set, |b, set| {
b.iter(|| set.contains(MISS_KEY));
});

group.bench_with_input(BenchmarkId::new("trie", label), &trie, |b, trie| {
b.iter(|| trie.get(MISS_KEY).is_some());
});
}

group.finish();
}

fn bench_full_scan(c: &mut Criterion) {
let mut group = c.benchmark_group("should_keep_tag/full_scan");

// Pre-extract tag names so we only measure the set membership check.
let metric_tag_names: Vec<&str> = METRIC_TAGS.iter().map(|t| tag_name(t)).collect();

let std_set = build_std_hashset(EXCLUDE_FILTER_NAMES);
let hb_set = build_hb_hashset(EXCLUDE_FILTER_NAMES);
let trie = build_trie(EXCLUDE_FILTER_NAMES);

group.bench_function("std_hashset", |b| {
b.iter(|| {
let mut kept = 0u32;
for name in &metric_tag_names {
if !std_set.contains(*name) {
kept += 1;
}
}
kept
});
});

group.bench_function("hb_hashset", |b| {
b.iter(|| {
let mut kept = 0u32;
for name in &metric_tag_names {
if !hb_set.contains(*name) {
kept += 1;
}
}
kept
});
});

group.bench_function("trie", |b| {
b.iter(|| {
let mut kept = 0u32;
for name in &metric_tag_names {
if trie.get(name).is_none() {
kept += 1;
}
}
kept
});
});

group.finish();
}

criterion_group!(benches, bench_lookup_hit, bench_lookup_miss, bench_full_scan);
criterion_main!(benches);
Loading
Loading