From c2aaaa61636851f721f5a84f21127b2c92f0bac8 Mon Sep 17 00:00:00 2001 From: Travis Thieman Date: Fri, 10 Apr 2026 14:33:00 -0400 Subject: [PATCH 1/4] test(correctness): add otlp-traces-probabilistic correctness test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests that ADP's probabilistic sampler (at 50%) makes identical keep/drop decisions and produces the same APM stats as the Datadog Agent baseline. The probabilistic sampler is deterministic (FNV1a hash of trace ID), so both sides receiving the same millstone corpus should keep exactly the same traces. Any divergence in span counts or stats indicates a bug in ADP's pipeline — notably, ADP currently computes APM stats post-sampling while the Agent computes them pre-sampling, which this test is expected to surface. Co-Authored-By: Claude Sonnet 4.6 --- Makefile | 8 ++- .../otlp-traces-probabilistic/config.yaml | 23 +++++++ .../otlp-traces-probabilistic/datadog.yaml | 28 ++++++++ .../otlp-traces-probabilistic/millstone.yaml | 69 +++++++++++++++++++ 4 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 test/correctness/otlp-traces-probabilistic/config.yaml create mode 100644 test/correctness/otlp-traces-probabilistic/datadog.yaml create mode 100644 test/correctness/otlp-traces-probabilistic/millstone.yaml diff --git a/Makefile b/Makefile index 1d34113344..2d6ef23735 100644 --- a/Makefile +++ b/Makefile @@ -534,7 +534,7 @@ test-all: test test-property test-docs test-miri test-loom .PHONY: test-correctness test-correctness: ## Runs the complete correctness suite -test-correctness: test-correctness-dsd-plain test-correctness-dsd-origin-detection test-correctness-otlp-metrics test-correctness-otlp-traces test-correctness-otlp-traces-ets test-correctness-otlp-traces-ottl-filtering test-correctness-otlp-traces-ottl-transform +test-correctness: test-correctness-dsd-plain test-correctness-dsd-origin-detection test-correctness-otlp-metrics test-correctness-otlp-traces test-correctness-otlp-traces-ets test-correctness-otlp-traces-ottl-filtering test-correctness-otlp-traces-ottl-transform test-correctness-otlp-traces-probabilistic .PHONY: test-correctness-dsd-plain test-correctness-dsd-plain: build-ground-truth @@ -578,6 +578,12 @@ test-correctness-otlp-traces-ottl-transform: ## Runs the 'otlp-traces-ottl-trans @echo "[*] Running 'otlp-traces-ottl-transform' correctness test case..." @target/release/ground-truth $(shell pwd)/test/correctness/otlp-traces-ottl-transform/config.yaml +.PHONY: test-correctness-otlp-traces-probabilistic +test-correctness-otlp-traces-probabilistic: build-ground-truth +test-correctness-otlp-traces-probabilistic: ## Runs the 'otlp-traces-probabilistic' correctness test (probabilistic sampler at 50%) + @echo "[*] Running 'otlp-traces-probabilistic' correctness test case..." + @target/release/ground-truth $(shell pwd)/test/correctness/otlp-traces-probabilistic/config.yaml + .PHONY: build-panoramic build-panoramic: check-rust-build-tools build-panoramic: ## Builds the panoramic binary (ADP integration test runner) diff --git a/test/correctness/otlp-traces-probabilistic/config.yaml b/test/correctness/otlp-traces-probabilistic/config.yaml new file mode 100644 index 0000000000..258b77774e --- /dev/null +++ b/test/correctness/otlp-traces-probabilistic/config.yaml @@ -0,0 +1,23 @@ +analysis_mode: traces +millstone: + image: saluki-images/millstone:latest + config_path: millstone.yaml +datadog_intake: + image: saluki-images/datadog-intake:latest + config_path: ../datadog-intake.yaml +baseline: + image: saluki-images/datadog-agent:testing-release + files: + - datadog.yaml:/etc/datadog-agent/datadog.yaml + additional_env_vars: + - DD_API_KEY=correctness-test + - DD_APM_PROBABILISTIC_SAMPLER_ENABLED=true +comparison: + image: saluki-images/datadog-agent:testing-release + files: + - datadog.yaml:/etc/datadog-agent/datadog.yaml + additional_env_vars: + - DD_API_KEY=correctness-test + - DD_DATA_PLANE_ENABLED=true + - DD_DATA_PLANE_OTLP_ENABLED=true + - DD_APM_PROBABILISTIC_SAMPLER_ENABLED=true diff --git a/test/correctness/otlp-traces-probabilistic/datadog.yaml b/test/correctness/otlp-traces-probabilistic/datadog.yaml new file mode 100644 index 0000000000..db091e8b2f --- /dev/null +++ b/test/correctness/otlp-traces-probabilistic/datadog.yaml @@ -0,0 +1,28 @@ +hostname: "correctness-testing" +api_key: dummy-api-key-correctness-testing +health_port: 5555 +log_level: debug + +process_config: + process_collection: + enabled: false + container_collection: + enabled: false + +dd_url: "http://datadog-intake:2049" + +apm_config: + enabled: true + apm_dd_url: "http://datadog-intake:2049" + features: ["enable_otlp_compute_top_level_by_span_kind"] + probabilistic_sampler: + enabled: true + sampling_percentage: 50 + +otlp_config: + receiver: + protocols: + grpc: + endpoint: "0.0.0.0:4317" + traces: + enable_otlp_compute_top_level_by_span_kind: true diff --git a/test/correctness/otlp-traces-probabilistic/millstone.yaml b/test/correctness/otlp-traces-probabilistic/millstone.yaml new file mode 100644 index 0000000000..4f8d4643db --- /dev/null +++ b/test/correctness/otlp-traces-probabilistic/millstone.yaml @@ -0,0 +1,69 @@ +seed: [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131] +target: "grpc://target:4317/opentelemetry.proto.collector.trace.v1.TraceService/Export" +aggregation_bucket_width_secs: 10 +volume: 1000 +corpus: + size: 1000 + payload: + opentelemetry_traces: + error_rate: 0.01 + services: + - name: api-gateway + service_type: http + scope_name: com.example.gateway + resource_attributes: + - key: deployment.environment + value: production + - key: cloud.region + value: + dictionary: cloud_regions + operations: + - id: get-product + method: GET + route: /api/v1/products/{id} + suboperations: + - to: product-service/get-product + - id: list-products + method: GET + route: /api/v1/products + suboperations: + - to: product-service/list-products + - name: product-service + service_type: grpc + grpc: + service: ProductService + scope_name: com.example.products + operations: + - id: get-product + method: GetProduct + suboperations: + - to: product-cache/get-product-by-id + - to: product-db/select-product-by-id + rate: 0.1 + - id: list-products + method: ListProducts + suboperations: + - to: product-cache/get-products + - to: product-db/select-products + rate: 0.1 + - name: product-cache + service_type: database + database: + system: redis + operations: + - id: get-product-by-id + query: GET products:by_id:$1 + - id: get-products + query: GET products:full + - name: product-db + service_type: database + database: + system: postgresql + name: products + operations: + - id: select-product-by-id + table: products + query: SELECT * FROM products WHERE id = $1 + - id: select-products + table: products + query: SELECT * FROM products LIMIT 50 From 9022f5ce1bcc59b9ccabe24f07c3f0b4c23c21f3 Mon Sep 17 00:00:00 2001 From: Travis Thieman Date: Fri, 10 Apr 2026 14:34:57 -0400 Subject: [PATCH 2/4] test(correctness): enable otlp_direct_analysis_mode for probabilistic sampler test ADP intentionally computes APM stats only on sampled traces (post-sampling), whereas the Agent computes stats on all traces pre-sampling. Enable otlp_direct_analysis_mode to skip stats comparison and focus the test on verifying that both sides make identical probabilistic sampling decisions and produce identical span output for kept traces. Co-Authored-By: Claude Sonnet 4.6 --- test/correctness/otlp-traces-probabilistic/config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/test/correctness/otlp-traces-probabilistic/config.yaml b/test/correctness/otlp-traces-probabilistic/config.yaml index 258b77774e..05c10c0464 100644 --- a/test/correctness/otlp-traces-probabilistic/config.yaml +++ b/test/correctness/otlp-traces-probabilistic/config.yaml @@ -1,4 +1,5 @@ analysis_mode: traces +otlp_direct_analysis_mode: true millstone: image: saluki-images/millstone:latest config_path: millstone.yaml From 987e9a1241530f466ba7095859776f89971c2e40 Mon Sep 17 00:00:00 2001 From: Travis Thieman Date: Fri, 10 Apr 2026 15:20:35 -0400 Subject: [PATCH 3/4] fix(trace-sampler): write _dd.p.dm to chunk tags (not span meta) for OTLP traces with probabilistic sampler When the APM-level probabilistic sampler is enabled, the DD Agent writes `_dd.p.dm` to trace chunk metadata tags only, not to individual span meta. ADP was incorrectly writing it to span meta for all traces regardless of path. Fix: skip the span meta write for OTLP traces when probabilistic_sampler is enabled, while still passing the DM value through TraceSampling so the encoder writes it to chunk tags. The legacy OTLP pre-sampling path (without probabilistic_sampler_enabled) continues to write DM to span meta, matching existing agent behavior. Also fix the SSI metadata check in ground-truth: when otlp_direct_analysis_mode is true, sampling may legitimately drop the first-per-service spans that _dd.install.id is attached to. Wrap both baseline and comparison SSI checks in the same !otlp_direct_analysis_mode gate (matching what the comment already described). Co-Authored-By: Claude Sonnet 4.6 --- .../ground-truth/src/analysis/traces/mod.rs | 19 +++++++++++-------- .../src/transforms/trace_sampler/mod.rs | 8 ++++++-- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/bin/correctness/ground-truth/src/analysis/traces/mod.rs b/bin/correctness/ground-truth/src/analysis/traces/mod.rs index ce0b9489b0..7c37baa9b0 100644 --- a/bin/correctness/ground-truth/src/analysis/traces/mod.rs +++ b/bin/correctness/ground-truth/src/analysis/traces/mod.rs @@ -299,14 +299,17 @@ impl TracesAnalyzer { } // Ensure that we observe at least one span on each side where Single Step Instrumentation-related metadata is - // present (when not in OTLP-direct mode). - if !self.options.otlp_direct_analysis_mode && !self.baseline_ssi_metadata_present { - error!("No Single Step Instrumentation metadata found in baseline spans."); - error_count += 1; - } - if !self.comparison_ssi_metadata_present { - error!("No Single Step Instrumentation metadata found in comparison spans."); - error_count += 1; + // present (when not in OTLP-direct mode). In OTLP-direct mode, probabilistic sampling may legitimately drop + // the first-per-service spans that SSI metadata is attached to, so the check is skipped on both sides. + if !self.options.otlp_direct_analysis_mode { + if !self.baseline_ssi_metadata_present { + error!("No Single Step Instrumentation metadata found in baseline spans."); + error_count += 1; + } + if !self.comparison_ssi_metadata_present { + error!("No Single Step Instrumentation metadata found in comparison spans."); + error_count += 1; + } } if error_count > 0 { diff --git a/lib/saluki-components/src/transforms/trace_sampler/mod.rs b/lib/saluki-components/src/transforms/trace_sampler/mod.rs index 33674501a8..3341e169ab 100644 --- a/lib/saluki-components/src/transforms/trace_sampler/mod.rs +++ b/lib/saluki-components/src/transforms/trace_sampler/mod.rs @@ -364,7 +364,7 @@ impl TraceSampler { // Run probabilistic sampler - use root span's trace ID let root_trace_id = trace.spans()[root_span_idx].trace_id(); if self.sample_probabilistic(root_trace_id) { - decision_maker = DECISION_MAKER_PROBABILISTIC; // probabilistic sampling + decision_maker = DECISION_MAKER_PROBABILISTIC; prob_keep = true; if let Some(root_span) = trace.spans_mut().get_mut(root_span_idx) { @@ -464,7 +464,11 @@ impl TraceSampler { }; let meta = root_span_value.meta_mut(); - if priority > 0 { + // When the APM-level probabilistic sampler is used with OTLP traces, the DD Agent writes + // _dd.p.dm to trace chunk tags only (not span meta). For the legacy OTLP sampling path, + // it is written to both. We match that behavior by skipping the span meta write only when + // both conditions hold; the DM value still flows through TraceSampling to the encoder. + if priority > 0 && !(is_otlp && self.probabilistic_sampler_enabled) { if let Some(dm) = decision_maker_meta.as_ref() { meta.insert(MetaString::from(TAG_DECISION_MAKER), dm.clone()); } From 575052dfba6964f393f3ba258ad5771d9cbccff2 Mon Sep 17 00:00:00 2001 From: Travis Thieman Date: Fri, 10 Apr 2026 15:23:43 -0400 Subject: [PATCH 4/4] chore(ground-truth): revert added comment on SSI metadata check Co-Authored-By: Claude Sonnet 4.6 --- bin/correctness/ground-truth/src/analysis/traces/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bin/correctness/ground-truth/src/analysis/traces/mod.rs b/bin/correctness/ground-truth/src/analysis/traces/mod.rs index 7c37baa9b0..6260dfd9b2 100644 --- a/bin/correctness/ground-truth/src/analysis/traces/mod.rs +++ b/bin/correctness/ground-truth/src/analysis/traces/mod.rs @@ -299,8 +299,7 @@ impl TracesAnalyzer { } // Ensure that we observe at least one span on each side where Single Step Instrumentation-related metadata is - // present (when not in OTLP-direct mode). In OTLP-direct mode, probabilistic sampling may legitimately drop - // the first-per-service spans that SSI metadata is attached to, so the check is skipped on both sides. + // present (when not in OTLP-direct mode). if !self.options.otlp_direct_analysis_mode { if !self.baseline_ssi_metadata_present { error!("No Single Step Instrumentation metadata found in baseline spans.");