From a16e4d7f26cdfc65142b6814a2c60aee812baa13 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 25 May 2026 10:46:08 +0000
Subject: [PATCH 1/2] feat(audit): add --max-cov flag with correct statistical
 design
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements CoV (Coefficient of Variation = σ/μ × 100%) flagging for
`git perf audit`. Two complementary metrics are computed:

- Tail CoV: computed from per-commit aggregated values, measuring
  cross-run baseline stability (is the historical baseline reliable?).
- Head CoV: computed from raw measurements at HEAD, measuring
  within-run repeatability (is the current CI run reliable?).

A warning is emitted when either exceeds the configured threshold.
Configurable via --max-cov CLI flag or max_cov in .gitperfconfig
with standard CLI > per-measurement config > default precedence.

This fixes the statistically incorrect approach of computing CoV from
the single aggregated head value (which is always a scalar and cannot
have a CoV). Raw HEAD measurements are now threaded through
audit_with_commits → audit_with_data for this purpose.

Closes #326

https://claude.ai/code/session_019VcYg5BHPM5JHGevzwHrta
---
 cli_types/src/lib.rs      |   9 +
 docs/manpage.md           |   1 +
 git_perf/src/audit.rs     | 493 ++++++++++++++++++++++++++++++++++++--
 git_perf/src/cli.rs       |   2 +
 git_perf/src/config.rs    |  51 ++++
 man/man1/git-perf-audit.1 |   5 +-
 6 files changed, 542 insertions(+), 19 deletions(-)
diff --git a/cli_types/src/lib.rs b/cli_types/src/lib.rs
index fe67f70383..a42c61a1f4 100644
--- a/cli_types/src/lib.rs
+++ b/cli_types/src/lib.rs
@@ -467,6 +467,15 @@ pub enum Commands {
         #[arg(short = 'D', long, value_enum)]
         dispersion_method: Option<DispersionMethod>,
 
+        /// Flag measurements with high Coefficient of Variation (CoV = σ/μ × 100%).
+        /// Tail CoV is computed from per-commit aggregated values and reflects
+        /// cross-run baseline stability. Head CoV is computed from the raw
+        /// measurements at HEAD and reflects within-run repeatability.
+        /// A warning is emitted when either exceeds this percentage threshold.
+        /// If not specified, uses the value from .gitperfconfig file, or no flagging.
+        #[arg(long)]
+        max_cov: Option<f64>,
+
         /// Suppress warning when change points are detected in the current epoch.
         /// By default, audit will warn if a change point (regime shift) is detected
         /// within the current measurement epoch, as this may affect z-score reliability.
diff --git a/docs/manpage.md b/docs/manpage.md
index 07ea2a9aec..96743c758b 100644
--- a/docs/manpage.md
+++ b/docs/manpage.md
@@ -275,6 +275,7 @@ The sparkline visualization shows the range of measurements relative to the tail
 
   Possible values: `stddev`, `mad`
 
+* `--max-cov <MAX_COV>` — Flag measurements with high Coefficient of Variation (CoV = σ/μ × 100%). Tail CoV is computed from per-commit aggregated values and reflects cross-run baseline stability. Head CoV is computed from the raw measurements at HEAD and reflects within-run repeatability. A warning is emitted when either exceeds this percentage threshold. If not specified, uses the value from .gitperfconfig file, or no flagging
 * `--no-change-point-warning` — Suppress warning when change points are detected in the current epoch. By default, audit will warn if a change point (regime shift) is detected within the current measurement epoch, as this may affect z-score reliability
 
 
diff --git a/git_perf/src/audit.rs b/git_perf/src/audit.rs
index 1255b4ec8b..d43a2355f1 100644
--- a/git_perf/src/audit.rs
+++ b/git_perf/src/audit.rs
@@ -48,6 +48,7 @@ pub(crate) struct ResolvedAuditParams {
     pub summarize_by: ReductionFunc,
     pub sigma: f64,
     pub dispersion_method: DispersionMethod,
+    pub max_cov: Option<f64>,
 }
 
 /// Resolves audit parameters for a specific measurement with proper precedence:
@@ -61,6 +62,7 @@ pub(crate) fn resolve_audit_params(
     cli_summarize_by: Option<ReductionFunc>,
     cli_sigma: Option<f64>,
     cli_dispersion_method: Option<DispersionMethod>,
+    cli_max_cov: Option<f64>,
 ) -> ResolvedAuditParams {
     let min_count = cli_min_count
         .or_else(|| config::audit_min_measurements(measurement))
@@ -82,11 +84,14 @@ pub(crate) fn resolve_audit_params(
         })
         .unwrap_or(DispersionMethod::StandardDeviation);
 
+    let max_cov = cli_max_cov.or_else(|| config::audit_max_cov(measurement));
+
     ResolvedAuditParams {
         min_count,
         summarize_by,
         sigma,
         dispersion_method,
+        max_cov,
     }
 }
 
@@ -202,6 +207,7 @@ pub fn audit_multiple(
     summarize_by: Option<ReductionFunc>,
     sigma: Option<f64>,
     dispersion_method: Option<DispersionMethod>,
+    max_cov: Option<f64>,
     combined_patterns: &[String],
     separate_by: &[String],
     _no_change_point_warning: bool, // TODO: Implement change point warning in Phase 2
@@ -271,6 +277,7 @@ pub fn audit_multiple(
             summarize_by,
             sigma,
             dispersion_method,
+            max_cov,
         );
 
         // Warn if max_count limits historical data below min_measurements requirement
@@ -310,6 +317,7 @@ pub fn audit_multiple(
                 params.summarize_by,
                 params.sigma,
                 params.dispersion_method,
+                params.max_cov,
             )?;
 
             // TODO(Phase 2): Add change point detection warning here
@@ -370,6 +378,7 @@ pub fn audit_multiple(
 /// Audits a measurement using pre-loaded commit data.
 /// This is more efficient than the old `audit` function when auditing multiple measurements,
 /// as it reuses the same commit data instead of walking commits multiple times.
+#[allow(clippy::too_many_arguments)]
 fn audit_with_commits(
     measurement: &str,
     commits: &[Result<Commit>],
@@ -378,6 +387,7 @@ fn audit_with_commits(
     summarize_by: ReductionFunc,
     sigma: f64,
     dispersion_method: DispersionMethod,
+    max_cov: Option<f64>,
 ) -> Result<AuditResult> {
     // Convert Vec<Result<Commit>> into an iterator of Result<Commit> by cloning references
     // This is necessary because summarize_measurements expects an iterator of Result<Commit>
@@ -395,6 +405,20 @@ fn audit_with_commits(
     let filter_by =
         |m: &MeasurementData| m.name == measurement && m.key_values_is_superset_of(selectors);
 
+    // Extract raw HEAD measurements before aggregation for head CoV computation.
+    // Each measurement data point at HEAD is a separate sample (e.g., repeated benchmark runs).
+    let head_raw: Vec<f64> = commits
+        .first()
+        .and_then(|r| r.as_ref().ok())
+        .map(|c| {
+            c.measurements
+                .iter()
+                .filter(|m| filter_by(m))
+                .map(|m| m.val)
+                .collect()
+        })
+        .unwrap_or_default();
+
     let mut aggregates = measurement_retrieval::take_while_same_epoch(summarize_measurements(
         commits_iter,
         &summarize_by,
@@ -419,24 +443,29 @@ fn audit_with_commits(
     audit_with_data(
         measurement,
         head,
+        head_raw,
         tail,
         min_count,
         sigma,
         dispersion_method,
         summarize_by,
+        max_cov,
     )
 }
 
 /// Core audit logic that can be tested with mock data
 /// This function contains all the mutation-tested logic paths
+#[allow(clippy::too_many_arguments)]
 fn audit_with_data(
     measurement: &str,
     head: f64,
+    head_raw: Vec<f64>,
     tail: Vec<f64>,
     min_count: u16,
     sigma: f64,
     dispersion_method: DispersionMethod,
     summarize_by: ReductionFunc,
+    max_cov: Option<f64>,
 ) -> Result<AuditResult> {
     // Note: CLI enforces min_count >= 2 via clap::value_parser!(u16).range(2..)
     // Tests may use lower values for edge case testing, but production code
@@ -450,6 +479,30 @@ fn audit_with_data(
     let head_summary = stats::aggregate_measurements(iter::once(&head));
     let tail_summary = stats::aggregate_measurements(tail.iter());
 
+    // Compute CoV before tail is consumed. Tail CoV uses per-commit aggregated values
+    // (cross-run baseline stability). Head CoV uses the raw measurements at HEAD
+    // (within-run repeatability). Require ≥2 samples and a non-zero mean for each.
+    let cov_warning = max_cov.and_then(|threshold| {
+        let tail_cov = (tail_summary.len >= 2 && tail_summary.mean.abs() > f64::EPSILON)
+            .then(|| tail_summary.stddev / tail_summary.mean * 100.0);
+        let head_raw_summary = stats::aggregate_measurements(head_raw.iter());
+        let head_cov = (head_raw.len() >= 2 && head_raw_summary.mean.abs() > f64::EPSILON)
+            .then(|| head_raw_summary.stddev / head_raw_summary.mean * 100.0);
+
+        let tail_exceeds = tail_cov.is_some_and(|cov| cov > threshold);
+        let head_exceeds = head_cov.is_some_and(|cov| cov > threshold);
+
+        if tail_exceeds || head_exceeds {
+            let tail_str = tail_cov.map_or("n/a".to_string(), |c| format!("{c:.1}%"));
+            let head_str = head_cov.map_or("n/a".to_string(), |c| format!("{c:.1}%"));
+            Some(format!(
+                "\n⚠️ High CoV: tail={tail_str}, head={head_str} (threshold: {threshold:.1}%)"
+            ))
+        } else {
+            None
+        }
+    });
+
     // Generate sparkline and calculate range for all measurements - used in both skip and normal paths
     let all_measurements = tail.into_iter().chain(iter::once(head)).collect::<Vec<_>>();
 
@@ -536,6 +589,9 @@ fn audit_with_data(
             summary.push_str(&format!("Head: {}\n", head_display));
             summary.push_str(&format!("Tail: {}\n", tail_display));
             summary.push_str(&sparkline);
+            if let Some(ref warning) = cov_warning {
+                summary.push_str(warning);
+            }
         }
         // If 0 total measurements, return empty summary
 
@@ -748,8 +804,9 @@ mod test {
                 Some(ReductionFunc::Mean),
                 Some(2.0),
                 Some(DispersionMethod::StandardDeviation),
-                &[], // Empty combined_patterns
-                &[], // Empty separate_by
+                None, // max_cov
+                &[],  // Empty combined_patterns
+                &[],  // Empty separate_by
                 false,
             );
 
@@ -770,11 +827,13 @@ mod test {
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![10.0, 11.0, 12.0], // Exactly 3 measurements
             3,                      // min_count = 3
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -786,11 +845,13 @@ mod test {
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![10.0, 11.0], // Only 2 measurements
             3,                // min_count = 3
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -806,11 +867,13 @@ mod test {
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![], // 0 measurements
             5,      // min_count > 0 to trigger skip
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -822,11 +885,13 @@ mod test {
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![10.0], // 1 measurement
             5,          // min_count > 1 to trigger skip
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -837,11 +902,13 @@ mod test {
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![10.0, 11.0], // 2 measurements
             5,                // min_count > 2 to trigger skip
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -859,11 +926,13 @@ mod test {
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![], // 0 tail measurements = 1 total measurement
             5,      // min_count > 0 to trigger skip
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -878,11 +947,13 @@ mod test {
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![10.0], // 1 tail measurement = 2 total measurements
             5,          // min_count > 1 to trigger skip
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -905,11 +976,13 @@ mod test {
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![10.0, 11.0], // 2 tail measurements = 3 total measurements
             5,                // min_count > 2 to trigger skip
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -932,11 +1005,13 @@ mod test {
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![10.0, 11.0], // 2 tail measurements = 3 total measurements
             5,                // min_count > 2 to trigger skip
             2.0,
             DispersionMethod::MedianAbsoluteDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -950,12 +1025,14 @@ mod test {
         // Use values where division and modulo produce very different results
         let result = audit_with_data(
             "test_measurement",
-            25.0,                   // head
+            25.0, // head
+            vec![],
             vec![10.0, 10.0, 10.0], // tail, median = 10.0
             2,
             10.0, // High sigma to avoid z-score failures
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -985,13 +1062,15 @@ mod test {
 
         // Case 1: z_score exceeds sigma, no threshold bypass (should fail)
         let result = audit_with_data(
-            "test_measurement",                 // No config threshold for this name
-            100.0,                              // Very high head value
+            "test_measurement", // No config threshold for this name
+            100.0,              // Very high head value
+            vec![],
             vec![10.0, 10.0, 10.0, 10.0, 10.0], // Low tail values
             2,
             0.5, // Low sigma threshold
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1002,12 +1081,14 @@ mod test {
         // Case 2: z_score within sigma (should pass)
         let result = audit_with_data(
             "test_measurement",
-            10.2,                               // Close to tail values
+            10.2, // Close to tail values
+            vec![],
             vec![10.0, 10.1, 10.0, 10.1, 10.0], // Some variance to avoid zero stddev
             2,
             100.0, // Very high sigma threshold
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1025,11 +1106,13 @@ mod test {
         let result = audit_with_data(
             "test_measurement",
             1000.0, // Extreme outlier
+            vec![],
             vec![10.0, 10.0, 10.0, 10.0, 10.0],
             2,
             0.1, // Very strict sigma
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1041,12 +1124,14 @@ mod test {
         // Test passing case (should get success message)
         let result = audit_with_data(
             "test_measurement",
-            10.01,                              // Very close to tail
+            10.01, // Very close to tail
+            vec![],
             vec![10.0, 10.1, 10.0, 10.1, 10.0], // Varied values to avoid zero variance
             2,
             100.0, // Very lenient sigma
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1065,21 +1150,25 @@ mod test {
         let result_stddev = audit_with_data(
             "test_measurement",
             head,
+            vec![],
             tail.clone(),
             2,
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         let result_mad = audit_with_data(
             "test_measurement",
             head,
+            vec![],
             tail,
             2,
             2.0,
             DispersionMethod::MedianAbsoluteDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result_stddev.is_ok());
@@ -1113,11 +1202,13 @@ unit = "ms"
         let result = audit_with_data(
             "build_time",
             head,
+            vec![],
             tail,
             2,
             10.0, // High sigma to ensure it passes
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1213,12 +1304,14 @@ min_relative_deviation = 10.0
         // Should pass without showing the note
         let result = audit_with_data(
             "build_time",
-            10.1,                               // Very close to tail values
+            10.1, // Very close to tail values
+            vec![],
             vec![10.0, 10.1, 10.0, 10.1, 10.0], // Low variance
             2,
             100.0, // Very high sigma threshold - won't be exceeded
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1238,11 +1331,13 @@ min_relative_deviation = 10.0
         let result = audit_with_data(
             "build_time",
             1002.0, // High z-score outlier but low relative deviation
+            vec![],
             vec![1000.0, 1000.1, 1000.0, 1000.1, 1000.0], // Very low variance
             2,
             0.5, // Low sigma threshold - will be exceeded
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1263,11 +1358,13 @@ min_relative_deviation = 10.0
         let result = audit_with_data(
             "build_time",
             1200.0, // High z-score AND high relative deviation
+            vec![],
             vec![1000.0, 1000.1, 1000.0, 1000.1, 1000.0], // Very low variance
             2,
             0.5, // Low sigma threshold - will be exceeded
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1307,12 +1404,14 @@ min_absolute_deviation = 50.0
         // This catches the - vs / mutation
         let result = audit_with_data(
             "build_time",
-            100.0,                              // head value
+            100.0, // head value
+            vec![],
             vec![10.0, 10.0, 10.0, 10.0, 10.0], // tail values, median=10
             2,
             0.5, // Low sigma - will be exceeded
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1330,12 +1429,14 @@ min_absolute_deviation = 50.0
         // With <= : 50 <= 50 is true => passes (wrong)
         let result = audit_with_data(
             "build_time",
-            1050.0,                                       // head value
+            1050.0, // head value
+            vec![],
             vec![1000.0, 1000.0, 1000.0, 1000.0, 1000.0], // tail values, median=1000
             2,
             0.5, // Low sigma - will be exceeded
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1351,12 +1452,14 @@ min_absolute_deviation = 50.0
         // head=1049, tail_median=1000, absolute_deviation=49, threshold=50
         let result = audit_with_data(
             "build_time",
-            1049.0,                                       // head value
+            1049.0, // head value
+            vec![],
             vec![1000.0, 1000.0, 1000.0, 1000.0, 1000.0], // tail values, median=1000
             2,
             0.5, // Low sigma - will be exceeded
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1543,6 +1646,7 @@ dispersion_method = "mad"
                 Some(ReductionFunc::Min),                  // CLI aggregate
                 Some(3.0),                                 // CLI sigma
                 Some(DispersionMethod::StandardDeviation), // CLI dispersion
+                None,                                      // CLI max_cov
             );
 
             assert_eq!(
@@ -1581,6 +1685,7 @@ dispersion_method = "mad"
                 None,
                 None,
                 None,
+                None, // max_cov
             );
 
             assert_eq!(
@@ -1611,6 +1716,7 @@ dispersion_method = "mad"
                 None,
                 None,
                 None,
+                None, // max_cov
             );
 
             assert_eq!(
@@ -1884,12 +1990,14 @@ dispersion_method = "mad"
         // causing division by zero in sparkline calculation
         let result = audit_with_data(
             "test_measurement",
-            10.0,   // head
+            10.0, // head
+            vec![],
             vec![], // empty tail - triggers the bug
             2,      // min_count
             2.0,    // sigma
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         // Should succeed and skip (not crash with division by zero)
@@ -1911,12 +2019,14 @@ dispersion_method = "mad"
         // This tests the edge case where median is 0.0 even with measurements
         let result = audit_with_data(
             "test_measurement",
-            5.0,                 // non-zero head
+            5.0, // non-zero head
+            vec![],
             vec![0.0, 0.0, 0.0], // all zeros in tail
             2,                   // min_count
             2.0,                 // sigma
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         // Should succeed (not crash with division by zero)
@@ -1937,12 +2047,14 @@ dispersion_method = "mad"
         // Case 1: Median is non-zero - use percentages (default behavior)
         let result = audit_with_data(
             "test_measurement",
-            15.0,                   // head
+            15.0, // head
+            vec![],
             vec![10.0, 11.0, 12.0], // median=11.0 (non-zero)
             2,
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1954,12 +2066,14 @@ dispersion_method = "mad"
         // Case 2: Median is zero with non-zero head - use absolute values
         let result = audit_with_data(
             "test_measurement",
-            5.0,                 // head (non-zero)
+            5.0, // head (non-zero)
+            vec![],
             vec![0.0, 0.0, 0.0], // median=0
             2,
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1974,12 +2088,14 @@ dispersion_method = "mad"
         // Case 3: Everything is zero - show absolute values [0 - 0]
         let result = audit_with_data(
             "test_measurement",
-            0.0,                 // head
+            0.0, // head
+            vec![],
             vec![0.0, 0.0, 0.0], // median=0
             2,
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -1995,12 +2111,14 @@ dispersion_method = "mad"
         // This should skip the audit since we have 0 < 2 tail measurements.
         let result = audit_with_data(
             "test_measurement",
-            15.0,   // head
+            15.0, // head
+            vec![],
             vec![], // no tail measurements
             2,      // min_count = 2 (minimum allowed by CLI)
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -2028,12 +2146,14 @@ dispersion_method = "mad"
         // This should skip since we have 1 < 2 tail measurements.
         let result = audit_with_data(
             "test_measurement",
-            15.0,       // head
+            15.0, // head
+            vec![],
             vec![10.0], // single tail measurement
             2,          // min_count = 2 (minimum allowed by CLI)
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -2062,11 +2182,13 @@ dispersion_method = "mad"
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![10.0, 11.0, 12.0],
             2,
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Min,
+            None,
         );
 
         assert!(result.is_ok());
@@ -2080,11 +2202,13 @@ dispersion_method = "mad"
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![10.0, 11.0, 12.0],
             2,
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Max,
+            None,
         );
 
         assert!(result.is_ok());
@@ -2098,11 +2222,13 @@ dispersion_method = "mad"
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![10.0, 11.0, 12.0],
             2,
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Median,
+            None,
         );
 
         assert!(result.is_ok());
@@ -2116,11 +2242,13 @@ dispersion_method = "mad"
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![10.0, 11.0, 12.0],
             2,
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Mean,
+            None,
         );
 
         assert!(result.is_ok());
@@ -2134,11 +2262,13 @@ dispersion_method = "mad"
         let result = audit_with_data(
             "test_measurement",
             15.0,
+            vec![],
             vec![], // No tail measurements, total = 1
             2,
             2.0,
             DispersionMethod::StandardDeviation,
             ReductionFunc::Median,
+            None,
         );
 
         assert!(result.is_ok());
@@ -2148,4 +2278,331 @@ dispersion_method = "mad"
         // But should show Head summary
         assert!(audit_result.message.contains("Head:"));
     }
+
+    // --- CoV warning tests ---
+
+    #[test]
+    fn test_tail_cov_warning_fires_above_threshold() {
+        // tail = [50, 100, 150, 100, 100]: mean=100, sample stddev≈35.4 → CoV≈35.4% > 30%
+        let result = audit_with_data(
+            "test_measurement",
+            100.0,
+            vec![],
+            vec![50.0, 100.0, 150.0, 100.0, 100.0],
+            2,
+            10.0, // high sigma so audit passes on z-score
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(30.0),
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            msg.contains("⚠️ High CoV"),
+            "Should warn when tail CoV exceeds threshold, got: {msg}"
+        );
+        assert!(msg.contains("threshold: 30.0%"), "got: {msg}");
+    }
+
+    #[test]
+    fn test_tail_cov_warning_absent_below_threshold() {
+        // tail = [100, 100, 100]: mean=100, stddev=0 → CoV=0% < 30%
+        let result = audit_with_data(
+            "test_measurement",
+            100.0,
+            vec![],
+            vec![100.0, 100.0, 100.0],
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(30.0),
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            !msg.contains("⚠️ High CoV"),
+            "Should not warn when tail CoV is below threshold, got: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_head_cov_warning_fires_above_threshold() {
+        // head_raw = [50, 150]: mean=100, sample stddev≈70.7 → CoV≈70.7% > 50%
+        // tail is stable so tail CoV is low; only head CoV triggers the warning
+        let result = audit_with_data(
+            "test_measurement",
+            100.0,
+            vec![50.0, 150.0], // raw head measurements — high variance
+            vec![100.0, 100.0, 100.0, 100.0, 100.0], // stable tail
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(50.0),
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            msg.contains("⚠️ High CoV"),
+            "Should warn when head CoV exceeds threshold, got: {msg}"
+        );
+        assert!(
+            msg.contains("head="),
+            "Warning should include head CoV value, got: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_head_cov_warning_absent_below_threshold() {
+        // head_raw = [100, 100]: mean=100, stddev=0 → CoV=0% < 50%
+        let result = audit_with_data(
+            "test_measurement",
+            100.0,
+            vec![100.0, 100.0],
+            vec![100.0, 100.0, 100.0],
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(50.0),
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            !msg.contains("⚠️ High CoV"),
+            "Should not warn when head CoV is below threshold, got: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_cov_warning_absent_with_no_threshold() {
+        // High-variance data but no threshold → no warning
+        let result = audit_with_data(
+            "test_measurement",
+            100.0,
+            vec![10.0, 200.0, 50.0],               // high head variance
+            vec![10.0, 200.0, 50.0, 300.0, 100.0], // high tail variance
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            None, // no max_cov
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            !msg.contains("⚠️ High CoV"),
+            "Should not warn when no threshold is set, got: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_cov_warning_absent_for_single_tail_measurement() {
+        // Only 1 tail measurement → tail_summary.len = 1, so tail CoV skipped
+        // Also head_raw empty → head CoV also skipped → no warning
+        let result = audit_with_data(
+            "test_measurement",
+            100.0,
+            vec![],
+            vec![100.0], // single tail — tail_summary.len = 1 < 2
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(0.0), // very strict threshold, would fire if CoV computed
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            !msg.contains("⚠️ High CoV"),
+            "Should not warn when tail has only 1 measurement, got: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_cov_warning_absent_for_single_head_raw_measurement() {
+        // head_raw has only 1 value → head CoV skipped; tail stable → tail CoV not triggered
+        let result = audit_with_data(
+            "test_measurement",
+            100.0,
+            vec![100.0], // single raw head measurement — head CoV skipped
+            vec![100.0, 100.0, 100.0, 100.0],
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(0.0), // would fire if CoV computed
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            !msg.contains("⚠️ High CoV"),
+            "Should not compute head CoV from a single raw measurement, got: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_cov_shows_correct_tail_cov_value() {
+        // tail = [0, 200]: mean=100, sample stddev=141.4… → CoV≈141.4%
+        // Verifies stddev / mean (not stddev * mean or stddev + mean) and * 100.0
+        let result = audit_with_data(
+            "test_measurement",
+            105.0,
+            vec![],
+            vec![0.0, 200.0],
+            2,
+            100.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(100.0), // threshold=100%, CoV≈141.4% exceeds it
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            msg.contains("tail=141.4%"),
+            "Should show correct tail CoV percentage, got: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_cov_shows_correct_head_cov_value() {
+        // head_raw = [0, 200]: mean=100, sample stddev=141.4… → CoV≈141.4%
+        let result = audit_with_data(
+            "test_measurement",
+            100.0,
+            vec![0.0, 200.0],                 // raw head measurements
+            vec![100.0, 100.0, 100.0, 100.0], // stable tail
+            2,
+            100.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(100.0), // threshold=100%, head CoV≈141.4% exceeds it
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            msg.contains("head=141.4%"),
+            "Should show correct head CoV percentage, got: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_cov_warning_shown_on_failing_audit() {
+        // CoV warning is informational and appears even when z-score causes a fail
+        let result = audit_with_data(
+            "test_measurement",
+            500.0, // extreme outlier — will fail z-score
+            vec![],
+            vec![50.0, 100.0, 150.0, 100.0, 100.0], // CoV≈35.4% > 30%
+            2,
+            0.5, // strict sigma — audit fails
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(30.0),
+        );
+        assert!(result.is_ok());
+        let audit_result = result.unwrap();
+        assert!(!audit_result.passed, "Should fail audit");
+        assert!(
+            audit_result.message.contains("⚠️ High CoV"),
+            "CoV warning should appear even on failure, got: {}",
+            audit_result.message
+        );
+    }
+
+    #[test]
+    fn test_cov_no_warning_when_cov_equals_threshold() {
+        // tail = [100, 100, 100]: CoV = 0%, threshold = 0%
+        // Strict > means 0 > 0 is false → no warning.
+        // With >= mutation: 0 >= 0 is true → warning fires → mutation caught.
+        let result = audit_with_data(
+            "test_measurement",
+            100.0,
+            vec![],
+            vec![100.0, 100.0, 100.0],
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(0.0), // threshold = 0%
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            !msg.contains("⚠️ High CoV"),
+            "Should not warn when CoV (0%) equals threshold (0%) with strict >, got: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_cov_skips_near_zero_mean() {
+        // tail mean ≈ EPSILON: the guard "mean.abs() > EPSILON" prevents CoV computation.
+        // With >= mutation: EPSILON >= EPSILON → CoV computed → warning fires → caught.
+        let eps = f64::EPSILON;
+        let result = audit_with_data(
+            "test_measurement",
+            1.0,
+            vec![],
+            vec![0.0, 2.0 * eps], // mean = EPSILON, sample stddev = EPSILON*sqrt(2)
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Mean,
+            Some(100.0), // CoV would be ~141% if computed from near-zero mean
+        );
+        assert!(result.is_ok());
+        assert!(
+            !result.unwrap().message.contains("⚠️ High CoV"),
+            "Should not compute CoV when tail mean equals EPSILON"
+        );
+    }
+
+    #[test]
+    fn test_cov_warning_fires_when_only_head_exceeds_threshold() {
+        // tail CoV is below threshold, head CoV is above → warning fires (|| not &&)
+        // head_raw = [0, 200]: CoV≈141.4%; tail = [100, 100]: CoV=0%
+        // threshold = 50%: only head exceeds. With && instead of ||, no warning → mutation caught.
+        let result = audit_with_data(
+            "test_measurement",
+            100.0,
+            vec![0.0, 200.0], // high head CoV
+            vec![100.0, 100.0, 100.0, 100.0],
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(50.0),
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            msg.contains("⚠️ High CoV"),
+            "Should warn when head CoV alone exceeds threshold (|| not &&), got: {msg}"
+        );
+    }
+
+    #[test]
+    fn test_cov_tail_len_boundary_two() {
+        // Verify tail CoV is computed when tail has exactly 2 measurements (len >= 2 boundary).
+        // With > instead of >= mutation: len=2 is NOT > 2 → CoV skipped → no warning → caught.
+        // tail = [0, 200]: mean=100, CoV≈141.4% > 50%
+        let result = audit_with_data(
+            "test_measurement",
+            100.0,
+            vec![],
+            vec![0.0, 200.0], // exactly 2 tail values, high CoV
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(50.0),
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            msg.contains("⚠️ High CoV"),
+            "Should compute tail CoV with exactly 2 tail measurements (>= 2), got: {msg}"
+        );
+    }
 }
diff --git a/git_perf/src/cli.rs b/git_perf/src/cli.rs
index 4664ece153..c9a6aab6fa 100644
--- a/git_perf/src/cli.rs
+++ b/git_perf/src/cli.rs
@@ -137,6 +137,7 @@ pub fn handle_calls() -> Result<()> {
             aggregate_by,
             sigma,
             dispersion_method,
+            max_cov,
             filter,
             no_change_point_warning,
         } => {
@@ -173,6 +174,7 @@ pub fn handle_calls() -> Result<()> {
                 aggregate_by.map(ReductionFunc::from),
                 sigma,
                 dispersion_method.map(crate::stats::DispersionMethod::from),
+                max_cov,
                 &combined_patterns,
                 &separate_by,
                 no_change_point_warning,
diff --git a/git_perf/src/config.rs b/git_perf/src/config.rs
index f1d15da454..bd095e83bf 100644
--- a/git_perf/src/config.rs
+++ b/git_perf/src/config.rs
@@ -328,6 +328,22 @@ pub fn audit_min_relative_deviation(measurement: &str) -> Option<f64> {
     None
 }
 
+/// Returns the maximum CoV (Coefficient of Variation = σ/μ × 100%) threshold from
+/// config, or None if not set. When tail or head CoV exceeds this value, a warning
+/// is emitted in the audit output.
+#[must_use]
+pub fn audit_max_cov(measurement: &str) -> Option<f64> {
+    let config = read_hierarchical_config().ok()?;
+
+    if let Some(s) = config.get_with_parent_fallback("measurement", measurement, "max_cov") {
+        if let Ok(v) = s.parse::<f64>() {
+            return Some(v);
+        }
+    }
+
+    None
+}
+
 /// Returns the minimum absolute deviation threshold from config, or None if not set.
 #[must_use]
 pub fn audit_min_absolute_deviation(measurement: &str) -> Option<f64> {
@@ -696,6 +712,41 @@ min_relative_deviation = 10.0
         });
     }
 
+    #[test]
+    fn test_audit_max_cov() {
+        with_isolated_home(|temp_dir| {
+            env::set_current_dir(temp_dir).unwrap();
+            init_repo(temp_dir);
+
+            let workspace_config_path = temp_dir.join(".gitperfconfig");
+            let local_config = r#"
+[measurement]
+max_cov = 30.0
+
+[measurement."build_time"]
+max_cov = 50.0
+
+[measurement."memory_usage"]
+max_cov = 20.0
+"#;
+            fs::write(&workspace_config_path, local_config).unwrap();
+
+            assert_eq!(super::audit_max_cov("build_time"), Some(50.0));
+            assert_eq!(super::audit_max_cov("memory_usage"), Some(20.0));
+            assert_eq!(super::audit_max_cov("other_measurement"), Some(30.0));
+
+            let global_config = r#"
+[measurement]
+max_cov = 30.0
+"#;
+            fs::write(&workspace_config_path, global_config).unwrap();
+            assert_eq!(super::audit_max_cov("any_measurement"), Some(30.0));
+
+            fs::remove_file(&workspace_config_path).unwrap();
+            assert_eq!(super::audit_max_cov("any_measurement"), None);
+        });
+    }
+
     #[test]
     fn test_audit_min_absolute_deviation() {
         with_isolated_home(|temp_dir| {
diff --git a/man/man1/git-perf-audit.1 b/man/man1/git-perf-audit.1
index 49c9c3027e..f0a4d5eb73 100644
--- a/man/man1/git-perf-audit.1
+++ b/man/man1/git-perf-audit.1
@@ -4,7 +4,7 @@
 .SH NAME
 audit \- For given measurements, check perfomance deviations of the HEAD commit against `<n>` previous commits. Group previous results and aggregate their results before comparison
 .SH SYNOPSIS
-\fBaudit\fR [\fB\-m\fR|\fB\-\-measurement\fR] [\fB\-n\fR|\fB\-\-max\-count\fR] [\fB\-\-since\fR] [\fB\-\-until\fR] [\fB\-s\fR|\fB\-\-selectors\fR] [\fB\-f\fR|\fB\-\-filter\fR] [\fB\-S\fR|\fB\-\-separate\-by\fR] [\fB\-\-min\-measurements\fR] [\fB\-a\fR|\fB\-\-aggregate\-by\fR] [\fB\-d\fR|\fB\-\-sigma\fR] [\fB\-D\fR|\fB\-\-dispersion\-method\fR] [\fB\-\-no\-change\-point\-warning\fR] [\fB\-h\fR|\fB\-\-help\fR] [\fICOMMIT\fR] 
+\fBaudit\fR [\fB\-m\fR|\fB\-\-measurement\fR] [\fB\-n\fR|\fB\-\-max\-count\fR] [\fB\-\-since\fR] [\fB\-\-until\fR] [\fB\-s\fR|\fB\-\-selectors\fR] [\fB\-f\fR|\fB\-\-filter\fR] [\fB\-S\fR|\fB\-\-separate\-by\fR] [\fB\-\-min\-measurements\fR] [\fB\-a\fR|\fB\-\-aggregate\-by\fR] [\fB\-d\fR|\fB\-\-sigma\fR] [\fB\-D\fR|\fB\-\-dispersion\-method\fR] [\fB\-\-max\-cov\fR] [\fB\-\-no\-change\-point\-warning\fR] [\fB\-h\fR|\fB\-\-help\fR] [\fICOMMIT\fR] 
 .SH DESCRIPTION
 For given measurements, check perfomance deviations of the HEAD commit against `<n>` previous commits. Group previous results and aggregate their results before comparison.
 .PP
@@ -102,6 +102,9 @@ stddev
 mad
 .RE
 .TP
+\fB\-\-max\-cov\fR \fI<MAX_COV>\fR
+Flag measurements with high Coefficient of Variation (CoV = σ/μ × 100%). Tail CoV is computed from per\-commit aggregated values and reflects cross\-run baseline stability. Head CoV is computed from the raw measurements at HEAD and reflects within\-run repeatability. A warning is emitted when either exceeds this percentage threshold. If not specified, uses the value from .gitperfconfig file, or no flagging
+.TP
 \fB\-\-no\-change\-point\-warning\fR
 Suppress warning when change points are detected in the current epoch. By default, audit will warn if a change point (regime shift) is detected within the current measurement epoch, as this may affect z\-score reliability
 .TP

From aa0252bc9aa65ce92fb5eb808dcca789033bbfe3 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 25 May 2026 10:54:02 +0000
Subject: [PATCH 2/2] test(audit): add mutation-catching tests for head CoV
 edge cases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three tests to cover missed mutants on the head CoV computation guards:
- test_head_cov_skips_near_zero_mean: catches `> → >=` on EPSILON guard
- test_head_cov_skips_when_mean_near_zero_despite_valid_len: catches `&& → ||`
- test_head_cov_no_warning_when_cov_equals_threshold: catches `> → >=` on threshold

https://claude.ai/code/session_019VcYg5BHPM5JHGevzwHrta
---
 git_perf/src/audit.rs | 76 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/git_perf/src/audit.rs b/git_perf/src/audit.rs
index d43a2355f1..4a227fe53a 100644
--- a/git_perf/src/audit.rs
+++ b/git_perf/src/audit.rs
@@ -2605,4 +2605,80 @@ dispersion_method = "mad"
             "Should compute tail CoV with exactly 2 tail measurements (>= 2), got: {msg}"
         );
     }
+
+    #[test]
+    fn test_head_cov_skips_near_zero_mean() {
+        // head_raw mean = EPSILON: the guard "mean.abs() > EPSILON" must prevent CoV computation.
+        // head_raw = [0.0, 2*EPSILON]: mean = EPSILON, sample stddev = EPSILON*sqrt(2)
+        // If guard used >= instead of >: EPSILON >= EPSILON → CoV = sqrt(2)*100% ≈ 141.4% > 100%
+        // → warning fires → mutation caught.
+        let eps = f64::EPSILON;
+        let result = audit_with_data(
+            "test_measurement",
+            1.0,
+            vec![0.0, 2.0 * eps], // head_raw: mean = EPSILON, not > EPSILON
+            vec![100.0, 100.0, 100.0, 100.0],
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Mean,
+            Some(100.0),
+        );
+        assert!(result.is_ok());
+        assert!(
+            !result.unwrap().message.contains("⚠️ High CoV"),
+            "Should not compute head CoV when mean equals EPSILON"
+        );
+    }
+
+    #[test]
+    fn test_head_cov_skips_when_mean_near_zero_despite_valid_len() {
+        // Catches && → || mutation on head guard (line 489:45).
+        // head_raw has len=2 (passes len >= 2) but mean = EPSILON/2 ≤ EPSILON (fails mean guard).
+        // Original (&&): true && false = false → no CoV.
+        // Mutated (||): true || false = true → CoV = stddev/(EPSILON/2)*100 ≈ 141.4% > threshold
+        // → warning fires → mutation caught.
+        let eps = f64::EPSILON;
+        let result = audit_with_data(
+            "test_measurement",
+            1.0,
+            vec![eps, 0.0], // mean = eps/2 < EPSILON, but len = 2
+            vec![100.0, 100.0, 100.0, 100.0],
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Mean,
+            Some(100.0),
+        );
+        assert!(result.is_ok());
+        assert!(
+            !result.unwrap().message.contains("⚠️ High CoV"),
+            "Should not compute head CoV when mean is near-zero, even with len >= 2"
+        );
+    }
+
+    #[test]
+    fn test_head_cov_no_warning_when_cov_equals_threshold() {
+        // head_raw = [100, 100]: stddev=0, CoV=0%, threshold=0%.
+        // Strict > means 0 > 0 is false → no warning.
+        // With >= mutation: 0 >= 0 → warning fires → mutation caught.
+        // Tail is stable (CoV=0%) so only the head threshold comparison is exercised.
+        let result = audit_with_data(
+            "test_measurement",
+            100.0,
+            vec![100.0, 100.0], // head_raw: CoV = 0%
+            vec![100.0, 100.0, 100.0],
+            2,
+            10.0,
+            DispersionMethod::StandardDeviation,
+            ReductionFunc::Min,
+            Some(0.0), // threshold = 0%
+        );
+        assert!(result.is_ok());
+        let msg = result.unwrap().message;
+        assert!(
+            !msg.contains("⚠️ High CoV"),
+            "Should not warn when head CoV (0%) equals threshold (0%) with strict >, got: {msg}"
+        );
+    }
 }