diff --git a/Cargo.lock b/Cargo.lock index 7ec43b492..ddefc3f08 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -399,6 +399,7 @@ dependencies = [ "anyhow", "benchlib", "cargo_metadata", + "cfg-if", "chrono", "clap", "console", diff --git a/collector/Cargo.toml b/collector/Cargo.toml index 021be7aaf..4e35ce576 100644 --- a/collector/Cargo.toml +++ b/collector/Cargo.toml @@ -18,6 +18,7 @@ serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } tokio = { workspace = true, features = ["rt", "process"] } +cfg-if = "1" thiserror = "2" tempfile = "3" libc = "0.2" diff --git a/collector/src/compile/benchmark/target.rs b/collector/src/compile/benchmark/target.rs index 9903c3add..0a69833d1 100644 --- a/collector/src/compile/benchmark/target.rs +++ b/collector/src/compile/benchmark/target.rs @@ -5,6 +5,7 @@ use std::{fmt, str::FromStr}; /// https://doc.rust-lang.org/nightly/rustc/platform-support.html /// /// Presently we only support x86_64 +/// FIXME: we actually support Windows and aarch64, but that isn't captured here. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, serde::Deserialize)] pub enum Target { /// `x86_64-unknown-linux-gnu` diff --git a/collector/src/compile/execute/mod.rs b/collector/src/compile/execute/mod.rs index ff10bafe6..1792af944 100644 --- a/collector/src/compile/execute/mod.rs +++ b/collector/src/compile/execute/mod.rs @@ -134,66 +134,137 @@ pub struct CargoProcess<'a> { pub target: Target, pub workspace_package: Option, } -/// Returns an optional list of Performance CPU cores, if the system has P and E cores. -/// This list *should* be in a format suitable for the `taskset` command. -#[cfg(target_os = "linux")] -fn performance_cores() -> Option<&'static String> { - use std::sync::LazyLock; - static PERFORMANCE_CORES: LazyLock> = LazyLock::new(|| { - if std::fs::exists("/sys/devices/cpu").expect("Could not check the CPU architecture details: could not check if `/sys/devices/cpu` exists!") { - // If /sys/devices/cpu exists, then this is not a "Performance-hybrid" CPU. - None - } - else if std::fs::exists("/sys/devices/cpu_core").expect("Could not check the CPU architecture detali: could not check if `/sys/devices/cpu_core` exists!") { - // If /sys/devices/cpu_core exists, then this is a "Performance-hybrid" CPU. - eprintln!("WARNING: Performance-Hybrid CPU detected. `rustc-perf` can't run properly on Efficency cores: test suite will only use Performance cores!"); - Some(std::fs::read_to_string("/sys/devices/cpu_core/cpus").unwrap().trim().to_string()) - } else { - // If neither dir exists, then something is wrong - `/sys/devices/cpu` has been in Linux for over a decade. - eprintln!("WARNING: neither `/sys/devices/cpu` nor `/sys/devices/cpu_core` present, unable to determine if this CPU has a Performance-Hybrid architecture."); - None - } - }); - (*PERFORMANCE_CORES).as_ref() -} -#[cfg(not(target_os = "linux"))] -// Modify this stub if you want to add support for P/E cores on more OSs -fn performance_cores() -> Option<&'static String> { - None +// Some CPUs have a hybrid architecture with a mixture of P-cores (power) and E-cores (efficiency). +// When benchmarking we use `taskset` to restrict execution to P-cores. Why? +// 1. The instruction count info for E-cores is often incomplete, and a substantial chunk of events +// is lost. +// 2. The performance characteristics of E-cores are less reliable, so excluding them from the +// benchmark makes things easier. +// 3. An unpredictable mix of P-core and E-core execution can give inconsistent results. +// +// If a hybrid architecture is detected, this type is used to hold information about the P-cores. +// The detection method used varies across platforms. +#[derive(Debug)] +struct PCores { + /// The number of P-cores. + len: usize, + /// The list of P-cores, in a form suitable for passing to `taskset`. + list: String, } -#[cfg(target_os = "linux")] -/// Makes the benchmark run only on Performance cores. -fn run_on_p_cores(path: &Path, cpu_list: &str) -> Command { - // Parse CPU list to extract the number of P cores! - // This assumes the P core id's are countinus, in format `fisrt_id-last_id` - let (core_start, core_end) = cpu_list - .split_once("-") - .unwrap_or_else(|| panic!("Unsuported P core list format: {cpu_list:?}.")); - let core_start: u32 = core_start - .parse() - .expect("Expected a number when parsing the start of the P core list!"); - let core_end: u32 = core_end - .parse() - .expect("Expected a number when parsing the end of the P core list!"); - let core_count = core_end - core_start; - let mut cmd = Command::new("taskset"); - // Set job count to P core count - this is done for 2 reasons: - // 1. The instruction count info for E core is often very incompleate - a substantial chunk of events is lost. - // 2. The performance charcteristics of E cores are less reliable, so excluding them from the benchmark makes things easier. - cmd.env("CARGO_BUILD_JOBS", format!("{core_count}")); - // pass the P core list to taskset to pin task to the P core. - cmd.arg("--cpu-list"); - cmd.arg(cpu_list); - cmd.arg(path); - cmd -} +static P_CORES: LazyLock> = LazyLock::new(p_cores); + +cfg_if::cfg_if! { + if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] { + // On x86-64/Linux we look for the presence of `/sys/devices/cpu_core/` which indicates a + // hybrid architecture. + fn p_cores() -> Option { + if std::fs::exists("/sys/devices/cpu").unwrap() { + // `/sys/devices/cpu` exists: this is not a hybrid CPU. + None + } else if std::fs::exists("/sys/devices/cpu_core").unwrap() { + // `/sys/devices/cpu_core/` exists: this is a hybrid CPU, and the `cpus` file + // within contains the list of P-cores. (`sys/devices/cpu_atom/cpus` contains + // the list of E-cores). + let list = + std::fs::read_to_string("/sys/devices/cpu_core/cpus") + .unwrap() + .trim() + .to_string(); + eprintln!( + "WARNING: hybrid Intel CPU detected; test suite will only use P-cores: {list}" + ); + // Parse CPU list to extract the number of P-cores. This assumes the P-core ids are + // continuous, in format `m-n`. + let (first, last) = list + .split_once("-") + .unwrap_or_else(|| panic!("unsupported P-core list format: {list:?}.")); + let first = first + .parse::() + .expect("expected a number at the start of the P-core list"); + let last = last + .parse::() + .expect("expected a number at the end of the P-core list"); + let len = last - first + 1; // e.g. "0-3" is four cores: [0, 1, 2, 3] + Some(PCores { len, list }) + } else { + // Neither dir exists: something is wrong, because `/sys/devices/cpu` has been + // in Linux (on x86-64, at least) for over a decade. + eprintln!( + "WARNING: `/sys/devices/{{cpu,cpu_core}}` not found; \ + unable to determine if CPU has a hybrid architecture" + ); + None + } + } + } else if #[cfg(all(target_os = "linux", target_arch = "aarch64"))] { + // On ARM64/Linux there is no definitive way to distinguish P-cores from E-cores, so we + // must use a heuristic. + // + // Each core has a listed "capacity", a performance estimate relative to the most powerful + // core in the system (scaled 0-1024). For example, an ASUS GX10 Ascent has a Cortex-X925 + // with 10 P-cores and a Cortex-A725 with 10 E-cores. The reported capacities are: + // * Cores 0- 4: 718 (E-cores in cluster 1 with 8MiB L3 cache) + // * Cores 5- 9: 997 (P-cores in cluster 1 with 8MiB L3 cache) + // * Cores 10-14: 731 (E-cores in cluster 2 with 16MiB L3 cache) + // * Cores 15-18: 1017 (P-cores in cluster 2 with 16MiB L3 cache) + // * Core 19: 1024 (P-core in cluster 2 with 16MiB L3 cache)) + // + // The heuristic is that any core with a capacity at least 90% of the maximum capacity is + // considered a P-core, and any other core is considered an E-core. (The 718/731 and + // 997/1017 differences are presumably due to the L3 cache size. The reason for the + // 1017/1024 difference is unclear. Even though the P-cores are not all identical, they are + // close enough for our purposes.) + fn p_cores() -> Option { + let mut caps = vec![]; + for i in 0.. { + let path = format!("/sys/devices/system/cpu/cpu{i}/cpu_capacity"); + if !std::fs::exists(&path).unwrap() { + break; + } + let cap = std::fs::read_to_string(&path).unwrap().trim().parse::().unwrap(); + caps.push((i, cap)); + } + + if let Some(max_cap) = caps.iter().map(|(_, cap)| cap).max() { + // Filter out cores that fail the 90% capacity check. + let cap_threshold = *max_cap as f64 * 0.9; + let p_cores: Vec<_> = caps.iter().filter_map(|(i, cap)| { + if *cap as f64 >= cap_threshold { + Some(i.to_string()) + } else { + None + } + }).collect(); -#[cfg(not(target_os = "linux"))] -// Modify this stub if you want to add support for P/E cores on more OSs -fn run_on_p_cores(_path: &Path, _cpu_list: &str) -> Command { - todo!("Can't run commands on the P cores on this platform"); + if p_cores.len() == caps.len() { + // All cores have roughly the same capacity; this is not a hybrid CPU. + None + } else { + let list = p_cores.join(","); + eprintln!( + "WARNING: hybrid ARM CPU detected; test suite will only use P-cores: {list}" + ); + Some(PCores { + len: p_cores.len(), + list, + }) + } + } else { + eprintln!( + "WARNING: `/sys/devices/system/cpu/cpu*/cpu_capacity` not found; \ + unable to determine if CPU has a hybrid architecture" + ); + None + } + } + } else { + // Modify this stub if you want to add support for hybrid architectures on more platforms. + fn p_cores() -> Option { + None + } + } } impl<'a> CargoProcess<'a> { @@ -214,11 +285,17 @@ impl<'a> CargoProcess<'a> { } fn base_command(&self, cwd: &Path, subcommand: &str) -> Command { - // Processors with P and E cores require special handling - let mut cmd = if let Some(p_cores) = performance_cores() { - run_on_p_cores(Path::new(&self.toolchain.components.cargo), p_cores) + let cargo_path = Path::new(&self.toolchain.components.cargo); + let mut cmd = if let Some(p_cores) = (*P_CORES).as_ref() { + // Processors with P-cores and E-cores require special handling. + let mut cmd = Command::new("taskset"); + cmd.env("CARGO_BUILD_JOBS", p_cores.len.to_string()); + cmd.arg("--cpu-list"); + cmd.arg(&p_cores.list); + cmd.arg(cargo_path); + cmd } else { - Command::new(Path::new(&self.toolchain.components.cargo)) + Command::new(cargo_path) }; cmd // Not all cargo invocations (e.g. `cargo clean`) need all of these @@ -604,6 +681,11 @@ fn process_stat_output( let stdout = String::from_utf8(output.stdout.clone()).expect("utf8 output"); let mut stats = Stats::new(); + // ARM P-core events have names like `armv8_pmuv3_0/instructions:u/` and + // `armv8_pmuv3_1/branche-misses/`. + #[cfg(all(target_os = "linux", target_arch = "aarch64"))] + let arm_p_core_events_re = regex::Regex::new(r"armv[0-9]_pmuv[0-9]_[0-9]/([^/]*)/").unwrap(); + let mut self_profile_dir: Option = None; let mut self_profile_crate: Option = None; for line in stdout.lines() { @@ -654,24 +736,43 @@ fn process_stat_output( } }; } + let mut parts = line.split(';').map(|s| s.trim()); let cnt = get!(parts.next()); + if cnt == "" || cnt == "" || cnt.is_empty() { + continue; + } + let _unit = get!(parts.next()); - let mut name = get!(parts.next()); - // Map P-core events to normal events - if name == "cpu_core/instructions:u/" { - name = "instructions:u"; + + #[allow(unused_mut)] + let mut name = get!(parts.next()).to_string(); + // Map P-core event name to normal event names. + cfg_if::cfg_if! { + if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] { + if name == "cpu_core/instructions:u/" { + name = "instructions:u".to_string(); + } + } else if #[cfg(all(target_os = "linux", target_arch = "aarch64"))] { + // ARM P-core events have names like `armv8_pmuv3_0/instructions:u/` and + // `armv8_pmuv3_1/branche-misses/`. + if let Some(event) = arm_p_core_events_re.captures(&name) { + name = event[1].to_string(); + } + } } + let _time = get!(parts.next()); + let pct = get!(parts.next()); - if cnt == "" || cnt == "" || cnt.is_empty() { - continue; - } if !pct.starts_with("100.") { + // If this fails, it's probably because the CPU has a hybrid architecture and the + // metric is split across P-cores and E-cores. See `PCores`. panic!("measurement of `{name}` only active for {pct}% of the time"); } + stats.insert( - name.to_owned(), + name, cnt.parse() .map_err(|e| DeserializeStatError::ParseError(cnt.to_string(), e))?, ); diff --git a/site/frontend/package-lock.json b/site/frontend/package-lock.json index 714223450..e68e5ed6e 100644 --- a/site/frontend/package-lock.json +++ b/site/frontend/package-lock.json @@ -592,7 +592,6 @@ "resolved": "https://registry.npmjs.org/@parcel/core/-/core-2.16.0.tgz", "integrity": "sha512-erH9GdLe8Boie0mCO8hXn8Qt/pCACsOFlKp8UHNMlPaizUtCDkCOQqwmSi+VyrJ3dMMCOc/qBwTSGAJaJE8/Kw==", "license": "MIT", - "peer": true, "dependencies": { "@mischnic/json-sourcemap": "^0.1.1", "@parcel/cache": "2.16.0", @@ -2614,7 +2613,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.9", "caniuse-lite": "^1.0.30001746", @@ -3737,7 +3735,6 @@ "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "devOptional": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -3807,7 +3804,6 @@ "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.22.tgz", "integrity": "sha512-toaZjQ3a/G/mYaLSbV+QsQhIdMo9x5rrqIpYRObsJ6T/J+RyCSFwN2LHNVH9v8uIcljDNa3QzPVdv3Y6b9hAJQ==", "license": "MIT", - "peer": true, "dependencies": { "@vue/compiler-dom": "3.5.22", "@vue/compiler-sfc": "3.5.22",