Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions collector/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
tokio = { workspace = true, features = ["rt", "process"] }

cfg-if = "1"
thiserror = "2"
tempfile = "3"
libc = "0.2"
Expand Down
1 change: 1 addition & 0 deletions collector/src/compile/benchmark/target.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::{fmt, str::FromStr};
/// https://doc.rust-lang.org/nightly/rustc/platform-support.html
///
/// Presently we only support x86_64
/// FIXME: we actually support Windows and aarch64, but that isn't captured here.
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, serde::Deserialize)]
pub enum Target {
/// `x86_64-unknown-linux-gnu`
Expand Down
237 changes: 169 additions & 68 deletions collector/src/compile/execute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,66 +134,137 @@ pub struct CargoProcess<'a> {
pub target: Target,
pub workspace_package: Option<String>,
}
/// Returns an optional list of Performance CPU cores, if the system has P and E cores.
/// This list *should* be in a format suitable for the `taskset` command.
#[cfg(target_os = "linux")]
fn performance_cores() -> Option<&'static String> {
use std::sync::LazyLock;
static PERFORMANCE_CORES: LazyLock<Option<String>> = LazyLock::new(|| {
if std::fs::exists("/sys/devices/cpu").expect("Could not check the CPU architecture details: could not check if `/sys/devices/cpu` exists!") {
// If /sys/devices/cpu exists, then this is not a "Performance-hybrid" CPU.
None
}
else if std::fs::exists("/sys/devices/cpu_core").expect("Could not check the CPU architecture detali: could not check if `/sys/devices/cpu_core` exists!") {
// If /sys/devices/cpu_core exists, then this is a "Performance-hybrid" CPU.
eprintln!("WARNING: Performance-Hybrid CPU detected. `rustc-perf` can't run properly on Efficency cores: test suite will only use Performance cores!");
Some(std::fs::read_to_string("/sys/devices/cpu_core/cpus").unwrap().trim().to_string())
} else {
// If neither dir exists, then something is wrong - `/sys/devices/cpu` has been in Linux for over a decade.
eprintln!("WARNING: neither `/sys/devices/cpu` nor `/sys/devices/cpu_core` present, unable to determine if this CPU has a Performance-Hybrid architecture.");
None
}
});
(*PERFORMANCE_CORES).as_ref()
}

#[cfg(not(target_os = "linux"))]
// Modify this stub if you want to add support for P/E cores on more OSs
fn performance_cores() -> Option<&'static String> {
None
// Some CPUs have a hybrid architecture with a mixture of P-cores (power) and E-cores (efficiency).
// When benchmarking we use `taskset` to restrict execution to P-cores. Why?
// 1. The instruction count info for E-cores is often incomplete, and a substantial chunk of events
// is lost.
// 2. The performance characteristics of E-cores are less reliable, so excluding them from the
// benchmark makes things easier.
// 3. An unpredictable mix of P-core and E-core execution can give inconsistent results.
//
// If a hybrid architecture is detected, this type is used to hold information about the P-cores.
// The detection method used varies across platforms.
#[derive(Debug)]
struct PCores {
/// The number of P-cores.
len: usize,
/// The list of P-cores, in a form suitable for passing to `taskset`.
list: String,
}

#[cfg(target_os = "linux")]
/// Makes the benchmark run only on Performance cores.
fn run_on_p_cores(path: &Path, cpu_list: &str) -> Command {
// Parse CPU list to extract the number of P cores!
// This assumes the P core id's are countinus, in format `fisrt_id-last_id`
let (core_start, core_end) = cpu_list
.split_once("-")
.unwrap_or_else(|| panic!("Unsuported P core list format: {cpu_list:?}."));
let core_start: u32 = core_start
.parse()
.expect("Expected a number when parsing the start of the P core list!");
let core_end: u32 = core_end
.parse()
.expect("Expected a number when parsing the end of the P core list!");
let core_count = core_end - core_start;
let mut cmd = Command::new("taskset");
// Set job count to P core count - this is done for 2 reasons:
// 1. The instruction count info for E core is often very incompleate - a substantial chunk of events is lost.
// 2. The performance charcteristics of E cores are less reliable, so excluding them from the benchmark makes things easier.
cmd.env("CARGO_BUILD_JOBS", format!("{core_count}"));
// pass the P core list to taskset to pin task to the P core.
cmd.arg("--cpu-list");
cmd.arg(cpu_list);
cmd.arg(path);
cmd
}
static P_CORES: LazyLock<Option<PCores>> = LazyLock::new(p_cores);

cfg_if::cfg_if! {
if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] {
// On x86-64/Linux we look for the presence of `/sys/devices/cpu_core/` which indicates a
// hybrid architecture.
fn p_cores() -> Option<PCores> {
if std::fs::exists("/sys/devices/cpu").unwrap() {
// `/sys/devices/cpu` exists: this is not a hybrid CPU.
None
} else if std::fs::exists("/sys/devices/cpu_core").unwrap() {
// `/sys/devices/cpu_core/` exists: this is a hybrid CPU, and the `cpus` file
// within contains the list of P-cores. (`sys/devices/cpu_atom/cpus` contains
// the list of E-cores).
let list =
std::fs::read_to_string("/sys/devices/cpu_core/cpus")
.unwrap()
.trim()
.to_string();
eprintln!(
"WARNING: hybrid Intel CPU detected; test suite will only use P-cores: {list}"
);
// Parse CPU list to extract the number of P-cores. This assumes the P-core ids are
// continuous, in format `m-n`.
let (first, last) = list
.split_once("-")
.unwrap_or_else(|| panic!("unsupported P-core list format: {list:?}."));
let first = first
.parse::<usize>()
.expect("expected a number at the start of the P-core list");
let last = last
.parse::<usize>()
.expect("expected a number at the end of the P-core list");
let len = last - first + 1; // e.g. "0-3" is four cores: [0, 1, 2, 3]
Some(PCores { len, list })
} else {
// Neither dir exists: something is wrong, because `/sys/devices/cpu` has been
// in Linux (on x86-64, at least) for over a decade.
eprintln!(
"WARNING: `/sys/devices/{{cpu,cpu_core}}` not found; \
unable to determine if CPU has a hybrid architecture"
);
None
}
}
} else if #[cfg(all(target_os = "linux", target_arch = "aarch64"))] {
// On ARM64/Linux there is no definitive way to distinguish P-cores from E-cores, so we
// must use a heuristic.
//
// Each core has a listed "capacity", a performance estimate relative to the most powerful
// core in the system (scaled 0-1024). For example, an ASUS GX10 Ascent has a Cortex-X925
// with 10 P-cores and a Cortex-A725 with 10 E-cores. The reported capacities are:
// * Cores 0- 4: 718 (E-cores in cluster 1 with 8MiB L3 cache)
// * Cores 5- 9: 997 (P-cores in cluster 1 with 8MiB L3 cache)
// * Cores 10-14: 731 (E-cores in cluster 2 with 16MiB L3 cache)
// * Cores 15-18: 1017 (P-cores in cluster 2 with 16MiB L3 cache)
// * Core 19: 1024 (P-core in cluster 2 with 16MiB L3 cache))
//
// The heuristic is that any core with a capacity at least 90% of the maximum capacity is
// considered a P-core, and any other core is considered an E-core. (The 718/731 and
// 997/1017 differences are presumably due to the L3 cache size. The reason for the
// 1017/1024 difference is unclear. Even though the P-cores are not all identical, they are
// close enough for our purposes.)
fn p_cores() -> Option<PCores> {
let mut caps = vec![];
for i in 0.. {
let path = format!("/sys/devices/system/cpu/cpu{i}/cpu_capacity");
if !std::fs::exists(&path).unwrap() {
break;
}
let cap = std::fs::read_to_string(&path).unwrap().trim().parse::<usize>().unwrap();
caps.push((i, cap));
}

if let Some(max_cap) = caps.iter().map(|(_, cap)| cap).max() {
// Filter out cores that fail the 90% capacity check.
let cap_threshold = *max_cap as f64 * 0.9;
let p_cores: Vec<_> = caps.iter().filter_map(|(i, cap)| {
if *cap as f64 >= cap_threshold {
Some(i.to_string())
} else {
None
}
}).collect();

#[cfg(not(target_os = "linux"))]
// Modify this stub if you want to add support for P/E cores on more OSs
fn run_on_p_cores(_path: &Path, _cpu_list: &str) -> Command {
todo!("Can't run commands on the P cores on this platform");
if p_cores.len() == caps.len() {
// All cores have roughly the same capacity; this is not a hybrid CPU.
None
} else {
let list = p_cores.join(",");
eprintln!(
"WARNING: hybrid ARM CPU detected; test suite will only use P-cores: {list}"
);
Some(PCores {
len: p_cores.len(),
list,
})
}
} else {
eprintln!(
"WARNING: `/sys/devices/system/cpu/cpu*/cpu_capacity` not found; \
unable to determine if CPU has a hybrid architecture"
);
None
}
}
} else {
// Modify this stub if you want to add support for hybrid architectures on more platforms.
fn p_cores() -> Option<PCores> {
None
}
}
}

impl<'a> CargoProcess<'a> {
Expand All @@ -214,11 +285,17 @@ impl<'a> CargoProcess<'a> {
}

fn base_command(&self, cwd: &Path, subcommand: &str) -> Command {
// Processors with P and E cores require special handling
let mut cmd = if let Some(p_cores) = performance_cores() {
run_on_p_cores(Path::new(&self.toolchain.components.cargo), p_cores)
let cargo_path = Path::new(&self.toolchain.components.cargo);
let mut cmd = if let Some(p_cores) = (*P_CORES).as_ref() {
// Processors with P-cores and E-cores require special handling.
let mut cmd = Command::new("taskset");
cmd.env("CARGO_BUILD_JOBS", p_cores.len.to_string());
cmd.arg("--cpu-list");
cmd.arg(&p_cores.list);
cmd.arg(cargo_path);
cmd
} else {
Command::new(Path::new(&self.toolchain.components.cargo))
Command::new(cargo_path)
};
cmd
// Not all cargo invocations (e.g. `cargo clean`) need all of these
Expand Down Expand Up @@ -604,6 +681,11 @@ fn process_stat_output(
let stdout = String::from_utf8(output.stdout.clone()).expect("utf8 output");
let mut stats = Stats::new();

// ARM P-core events have names like `armv8_pmuv3_0/instructions:u/` and
// `armv8_pmuv3_1/branche-misses/`.
#[cfg(all(target_os = "linux", target_arch = "aarch64"))]
let arm_p_core_events_re = regex::Regex::new(r"armv[0-9]_pmuv[0-9]_[0-9]/([^/]*)/").unwrap();

let mut self_profile_dir: Option<PathBuf> = None;
let mut self_profile_crate: Option<String> = None;
for line in stdout.lines() {
Expand Down Expand Up @@ -654,24 +736,43 @@ fn process_stat_output(
}
};
}

let mut parts = line.split(';').map(|s| s.trim());
let cnt = get!(parts.next());
if cnt == "<not supported>" || cnt == "<not counted>" || cnt.is_empty() {
continue;
}

let _unit = get!(parts.next());
let mut name = get!(parts.next());
// Map P-core events to normal events
if name == "cpu_core/instructions:u/" {
name = "instructions:u";

#[allow(unused_mut)]
let mut name = get!(parts.next()).to_string();
// Map P-core event name to normal event names.
cfg_if::cfg_if! {
if #[cfg(all(target_os = "linux", target_arch = "x86_64"))] {
if name == "cpu_core/instructions:u/" {
name = "instructions:u".to_string();
}
} else if #[cfg(all(target_os = "linux", target_arch = "aarch64"))] {
// ARM P-core events have names like `armv8_pmuv3_0/instructions:u/` and
// `armv8_pmuv3_1/branche-misses/`.
if let Some(event) = arm_p_core_events_re.captures(&name) {
name = event[1].to_string();
}
}
}

let _time = get!(parts.next());

let pct = get!(parts.next());
if cnt == "<not supported>" || cnt == "<not counted>" || cnt.is_empty() {
continue;
}
if !pct.starts_with("100.") {
// If this fails, it's probably because the CPU has a hybrid architecture and the
// metric is split across P-cores and E-cores. See `PCores`.
panic!("measurement of `{name}` only active for {pct}% of the time");
}

stats.insert(
name.to_owned(),
name,
cnt.parse()
.map_err(|e| DeserializeStatError::ParseError(cnt.to_string(), e))?,
);
Expand Down
4 changes: 0 additions & 4 deletions site/frontend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading