From 0f7c98f2a3eeb19e028b1496b1738b789dd2c941 Mon Sep 17 00:00:00 2001 From: xmakro Date: Tue, 2 Jun 2026 08:35:20 -0700 Subject: [PATCH] Build the dep-graph reverse index lazily, per DepKind Replace the eager per-DepKind fingerprint-to-index map built at decode with a counting sort into per-kind ranges plus a lazily-built map per kind. --- .../rustc_incremental/src/persist/load.rs | 2 +- .../rustc_middle/src/dep_graph/serialized.rs | 153 +++++++++++++++--- 2 files changed, 128 insertions(+), 27 deletions(-) diff --git a/compiler/rustc_incremental/src/persist/load.rs b/compiler/rustc_incremental/src/persist/load.rs index 0e2cda68c6cc4..6229c66afa75b 100644 --- a/compiler/rustc_incremental/src/persist/load.rs +++ b/compiler/rustc_incremental/src/persist/load.rs @@ -112,7 +112,7 @@ fn load_dep_graph(sess: &Session) -> LoadResult { return LoadResult::DataOutOfDate; } - let prev_graph = SerializedDepGraph::decode(&mut decoder); + let prev_graph = SerializedDepGraph::decode(&mut decoder, &sess.prof); LoadResult::Ok { prev_graph, prev_work_products } } diff --git a/compiler/rustc_middle/src/dep_graph/serialized.rs b/compiler/rustc_middle/src/dep_graph/serialized.rs index ef5e3d9268ad7..759c37c9ff587 100644 --- a/compiler/rustc_middle/src/dep_graph/serialized.rs +++ b/compiler/rustc_middle/src/dep_graph/serialized.rs @@ -41,8 +41,8 @@ use std::cell::RefCell; use std::cmp::max; -use std::sync::Arc; use std::sync::atomic::Ordering; +use std::sync::{Arc, OnceLock}; use std::{iter, mem}; use rustc_data_structures::fingerprint::{Fingerprint, PackedFingerprint}; @@ -51,7 +51,7 @@ use rustc_data_structures::outline; use rustc_data_structures::profiling::SelfProfilerRef; use rustc_data_structures::sync::{AtomicU64, Lock, WorkerLocal, broadcast}; use rustc_data_structures::unhash::UnhashMap; -use rustc_index::IndexVec; +use rustc_index::{IndexSlice, IndexVec}; use rustc_serialize::opaque::mem_encoder::MemEncoder; use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder}; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; @@ -95,7 +95,7 @@ const DEP_NODE_WIDTH_BITS: usize = DEP_NODE_SIZE / 2; /// /// There may be unused indices with DepKind::Null in this graph due to batch allocation of /// indices to threads. -#[derive(Debug, Default)] +#[derive(Default)] pub struct SerializedDepGraph { /// The set of all DepNodes in the graph nodes: IndexVec, @@ -113,12 +113,95 @@ pub struct SerializedDepGraph { /// A flattened list of all edge targets in the graph, stored in the same /// varint encoding that we use on disk. Edge sources are implicit in edge_list_indices. edge_list_data: Vec, - /// For each dep kind, stores a map from key fingerprints back to the index - /// of the corresponding node. This is the inverse of `nodes`. - index: Vec>, + /// The lazily-built inverse of `nodes`: maps a [`DepNode`] back to its + /// [`SerializedDepNodeIndex`] via the node's key fingerprint. See + /// [`LazyNodeIndex`]. + reverse_index: LazyNodeIndex, /// The number of previous compilation sessions. This is used to generate /// unique anon dep nodes per session. session_count: u64, + /// Used to time the lazy per-`DepKind` reverse-index build. `None` only for + /// the empty default graph, which is never looked up. + profiler: Option, +} + +// `SelfProfilerRef` is not `Debug`, so we can't derive this. +impl std::fmt::Debug for SerializedDepGraph { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SerializedDepGraph") + .field("nodes", &self.nodes) + .field("value_fingerprints", &self.value_fingerprints) + .field("edge_list_indices", &self.edge_list_indices) + .field("edge_list_data", &self.edge_list_data) + .field("reverse_index", &self.reverse_index) + .field("session_count", &self.session_count) + .finish_non_exhaustive() + } +} + +/// The inverse of [`SerializedDepGraph::nodes`], built lazily per [`DepKind`]. +/// +/// Only few nodes are ever looked up here, and those cluster into a handful of +/// `DepKind`s. Building a map for every kind up front would be wasted work. +#[derive(Debug, Default)] +struct LazyNodeIndex { + /// All (non-`Null`) node indices, grouped into contiguous per-`DepKind` + /// ranges described by `kinds`. For any non-`Null` `DepKind` `k`, all values in + /// `nodes_by_kind[kinds[k].start..][..kinds[k].len]` + /// must be `Some` and have kind `k`. + nodes_by_kind: Vec>, + /// For each `DepKind`, the range of `nodes_by_kind` holding its node indices + /// and the lazily-built fingerprint map over that range. + kinds: Vec, +} + +#[derive(Debug, Default)] +struct LazyKindIndex { + /// Offset into `LazyNodeIndex::nodes_by_kind` of this kind's first node. + start: u32, + /// Number of nodes of this kind. + len: u32, + /// `key_fingerprint -> node index`, built from this kind's range on first + /// lookup. Empty kinds (and kinds never looked up) never build a map. + map: OnceLock>, +} + +impl LazyKindIndex { + /// Returns this kind's `key_fingerprint -> node index` map. + fn fingerprint_map( + &self, + kind: DepKind, + nodes: &IndexSlice, + nodes_by_kind: &[Option], + profiler: &Option, + ) -> &UnhashMap { + self.map.get_or_init(|| { + let _prof_timer = profiler + .as_ref() + .map(|p| p.generic_activity("incr_comp_load_dep_graph_reverse_index")); + let range = (self.start as usize)..(self.start as usize + self.len as usize); + let mut map = + UnhashMap::with_capacity_and_hasher(self.len as usize, Default::default()); + for &idx in &nodes_by_kind[range] { + let idx = idx.expect("counting sort fills every slot of a kind's range"); + let node = nodes[idx]; + debug_assert_eq!(node.kind, kind); + if map.insert(node.key_fingerprint, idx).is_some() + // Side effect nodes can legitimately share a fingerprint. + && node.kind != DepKind::SideEffect + { + panic!( + "Error: A dep graph node ({kind:?}) does not have an unique index. \ + Running a clean build on a nightly compiler with \ + `-Z incremental-verify-ich` can help narrow down the issue for reporting. \ + A clean build may also work around the issue.\n + DepNode: {node:?}" + ) + } + } + map + }) + } } impl SerializedDepGraph { @@ -151,7 +234,14 @@ impl SerializedDepGraph { #[inline] pub fn node_to_index_opt(&self, dep_node: &DepNode) -> Option { - self.index.get(dep_node.kind.as_usize())?.get(&dep_node.key_fingerprint).copied() + let kind = self.reverse_index.kinds.get(dep_node.kind.as_usize())?; + let map = kind.fingerprint_map( + dep_node.kind, + &self.nodes, + &self.reverse_index.nodes_by_kind, + &self.profiler, + ); + map.get(&dep_node.key_fingerprint).copied() } #[inline] @@ -206,8 +296,8 @@ fn mask(bits: usize) -> usize { } impl SerializedDepGraph { - #[instrument(level = "debug", skip(d))] - pub fn decode(d: &mut MemDecoder<'_>) -> Arc { + #[instrument(level = "debug", skip(d, profiler))] + pub fn decode(d: &mut MemDecoder<'_>, profiler: &SelfProfilerRef) -> Arc { // The last 16 bytes are the node count and edge count. debug!("position: {:?}", d.position()); @@ -286,36 +376,47 @@ impl SerializedDepGraph { // end of the array. This padding ensure it doesn't. edge_list_data.extend(&[0u8; DEP_NODE_PAD]); - // Read the number of each dep kind and use it to create an hash map with a suitable size. - let mut index: Vec<_> = (0..(DepKind::MAX + 1)) - .map(|_| UnhashMap::with_capacity_and_hasher(d.read_u32() as usize, Default::default())) - .collect(); + // Read the number of nodes of each dep kind, and perform + // counting sort for `LazyNodeIndex`. + let mut kinds = Vec::with_capacity(DepKind::MAX as usize + 1); + let mut offset = 0u32; + for _ in 0..(DepKind::MAX + 1) { + let len = d.read_u32(); + kinds.push(LazyKindIndex { start: offset, len, map: OnceLock::new() }); + offset += len; + } + debug_assert_eq!(offset as usize, node_count); let session_count = d.read_u64(); + // Counting sort: place each node index into its kind's range. `fill[k]` + // points at the next free slot in kind `k`'s range, so a kind's nodes end + // up contiguous. Slots start as `None` and are each filled exactly once + // (the counts sum to the number of non-`Null` nodes). + let mut nodes_by_kind = vec![None; node_count]; + let mut fill: Vec = kinds.iter().map(|k| k.start).collect(); for (idx, node) in nodes.iter_enumerated() { - if index[node.kind.as_usize()].insert(node.key_fingerprint, idx).is_some() { - // Empty nodes and side effect nodes can have duplicates - if node.kind != DepKind::Null && node.kind != DepKind::SideEffect { - let kind = node.kind; - panic!( - "Error: A dep graph node ({kind:?}) does not have an unique index. \ - Running a clean build on a nightly compiler with \ - `-Z incremental-verify-ich` can help narrow down the issue for reporting. \ - A clean build may also work around the issue.\n - DepNode: {node:?}" - ) - } + // Unused indices from batch allocation stay `Null`; they carry no + // encoded node and are never looked up by fingerprint, so skip them. + if node.kind == DepKind::Null { + continue; } + let k = node.kind.as_usize(); + nodes_by_kind[fill[k] as usize] = Some(idx); + fill[k] += 1; } + // Each kind's range was filled exactly to its end. + debug_assert!(kinds.iter().zip(&fill).all(|(k, &f)| f == k.start + k.len)); + let reverse_index = LazyNodeIndex { nodes_by_kind, kinds }; Arc::new(SerializedDepGraph { nodes, value_fingerprints, edge_list_indices, edge_list_data, - index, + reverse_index, session_count, + profiler: Some(profiler.clone()), }) } }