Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/rustc_incremental/src/persist/load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ fn load_dep_graph(sess: &Session) -> LoadResult {
return LoadResult::DataOutOfDate;
}

let prev_graph = SerializedDepGraph::decode(&mut decoder);
let prev_graph = SerializedDepGraph::decode(&mut decoder, &sess.prof);

LoadResult::Ok { prev_graph, prev_work_products }
}
Expand Down
153 changes: 127 additions & 26 deletions compiler/rustc_middle/src/dep_graph/serialized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@

use std::cell::RefCell;
use std::cmp::max;
use std::sync::Arc;
use std::sync::atomic::Ordering;
use std::sync::{Arc, OnceLock};
use std::{iter, mem};

use rustc_data_structures::fingerprint::{Fingerprint, PackedFingerprint};
Expand All @@ -51,7 +51,7 @@ use rustc_data_structures::outline;
use rustc_data_structures::profiling::SelfProfilerRef;
use rustc_data_structures::sync::{AtomicU64, Lock, WorkerLocal, broadcast};
use rustc_data_structures::unhash::UnhashMap;
use rustc_index::IndexVec;
use rustc_index::{IndexSlice, IndexVec};
use rustc_serialize::opaque::mem_encoder::MemEncoder;
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder};
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
Expand Down Expand Up @@ -95,7 +95,7 @@ const DEP_NODE_WIDTH_BITS: usize = DEP_NODE_SIZE / 2;
///
/// There may be unused indices with DepKind::Null in this graph due to batch allocation of
/// indices to threads.
#[derive(Debug, Default)]
#[derive(Default)]
pub struct SerializedDepGraph {
/// The set of all DepNodes in the graph
nodes: IndexVec<SerializedDepNodeIndex, DepNode>,
Expand All @@ -113,12 +113,95 @@ pub struct SerializedDepGraph {
/// A flattened list of all edge targets in the graph, stored in the same
/// varint encoding that we use on disk. Edge sources are implicit in edge_list_indices.
edge_list_data: Vec<u8>,
/// For each dep kind, stores a map from key fingerprints back to the index
/// of the corresponding node. This is the inverse of `nodes`.
index: Vec<UnhashMap<PackedFingerprint, SerializedDepNodeIndex>>,
/// The lazily-built inverse of `nodes`: maps a [`DepNode`] back to its
/// [`SerializedDepNodeIndex`] via the node's key fingerprint. See
/// [`LazyNodeIndex`].
reverse_index: LazyNodeIndex,
/// The number of previous compilation sessions. This is used to generate
/// unique anon dep nodes per session.
session_count: u64,
/// Used to time the lazy per-`DepKind` reverse-index build. `None` only for
/// the empty default graph, which is never looked up.
profiler: Option<SelfProfilerRef>,
}

// `SelfProfilerRef` is not `Debug`, so we can't derive this.
impl std::fmt::Debug for SerializedDepGraph {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SerializedDepGraph")
.field("nodes", &self.nodes)
.field("value_fingerprints", &self.value_fingerprints)
.field("edge_list_indices", &self.edge_list_indices)
.field("edge_list_data", &self.edge_list_data)
.field("reverse_index", &self.reverse_index)
.field("session_count", &self.session_count)
.finish_non_exhaustive()
}
}

/// The inverse of [`SerializedDepGraph::nodes`], built lazily per [`DepKind`].
///
/// Only few nodes are ever looked up here, and those cluster into a handful of
/// `DepKind`s. Building a map for every kind up front would be wasted work.
#[derive(Debug, Default)]
struct LazyNodeIndex {
/// All (non-`Null`) node indices, grouped into contiguous per-`DepKind`
/// ranges described by `kinds`. For any non-`Null` `DepKind` `k`, all values in
/// `nodes_by_kind[kinds[k].start..][..kinds[k].len]`
/// must be `Some` and have kind `k`.
nodes_by_kind: Vec<Option<SerializedDepNodeIndex>>,
/// For each `DepKind`, the range of `nodes_by_kind` holding its node indices
/// and the lazily-built fingerprint map over that range.
kinds: Vec<LazyKindIndex>,
}

#[derive(Debug, Default)]
struct LazyKindIndex {
/// Offset into `LazyNodeIndex::nodes_by_kind` of this kind's first node.
start: u32,
/// Number of nodes of this kind.
len: u32,
/// `key_fingerprint -> node index`, built from this kind's range on first
/// lookup. Empty kinds (and kinds never looked up) never build a map.
map: OnceLock<UnhashMap<PackedFingerprint, SerializedDepNodeIndex>>,
}

impl LazyKindIndex {
/// Returns this kind's `key_fingerprint -> node index` map.
fn fingerprint_map(
&self,
kind: DepKind,
nodes: &IndexSlice<SerializedDepNodeIndex, DepNode>,
nodes_by_kind: &[Option<SerializedDepNodeIndex>],
profiler: &Option<SelfProfilerRef>,
) -> &UnhashMap<PackedFingerprint, SerializedDepNodeIndex> {
self.map.get_or_init(|| {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self.map.get_or_init(|| {
self.map.get_or_init(|| {
let _prof_timer = self.profiler.generic_activity("incr_comp_load_dep_graph_reverse_index");

Let's add a timer to for detailed perf output. This will make easier to understand why regression happen in some queries.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added the timer in fingerprint_map but I put the profiler on SerializedDepGraph rather than on LazyKindIndex, since there's one of those per DepKind and it would be a lot of clones. Annoying is that this needs a manual Debug impl. Please let me know if you see a better way

let _prof_timer = profiler
.as_ref()
.map(|p| p.generic_activity("incr_comp_load_dep_graph_reverse_index"));
let range = (self.start as usize)..(self.start as usize + self.len as usize);
let mut map =
UnhashMap::with_capacity_and_hasher(self.len as usize, Default::default());
for &idx in &nodes_by_kind[range] {
let idx = idx.expect("counting sort fills every slot of a kind's range");
let node = nodes[idx];
debug_assert_eq!(node.kind, kind);
if map.insert(node.key_fingerprint, idx).is_some()
// Side effect nodes can legitimately share a fingerprint.
&& node.kind != DepKind::SideEffect
{
panic!(
"Error: A dep graph node ({kind:?}) does not have an unique index. \
Running a clean build on a nightly compiler with \
`-Z incremental-verify-ich` can help narrow down the issue for reporting. \
A clean build may also work around the issue.\n
DepNode: {node:?}"
)
}
}
map
})
}
}

impl SerializedDepGraph {
Expand Down Expand Up @@ -151,7 +234,14 @@ impl SerializedDepGraph {

#[inline]
pub fn node_to_index_opt(&self, dep_node: &DepNode) -> Option<SerializedDepNodeIndex> {
self.index.get(dep_node.kind.as_usize())?.get(&dep_node.key_fingerprint).copied()
let kind = self.reverse_index.kinds.get(dep_node.kind.as_usize())?;
let map = kind.fingerprint_map(
dep_node.kind,
&self.nodes,
&self.reverse_index.nodes_by_kind,
&self.profiler,
);
map.get(&dep_node.key_fingerprint).copied()
}

#[inline]
Expand Down Expand Up @@ -206,8 +296,8 @@ fn mask(bits: usize) -> usize {
}

impl SerializedDepGraph {
#[instrument(level = "debug", skip(d))]
pub fn decode(d: &mut MemDecoder<'_>) -> Arc<SerializedDepGraph> {
#[instrument(level = "debug", skip(d, profiler))]
pub fn decode(d: &mut MemDecoder<'_>, profiler: &SelfProfilerRef) -> Arc<SerializedDepGraph> {
// The last 16 bytes are the node count and edge count.
debug!("position: {:?}", d.position());

Expand Down Expand Up @@ -286,36 +376,47 @@ impl SerializedDepGraph {
// end of the array. This padding ensure it doesn't.
edge_list_data.extend(&[0u8; DEP_NODE_PAD]);

// Read the number of each dep kind and use it to create an hash map with a suitable size.
let mut index: Vec<_> = (0..(DepKind::MAX + 1))
.map(|_| UnhashMap::with_capacity_and_hasher(d.read_u32() as usize, Default::default()))
.collect();
// Read the number of nodes of each dep kind, and perform
// counting sort for `LazyNodeIndex`.
let mut kinds = Vec::with_capacity(DepKind::MAX as usize + 1);
let mut offset = 0u32;
for _ in 0..(DepKind::MAX + 1) {
let len = d.read_u32();
kinds.push(LazyKindIndex { start: offset, len, map: OnceLock::new() });
offset += len;
}
debug_assert_eq!(offset as usize, node_count);

let session_count = d.read_u64();

// Counting sort: place each node index into its kind's range. `fill[k]`
// points at the next free slot in kind `k`'s range, so a kind's nodes end
// up contiguous. Slots start as `None` and are each filled exactly once
// (the counts sum to the number of non-`Null` nodes).
let mut nodes_by_kind = vec![None; node_count];
let mut fill: Vec<u32> = kinds.iter().map(|k| k.start).collect();
for (idx, node) in nodes.iter_enumerated() {
if index[node.kind.as_usize()].insert(node.key_fingerprint, idx).is_some() {
// Empty nodes and side effect nodes can have duplicates
if node.kind != DepKind::Null && node.kind != DepKind::SideEffect {
let kind = node.kind;
panic!(
"Error: A dep graph node ({kind:?}) does not have an unique index. \
Running a clean build on a nightly compiler with \
`-Z incremental-verify-ich` can help narrow down the issue for reporting. \
A clean build may also work around the issue.\n
DepNode: {node:?}"
)
}
// Unused indices from batch allocation stay `Null`; they carry no
// encoded node and are never looked up by fingerprint, so skip them.
if node.kind == DepKind::Null {
Comment thread
xmakro marked this conversation as resolved.
continue;
}
let k = node.kind.as_usize();
nodes_by_kind[fill[k] as usize] = Some(idx);
fill[k] += 1;
}
Comment thread
xmakro marked this conversation as resolved.
// Each kind's range was filled exactly to its end.
debug_assert!(kinds.iter().zip(&fill).all(|(k, &f)| f == k.start + k.len));
let reverse_index = LazyNodeIndex { nodes_by_kind, kinds };

Arc::new(SerializedDepGraph {
nodes,
value_fingerprints,
edge_list_indices,
edge_list_data,
index,
reverse_index,
session_count,
profiler: Some(profiler.clone()),
})
}
}
Expand Down
Loading