From c8f61433ba4fedc5987f5854d39ead5cb54b0682 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Mon, 30 Mar 2026 17:10:42 +0100 Subject: [PATCH 1/8] Add incremental consistency test CLI Binary that validates delete/re-add cycles produce identical results to fresh builds. For each cycle: index all files, resolve, delete N random files, resolve, re-add them, resolve, then compare declaration counts against a fresh baseline. Usage: incremental_test [--seed 42] [--cycles 5] --- rust/rubydex/Cargo.toml | 4 + rust/rubydex/src/bin/incremental_test.rs | 745 +++++++++++++++++++++++ 2 files changed, 749 insertions(+) create mode 100644 rust/rubydex/src/bin/incremental_test.rs diff --git a/rust/rubydex/Cargo.toml b/rust/rubydex/Cargo.toml index fcc6f2ea..51f6c1db 100644 --- a/rust/rubydex/Cargo.toml +++ b/rust/rubydex/Cargo.toml @@ -9,6 +9,10 @@ license = "MIT" name = "rubydex_cli" path = "src/main.rs" +[[bin]] +name = "incremental_test" +path = "src/bin/incremental_test.rs" + [lib] crate-type = ["rlib"] diff --git a/rust/rubydex/src/bin/incremental_test.rs b/rust/rubydex/src/bin/incremental_test.rs new file mode 100644 index 00000000..d8cdfc66 --- /dev/null +++ b/rust/rubydex/src/bin/incremental_test.rs @@ -0,0 +1,745 @@ +use std::collections::HashSet; +use std::io::Write as _; +use std::panic; +use std::time::Instant; + +use clap::Parser; + +use rubydex::{ + indexing::{self, LanguageId}, + integrity, listing, + model::graph::Graph, + resolution::Resolver, +}; + +#[derive(Parser, Debug)] +#[command( + name = "incremental_test", + about = "Test that incremental resolution matches fresh resolution" +)] +struct Args { + /// Path to a Ruby project or directory of gems + #[arg(value_name = "PATH")] + path: String, + + /// RNG seed for deterministic file selection + #[arg(long, default_value = "42")] + seed: u64, + + /// Number of delete/re-add cycles + #[arg(long, default_value = "5")] + cycles: usize, + + /// Bisect a failing round to find the minimum reproduction (deleted + indexed files) + #[arg(long)] + bisect: bool, + + /// File listing URIs to skip during indexing (one per line, grown by --bisect) + #[arg(long, value_name = "FILE")] + skip_file: Option, + + /// Number of retries per bisect step (for non-deterministic failures) + #[arg(long, default_value = "1")] + retries: usize, +} + +/// Delete percentage (hardcoded for now) +const DELETE_PCT: usize = 10; +/// Maximum files to delete per cycle +const MAX_DELETE: usize = 30; + +struct GraphCounts { + declarations: usize, + definitions: usize, + names: usize, + documents: usize, + constant_references: usize, + method_references: usize, +} + +impl GraphCounts { + fn from_graph(graph: &Graph) -> Self { + Self { + declarations: graph.declarations().len(), + definitions: graph.definitions().len(), + names: graph.names().len(), + documents: graph.documents().len(), + constant_references: graph.constant_references().len(), + method_references: graph.method_references().len(), + } + } + + fn compare(&self, other: &GraphCounts) -> Vec { + let checks: &[(&str, usize, usize)] = &[ + ("declarations", self.declarations, other.declarations), + ("definitions", self.definitions, other.definitions), + ("names", self.names, other.names), + ("documents", self.documents, other.documents), + ( + "constant_references", + self.constant_references, + other.constant_references, + ), + ("method_references", self.method_references, other.method_references), + ]; + + let mut mismatches = Vec::new(); + for &(name, incremental, fresh) in checks { + if incremental != fresh { + let diff = incremental.cast_signed() - fresh.cast_signed(); + mismatches.push(format!( + "{name}: incremental={incremental}, fresh={fresh} (diff={diff:+})", + )); + } + } + mismatches + } +} + +impl std::fmt::Display for GraphCounts { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{} declarations, {} definitions, {} names, {} documents, {} constant_refs, {} method_refs", + self.declarations, + self.definitions, + self.names, + self.documents, + self.constant_references, + self.method_references, + ) + } +} + +fn resolve(graph: &mut Graph) { + Resolver::new(graph).resolve(); +} + +/// Deterministic file selection using xxhash. Each file is hashed with the seed, +/// then files are sorted by hash and the first N are selected. +fn select_files_to_delete(file_count: usize, seed: u64) -> Vec { + let n = ((file_count * DELETE_PCT) / 100).clamp(1, MAX_DELETE); + let mut scored: Vec<(u64, usize)> = (0..file_count) + .map(|i| { + let hash = xxhash_rust::xxh3::xxh3_64_with_seed(&i.to_le_bytes(), seed); + (hash, i) + }) + .collect(); + scored.sort_by_key(|(hash, _)| *hash); + let mut indices: Vec = scored.into_iter().take(n).map(|(_, i)| i).collect(); + indices.sort_unstable(); + indices +} + +/// Check for dangling definition IDs — definitions referenced by declarations but not in the graph. +fn check_dangling_definitions(graph: &Graph) -> Vec { + let mut dangles = Vec::new(); + for decl in graph.declarations().values() { + for def_id in decl.definitions() { + if !graph.definitions().contains_key(def_id) { + dangles.push(format!( + "Declaration `{}` (kind={}) references missing def {def_id:?}", + decl.name(), + decl.kind(), + )); + } + } + } + dangles +} + +/// Check which definitions in a document don't map to a declaration. +fn check_unmapped_definitions(graph: &Graph, uri: &str) -> Vec { + let uri_id = rubydex::model::ids::UriId::from(uri); + let Some(document) = graph.documents().get(&uri_id) else { + return vec![format!("Document {uri} not found")]; + }; + + let mut unmapped = Vec::new(); + for def_id in document.definitions() { + let decl_id = graph.definition_id_to_declaration_id(*def_id); + if decl_id.is_none() { + let def = graph.definitions().get(def_id).unwrap(); + unmapped.push(format!(" {def_id:?} ({}) → no declaration", def.kind())); + } + } + unmapped +} + +/// Result of comparing incremental vs fresh graphs. +struct RoundResult { + counts: GraphCounts, + integrity_errors: Vec, + extras: Vec, + missing: Vec, +} + +/// Run a single round: index → resolve → delete → resolve → re-add → resolve. +/// When `indexed_indices` is Some, only those files are indexed (both incremental and fresh). +/// When None, all files are indexed. +fn run_round( + files: &[(String, String, LanguageId)], + delete_indices: &[usize], + indexed_indices: Option<&[usize]>, + verbose: bool, +) -> RoundResult { + let all_indices: Vec = (0..files.len()).collect(); + let active_indices = indexed_indices.unwrap_or(&all_indices); + + let mut incremental = Graph::new(); + for &i in active_indices { + let (ref uri, ref source, ref lang) = files[i]; + indexing::index_source(&mut incremental, uri, source, lang); + } + resolve(&mut incremental); + + if verbose { + for &i in delete_indices { + let unmapped = check_unmapped_definitions(&incremental, &files[i].0); + if !unmapped.is_empty() { + println!(" Unmapped definitions in {} ({}):", files[i].0, unmapped.len()); + for u in unmapped.iter().take(5) { + println!(" {u}"); + } + if unmapped.len() > 5 { + println!(" ... and {} more", unmapped.len() - 5); + } + } + } + } + + for &i in delete_indices { + incremental.delete_document(&files[i].0); + } + + if verbose { + let dangles = check_dangling_definitions(&incremental); + if !dangles.is_empty() { + println!(" Dangling definitions after delete ({}):", dangles.len()); + for d in dangles.iter().take(10) { + println!(" {d}"); + } + } + } + + if verbose { + println!(" Resolving after delete..."); + } + resolve(&mut incremental); + + for &i in delete_indices { + indexing::index_source(&mut incremental, &files[i].0, &files[i].1, &files[i].2); + } + + if verbose { + println!(" Resolving after re-index..."); + } + resolve(&mut incremental); + + let counts = GraphCounts::from_graph(&incremental); + let integrity_errors = integrity::check_integrity(&incremental); + + let mut fresh = Graph::new(); + for &i in active_indices { + let (ref uri, ref source, ref lang) = files[i]; + indexing::index_source(&mut fresh, uri, source, lang); + } + resolve(&mut fresh); + + let mut missing: Vec = Vec::new(); + for (decl_id, decl) in fresh.declarations() { + if !incremental.declarations().contains_key(decl_id) { + let owner_name = fresh.declarations().get(decl.owner_id()).map(|d| d.name().to_string()); + let owner_exists_in_inc = incremental.declarations().contains_key(decl.owner_id()); + let owner_has_singleton_in_inc = if owner_exists_in_inc { + incremental + .declarations() + .get(decl.owner_id()) + .and_then(|d| d.as_namespace()) + .and_then(|ns| ns.singleton_class()) + .is_some() + } else { + false + }; + missing.push(format!( + "{} ({}) owner={} in_inc={owner_exists_in_inc} has_singleton={owner_has_singleton_in_inc}", + decl.name(), + decl.kind(), + owner_name.as_deref().unwrap_or("?"), + )); + } + } + missing.sort(); + + let mut extras: Vec = Vec::new(); + for (decl_id, decl) in incremental.declarations() { + if !fresh.declarations().contains_key(decl_id) { + extras.push(format!("{} ({})", decl.name(), decl.kind())); + } + } + extras.sort(); + + RoundResult { + counts, + integrity_errors, + extras, + missing, + } +} + +/// Returns true if the given configuration causes a mismatch or panic. +/// Runs up to `retries` times — returns true if ANY attempt fails. +fn round_fails( + files: &[(String, String, LanguageId)], + delete_indices: &[usize], + indexed_indices: Option<&[usize]>, + retries: usize, +) -> bool { + for _ in 0..retries { + let result = panic::catch_unwind(panic::AssertUnwindSafe(|| { + run_round(files, delete_indices, indexed_indices, false) + })); + let failed = match result { + Ok(r) => !r.extras.is_empty() || !r.missing.is_empty() || !r.integrity_errors.is_empty(), + Err(_) => true, + }; + if failed { + return true; + } + } + false +} + +// --- Skip file I/O --- + +fn read_skip_file(path: &str) -> HashSet { + let Ok(content) = std::fs::read_to_string(path) else { + return HashSet::new(); + }; + content.lines().filter(|l| !l.is_empty()).map(String::from).collect() +} + +fn write_skip_file(path: &str, skipped_uris: &HashSet) { + let mut sorted: Vec<&str> = skipped_uris.iter().map(String::as_str).collect(); + sorted.sort_unstable(); + let mut file = std::fs::File::create(path).expect("failed to write skip file"); + for uri in sorted { + writeln!(file, "{uri}").expect("failed to write skip file"); + } +} + +/// Compute indices of files NOT in the skip set. +fn compute_indexed_indices(files: &[(String, String, LanguageId)], skipped: &HashSet) -> Vec { + (0..files.len()).filter(|&i| !skipped.contains(&files[i].0)).collect() +} + +// --- Delta debugging --- + +/// Delta-debugging: find the minimum subset of `initial` that still triggers `fails`. +/// Elements in `protected` are never removed. Uses chunked removal (halves → quarters → ...) +/// before falling back to one-at-a-time for efficiency with large sets. +/// Calls `on_progress` after each successful removal for persistence. +fn minimize_set( + initial: &[usize], + protected: &HashSet, + label: &str, + files: &[(String, String, LanguageId)], + mut fails: F, + mut on_progress: P, +) -> Vec +where + F: FnMut(&[usize]) -> bool, + P: FnMut(&[usize]), +{ + let mut current = initial.to_vec(); + let removable_count = current.iter().filter(|i| !protected.contains(i)).count(); + + println!( + "\nMinimizing {removable_count} {label} ({} protected)...\n", + protected.len() + ); + + if removable_count == 0 { + return current; + } + + // Chunked removal: try removing halves, then quarters, etc. + let mut chunk_size = removable_count / 2; + while chunk_size >= 1 { + let mut changed_this_pass = true; + while changed_this_pass { + changed_this_pass = false; + let removable: Vec = current.iter().copied().filter(|i| !protected.contains(i)).collect(); + + let mut offset = 0; + while offset < removable.len() { + let chunk_end = (offset + chunk_size).min(removable.len()); + let to_remove: HashSet = removable[offset..chunk_end].iter().copied().collect(); + let candidate: Vec = current.iter().copied().filter(|i| !to_remove.contains(i)).collect(); + + let remaining = candidate.iter().filter(|i| !protected.contains(i)).count(); + print!( + " Removing chunk of {} {label} ({remaining} remain)... ", + to_remove.len(), + ); + + if fails(&candidate) { + println!("still fails"); + current = candidate; + changed_this_pass = true; + on_progress(¤t); + break; // recompute removable with updated current + } + println!("passes — keeping"); + offset += chunk_size; + } + } + chunk_size /= 2; + } + + // One-at-a-time pass for stragglers + let mut changed = true; + while changed { + changed = false; + let removable: Vec = current.iter().copied().filter(|i| !protected.contains(i)).collect(); + + for &idx in &removable { + let candidate: Vec = current.iter().copied().filter(|&i| i != idx).collect(); + + let remaining = candidate.iter().filter(|i| !protected.contains(i)).count(); + print!(" Without {} ({remaining} remain)... ", files[idx].0,); + + if fails(&candidate) { + println!("still fails — removing"); + current = candidate; + changed = true; + on_progress(¤t); + break; // restart scan since indices shifted + } + println!("passes — keeping"); + } + } + + current +} + +/// Find minimum set of deleted files that still triggers the failure. +fn bisect_deleted_files( + files: &[(String, String, LanguageId)], + delete_indices: &[usize], + retries: usize, +) -> Vec { + minimize_set( + delete_indices, + &HashSet::new(), + "deleted files", + files, + |candidate| round_fails(files, candidate, None, retries), + |_| {}, + ) +} + +/// Run both bisect phases and print a complete minimal reproduction. +/// Phase 2 writes to `skip_file` after each successful removal for resumability. +fn bisect_and_report( + files: &[(String, String, LanguageId)], + delete_indices: &[usize], + skip_file: &str, + retries: usize, + fresh_counts: &GraphCounts, +) { + // Phase 1: minimize deleted files + let min_deleted = bisect_deleted_files(files, delete_indices, retries); + println!( + "\nPhase 1 complete: {} → {} deleted files", + delete_indices.len(), + min_deleted.len() + ); + for &i in &min_deleted { + println!(" {}", files[i].0); + } + + // Phase 2: minimize indexed files by growing the skip file + let existing_skipped = read_skip_file(skip_file); + let mut indexed = compute_indexed_indices(files, &existing_skipped); + + // Verify the failure still reproduces with current skip list + if !round_fails(files, &min_deleted, Some(&indexed), retries) { + println!("\nRound passes with current skip list — cannot bisect indexed files."); + println!("Try clearing {skip_file} and re-running."); + return; + } + + let protected: HashSet = min_deleted.iter().copied().collect(); + + indexed = minimize_set( + &indexed, + &protected, + "indexed files", + files, + |candidate| round_fails(files, &min_deleted, Some(candidate), retries), + |current_indexed| { + // Write skip file: all files NOT in current_indexed + let indexed_set: HashSet = current_indexed.iter().copied().collect(); + let skipped: HashSet = (0..files.len()) + .filter(|i| !indexed_set.contains(i)) + .map(|i| files[i].0.clone()) + .collect(); + write_skip_file(skip_file, &skipped); + }, + ); + + let non_deleted_count = indexed.iter().filter(|i| !min_deleted.contains(i)).count(); + println!( + "\nPhase 2 complete: {} → {} indexed files ({non_deleted_count} surviving + {} deleted)", + files.len(), + indexed.len(), + min_deleted.len() + ); + + // Write final skip file + let indexed_set: HashSet = indexed.iter().copied().collect(); + let final_skipped: HashSet = (0..files.len()) + .filter(|i| !indexed_set.contains(i)) + .map(|i| files[i].0.clone()) + .collect(); + write_skip_file(skip_file, &final_skipped); + println!("Skip file written to {skip_file}"); + + // Final verification + println!("\nFinal verification:"); + let final_result = run_round(files, &min_deleted, Some(&indexed), true); + print_round_result(&final_result, fresh_counts); + + // Print the minimal reproduction + let surviving: Vec = indexed.iter().copied().filter(|i| !min_deleted.contains(i)).collect(); + + println!("\n========================================"); + println!("Minimal reproduction ({} files total):", indexed.len()); + println!("========================================\n"); + + println!("Index {} file(s):", surviving.len()); + for &i in &surviving { + println!(" {}", files[i].0); + } + println!("\nDelete and re-add {} file(s):", min_deleted.len()); + for &i in &min_deleted { + println!(" {}", files[i].0); + } + + println!("\nFile contents:"); + for &i in &indexed { + let role = if min_deleted.contains(&i) { + "DELETE+RE-ADD" + } else { + "INDEX" + }; + println!("\n--- [{role}] {} ---", files[i].0); + for line in files[i].1.lines().take(50) { + println!(" {line}"); + } + let total_lines = files[i].1.lines().count(); + if total_lines > 50 { + println!(" ... ({total_lines} lines total)"); + } + } +} + +fn print_round_result(result: &RoundResult, fresh_counts: &GraphCounts) { + if !result.missing.is_empty() { + println!(" Missing declarations ({}):", result.missing.len()); + for m in result.missing.iter().take(30) { + println!(" {m}"); + } + } + if !result.extras.is_empty() { + println!(" Extra declarations ({}):", result.extras.len()); + for e in result.extras.iter().take(10) { + println!(" {e}"); + } + } + + let mismatches = result.counts.compare(fresh_counts); + if !mismatches.is_empty() { + println!(" Count mismatches:"); + for m in &mismatches { + println!(" {m}"); + } + println!(" Incremental: {}", result.counts); + println!(" Fresh: {fresh_counts}"); + } + if !result.integrity_errors.is_empty() { + println!(" Integrity errors ({}):", result.integrity_errors.len()); + for error in result.integrity_errors.iter().take(20) { + println!(" - {error}"); + } + } +} + +#[allow(clippy::too_many_lines)] +fn main() { + let args = Args::parse(); + + println!("Listing files..."); + let (file_paths, errors) = listing::collect_file_paths(vec![args.path], &HashSet::new()); + + for error in &errors { + eprintln!("{error}"); + } + + if file_paths.is_empty() { + eprintln!("No files found"); + std::process::exit(1); + } + + let mut file_paths = file_paths; + file_paths.sort(); + // Deterministic shuffle: use seed to permute file order so that different seeds + // explore different IdentityHashMap insertion orders (which affect resolution ordering). + let mut scored: Vec<(u64, _)> = file_paths + .into_iter() + .enumerate() + .map(|(i, p)| { + let hash = xxhash_rust::xxh3::xxh3_64_with_seed(&i.to_le_bytes(), args.seed); + (hash, p) + }) + .collect(); + scored.sort_by_key(|(hash, _)| *hash); + let file_paths: Vec<_> = scored.into_iter().map(|(_, p)| p).collect(); + + let files: Vec<(String, String, LanguageId)> = file_paths + .into_iter() + .filter_map(|path| { + let content = std::fs::read_to_string(&path).ok()?; + let uri = url::Url::from_file_path(&path).ok()?.to_string(); + let ext = path.extension().unwrap_or_default(); + let language_id = LanguageId::from(ext); + Some((uri, content, language_id)) + }) + .collect(); + + println!("Found {} files", files.len()); + + // Apply skip file to determine which files are indexed + let skipped = args.skip_file.as_deref().map_or_else(HashSet::new, read_skip_file); + let indexed_indices = if skipped.is_empty() { + None + } else { + let indices = compute_indexed_indices(&files, &skipped); + println!("Skipping {} files ({} indexed)", skipped.len(), indices.len()); + Some(indices) + }; + println!(); + + // Build fresh baseline (respecting skip list) + println!("Building fresh baseline..."); + let fresh_start = Instant::now(); + let mut fresh = Graph::new(); + let baseline_indices: Vec = indexed_indices.clone().unwrap_or_else(|| (0..files.len()).collect()); + for &i in &baseline_indices { + let (ref uri, ref source, ref lang) = files[i]; + indexing::index_source(&mut fresh, uri, source, lang); + } + resolve(&mut fresh); + let fresh_counts = GraphCounts::from_graph(&fresh); + println!( + "Fresh baseline ({:.2}s): {fresh_counts}\n", + fresh_start.elapsed().as_secs_f64() + ); + + let fresh_integrity_errors = integrity::check_integrity(&fresh); + if !fresh_integrity_errors.is_empty() { + eprintln!( + "WARNING: Fresh graph has {} integrity errors", + fresh_integrity_errors.len() + ); + for error in &fresh_integrity_errors { + eprintln!(" - {error}"); + } + } + + drop(fresh); + + // Select files to delete from the INDEXED set (not all files) + let active_count = baseline_indices.len(); + + let mut failed_rounds: Vec = Vec::new(); + + for round in 0..args.cycles { + let round_seed = args.seed.wrapping_add(round as u64); + let selected = select_files_to_delete(active_count, round_seed); + // Map back to global indices + let delete_indices: Vec = selected.iter().map(|&i| baseline_indices[i]).collect(); + + println!("Round {}", round + 1); + println!(" Removing {} files:", delete_indices.len()); + for &i in &delete_indices { + println!(" {}", files[i].0); + } + + let round_start = Instant::now(); + + let idx_ref = indexed_indices.as_deref(); + let round_result = panic::catch_unwind(panic::AssertUnwindSafe(|| { + run_round(&files, &delete_indices, idx_ref, true) + })); + + let elapsed = round_start.elapsed().as_secs_f64(); + + match round_result { + Ok(result) => { + let mismatches = result.counts.compare(&fresh_counts); + + if mismatches.is_empty() && result.integrity_errors.is_empty() { + println!(" OK ({elapsed:.2}s): {}", result.counts); + } else { + print_round_result(&result, &fresh_counts); + println!(" FAILED ({elapsed:.2}s)"); + failed_rounds.push(round + 1); + + if args.bisect { + let skip_path = args.skip_file.as_deref().unwrap_or("/tmp/incremental_test_skip"); + if args.skip_file.is_none() { + println!("\nNo --skip-file specified, using {skip_path}"); + } + bisect_and_report(&files, &delete_indices, skip_path, args.retries, &fresh_counts); + break; + } + } + } + Err(panic_info) => { + let msg = if let Some(s) = panic_info.downcast_ref::() { + s.clone() + } else if let Some(s) = panic_info.downcast_ref::<&str>() { + (*s).to_string() + } else { + "unknown panic".to_string() + }; + println!(" PANICKED ({elapsed:.2}s): {msg}"); + failed_rounds.push(round + 1); + + if args.bisect { + let skip_path = args.skip_file.as_deref().unwrap_or("/tmp/incremental_test_skip"); + if args.skip_file.is_none() { + println!("\nNo --skip-file specified, using {skip_path}"); + } + bisect_and_report(&files, &delete_indices, skip_path, args.retries, &fresh_counts); + break; + } + } + } + println!(); + } + + if failed_rounds.is_empty() { + println!("All {} rounds passed (seed={}).", args.cycles, args.seed); + } else { + eprintln!( + "FAILED: {} of {} rounds failed (rounds: {failed_rounds:?}, seed={})", + failed_rounds.len(), + args.cycles, + args.seed, + ); + std::process::exit(1); + } +} From dfd1f001eab96903fc706650fe8a84913c32c937 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Tue, 31 Mar 2026 19:48:23 +0100 Subject: [PATCH 2/8] Defer cleanup of declarations emptied during invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During invalidation, declarations can lose all their definitions via two paths that don't immediately remove them: 1. Name cascade (unresolve_dependent_name): a surviving file's definition is detached and re-queued — the declaration may be repopulated after re-resolution. 2. Document data cleanup (remove_document_data): the broad retain sweep detaches definitions that definition_to_declaration_id can't map back (e.g. singleton members like def self.x or @ivar in class body, where the lookup resolves to Foo but the member lives under ). In both cases, immediately removing the declaration would destroy accumulated state (singleton, members) that the resolver can rebuild. Instead, push emptied declarations to pending_declaration_cleanup and run cleanup after resolution — only removing those still empty. Add has_no_backing_definitions to Declaration, which combines the synthetic check with the empty-definitions check. Synthetic declarations (SingletonClass, Object, Module, Class) are never cleaned up this way — they're only removed when their owner is removed. --- rust/rubydex/src/model/declaration.rs | 12 +++ rust/rubydex/src/model/graph.rs | 145 ++++++++++++++++++++++---- rust/rubydex/src/resolution.rs | 9 ++ 3 files changed, 145 insertions(+), 21 deletions(-) diff --git a/rust/rubydex/src/model/declaration.rs b/rust/rubydex/src/model/declaration.rs index 64366c99..831d152e 100644 --- a/rust/rubydex/src/model/declaration.rs +++ b/rust/rubydex/src/model/declaration.rs @@ -345,6 +345,18 @@ impl Declaration { all_declarations!(self, it => it.definition_ids.is_empty()) } + /// Returns true if this declaration has no backing definitions and is eligible + /// for removal. Synthetic declarations (singleton classes, bootstrap Object/Module/Class) + /// are never definition-backed — they should only be removed when their owner is removed. + #[must_use] + pub fn has_no_backing_definitions(&self, decl_id: &DeclarationId) -> bool { + let is_synthetic = matches!(self, Declaration::Namespace(Namespace::SingletonClass(_))) + || *decl_id == *super::graph::OBJECT_ID + || *decl_id == *super::graph::MODULE_ID + || *decl_id == *super::graph::CLASS_ID; + !is_synthetic && self.has_no_definitions() + } + pub fn add_definition(&mut self, definition_id: DefinitionId) { all_declarations!(self, it => { debug_assert!( diff --git a/rust/rubydex/src/model/graph.rs b/rust/rubydex/src/model/graph.rs index b743d800..1d042dbd 100644 --- a/rust/rubydex/src/model/graph.rs +++ b/rust/rubydex/src/model/graph.rs @@ -87,6 +87,11 @@ pub struct Graph { /// Paths to exclude from file discovery during indexing. excluded_paths: HashSet, + + /// Declarations that became empty during name cascade and may need removal. + /// Deferred until after resolution — if the declaration was repopulated, skip it. + /// Drained by `take_pending_declaration_cleanup()`. + pending_declaration_cleanup: Vec, } impl Graph { @@ -129,6 +134,7 @@ impl Graph { name_dependents: IdentityHashMap::default(), pending_work: Vec::default(), excluded_paths: HashSet::new(), + pending_declaration_cleanup: Vec::default(), } } @@ -628,6 +634,11 @@ impl Graph { std::mem::take(&mut self.pending_work) } + /// Drains declarations that need post-resolution cleanup. + pub(crate) fn take_pending_declaration_cleanup(&mut self) -> Vec { + std::mem::take(&mut self.pending_declaration_cleanup) + } + fn push_work(&mut self, unit: Unit) { self.pending_work.push(unit); } @@ -636,6 +647,28 @@ impl Graph { self.pending_work.extend(units); } + /// Post-resolution cleanup: removes declarations that were emptied during name + /// cascade and never repopulated by re-resolution. Runs `invalidate_graph` on + /// any that are still empty, which cascades to their members and singletons. + /// Post-resolution cleanup: removes declarations that were emptied during + /// invalidation cascade and never repopulated by re-resolution. Cascades + /// to members, singletons, and descendants via `invalidate_graph`. + pub(crate) fn cleanup_empty_declarations(&mut self, candidates: Vec) { + let items: Vec = candidates + .into_iter() + .filter(|decl_id| { + self.declarations + .get(decl_id) + .is_some_and(Declaration::has_no_definitions) + }) + .map(InvalidationItem::Declaration) + .collect(); + + if !items.is_empty() { + self.invalidate_graph(items, IdentityHashMap::default()); + } + } + /// Converts a `Resolved` `NameRef` back to `Unresolved`, preserving the original `Name` data. /// Returns the `DeclarationId` it was previously resolved to, if any. fn unresolve_name(&mut self, name_id: NameId) -> Option { @@ -1038,10 +1071,14 @@ impl Graph { .collect(); if !missed_def_ids.is_empty() { - for declaration in self.declarations.values_mut() { + for (decl_id, declaration) in &mut self.declarations { + let had_definitions = !declaration.definitions().is_empty(); for def_id in &missed_def_ids { declaration.remove_definition(def_id); } + if had_definitions && declaration.has_no_backing_definitions(decl_id) { + self.pending_declaration_cleanup.push(*decl_id); + } } } @@ -1056,8 +1093,11 @@ impl Graph { } } - /// Unified invalidation worklist. Processes declaration and name items in a single loop, + /// Invalidation worklist. Processes declaration, name, and reference items in a loop, /// where processing one item can push new items back onto the queue. + /// + /// `pending_detachments` maps declarations to definitions that need to be detached + /// before deciding whether to remove or update each declaration. fn invalidate_graph( &mut self, items: Vec, @@ -1125,10 +1165,13 @@ impl Graph { let Some(decl) = self.declarations.get(&decl_id) else { return; }; - let should_remove = decl.has_no_definitions() || !self.declarations.contains_key(decl.owner_id()); + let should_remove = + decl.has_no_backing_definitions(&decl_id) || !self.declarations.contains_key(decl.owner_id()); if should_remove { - // Queue members + singleton for removal + // Remove path: definitions came from a deleted/changed file and won't + // re-resolve to this declaration. Removal must be immediate so child + // declarations see a missing owner and cascade correctly. if let Some(ns) = decl.as_namespace() { if let Some(singleton_id) = ns.singleton_class() { queue.push(InvalidationItem::Declaration(*singleton_id)); @@ -1236,12 +1279,18 @@ impl Graph { decl.remove_definition(def_id); } + // Defer cleanup: unlike the remove path in invalidate_declaration + // (where definitions are from a deleted file and won't return), + // this definition is from a surviving file — it was re-queued and + // will likely re-resolve to the same declaration. Removing now + // would destroy accumulated state (singleton, members) that can't + // be rebuilt. Post-resolution cleanup removes it if still empty. if self .declarations .get(&old_decl_id) - .is_some_and(Declaration::has_no_definitions) + .is_some_and(|decl| decl.has_no_backing_definitions(&old_decl_id)) { - queue.push(InvalidationItem::Declaration(old_decl_id)); + self.pending_declaration_cleanup.push(old_decl_id); } } NameDependent::ChildName(_) | NameDependent::NestedName(_) => {} @@ -3670,6 +3719,58 @@ mod incremental_resolution_tests { assert_no_dangling_definitions(context.graph()); } + #[test] + fn singleton_class_survives_when_reopener_is_deleted() { + let mut context = GraphTest::new(); + context.index_uri("file:///foo.rb", "class Foo; def self.bar; end; end"); + context.index_uri("file:///reopener.rb", "class Foo; end"); + context.resolve(); + + assert_declaration_exists!(context, "Foo"); + assert_declaration_exists!(context, "Foo::"); + + context.delete_uri("file:///reopener.rb"); + context.resolve(); + + assert_declaration_exists!(context, "Foo"); + assert_declaration_exists!(context, "Foo::"); + } + + #[test] + fn class_inside_singleton_scope_instance_methods_only_survives_cascade() { + let mut context = GraphTest::new(); + + context.index_uri( + "file:///main.rb", + r" + module Outer + class << self + class Inner + def initialize; end + end + + def run + Inner.new + end + end + end + ", + ); + context.index_uri("file:///reopener.rb", "module Outer; end"); + context.resolve(); + + assert_declaration_exists!(context, "Outer"); + assert_declaration_exists!(context, "Outer::::Inner"); + assert_declaration_exists!(context, "Outer::::Inner::"); + + context.delete_uri("file:///reopener.rb"); + context.resolve(); + + assert_declaration_exists!(context, "Outer"); + assert_declaration_exists!(context, "Outer::::Inner"); + assert_declaration_exists!(context, "Outer::::Inner::"); + } + #[test] fn singleton_class_preserved_after_delete_and_reindex() { let mut context = GraphTest::new(); @@ -3692,27 +3793,29 @@ mod incremental_resolution_tests { } #[test] - fn singleton_recreated_when_reference_nested_in_compact_class() { + fn singleton_members_cleaned_up_after_file_deletion() { let mut context = GraphTest::new(); - - context.index_uri("file:///parent.rb", "module Parent; end"); - context.index_uri("file:///target.rb", "class Parent::Target; end"); - context.index_uri("file:///caller.rb", "class Parent::Caller; Parent::Target.new; end"); + context.index_uri( + "file:///a.rb", + " + class Foo + @x = 1 + def self.bar; end + end + ", + ); + context.index_uri("file:///b.rb", "class Foo; end"); context.resolve(); - assert_declaration_exists!(context, "Parent::Target"); - assert_declaration_exists!(context, "Parent::Target::"); + assert_declaration_exists!(context, "Foo::#@x"); + assert_declaration_exists!(context, "Foo::#bar()"); - context.delete_uri("file:///parent.rb"); - context.delete_uri("file:///target.rb"); - context.resolve(); - - context.index_uri("file:///parent.rb", "module Parent; end"); - context.index_uri("file:///target.rb", "class Parent::Target; end"); + context.delete_uri("file:///a.rb"); context.resolve(); - assert_declaration_exists!(context, "Parent::Target"); - assert_declaration_exists!(context, "Parent::Target::"); + assert_declaration_exists!(context, "Foo"); + assert_declaration_does_not_exist!(context, "Foo::#@x"); + assert_declaration_does_not_exist!(context, "Foo::#bar()"); } #[test] diff --git a/rust/rubydex/src/resolution.rs b/rust/rubydex/src/resolution.rs index 7ba9f498..d750be0b 100644 --- a/rust/rubydex/src/resolution.rs +++ b/rust/rubydex/src/resolution.rs @@ -124,6 +124,15 @@ impl<'a> Resolver<'a> { self.graph.extend_work(std::mem::take(&mut self.unit_queue)); self.handle_remaining_definitions(other_ids); + + // Post-resolution cleanup: declarations emptied during name cascade + // (unresolve_dependent_name) and never repopulated should be removed. + let mut cleanup_candidates = self.graph.take_pending_declaration_cleanup(); + if !cleanup_candidates.is_empty() { + cleanup_candidates.sort_unstable(); + cleanup_candidates.dedup(); + self.graph.cleanup_empty_declarations(cleanup_candidates); + } } /// Resolves a single constant against the graph. This method is not meant to be used by the resolution phase, but by From 5da9af2497b29a5127bb643213a9e7cbccecb072 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Thu, 2 Apr 2026 12:54:18 +0100 Subject: [PATCH 3/8] Clean up orphan Todo declarations after incremental resolution During delete-phase resolution, create_todo_for_parent can create placeholder declarations for temporarily unresolvable parents. After re-add, the real declarations resolve under their correct FQN but the orphan Todos persist with the wrong FQN. Reuse the existing pending_declaration_cleanup mechanism: Todos created by create_todo_for_parent are pushed to pending_declaration_cleanup. At the start of each resolve(), the vec is snapshotted and drained. At the end, cleanup_empty_declarations removes candidates that are still empty. Todos from the current cycle survive (they may be legitimate placeholders); Todos from previous cycles are cleaned up. --- rust/rubydex/src/model/graph.rs | 85 +++++++++++++++++++++++++++++---- rust/rubydex/src/resolution.rs | 17 +++---- 2 files changed, 86 insertions(+), 16 deletions(-) diff --git a/rust/rubydex/src/model/graph.rs b/rust/rubydex/src/model/graph.rs index 1d042dbd..c3ab95e6 100644 --- a/rust/rubydex/src/model/graph.rs +++ b/rust/rubydex/src/model/graph.rs @@ -647,13 +647,20 @@ impl Graph { self.pending_work.extend(units); } - /// Post-resolution cleanup: removes declarations that were emptied during name - /// cascade and never repopulated by re-resolution. Runs `invalidate_graph` on - /// any that are still empty, which cascades to their members and singletons. - /// Post-resolution cleanup: removes declarations that were emptied during - /// invalidation cascade and never repopulated by re-resolution. Cascades - /// to members, singletons, and descendants via `invalidate_graph`. - pub(crate) fn cleanup_empty_declarations(&mut self, candidates: Vec) { + pub(crate) fn mark_declaration_for_cleanup(&mut self, decl_id: DeclarationId) { + self.pending_declaration_cleanup.push(decl_id); + } + + /// Post-resolution cleanup: removes declarations that are still empty + /// (no definitions). Cascades to members, singletons, and descendants + /// via `invalidate_graph`. + pub(crate) fn cleanup_empty_declarations(&mut self, mut candidates: Vec) { + if candidates.is_empty() { + return; + } + candidates.sort_unstable(); + candidates.dedup(); + let items: Vec = candidates .into_iter() .filter(|decl_id| { @@ -2390,7 +2397,8 @@ mod incremental_resolution_tests { use crate::{ assert_alias_targets_contain, assert_ancestors_eq, assert_constant_reference_to, assert_constant_reference_unresolved, assert_declaration_does_not_exist, assert_declaration_exists, - assert_declaration_references_count_eq, assert_members_eq, assert_no_constant_alias_target, + assert_declaration_kind_eq, assert_declaration_references_count_eq, assert_members_eq, + assert_no_constant_alias_target, }; const NO_ANCESTORS: [&str; 0] = []; @@ -3841,4 +3849,65 @@ mod incremental_resolution_tests { assert_declaration_exists!(context, "Foo::#run()"); assert_declaration_exists!(context, "Foo#run()"); } + + /// Regression test derived from Rails via `--bisect --skip-file`. + /// Deleting an unrelated file triggers re-resolution that creates an orphan + /// `` declaration at the wrong scope. The bug depends on resolution + /// ordering (hash-based), so the specific definitions and `class << self` + /// block are required to trigger it. + #[test] + fn no_orphan_todo_for_compact_nested_class_after_unrelated_delete() { + let mut incremental = GraphTest::new(); + incremental.index_uri( + "file:///a.rb", + "module A; module B; class C; end; def self.x; C.x; end; end; end", + ); + incremental.index_uri( + "file:///b.rb", + "module A; module B; class C; def y; D.y; end; end; end; end", + ); + incremental.index_uri( + "file:///c.rb", + "module A; module B; class C; class D; class << self; def y; end; end; end; end; end; end", + ); + incremental.index_uri("file:///d.rb", "module A; end"); + incremental.resolve(); + + assert_declaration_exists!(incremental, "A::B::C::D"); + + incremental.delete_uri("file:///d.rb"); + incremental.resolve(); + + incremental.index_uri("file:///d.rb", "module A; end"); + incremental.resolve(); + + let mut fresh = GraphTest::new(); + fresh.index_uri( + "file:///a.rb", + "module A; module B; class C; end; def self.x; C.x; end; end; end", + ); + fresh.index_uri( + "file:///b.rb", + "module A; module B; class C; def y; D.y; end; end; end; end", + ); + fresh.index_uri( + "file:///c.rb", + "module A; module B; class C; class D; class << self; def y; end; end; end; end; end; end", + ); + fresh.index_uri("file:///d.rb", "module A; end"); + fresh.resolve(); + + let extras: Vec<_> = incremental + .graph() + .declarations() + .iter() + .filter(|(id, _)| !fresh.graph().declarations().contains_key(id)) + .map(|(_, d)| format!("{} ({})", d.name(), d.kind())) + .collect(); + + assert!( + extras.is_empty(), + "Orphan declarations after unrelated file delete: {extras:?}" + ); + } } // mod incremental_resolution_tests diff --git a/rust/rubydex/src/resolution.rs b/rust/rubydex/src/resolution.rs index d750be0b..cb464194 100644 --- a/rust/rubydex/src/resolution.rs +++ b/rust/rubydex/src/resolution.rs @@ -83,6 +83,12 @@ impl<'a> Resolver<'a> { /// /// Can panic if there's inconsistent data in the graph pub fn resolve(&mut self) { + // Snapshot cleanup candidates from previous cycles. This includes both: + // - Declarations emptied during invalidation cascade (from unresolve_dependent_name / remove_document_data) + // - Orphan Todos from a previous resolve (from create_todo_for_parent) + // Items added during THIS resolve survive in the vec for the next cycle. + let cleanup_candidates = self.graph.take_pending_declaration_cleanup(); + let other_ids = self.prepare_units(); loop { @@ -125,14 +131,8 @@ impl<'a> Resolver<'a> { self.handle_remaining_definitions(other_ids); - // Post-resolution cleanup: declarations emptied during name cascade - // (unresolve_dependent_name) and never repopulated should be removed. - let mut cleanup_candidates = self.graph.take_pending_declaration_cleanup(); - if !cleanup_candidates.is_empty() { - cleanup_candidates.sort_unstable(); - cleanup_candidates.dedup(); - self.graph.cleanup_empty_declarations(cleanup_candidates); - } + // Post-resolution cleanup: remove declarations that are still empty. + self.graph.cleanup_empty_declarations(cleanup_candidates); } /// Resolves a single constant against the graph. This method is not meant to be used by the resolution phase, but by @@ -1162,6 +1162,7 @@ impl<'a> Resolver<'a> { parent_owner_id, ))))); self.graph.add_member(&parent_owner_id, declaration_id, parent_str_id); + self.graph.mark_declaration_for_cleanup(declaration_id); } declaration_id From 7997c35f08b0016939dfe659119186257772add4 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Mon, 6 Apr 2026 22:13:23 +0100 Subject: [PATCH 4/8] Preserve populated synthetic declarations during post-resolution cleanup cleanup_empty_declarations removes declarations with no definitions after resolution. This correctly handles orphan TODOs and singletons that were never populated, but incorrectly removes those that have members. Two issues fixed: 1. TODO declarations created by create_todo_for_parent during a previous resolve cycle get cleaned up even after being re-populated with members during re-resolution. This caused compact-notation ChildName chains (e.g., Google::Cloud::V2::Type) to lose their intermediate namespaces after delete+re-add of a shared ancestor. 2. Singleton classes created by get_or_create_singleton_class during incremental re-resolution were never marked for cleanup, so empty singletons (created speculatively during broken ancestor chains) survived indefinitely. Now singletons are marked for cleanup at creation; empty ones are removed post-resolution. Fix: skip TODOs and singleton classes that have members in the cleanup filter, and mark newly created singletons for cleanup. --- rust/rubydex/src/model/graph.rs | 96 +++++++++++++++++++++++++++++++-- rust/rubydex/src/resolution.rs | 6 +++ 2 files changed, 99 insertions(+), 3 deletions(-) diff --git a/rust/rubydex/src/model/graph.rs b/rust/rubydex/src/model/graph.rs index c3ab95e6..7f431554 100644 --- a/rust/rubydex/src/model/graph.rs +++ b/rust/rubydex/src/model/graph.rs @@ -664,9 +664,18 @@ impl Graph { let items: Vec = candidates .into_iter() .filter(|decl_id| { - self.declarations - .get(decl_id) - .is_some_and(Declaration::has_no_definitions) + self.declarations.get(decl_id).is_some_and(|decl| { + // Skip synthetic declarations that acquired members during + // resolution — they're actively serving as namespace parents. + // TODOs from create_todo_for_parent and singletons from + // get_or_create_singleton_class are both definition-less but + // may hold child declarations or methods. + let is_synthetic_with_members = matches!( + decl, + Declaration::Namespace(Namespace::Todo(_) | Namespace::SingletonClass(_)) + ) && decl.as_namespace().is_some_and(|ns| !ns.members().is_empty()); + decl.has_no_definitions() && !is_synthetic_with_members + }) }) .map(InvalidationItem::Declaration) .collect(); @@ -3910,4 +3919,85 @@ mod incremental_resolution_tests { "Orphan declarations after unrelated file delete: {extras:?}" ); } + + /// Regression test from Core via `--bisect`: compact-notation class with deep + /// nesting loses declarations after deleting and re-adding a file that defines + /// a shared ancestor. The TODO placeholders created by `create_todo_for_parent` + /// were being cleaned up by `cleanup_empty_declarations` from a previous cycle + /// even though they now hold members. + #[test] + fn compact_class_declarations_restored_after_shared_ancestor_delete_readd() { + let a = "module Google; end; class Google::Protobuf; end"; + let b = "class Google::Cloud::V2::Type < Google::Protobuf; end"; + + let mut incremental = GraphTest::new(); + incremental.index_uri("file:///a.rb", a); + incremental.index_uri("file:///b.rb", b); + incremental.resolve(); + + assert_declaration_exists!(incremental, "Google::Cloud::V2::Type"); + + incremental.delete_uri("file:///a.rb"); + incremental.resolve(); + + incremental.index_uri("file:///a.rb", a); + incremental.resolve(); + + assert_declaration_exists!(incremental, "Google"); + assert_declaration_exists!(incremental, "Google::Protobuf"); + assert_declaration_exists!(incremental, "Google::Cloud::V2::Type"); + } + + /// Regression test from Core via `--bisect`: compact-notation class with + /// `class << self` creates a singleton on a TODO placeholder. Double + /// resolve must not leak extra singleton declarations. + #[test] + fn no_leaked_singleton_on_todo_after_double_resolve() { + // Matches bisect: ActiveRecord::SessionStore::Session with class << self + // + module ActiveRecord from errors.rb + activesupport rbi + let session_rbi = r#" + class ActiveRecord::SessionStore::Session + class << self + def new(attributes = nil); end + end + end + "#; + let errors_rb = r#" + module ActiveRecord + class ActiveRecordError < StandardError; end + end + "#; + + let mut incremental = GraphTest::new(); + incremental.index_uri("file:///session.rbi", session_rbi); + incremental.index_uri("file:///errors.rb", errors_rb); + incremental.resolve(); + incremental.resolve(); + + let mut fresh = GraphTest::new(); + fresh.index_uri("file:///session.rbi", session_rbi); + fresh.index_uri("file:///errors.rb", errors_rb); + fresh.resolve(); + + let extras: Vec<_> = incremental + .graph() + .declarations() + .iter() + .filter(|(id, _)| !fresh.graph().declarations().contains_key(id)) + .map(|(_, d)| format!("{} ({})", d.name(), d.kind())) + .collect(); + + let missing: Vec<_> = fresh + .graph() + .declarations() + .iter() + .filter(|(id, _)| !incremental.graph().declarations().contains_key(id)) + .map(|(_, d)| format!("{} ({})", d.name(), d.kind())) + .collect(); + + assert!( + extras.is_empty() && missing.is_empty(), + "Declaration mismatch after double resolve:\n Extra: {extras:?}\n Missing: {missing:?}" + ); + } } // mod incremental_resolution_tests diff --git a/rust/rubydex/src/resolution.rs b/rust/rubydex/src/resolution.rs index cb464194..bfcaf9ae 100644 --- a/rust/rubydex/src/resolution.rs +++ b/rust/rubydex/src/resolution.rs @@ -682,6 +682,12 @@ impl<'a> Resolver<'a> { )))), ); + // Mark for post-resolution cleanup: if the singleton stays empty + // (no members added during resolution), it was created speculatively + // and should be removed. This handles singletons created during + // incremental re-resolution when ancestor chains are temporarily broken. + self.graph.mark_declaration_for_cleanup(decl_id); + // Queue ancestor linearization so the singleton's ancestor chain is // built — this cascades upward via singleton_parent_id, creating // parent singletons as needed. From 569d2ef8374c3c9ae479b94fb980d2d975305427 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Tue, 7 Apr 2026 11:19:43 +0100 Subject: [PATCH 5/8] Add --trace-declaration flag for debugging incremental resolution Adds a TraceEvent callback system to Graph that traces declaration creation and removal events. The incremental test CLI exposes this via --trace-declaration , which logs matching events across all 4 resolve phases (initial, post-delete, post-re-add, fresh). This enables debugging cases where bisect can't minimize further because the divergence depends on global resolution ordering. --- rust/rubydex/src/bin/incremental_test.rs | 39 +++++++++- rust/rubydex/src/model/graph.rs | 98 +++++++++++++++++++++++- rust/rubydex/src/resolution.rs | 14 +++- 3 files changed, 143 insertions(+), 8 deletions(-) diff --git a/rust/rubydex/src/bin/incremental_test.rs b/rust/rubydex/src/bin/incremental_test.rs index d8cdfc66..1b730d95 100644 --- a/rust/rubydex/src/bin/incremental_test.rs +++ b/rust/rubydex/src/bin/incremental_test.rs @@ -8,7 +8,7 @@ use clap::Parser; use rubydex::{ indexing::{self, LanguageId}, integrity, listing, - model::graph::Graph, + model::graph::{Graph, TraceEvent}, resolution::Resolver, }; @@ -41,6 +41,10 @@ struct Args { /// Number of retries per bisect step (for non-deterministic failures) #[arg(long, default_value = "1")] retries: usize, + + /// Trace creation/removal of declarations whose name contains PATTERN + #[arg(long, value_name = "PATTERN")] + trace_declaration: Option, } /// Delete percentage (hardcoded for now) @@ -166,6 +170,19 @@ fn check_unmapped_definitions(graph: &Graph, uri: &str) -> Vec { unmapped } +fn set_trace_for_phase(graph: &mut Graph, pattern: &str, phase: &str) { + let pat = pattern.to_string(); + let phase = phase.to_string(); + graph.set_trace(move |event| { + let name = match &event { + TraceEvent::Created { name, .. } | TraceEvent::Removed { name, .. } => name, + }; + if name.contains(pat.as_str()) { + eprintln!("[TRACE {phase}] {event}"); + } + }); +} + /// Result of comparing incremental vs fresh graphs. struct RoundResult { counts: GraphCounts, @@ -182,6 +199,7 @@ fn run_round( delete_indices: &[usize], indexed_indices: Option<&[usize]>, verbose: bool, + trace_pattern: Option<&str>, ) -> RoundResult { let all_indices: Vec = (0..files.len()).collect(); let active_indices = indexed_indices.unwrap_or(&all_indices); @@ -191,6 +209,9 @@ fn run_round( let (ref uri, ref source, ref lang) = files[i]; indexing::index_source(&mut incremental, uri, source, lang); } + if let Some(pat) = trace_pattern { + set_trace_for_phase(&mut incremental, pat, "initial"); + } resolve(&mut incremental); if verbose { @@ -225,6 +246,9 @@ fn run_round( if verbose { println!(" Resolving after delete..."); } + if let Some(pat) = trace_pattern { + set_trace_for_phase(&mut incremental, pat, "post-delete"); + } resolve(&mut incremental); for &i in delete_indices { @@ -234,6 +258,9 @@ fn run_round( if verbose { println!(" Resolving after re-index..."); } + if let Some(pat) = trace_pattern { + set_trace_for_phase(&mut incremental, pat, "post-re-add"); + } resolve(&mut incremental); let counts = GraphCounts::from_graph(&incremental); @@ -244,6 +271,9 @@ fn run_round( let (ref uri, ref source, ref lang) = files[i]; indexing::index_source(&mut fresh, uri, source, lang); } + if let Some(pat) = trace_pattern { + set_trace_for_phase(&mut fresh, pat, "fresh"); + } resolve(&mut fresh); let mut missing: Vec = Vec::new(); @@ -297,7 +327,7 @@ fn round_fails( ) -> bool { for _ in 0..retries { let result = panic::catch_unwind(panic::AssertUnwindSafe(|| { - run_round(files, delete_indices, indexed_indices, false) + run_round(files, delete_indices, indexed_indices, false, None) })); let failed = match result { Ok(r) => !r.extras.is_empty() || !r.missing.is_empty() || !r.integrity_errors.is_empty(), @@ -508,7 +538,7 @@ fn bisect_and_report( // Final verification println!("\nFinal verification:"); - let final_result = run_round(files, &min_deleted, Some(&indexed), true); + let final_result = run_round(files, &min_deleted, Some(&indexed), true, None); print_round_result(&final_result, fresh_counts); // Print the minimal reproduction @@ -680,8 +710,9 @@ fn main() { let round_start = Instant::now(); let idx_ref = indexed_indices.as_deref(); + let trace_pat = args.trace_declaration.as_deref(); let round_result = panic::catch_unwind(panic::AssertUnwindSafe(|| { - run_round(&files, &delete_indices, idx_ref, true) + run_round(&files, &delete_indices, idx_ref, true, trace_pat) })); let elapsed = round_start.elapsed().as_secs_f64(); diff --git a/rust/rubydex/src/model/graph.rs b/rust/rubydex/src/model/graph.rs index 7f431554..2cce4760 100644 --- a/rust/rubydex/src/model/graph.rs +++ b/rust/rubydex/src/model/graph.rs @@ -1,5 +1,6 @@ use std::collections::HashSet; use std::collections::hash_map::Entry; +use std::fmt; use std::path::PathBuf; use std::sync::LazyLock; @@ -16,6 +17,34 @@ use crate::model::references::{ConstantReference, MethodRef}; use crate::model::string_ref::StringRef; use crate::stats; +/// Events emitted by the graph when declarations are created or removed. +/// Used for debugging and tracing declaration lifecycle. +pub enum TraceEvent<'a> { + Created { + name: &'a str, + kind: &'static str, + caller: &'static str, + }, + Removed { + name: &'a str, + kind: &'static str, + caller: &'static str, + }, +} + +impl fmt::Display for TraceEvent<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TraceEvent::Created { name, kind, caller } => { + write!(f, "CREATED {name} ({kind}) by {caller}") + } + TraceEvent::Removed { name, kind, caller } => { + write!(f, "REMOVED {name} ({kind}) by {caller}") + } + } + } +} + /// An entity whose validity depends on a particular `NameId`. /// Used as the value type in the `name_dependents` reverse index. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -56,7 +85,7 @@ pub enum Unit { // The `Graph` is the global representation of the entire Ruby codebase. It contains all declarations and their // relationships -#[derive(Default, Debug)] +#[derive(Default)] pub struct Graph { // Map of declaration nodes declarations: IdentityHashMap, @@ -92,6 +121,10 @@ pub struct Graph { /// Deferred until after resolution — if the declaration was repopulated, skip it. /// Drained by `take_pending_declaration_cleanup()`. pending_declaration_cleanup: Vec, + + /// Optional callback for tracing declaration lifecycle events (creation/removal). + #[allow(clippy::type_complexity)] + trace_fn: Option)>>, } impl Graph { @@ -122,7 +155,7 @@ impl Graph { )))), ); - Self { + let graph = Self { declarations, definitions: IdentityHashMap::default(), documents: IdentityHashMap::default(), @@ -135,6 +168,35 @@ impl Graph { pending_work: Vec::default(), excluded_paths: HashSet::new(), pending_declaration_cleanup: Vec::default(), + trace_fn: None, + }; + + graph.emit_trace(TraceEvent::Created { + name: "Object", + kind: "Class", + caller: "bootstrap", + }); + graph.emit_trace(TraceEvent::Created { + name: "Module", + kind: "Class", + caller: "bootstrap", + }); + graph.emit_trace(TraceEvent::Created { + name: "Class", + kind: "Class", + caller: "bootstrap", + }); + + graph + } + + pub fn set_trace(&mut self, f: impl Fn(TraceEvent<'_>) + 'static) { + self.trace_fn = Some(Box::new(f)); + } + + pub(crate) fn emit_trace(&self, event: TraceEvent<'_>) { + if let Some(ref f) = self.trace_fn { + f(event); } } @@ -211,7 +273,14 @@ impl Graph { Entry::Vacant(vacant_entry) => { let mut declaration = constructor(fully_qualified_name); declaration.add_definition(definition_id); + let trace_name = declaration.name().to_string(); + let trace_kind = declaration.kind(); vacant_entry.insert(declaration); + self.emit_trace(TraceEvent::Created { + name: &trace_name, + kind: trace_kind, + caller: "add_declaration", + }); } } @@ -248,7 +317,14 @@ impl Graph { let mut new_decl = constructor(name, owner_id); new_decl.as_namespace_mut().unwrap().extend(old_decl); + let trace_name = new_decl.name().to_string(); + let trace_kind = new_decl.kind(); self.declarations.insert(declaration_id, new_decl); + self.emit_trace(TraceEvent::Created { + name: &trace_name, + kind: trace_kind, + caller: "promote", + }); } #[must_use] @@ -674,7 +750,15 @@ impl Graph { decl, Declaration::Namespace(Namespace::Todo(_) | Namespace::SingletonClass(_)) ) && decl.as_namespace().is_some_and(|ns| !ns.members().is_empty()); - decl.has_no_definitions() && !is_synthetic_with_members + let should_remove = decl.has_no_definitions() && !is_synthetic_with_members; + if should_remove { + self.emit_trace(TraceEvent::Removed { + name: decl.name(), + kind: decl.kind(), + caller: "cleanup", + }); + } + should_remove }) }) .map(InvalidationItem::Declaration) @@ -1185,6 +1269,14 @@ impl Graph { decl.has_no_backing_definitions(&decl_id) || !self.declarations.contains_key(decl.owner_id()); if should_remove { + let trace_name = decl.name().to_string(); + let trace_kind = decl.kind(); + self.emit_trace(TraceEvent::Removed { + name: &trace_name, + kind: trace_kind, + caller: "invalidate", + }); + // Remove path: definitions came from a deleted/changed file and won't // re-resolve to this declaration. Removal must be immediate so child // declarations see a missing owner and cascade correctly. diff --git a/rust/rubydex/src/resolution.rs b/rust/rubydex/src/resolution.rs index bfcaf9ae..18eb22cc 100644 --- a/rust/rubydex/src/resolution.rs +++ b/rust/rubydex/src/resolution.rs @@ -10,7 +10,7 @@ use crate::model::{ Namespace, SingletonClassDeclaration, TodoDeclaration, }, definitions::{Definition, Mixin, Receiver}, - graph::{CLASS_ID, Graph, MODULE_ID, OBJECT_ID, Unit}, + graph::{CLASS_ID, Graph, MODULE_ID, OBJECT_ID, TraceEvent, Unit}, identity_maps::{IdentityHashBuilder, IdentityHashMap, IdentityHashSet}, ids::{ConstantReferenceId, DeclarationId, DefinitionId, NameId, StringId}, name::{Name, NameRef, ParentScope}, @@ -674,6 +674,7 @@ impl<'a> Resolver<'a> { let decl_id = DeclarationId::from(&fully_qualified_name); namespace_decl.set_singleton_class_id(decl_id); + let trace_name = fully_qualified_name.clone(); self.graph.declarations_mut().insert( decl_id, Declaration::Namespace(Namespace::SingletonClass(Box::new(SingletonClassDeclaration::new( @@ -681,6 +682,11 @@ impl<'a> Resolver<'a> { attached_id, )))), ); + self.graph.emit_trace(TraceEvent::Created { + name: &trace_name, + kind: "SingletonClass", + caller: "get_or_create_singleton", + }); // Mark for post-resolution cleanup: if the singleton stays empty // (no members added during resolution), it was created speculatively @@ -1163,12 +1169,18 @@ impl<'a> Resolver<'a> { let declaration_id = DeclarationId::from(&fully_qualified_name); if let Entry::Vacant(e) = self.graph.declarations_mut().entry(declaration_id) { + let trace_name = fully_qualified_name.clone(); e.insert(Declaration::Namespace(Namespace::Todo(Box::new(TodoDeclaration::new( fully_qualified_name, parent_owner_id, ))))); self.graph.add_member(&parent_owner_id, declaration_id, parent_str_id); self.graph.mark_declaration_for_cleanup(declaration_id); + self.graph.emit_trace(TraceEvent::Created { + name: &trace_name, + kind: "Todo", + caller: "create_todo", + }); } declaration_id From 600b4a6b83d2fef219b67f5b61aa6b1d63a648cf Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Wed, 8 Apr 2026 15:50:51 +0100 Subject: [PATCH 6/8] Clean up orphan TODOs within same resolve cycle When compact-notation classes (e.g. class A::B::C) create TODO placeholders for intermediate qualifiers, and the real namespace resolves from another file in the same cycle, the TODO becomes orphaned. Previously, these orphan TODOs were only cleaned up in the NEXT resolve cycle, causing single-resolve and double-resolve to produce different graphs. Fix: after the main cleanup pass, run a second pass that targets orphan TODOs from the current cycle. If the cleanup cascades (removing singletons attached to the TODO, unresolving names), run another resolve_inner() pass to process the resulting work and recreate declarations at their correct FQN. --- rust/rubydex/src/model/graph.rs | 49 +++++++++++++++++++++++++-------- rust/rubydex/src/resolution.rs | 35 +++++++++++++++++++++++ 2 files changed, 73 insertions(+), 11 deletions(-) diff --git a/rust/rubydex/src/model/graph.rs b/rust/rubydex/src/model/graph.rs index 2cce4760..613e6b9e 100644 --- a/rust/rubydex/src/model/graph.rs +++ b/rust/rubydex/src/model/graph.rs @@ -710,6 +710,12 @@ impl Graph { std::mem::take(&mut self.pending_work) } + /// Returns true if there are unprocessed work items from previous cycles. + #[must_use] + pub fn has_pending_work(&self) -> bool { + !self.pending_work.is_empty() + } + /// Drains declarations that need post-resolution cleanup. pub(crate) fn take_pending_declaration_cleanup(&mut self) -> Vec { std::mem::take(&mut self.pending_declaration_cleanup) @@ -4040,35 +4046,56 @@ mod incremental_resolution_tests { assert_declaration_exists!(incremental, "Google::Cloud::V2::Type"); } - /// Regression test from Core via `--bisect`: compact-notation class with - /// `class << self` creates a singleton on a TODO placeholder. Double - /// resolve must not leak extra singleton declarations. - #[test] - fn no_leaked_singleton_on_todo_after_double_resolve() { - // Matches bisect: ActiveRecord::SessionStore::Session with class << self - // + module ActiveRecord from errors.rb + activesupport rbi - let session_rbi = r#" + /// Bisect found 3 files needed: session.rbi (compact class with class << self), + /// errors.rb (module ActiveRecord with include), and activesupport.rbi (defines + /// the included module). The third file changes resolution ordering so the TODO + /// promotion path diverges between single and double resolve. + fn todo_promotion_fixtures() -> (&'static str, &'static str, &'static str) { + ( + // session.rbi: compact-notation class with class << self + r" class ActiveRecord::SessionStore::Session class << self def new(attributes = nil); end end end - "#; - let errors_rb = r#" + ", + // errors.rb: real module with include (changes resolution order) + r" module ActiveRecord + include ActiveSupport::Deprecation::DeprecatedConstantAccessor class ActiveRecordError < StandardError; end end - "#; + ", + // activesupport.rbi: defines the included module chain + r" + module ActiveSupport + module Deprecation + module DeprecatedConstantAccessor; end + end + end + ", + ) + } + + /// Regression test from Core via `--bisect`: compact-notation class with + /// `class << self` creates a singleton on a TODO placeholder. Double + /// resolve must not leak extra singleton declarations. + #[test] + fn no_leaked_singleton_on_todo_after_double_resolve() { + let (session_rbi, errors_rb, activesupport_rbi) = todo_promotion_fixtures(); let mut incremental = GraphTest::new(); incremental.index_uri("file:///session.rbi", session_rbi); incremental.index_uri("file:///errors.rb", errors_rb); + incremental.index_uri("file:///activesupport.rbi", activesupport_rbi); incremental.resolve(); incremental.resolve(); let mut fresh = GraphTest::new(); fresh.index_uri("file:///session.rbi", session_rbi); fresh.index_uri("file:///errors.rb", errors_rb); + fresh.index_uri("file:///activesupport.rbi", activesupport_rbi); fresh.resolve(); let extras: Vec<_> = incremental diff --git a/rust/rubydex/src/resolution.rs b/rust/rubydex/src/resolution.rs index 18eb22cc..2c3d3297 100644 --- a/rust/rubydex/src/resolution.rs +++ b/rust/rubydex/src/resolution.rs @@ -83,6 +83,41 @@ impl<'a> Resolver<'a> { /// /// Can panic if there's inconsistent data in the graph pub fn resolve(&mut self) { + self.resolve_inner(); + + // After resolution + cleanup, orphan TODOs from create_todo_for_parent may + // remain. These are TODOs created during this resolve cycle for compact- + // notation qualifiers that never acquired definitions (the real namespace + // resolved elsewhere). Clean them up separately — they can't be included + // in the main cleanup pass because singletons created in the same cycle + // might not have acquired members yet. Non-TODO candidates (singletons) + // are intentionally discarded: empty singletons are caught by the + // is_empty_singleton check in invalidate_declaration during subsequent + // invalidation passes. + let current_cycle = self.graph.take_pending_declaration_cleanup(); + let todo_only: Vec<_> = current_cycle + .into_iter() + .filter(|id| { + matches!( + self.graph.declarations().get(id), + Some(Declaration::Namespace(Namespace::Todo(_))) + ) + }) + .collect(); + if !todo_only.is_empty() { + self.graph.cleanup_empty_declarations(todo_only); + } + + // Both the main cleanup (inside resolve_inner) and the TODO cleanup above + // may have cascaded — removing empty singletons, unresolving names, and + // re-queuing dependent references. Run another resolve pass to process + // the resulting pending_work and recreate declarations at their correct FQN. + if self.graph.has_pending_work() { + self.resolve_inner(); + } + } + + fn resolve_inner(&mut self) { // Snapshot cleanup candidates from previous cycles. This includes both: // - Declarations emptied during invalidation cascade (from unresolve_dependent_name / remove_document_data) // - Orphan Todos from a previous resolve (from create_todo_for_parent) From 50aad55f12855f327ce7a83f279544f05f25e8bf Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Wed, 8 Apr 2026 15:51:09 +0100 Subject: [PATCH 7/8] Remove empty singletons during invalidation and fix Attached resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes for singleton consistency between fresh and incremental: 1. invalidate_declaration: empty singletons (no definitions AND no members) now take the remove path instead of the update path. Previously, has_no_backing_definitions() treated ALL singletons as synthetic, keeping zombie singletons alive. The remove path also clears singleton_class_id on the owner so get_or_create_singleton_class can recreate if needed. 2. resolve_constant_internal: when a NameRef::Resolved name has ParentScope::Attached but resolves to a non-singleton (stale resolution from a TODO), create the singleton and update the resolution. This ensures the Resolved path produces the same side effects as the Unresolved→Attached path. Also adds clear_singleton_class_id and transfers singleton_class_id during TODO promotion in extend(). --- rust/rubydex/src/model/declaration.rs | 22 ++++++++ rust/rubydex/src/model/graph.rs | 73 +++++++++++++++++++++++++-- rust/rubydex/src/resolution.rs | 24 ++++++++- 3 files changed, 115 insertions(+), 4 deletions(-) diff --git a/rust/rubydex/src/model/declaration.rs b/rust/rubydex/src/model/declaration.rs index 831d152e..5748495c 100644 --- a/rust/rubydex/src/model/declaration.rs +++ b/rust/rubydex/src/model/declaration.rs @@ -138,6 +138,16 @@ macro_rules! namespace_declaration { Declaration::Namespace(namespace) => { self.members.extend(namespace.members()); self.references.extend(namespace.references()); + // Transfer singleton_class_id if the old declaration had one + // and we don't already have one. This happens during TODO + // promotion: the TODO may have acquired a singleton via + // get_or_create_singleton_class, and the new declaration + // should inherit it. + if self.singleton_class_id.is_none() { + if let Some(&id) = namespace.singleton_class() { + self.singleton_class_id = Some(id); + } + } } Declaration::Constant(constant) => { self.references.extend(constant.references()); @@ -162,6 +172,14 @@ macro_rules! namespace_declaration { self.singleton_class_id = Some(declaration_id); } + /// Clears `singleton_class_id` only if it matches `expected`. + /// Prevents accidentally clearing a pointer to a re-created singleton. + pub fn clear_singleton_class_id(&mut self, expected: &DeclarationId) { + if self.singleton_class_id.as_ref() == Some(expected) { + self.singleton_class_id = None; + } + } + pub fn singleton_class_id(&self) -> Option<&DeclarationId> { self.singleton_class_id.as_ref() } @@ -590,6 +608,10 @@ impl Namespace { all_namespaces!(self, it => it.set_singleton_class_id(declaration_id)); } + pub fn clear_singleton_class_id(&mut self, expected: &DeclarationId) { + all_namespaces!(self, it => it.clear_singleton_class_id(expected)); + } + #[must_use] pub fn owner_id(&self) -> &DeclarationId { all_namespaces!(self, it => &it.owner_id) diff --git a/rust/rubydex/src/model/graph.rs b/rust/rubydex/src/model/graph.rs index 613e6b9e..0db67238 100644 --- a/rust/rubydex/src/model/graph.rs +++ b/rust/rubydex/src/model/graph.rs @@ -1271,8 +1271,15 @@ impl Graph { let Some(decl) = self.declarations.get(&decl_id) else { return; }; - let should_remove = - decl.has_no_backing_definitions(&decl_id) || !self.declarations.contains_key(decl.owner_id()); + // Empty singletons (no definitions AND no members) are speculatively created + // by get_or_create_singleton_class during ancestor linearization. They should + // be removed even though has_no_backing_definitions() skips them (synthetic). + let is_empty_singleton = matches!(decl, Declaration::Namespace(Namespace::SingletonClass(_))) + && decl.has_no_definitions() + && decl.as_namespace().is_some_and(|ns| ns.members().is_empty()); + let should_remove = is_empty_singleton + || decl.has_no_backing_definitions(&decl_id) + || !self.declarations.contains_key(decl.owner_id()); if should_remove { let trace_name = decl.name().to_string(); @@ -1328,6 +1335,11 @@ impl Graph { && let Some(ns) = owner.as_namespace_mut() { ns.remove_member(&unqualified_str_id); + // For singletons being removed, clear the owner's singleton_class_id + // pointer so get_or_create_singleton_class can recreate it if needed. + if is_empty_singleton { + ns.clear_singleton_class_id(&decl_id); + } } } @@ -2504,7 +2516,7 @@ mod incremental_resolution_tests { use crate::{ assert_alias_targets_contain, assert_ancestors_eq, assert_constant_reference_to, assert_constant_reference_unresolved, assert_declaration_does_not_exist, assert_declaration_exists, - assert_declaration_kind_eq, assert_declaration_references_count_eq, assert_members_eq, + assert_declaration_references_count_eq, assert_members_eq, assert_no_constant_alias_target, }; @@ -4119,4 +4131,59 @@ mod incremental_resolution_tests { "Declaration mismatch after double resolve:\n Extra: {extras:?}\n Missing: {missing:?}" ); } + + #[test] + fn empty_singleton_removed_by_cleanup_after_member_deletion() { + // Regression: empty singletons (no definitions, no members) must be + // removed by cleanup. Previously, invalidate_declaration treated all + // SingletonClass as synthetic (has_no_backing_definitions = false), + // keeping them alive via the update path even when empty. + let mut context = GraphTest::new(); + + context.index_uri("file:///foo.rb", "class Foo; def self.bar; end; end"); + context.index_uri("file:///reopener.rb", "class Foo; end"); + context.resolve(); + + assert_declaration_exists!(context, "Foo"); + assert_declaration_exists!(context, "Foo::"); + + // Delete the file with the singleton method — Foo:: loses its + // member #bar(). The singleton becomes empty. + context.delete_uri("file:///foo.rb"); + context.resolve(); + + // Foo survives (reopener has a definition). The empty singleton + // should be cleaned up. + assert_declaration_exists!(context, "Foo"); + + // Re-add the file — Foo:: should be recreated with its member + context.index_uri("file:///foo.rb", "class Foo; def self.bar; end; end"); + context.resolve(); + + assert_declaration_exists!(context, "Foo"); + assert_declaration_exists!(context, "Foo::"); + } + + #[test] + fn resolved_attached_name_creates_singleton_for_promoted_todo() { + // Regression: when a NameRef::Resolved name has ParentScope::Attached + // but resolves to a non-singleton declaration (stale resolution from + // when the target was a TODO), the Resolved path must create the + // singleton. Verifies that after double-resolve, the singleton exists + // on the fully-qualified class and not on an orphaned TODO. + // + // Uses the same 3-file scenario as no_leaked_singleton_on_todo above. + let (session_rbi, errors_rb, activesupport_rbi) = todo_promotion_fixtures(); + + let mut context = GraphTest::new(); + context.index_uri("file:///session.rbi", session_rbi); + context.index_uri("file:///errors.rb", errors_rb); + context.index_uri("file:///activesupport.rbi", activesupport_rbi); + context.resolve(); + context.resolve(); + + // Singleton should be on the fully-qualified class, not an orphaned TODO + assert_declaration_exists!(context, "ActiveRecord::SessionStore::Session::"); + assert_declaration_does_not_exist!(context, "Session::"); + } } // mod incremental_resolution_tests diff --git a/rust/rubydex/src/resolution.rs b/rust/rubydex/src/resolution.rs index 2c3d3297..4eb87d8b 100644 --- a/rust/rubydex/src/resolution.rs +++ b/rust/rubydex/src/resolution.rs @@ -1361,7 +1361,29 @@ impl<'a> Resolver<'a> { } } } - NameRef::Resolved(resolved) => Outcome::Resolved(*resolved.declaration_id(), None), + NameRef::Resolved(resolved) => { + // For Attached names that were previously resolved to a non-singleton + // (stale from a previous cycle where the target was a TODO), ensure the + // singleton exists. Without this, the Unresolved→Attached path creates + // singletons during incremental re-resolution that this Resolved path + // skips in fresh, causing incremental/fresh divergence. + if matches!(resolved.name().parent_scope(), ParentScope::Attached(_)) { + let decl_id = *resolved.declaration_id(); + let is_singleton = matches!( + self.graph.declarations().get(&decl_id), + Some(Declaration::Namespace(Namespace::SingletonClass(_))) + ); + if !is_singleton { + // The name was resolved to the target class, not its singleton. + // Create the singleton — the reference handler will update the + // resolution target via the returned Outcome. + if let Some(singleton_id) = self.get_or_create_singleton_class(decl_id) { + return Outcome::Resolved(singleton_id, Some(singleton_id)); + } + } + } + Outcome::Resolved(*resolved.declaration_id(), None) + } } } From 231bc8a874a3f73ab98ecd53f5ac78c1e2c1f234 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Wed, 8 Apr 2026 15:51:19 +0100 Subject: [PATCH 8/8] Fix bisect final report to use minimized fresh baseline The final verification in bisect_and_report was comparing the minimized round result against the ORIGINAL fresh baseline (all files), producing misleading count diffs when Phase 2 reduced the indexed set. Now builds a fresh baseline from the minimized file set for accurate final reporting. --- rust/rubydex/src/bin/incremental_test.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/rust/rubydex/src/bin/incremental_test.rs b/rust/rubydex/src/bin/incremental_test.rs index 1b730d95..2662d3ef 100644 --- a/rust/rubydex/src/bin/incremental_test.rs +++ b/rust/rubydex/src/bin/incremental_test.rs @@ -476,7 +476,6 @@ fn bisect_and_report( delete_indices: &[usize], skip_file: &str, retries: usize, - fresh_counts: &GraphCounts, ) { // Phase 1: minimize deleted files let min_deleted = bisect_deleted_files(files, delete_indices, retries); @@ -536,10 +535,19 @@ fn bisect_and_report( write_skip_file(skip_file, &final_skipped); println!("Skip file written to {skip_file}"); + // Build fresh baseline from the minimized file set (not the original full baseline) + let mut fresh_minimized = Graph::new(); + for &i in &indexed { + let (ref uri, ref source, ref lang) = files[i]; + indexing::index_source(&mut fresh_minimized, uri, source, lang); + } + resolve(&mut fresh_minimized); + let minimized_fresh_counts = GraphCounts::from_graph(&fresh_minimized); + // Final verification println!("\nFinal verification:"); let final_result = run_round(files, &min_deleted, Some(&indexed), true, None); - print_round_result(&final_result, fresh_counts); + print_round_result(&final_result, &minimized_fresh_counts); // Print the minimal reproduction let surviving: Vec = indexed.iter().copied().filter(|i| !min_deleted.contains(i)).collect(); @@ -733,7 +741,7 @@ fn main() { if args.skip_file.is_none() { println!("\nNo --skip-file specified, using {skip_path}"); } - bisect_and_report(&files, &delete_indices, skip_path, args.retries, &fresh_counts); + bisect_and_report(&files, &delete_indices, skip_path, args.retries); break; } } @@ -754,7 +762,7 @@ fn main() { if args.skip_file.is_none() { println!("\nNo --skip-file specified, using {skip_path}"); } - bisect_and_report(&files, &delete_indices, skip_path, args.retries, &fresh_counts); + bisect_and_report(&files, &delete_indices, skip_path, args.retries); break; } }