diff --git a/rust/rubydex/Cargo.toml b/rust/rubydex/Cargo.toml index fcc6f2ea..51f6c1db 100644 --- a/rust/rubydex/Cargo.toml +++ b/rust/rubydex/Cargo.toml @@ -9,6 +9,10 @@ license = "MIT" name = "rubydex_cli" path = "src/main.rs" +[[bin]] +name = "incremental_test" +path = "src/bin/incremental_test.rs" + [lib] crate-type = ["rlib"] diff --git a/rust/rubydex/src/bin/incremental_test.rs b/rust/rubydex/src/bin/incremental_test.rs new file mode 100644 index 00000000..2662d3ef --- /dev/null +++ b/rust/rubydex/src/bin/incremental_test.rs @@ -0,0 +1,784 @@ +use std::collections::HashSet; +use std::io::Write as _; +use std::panic; +use std::time::Instant; + +use clap::Parser; + +use rubydex::{ + indexing::{self, LanguageId}, + integrity, listing, + model::graph::{Graph, TraceEvent}, + resolution::Resolver, +}; + +#[derive(Parser, Debug)] +#[command( + name = "incremental_test", + about = "Test that incremental resolution matches fresh resolution" +)] +struct Args { + /// Path to a Ruby project or directory of gems + #[arg(value_name = "PATH")] + path: String, + + /// RNG seed for deterministic file selection + #[arg(long, default_value = "42")] + seed: u64, + + /// Number of delete/re-add cycles + #[arg(long, default_value = "5")] + cycles: usize, + + /// Bisect a failing round to find the minimum reproduction (deleted + indexed files) + #[arg(long)] + bisect: bool, + + /// File listing URIs to skip during indexing (one per line, grown by --bisect) + #[arg(long, value_name = "FILE")] + skip_file: Option, + + /// Number of retries per bisect step (for non-deterministic failures) + #[arg(long, default_value = "1")] + retries: usize, + + /// Trace creation/removal of declarations whose name contains PATTERN + #[arg(long, value_name = "PATTERN")] + trace_declaration: Option, +} + +/// Delete percentage (hardcoded for now) +const DELETE_PCT: usize = 10; +/// Maximum files to delete per cycle +const MAX_DELETE: usize = 30; + +struct GraphCounts { + declarations: usize, + definitions: usize, + names: usize, + documents: usize, + constant_references: usize, + method_references: usize, +} + +impl GraphCounts { + fn from_graph(graph: &Graph) -> Self { + Self { + declarations: graph.declarations().len(), + definitions: graph.definitions().len(), + names: graph.names().len(), + documents: graph.documents().len(), + constant_references: graph.constant_references().len(), + method_references: graph.method_references().len(), + } + } + + fn compare(&self, other: &GraphCounts) -> Vec { + let checks: &[(&str, usize, usize)] = &[ + ("declarations", self.declarations, other.declarations), + ("definitions", self.definitions, other.definitions), + ("names", self.names, other.names), + ("documents", self.documents, other.documents), + ( + "constant_references", + self.constant_references, + other.constant_references, + ), + ("method_references", self.method_references, other.method_references), + ]; + + let mut mismatches = Vec::new(); + for &(name, incremental, fresh) in checks { + if incremental != fresh { + let diff = incremental.cast_signed() - fresh.cast_signed(); + mismatches.push(format!( + "{name}: incremental={incremental}, fresh={fresh} (diff={diff:+})", + )); + } + } + mismatches + } +} + +impl std::fmt::Display for GraphCounts { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{} declarations, {} definitions, {} names, {} documents, {} constant_refs, {} method_refs", + self.declarations, + self.definitions, + self.names, + self.documents, + self.constant_references, + self.method_references, + ) + } +} + +fn resolve(graph: &mut Graph) { + Resolver::new(graph).resolve(); +} + +/// Deterministic file selection using xxhash. Each file is hashed with the seed, +/// then files are sorted by hash and the first N are selected. +fn select_files_to_delete(file_count: usize, seed: u64) -> Vec { + let n = ((file_count * DELETE_PCT) / 100).clamp(1, MAX_DELETE); + let mut scored: Vec<(u64, usize)> = (0..file_count) + .map(|i| { + let hash = xxhash_rust::xxh3::xxh3_64_with_seed(&i.to_le_bytes(), seed); + (hash, i) + }) + .collect(); + scored.sort_by_key(|(hash, _)| *hash); + let mut indices: Vec = scored.into_iter().take(n).map(|(_, i)| i).collect(); + indices.sort_unstable(); + indices +} + +/// Check for dangling definition IDs — definitions referenced by declarations but not in the graph. +fn check_dangling_definitions(graph: &Graph) -> Vec { + let mut dangles = Vec::new(); + for decl in graph.declarations().values() { + for def_id in decl.definitions() { + if !graph.definitions().contains_key(def_id) { + dangles.push(format!( + "Declaration `{}` (kind={}) references missing def {def_id:?}", + decl.name(), + decl.kind(), + )); + } + } + } + dangles +} + +/// Check which definitions in a document don't map to a declaration. +fn check_unmapped_definitions(graph: &Graph, uri: &str) -> Vec { + let uri_id = rubydex::model::ids::UriId::from(uri); + let Some(document) = graph.documents().get(&uri_id) else { + return vec![format!("Document {uri} not found")]; + }; + + let mut unmapped = Vec::new(); + for def_id in document.definitions() { + let decl_id = graph.definition_id_to_declaration_id(*def_id); + if decl_id.is_none() { + let def = graph.definitions().get(def_id).unwrap(); + unmapped.push(format!(" {def_id:?} ({}) → no declaration", def.kind())); + } + } + unmapped +} + +fn set_trace_for_phase(graph: &mut Graph, pattern: &str, phase: &str) { + let pat = pattern.to_string(); + let phase = phase.to_string(); + graph.set_trace(move |event| { + let name = match &event { + TraceEvent::Created { name, .. } | TraceEvent::Removed { name, .. } => name, + }; + if name.contains(pat.as_str()) { + eprintln!("[TRACE {phase}] {event}"); + } + }); +} + +/// Result of comparing incremental vs fresh graphs. +struct RoundResult { + counts: GraphCounts, + integrity_errors: Vec, + extras: Vec, + missing: Vec, +} + +/// Run a single round: index → resolve → delete → resolve → re-add → resolve. +/// When `indexed_indices` is Some, only those files are indexed (both incremental and fresh). +/// When None, all files are indexed. +fn run_round( + files: &[(String, String, LanguageId)], + delete_indices: &[usize], + indexed_indices: Option<&[usize]>, + verbose: bool, + trace_pattern: Option<&str>, +) -> RoundResult { + let all_indices: Vec = (0..files.len()).collect(); + let active_indices = indexed_indices.unwrap_or(&all_indices); + + let mut incremental = Graph::new(); + for &i in active_indices { + let (ref uri, ref source, ref lang) = files[i]; + indexing::index_source(&mut incremental, uri, source, lang); + } + if let Some(pat) = trace_pattern { + set_trace_for_phase(&mut incremental, pat, "initial"); + } + resolve(&mut incremental); + + if verbose { + for &i in delete_indices { + let unmapped = check_unmapped_definitions(&incremental, &files[i].0); + if !unmapped.is_empty() { + println!(" Unmapped definitions in {} ({}):", files[i].0, unmapped.len()); + for u in unmapped.iter().take(5) { + println!(" {u}"); + } + if unmapped.len() > 5 { + println!(" ... and {} more", unmapped.len() - 5); + } + } + } + } + + for &i in delete_indices { + incremental.delete_document(&files[i].0); + } + + if verbose { + let dangles = check_dangling_definitions(&incremental); + if !dangles.is_empty() { + println!(" Dangling definitions after delete ({}):", dangles.len()); + for d in dangles.iter().take(10) { + println!(" {d}"); + } + } + } + + if verbose { + println!(" Resolving after delete..."); + } + if let Some(pat) = trace_pattern { + set_trace_for_phase(&mut incremental, pat, "post-delete"); + } + resolve(&mut incremental); + + for &i in delete_indices { + indexing::index_source(&mut incremental, &files[i].0, &files[i].1, &files[i].2); + } + + if verbose { + println!(" Resolving after re-index..."); + } + if let Some(pat) = trace_pattern { + set_trace_for_phase(&mut incremental, pat, "post-re-add"); + } + resolve(&mut incremental); + + let counts = GraphCounts::from_graph(&incremental); + let integrity_errors = integrity::check_integrity(&incremental); + + let mut fresh = Graph::new(); + for &i in active_indices { + let (ref uri, ref source, ref lang) = files[i]; + indexing::index_source(&mut fresh, uri, source, lang); + } + if let Some(pat) = trace_pattern { + set_trace_for_phase(&mut fresh, pat, "fresh"); + } + resolve(&mut fresh); + + let mut missing: Vec = Vec::new(); + for (decl_id, decl) in fresh.declarations() { + if !incremental.declarations().contains_key(decl_id) { + let owner_name = fresh.declarations().get(decl.owner_id()).map(|d| d.name().to_string()); + let owner_exists_in_inc = incremental.declarations().contains_key(decl.owner_id()); + let owner_has_singleton_in_inc = if owner_exists_in_inc { + incremental + .declarations() + .get(decl.owner_id()) + .and_then(|d| d.as_namespace()) + .and_then(|ns| ns.singleton_class()) + .is_some() + } else { + false + }; + missing.push(format!( + "{} ({}) owner={} in_inc={owner_exists_in_inc} has_singleton={owner_has_singleton_in_inc}", + decl.name(), + decl.kind(), + owner_name.as_deref().unwrap_or("?"), + )); + } + } + missing.sort(); + + let mut extras: Vec = Vec::new(); + for (decl_id, decl) in incremental.declarations() { + if !fresh.declarations().contains_key(decl_id) { + extras.push(format!("{} ({})", decl.name(), decl.kind())); + } + } + extras.sort(); + + RoundResult { + counts, + integrity_errors, + extras, + missing, + } +} + +/// Returns true if the given configuration causes a mismatch or panic. +/// Runs up to `retries` times — returns true if ANY attempt fails. +fn round_fails( + files: &[(String, String, LanguageId)], + delete_indices: &[usize], + indexed_indices: Option<&[usize]>, + retries: usize, +) -> bool { + for _ in 0..retries { + let result = panic::catch_unwind(panic::AssertUnwindSafe(|| { + run_round(files, delete_indices, indexed_indices, false, None) + })); + let failed = match result { + Ok(r) => !r.extras.is_empty() || !r.missing.is_empty() || !r.integrity_errors.is_empty(), + Err(_) => true, + }; + if failed { + return true; + } + } + false +} + +// --- Skip file I/O --- + +fn read_skip_file(path: &str) -> HashSet { + let Ok(content) = std::fs::read_to_string(path) else { + return HashSet::new(); + }; + content.lines().filter(|l| !l.is_empty()).map(String::from).collect() +} + +fn write_skip_file(path: &str, skipped_uris: &HashSet) { + let mut sorted: Vec<&str> = skipped_uris.iter().map(String::as_str).collect(); + sorted.sort_unstable(); + let mut file = std::fs::File::create(path).expect("failed to write skip file"); + for uri in sorted { + writeln!(file, "{uri}").expect("failed to write skip file"); + } +} + +/// Compute indices of files NOT in the skip set. +fn compute_indexed_indices(files: &[(String, String, LanguageId)], skipped: &HashSet) -> Vec { + (0..files.len()).filter(|&i| !skipped.contains(&files[i].0)).collect() +} + +// --- Delta debugging --- + +/// Delta-debugging: find the minimum subset of `initial` that still triggers `fails`. +/// Elements in `protected` are never removed. Uses chunked removal (halves → quarters → ...) +/// before falling back to one-at-a-time for efficiency with large sets. +/// Calls `on_progress` after each successful removal for persistence. +fn minimize_set( + initial: &[usize], + protected: &HashSet, + label: &str, + files: &[(String, String, LanguageId)], + mut fails: F, + mut on_progress: P, +) -> Vec +where + F: FnMut(&[usize]) -> bool, + P: FnMut(&[usize]), +{ + let mut current = initial.to_vec(); + let removable_count = current.iter().filter(|i| !protected.contains(i)).count(); + + println!( + "\nMinimizing {removable_count} {label} ({} protected)...\n", + protected.len() + ); + + if removable_count == 0 { + return current; + } + + // Chunked removal: try removing halves, then quarters, etc. + let mut chunk_size = removable_count / 2; + while chunk_size >= 1 { + let mut changed_this_pass = true; + while changed_this_pass { + changed_this_pass = false; + let removable: Vec = current.iter().copied().filter(|i| !protected.contains(i)).collect(); + + let mut offset = 0; + while offset < removable.len() { + let chunk_end = (offset + chunk_size).min(removable.len()); + let to_remove: HashSet = removable[offset..chunk_end].iter().copied().collect(); + let candidate: Vec = current.iter().copied().filter(|i| !to_remove.contains(i)).collect(); + + let remaining = candidate.iter().filter(|i| !protected.contains(i)).count(); + print!( + " Removing chunk of {} {label} ({remaining} remain)... ", + to_remove.len(), + ); + + if fails(&candidate) { + println!("still fails"); + current = candidate; + changed_this_pass = true; + on_progress(¤t); + break; // recompute removable with updated current + } + println!("passes — keeping"); + offset += chunk_size; + } + } + chunk_size /= 2; + } + + // One-at-a-time pass for stragglers + let mut changed = true; + while changed { + changed = false; + let removable: Vec = current.iter().copied().filter(|i| !protected.contains(i)).collect(); + + for &idx in &removable { + let candidate: Vec = current.iter().copied().filter(|&i| i != idx).collect(); + + let remaining = candidate.iter().filter(|i| !protected.contains(i)).count(); + print!(" Without {} ({remaining} remain)... ", files[idx].0,); + + if fails(&candidate) { + println!("still fails — removing"); + current = candidate; + changed = true; + on_progress(¤t); + break; // restart scan since indices shifted + } + println!("passes — keeping"); + } + } + + current +} + +/// Find minimum set of deleted files that still triggers the failure. +fn bisect_deleted_files( + files: &[(String, String, LanguageId)], + delete_indices: &[usize], + retries: usize, +) -> Vec { + minimize_set( + delete_indices, + &HashSet::new(), + "deleted files", + files, + |candidate| round_fails(files, candidate, None, retries), + |_| {}, + ) +} + +/// Run both bisect phases and print a complete minimal reproduction. +/// Phase 2 writes to `skip_file` after each successful removal for resumability. +fn bisect_and_report( + files: &[(String, String, LanguageId)], + delete_indices: &[usize], + skip_file: &str, + retries: usize, +) { + // Phase 1: minimize deleted files + let min_deleted = bisect_deleted_files(files, delete_indices, retries); + println!( + "\nPhase 1 complete: {} → {} deleted files", + delete_indices.len(), + min_deleted.len() + ); + for &i in &min_deleted { + println!(" {}", files[i].0); + } + + // Phase 2: minimize indexed files by growing the skip file + let existing_skipped = read_skip_file(skip_file); + let mut indexed = compute_indexed_indices(files, &existing_skipped); + + // Verify the failure still reproduces with current skip list + if !round_fails(files, &min_deleted, Some(&indexed), retries) { + println!("\nRound passes with current skip list — cannot bisect indexed files."); + println!("Try clearing {skip_file} and re-running."); + return; + } + + let protected: HashSet = min_deleted.iter().copied().collect(); + + indexed = minimize_set( + &indexed, + &protected, + "indexed files", + files, + |candidate| round_fails(files, &min_deleted, Some(candidate), retries), + |current_indexed| { + // Write skip file: all files NOT in current_indexed + let indexed_set: HashSet = current_indexed.iter().copied().collect(); + let skipped: HashSet = (0..files.len()) + .filter(|i| !indexed_set.contains(i)) + .map(|i| files[i].0.clone()) + .collect(); + write_skip_file(skip_file, &skipped); + }, + ); + + let non_deleted_count = indexed.iter().filter(|i| !min_deleted.contains(i)).count(); + println!( + "\nPhase 2 complete: {} → {} indexed files ({non_deleted_count} surviving + {} deleted)", + files.len(), + indexed.len(), + min_deleted.len() + ); + + // Write final skip file + let indexed_set: HashSet = indexed.iter().copied().collect(); + let final_skipped: HashSet = (0..files.len()) + .filter(|i| !indexed_set.contains(i)) + .map(|i| files[i].0.clone()) + .collect(); + write_skip_file(skip_file, &final_skipped); + println!("Skip file written to {skip_file}"); + + // Build fresh baseline from the minimized file set (not the original full baseline) + let mut fresh_minimized = Graph::new(); + for &i in &indexed { + let (ref uri, ref source, ref lang) = files[i]; + indexing::index_source(&mut fresh_minimized, uri, source, lang); + } + resolve(&mut fresh_minimized); + let minimized_fresh_counts = GraphCounts::from_graph(&fresh_minimized); + + // Final verification + println!("\nFinal verification:"); + let final_result = run_round(files, &min_deleted, Some(&indexed), true, None); + print_round_result(&final_result, &minimized_fresh_counts); + + // Print the minimal reproduction + let surviving: Vec = indexed.iter().copied().filter(|i| !min_deleted.contains(i)).collect(); + + println!("\n========================================"); + println!("Minimal reproduction ({} files total):", indexed.len()); + println!("========================================\n"); + + println!("Index {} file(s):", surviving.len()); + for &i in &surviving { + println!(" {}", files[i].0); + } + println!("\nDelete and re-add {} file(s):", min_deleted.len()); + for &i in &min_deleted { + println!(" {}", files[i].0); + } + + println!("\nFile contents:"); + for &i in &indexed { + let role = if min_deleted.contains(&i) { + "DELETE+RE-ADD" + } else { + "INDEX" + }; + println!("\n--- [{role}] {} ---", files[i].0); + for line in files[i].1.lines().take(50) { + println!(" {line}"); + } + let total_lines = files[i].1.lines().count(); + if total_lines > 50 { + println!(" ... ({total_lines} lines total)"); + } + } +} + +fn print_round_result(result: &RoundResult, fresh_counts: &GraphCounts) { + if !result.missing.is_empty() { + println!(" Missing declarations ({}):", result.missing.len()); + for m in result.missing.iter().take(30) { + println!(" {m}"); + } + } + if !result.extras.is_empty() { + println!(" Extra declarations ({}):", result.extras.len()); + for e in result.extras.iter().take(10) { + println!(" {e}"); + } + } + + let mismatches = result.counts.compare(fresh_counts); + if !mismatches.is_empty() { + println!(" Count mismatches:"); + for m in &mismatches { + println!(" {m}"); + } + println!(" Incremental: {}", result.counts); + println!(" Fresh: {fresh_counts}"); + } + if !result.integrity_errors.is_empty() { + println!(" Integrity errors ({}):", result.integrity_errors.len()); + for error in result.integrity_errors.iter().take(20) { + println!(" - {error}"); + } + } +} + +#[allow(clippy::too_many_lines)] +fn main() { + let args = Args::parse(); + + println!("Listing files..."); + let (file_paths, errors) = listing::collect_file_paths(vec![args.path], &HashSet::new()); + + for error in &errors { + eprintln!("{error}"); + } + + if file_paths.is_empty() { + eprintln!("No files found"); + std::process::exit(1); + } + + let mut file_paths = file_paths; + file_paths.sort(); + // Deterministic shuffle: use seed to permute file order so that different seeds + // explore different IdentityHashMap insertion orders (which affect resolution ordering). + let mut scored: Vec<(u64, _)> = file_paths + .into_iter() + .enumerate() + .map(|(i, p)| { + let hash = xxhash_rust::xxh3::xxh3_64_with_seed(&i.to_le_bytes(), args.seed); + (hash, p) + }) + .collect(); + scored.sort_by_key(|(hash, _)| *hash); + let file_paths: Vec<_> = scored.into_iter().map(|(_, p)| p).collect(); + + let files: Vec<(String, String, LanguageId)> = file_paths + .into_iter() + .filter_map(|path| { + let content = std::fs::read_to_string(&path).ok()?; + let uri = url::Url::from_file_path(&path).ok()?.to_string(); + let ext = path.extension().unwrap_or_default(); + let language_id = LanguageId::from(ext); + Some((uri, content, language_id)) + }) + .collect(); + + println!("Found {} files", files.len()); + + // Apply skip file to determine which files are indexed + let skipped = args.skip_file.as_deref().map_or_else(HashSet::new, read_skip_file); + let indexed_indices = if skipped.is_empty() { + None + } else { + let indices = compute_indexed_indices(&files, &skipped); + println!("Skipping {} files ({} indexed)", skipped.len(), indices.len()); + Some(indices) + }; + println!(); + + // Build fresh baseline (respecting skip list) + println!("Building fresh baseline..."); + let fresh_start = Instant::now(); + let mut fresh = Graph::new(); + let baseline_indices: Vec = indexed_indices.clone().unwrap_or_else(|| (0..files.len()).collect()); + for &i in &baseline_indices { + let (ref uri, ref source, ref lang) = files[i]; + indexing::index_source(&mut fresh, uri, source, lang); + } + resolve(&mut fresh); + let fresh_counts = GraphCounts::from_graph(&fresh); + println!( + "Fresh baseline ({:.2}s): {fresh_counts}\n", + fresh_start.elapsed().as_secs_f64() + ); + + let fresh_integrity_errors = integrity::check_integrity(&fresh); + if !fresh_integrity_errors.is_empty() { + eprintln!( + "WARNING: Fresh graph has {} integrity errors", + fresh_integrity_errors.len() + ); + for error in &fresh_integrity_errors { + eprintln!(" - {error}"); + } + } + + drop(fresh); + + // Select files to delete from the INDEXED set (not all files) + let active_count = baseline_indices.len(); + + let mut failed_rounds: Vec = Vec::new(); + + for round in 0..args.cycles { + let round_seed = args.seed.wrapping_add(round as u64); + let selected = select_files_to_delete(active_count, round_seed); + // Map back to global indices + let delete_indices: Vec = selected.iter().map(|&i| baseline_indices[i]).collect(); + + println!("Round {}", round + 1); + println!(" Removing {} files:", delete_indices.len()); + for &i in &delete_indices { + println!(" {}", files[i].0); + } + + let round_start = Instant::now(); + + let idx_ref = indexed_indices.as_deref(); + let trace_pat = args.trace_declaration.as_deref(); + let round_result = panic::catch_unwind(panic::AssertUnwindSafe(|| { + run_round(&files, &delete_indices, idx_ref, true, trace_pat) + })); + + let elapsed = round_start.elapsed().as_secs_f64(); + + match round_result { + Ok(result) => { + let mismatches = result.counts.compare(&fresh_counts); + + if mismatches.is_empty() && result.integrity_errors.is_empty() { + println!(" OK ({elapsed:.2}s): {}", result.counts); + } else { + print_round_result(&result, &fresh_counts); + println!(" FAILED ({elapsed:.2}s)"); + failed_rounds.push(round + 1); + + if args.bisect { + let skip_path = args.skip_file.as_deref().unwrap_or("/tmp/incremental_test_skip"); + if args.skip_file.is_none() { + println!("\nNo --skip-file specified, using {skip_path}"); + } + bisect_and_report(&files, &delete_indices, skip_path, args.retries); + break; + } + } + } + Err(panic_info) => { + let msg = if let Some(s) = panic_info.downcast_ref::() { + s.clone() + } else if let Some(s) = panic_info.downcast_ref::<&str>() { + (*s).to_string() + } else { + "unknown panic".to_string() + }; + println!(" PANICKED ({elapsed:.2}s): {msg}"); + failed_rounds.push(round + 1); + + if args.bisect { + let skip_path = args.skip_file.as_deref().unwrap_or("/tmp/incremental_test_skip"); + if args.skip_file.is_none() { + println!("\nNo --skip-file specified, using {skip_path}"); + } + bisect_and_report(&files, &delete_indices, skip_path, args.retries); + break; + } + } + } + println!(); + } + + if failed_rounds.is_empty() { + println!("All {} rounds passed (seed={}).", args.cycles, args.seed); + } else { + eprintln!( + "FAILED: {} of {} rounds failed (rounds: {failed_rounds:?}, seed={})", + failed_rounds.len(), + args.cycles, + args.seed, + ); + std::process::exit(1); + } +} diff --git a/rust/rubydex/src/model/declaration.rs b/rust/rubydex/src/model/declaration.rs index 64366c99..5748495c 100644 --- a/rust/rubydex/src/model/declaration.rs +++ b/rust/rubydex/src/model/declaration.rs @@ -138,6 +138,16 @@ macro_rules! namespace_declaration { Declaration::Namespace(namespace) => { self.members.extend(namespace.members()); self.references.extend(namespace.references()); + // Transfer singleton_class_id if the old declaration had one + // and we don't already have one. This happens during TODO + // promotion: the TODO may have acquired a singleton via + // get_or_create_singleton_class, and the new declaration + // should inherit it. + if self.singleton_class_id.is_none() { + if let Some(&id) = namespace.singleton_class() { + self.singleton_class_id = Some(id); + } + } } Declaration::Constant(constant) => { self.references.extend(constant.references()); @@ -162,6 +172,14 @@ macro_rules! namespace_declaration { self.singleton_class_id = Some(declaration_id); } + /// Clears `singleton_class_id` only if it matches `expected`. + /// Prevents accidentally clearing a pointer to a re-created singleton. + pub fn clear_singleton_class_id(&mut self, expected: &DeclarationId) { + if self.singleton_class_id.as_ref() == Some(expected) { + self.singleton_class_id = None; + } + } + pub fn singleton_class_id(&self) -> Option<&DeclarationId> { self.singleton_class_id.as_ref() } @@ -345,6 +363,18 @@ impl Declaration { all_declarations!(self, it => it.definition_ids.is_empty()) } + /// Returns true if this declaration has no backing definitions and is eligible + /// for removal. Synthetic declarations (singleton classes, bootstrap Object/Module/Class) + /// are never definition-backed — they should only be removed when their owner is removed. + #[must_use] + pub fn has_no_backing_definitions(&self, decl_id: &DeclarationId) -> bool { + let is_synthetic = matches!(self, Declaration::Namespace(Namespace::SingletonClass(_))) + || *decl_id == *super::graph::OBJECT_ID + || *decl_id == *super::graph::MODULE_ID + || *decl_id == *super::graph::CLASS_ID; + !is_synthetic && self.has_no_definitions() + } + pub fn add_definition(&mut self, definition_id: DefinitionId) { all_declarations!(self, it => { debug_assert!( @@ -578,6 +608,10 @@ impl Namespace { all_namespaces!(self, it => it.set_singleton_class_id(declaration_id)); } + pub fn clear_singleton_class_id(&mut self, expected: &DeclarationId) { + all_namespaces!(self, it => it.clear_singleton_class_id(expected)); + } + #[must_use] pub fn owner_id(&self) -> &DeclarationId { all_namespaces!(self, it => &it.owner_id) diff --git a/rust/rubydex/src/model/graph.rs b/rust/rubydex/src/model/graph.rs index b743d800..0db67238 100644 --- a/rust/rubydex/src/model/graph.rs +++ b/rust/rubydex/src/model/graph.rs @@ -1,5 +1,6 @@ use std::collections::HashSet; use std::collections::hash_map::Entry; +use std::fmt; use std::path::PathBuf; use std::sync::LazyLock; @@ -16,6 +17,34 @@ use crate::model::references::{ConstantReference, MethodRef}; use crate::model::string_ref::StringRef; use crate::stats; +/// Events emitted by the graph when declarations are created or removed. +/// Used for debugging and tracing declaration lifecycle. +pub enum TraceEvent<'a> { + Created { + name: &'a str, + kind: &'static str, + caller: &'static str, + }, + Removed { + name: &'a str, + kind: &'static str, + caller: &'static str, + }, +} + +impl fmt::Display for TraceEvent<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TraceEvent::Created { name, kind, caller } => { + write!(f, "CREATED {name} ({kind}) by {caller}") + } + TraceEvent::Removed { name, kind, caller } => { + write!(f, "REMOVED {name} ({kind}) by {caller}") + } + } + } +} + /// An entity whose validity depends on a particular `NameId`. /// Used as the value type in the `name_dependents` reverse index. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -56,7 +85,7 @@ pub enum Unit { // The `Graph` is the global representation of the entire Ruby codebase. It contains all declarations and their // relationships -#[derive(Default, Debug)] +#[derive(Default)] pub struct Graph { // Map of declaration nodes declarations: IdentityHashMap, @@ -87,6 +116,15 @@ pub struct Graph { /// Paths to exclude from file discovery during indexing. excluded_paths: HashSet, + + /// Declarations that became empty during name cascade and may need removal. + /// Deferred until after resolution — if the declaration was repopulated, skip it. + /// Drained by `take_pending_declaration_cleanup()`. + pending_declaration_cleanup: Vec, + + /// Optional callback for tracing declaration lifecycle events (creation/removal). + #[allow(clippy::type_complexity)] + trace_fn: Option)>>, } impl Graph { @@ -117,7 +155,7 @@ impl Graph { )))), ); - Self { + let graph = Self { declarations, definitions: IdentityHashMap::default(), documents: IdentityHashMap::default(), @@ -129,6 +167,36 @@ impl Graph { name_dependents: IdentityHashMap::default(), pending_work: Vec::default(), excluded_paths: HashSet::new(), + pending_declaration_cleanup: Vec::default(), + trace_fn: None, + }; + + graph.emit_trace(TraceEvent::Created { + name: "Object", + kind: "Class", + caller: "bootstrap", + }); + graph.emit_trace(TraceEvent::Created { + name: "Module", + kind: "Class", + caller: "bootstrap", + }); + graph.emit_trace(TraceEvent::Created { + name: "Class", + kind: "Class", + caller: "bootstrap", + }); + + graph + } + + pub fn set_trace(&mut self, f: impl Fn(TraceEvent<'_>) + 'static) { + self.trace_fn = Some(Box::new(f)); + } + + pub(crate) fn emit_trace(&self, event: TraceEvent<'_>) { + if let Some(ref f) = self.trace_fn { + f(event); } } @@ -205,7 +273,14 @@ impl Graph { Entry::Vacant(vacant_entry) => { let mut declaration = constructor(fully_qualified_name); declaration.add_definition(definition_id); + let trace_name = declaration.name().to_string(); + let trace_kind = declaration.kind(); vacant_entry.insert(declaration); + self.emit_trace(TraceEvent::Created { + name: &trace_name, + kind: trace_kind, + caller: "add_declaration", + }); } } @@ -242,7 +317,14 @@ impl Graph { let mut new_decl = constructor(name, owner_id); new_decl.as_namespace_mut().unwrap().extend(old_decl); + let trace_name = new_decl.name().to_string(); + let trace_kind = new_decl.kind(); self.declarations.insert(declaration_id, new_decl); + self.emit_trace(TraceEvent::Created { + name: &trace_name, + kind: trace_kind, + caller: "promote", + }); } #[must_use] @@ -628,6 +710,17 @@ impl Graph { std::mem::take(&mut self.pending_work) } + /// Returns true if there are unprocessed work items from previous cycles. + #[must_use] + pub fn has_pending_work(&self) -> bool { + !self.pending_work.is_empty() + } + + /// Drains declarations that need post-resolution cleanup. + pub(crate) fn take_pending_declaration_cleanup(&mut self) -> Vec { + std::mem::take(&mut self.pending_declaration_cleanup) + } + fn push_work(&mut self, unit: Unit) { self.pending_work.push(unit); } @@ -636,6 +729,52 @@ impl Graph { self.pending_work.extend(units); } + pub(crate) fn mark_declaration_for_cleanup(&mut self, decl_id: DeclarationId) { + self.pending_declaration_cleanup.push(decl_id); + } + + /// Post-resolution cleanup: removes declarations that are still empty + /// (no definitions). Cascades to members, singletons, and descendants + /// via `invalidate_graph`. + pub(crate) fn cleanup_empty_declarations(&mut self, mut candidates: Vec) { + if candidates.is_empty() { + return; + } + candidates.sort_unstable(); + candidates.dedup(); + + let items: Vec = candidates + .into_iter() + .filter(|decl_id| { + self.declarations.get(decl_id).is_some_and(|decl| { + // Skip synthetic declarations that acquired members during + // resolution — they're actively serving as namespace parents. + // TODOs from create_todo_for_parent and singletons from + // get_or_create_singleton_class are both definition-less but + // may hold child declarations or methods. + let is_synthetic_with_members = matches!( + decl, + Declaration::Namespace(Namespace::Todo(_) | Namespace::SingletonClass(_)) + ) && decl.as_namespace().is_some_and(|ns| !ns.members().is_empty()); + let should_remove = decl.has_no_definitions() && !is_synthetic_with_members; + if should_remove { + self.emit_trace(TraceEvent::Removed { + name: decl.name(), + kind: decl.kind(), + caller: "cleanup", + }); + } + should_remove + }) + }) + .map(InvalidationItem::Declaration) + .collect(); + + if !items.is_empty() { + self.invalidate_graph(items, IdentityHashMap::default()); + } + } + /// Converts a `Resolved` `NameRef` back to `Unresolved`, preserving the original `Name` data. /// Returns the `DeclarationId` it was previously resolved to, if any. fn unresolve_name(&mut self, name_id: NameId) -> Option { @@ -1038,10 +1177,14 @@ impl Graph { .collect(); if !missed_def_ids.is_empty() { - for declaration in self.declarations.values_mut() { + for (decl_id, declaration) in &mut self.declarations { + let had_definitions = !declaration.definitions().is_empty(); for def_id in &missed_def_ids { declaration.remove_definition(def_id); } + if had_definitions && declaration.has_no_backing_definitions(decl_id) { + self.pending_declaration_cleanup.push(*decl_id); + } } } @@ -1056,8 +1199,11 @@ impl Graph { } } - /// Unified invalidation worklist. Processes declaration and name items in a single loop, + /// Invalidation worklist. Processes declaration, name, and reference items in a loop, /// where processing one item can push new items back onto the queue. + /// + /// `pending_detachments` maps declarations to definitions that need to be detached + /// before deciding whether to remove or update each declaration. fn invalidate_graph( &mut self, items: Vec, @@ -1125,10 +1271,28 @@ impl Graph { let Some(decl) = self.declarations.get(&decl_id) else { return; }; - let should_remove = decl.has_no_definitions() || !self.declarations.contains_key(decl.owner_id()); + // Empty singletons (no definitions AND no members) are speculatively created + // by get_or_create_singleton_class during ancestor linearization. They should + // be removed even though has_no_backing_definitions() skips them (synthetic). + let is_empty_singleton = matches!(decl, Declaration::Namespace(Namespace::SingletonClass(_))) + && decl.has_no_definitions() + && decl.as_namespace().is_some_and(|ns| ns.members().is_empty()); + let should_remove = is_empty_singleton + || decl.has_no_backing_definitions(&decl_id) + || !self.declarations.contains_key(decl.owner_id()); if should_remove { - // Queue members + singleton for removal + let trace_name = decl.name().to_string(); + let trace_kind = decl.kind(); + self.emit_trace(TraceEvent::Removed { + name: &trace_name, + kind: trace_kind, + caller: "invalidate", + }); + + // Remove path: definitions came from a deleted/changed file and won't + // re-resolve to this declaration. Removal must be immediate so child + // declarations see a missing owner and cascade correctly. if let Some(ns) = decl.as_namespace() { if let Some(singleton_id) = ns.singleton_class() { queue.push(InvalidationItem::Declaration(*singleton_id)); @@ -1171,6 +1335,11 @@ impl Graph { && let Some(ns) = owner.as_namespace_mut() { ns.remove_member(&unqualified_str_id); + // For singletons being removed, clear the owner's singleton_class_id + // pointer so get_or_create_singleton_class can recreate it if needed. + if is_empty_singleton { + ns.clear_singleton_class_id(&decl_id); + } } } @@ -1236,12 +1405,18 @@ impl Graph { decl.remove_definition(def_id); } + // Defer cleanup: unlike the remove path in invalidate_declaration + // (where definitions are from a deleted file and won't return), + // this definition is from a surviving file — it was re-queued and + // will likely re-resolve to the same declaration. Removing now + // would destroy accumulated state (singleton, members) that can't + // be rebuilt. Post-resolution cleanup removes it if still empty. if self .declarations .get(&old_decl_id) - .is_some_and(Declaration::has_no_definitions) + .is_some_and(|decl| decl.has_no_backing_definitions(&old_decl_id)) { - queue.push(InvalidationItem::Declaration(old_decl_id)); + self.pending_declaration_cleanup.push(old_decl_id); } } NameDependent::ChildName(_) | NameDependent::NestedName(_) => {} @@ -2341,7 +2516,8 @@ mod incremental_resolution_tests { use crate::{ assert_alias_targets_contain, assert_ancestors_eq, assert_constant_reference_to, assert_constant_reference_unresolved, assert_declaration_does_not_exist, assert_declaration_exists, - assert_declaration_references_count_eq, assert_members_eq, assert_no_constant_alias_target, + assert_declaration_references_count_eq, assert_members_eq, + assert_no_constant_alias_target, }; const NO_ANCESTORS: [&str; 0] = []; @@ -3670,6 +3846,58 @@ mod incremental_resolution_tests { assert_no_dangling_definitions(context.graph()); } + #[test] + fn singleton_class_survives_when_reopener_is_deleted() { + let mut context = GraphTest::new(); + context.index_uri("file:///foo.rb", "class Foo; def self.bar; end; end"); + context.index_uri("file:///reopener.rb", "class Foo; end"); + context.resolve(); + + assert_declaration_exists!(context, "Foo"); + assert_declaration_exists!(context, "Foo::"); + + context.delete_uri("file:///reopener.rb"); + context.resolve(); + + assert_declaration_exists!(context, "Foo"); + assert_declaration_exists!(context, "Foo::"); + } + + #[test] + fn class_inside_singleton_scope_instance_methods_only_survives_cascade() { + let mut context = GraphTest::new(); + + context.index_uri( + "file:///main.rb", + r" + module Outer + class << self + class Inner + def initialize; end + end + + def run + Inner.new + end + end + end + ", + ); + context.index_uri("file:///reopener.rb", "module Outer; end"); + context.resolve(); + + assert_declaration_exists!(context, "Outer"); + assert_declaration_exists!(context, "Outer::::Inner"); + assert_declaration_exists!(context, "Outer::::Inner::"); + + context.delete_uri("file:///reopener.rb"); + context.resolve(); + + assert_declaration_exists!(context, "Outer"); + assert_declaration_exists!(context, "Outer::::Inner"); + assert_declaration_exists!(context, "Outer::::Inner::"); + } + #[test] fn singleton_class_preserved_after_delete_and_reindex() { let mut context = GraphTest::new(); @@ -3692,27 +3920,29 @@ mod incremental_resolution_tests { } #[test] - fn singleton_recreated_when_reference_nested_in_compact_class() { + fn singleton_members_cleaned_up_after_file_deletion() { let mut context = GraphTest::new(); - - context.index_uri("file:///parent.rb", "module Parent; end"); - context.index_uri("file:///target.rb", "class Parent::Target; end"); - context.index_uri("file:///caller.rb", "class Parent::Caller; Parent::Target.new; end"); + context.index_uri( + "file:///a.rb", + " + class Foo + @x = 1 + def self.bar; end + end + ", + ); + context.index_uri("file:///b.rb", "class Foo; end"); context.resolve(); - assert_declaration_exists!(context, "Parent::Target"); - assert_declaration_exists!(context, "Parent::Target::"); - - context.delete_uri("file:///parent.rb"); - context.delete_uri("file:///target.rb"); - context.resolve(); + assert_declaration_exists!(context, "Foo::#@x"); + assert_declaration_exists!(context, "Foo::#bar()"); - context.index_uri("file:///parent.rb", "module Parent; end"); - context.index_uri("file:///target.rb", "class Parent::Target; end"); + context.delete_uri("file:///a.rb"); context.resolve(); - assert_declaration_exists!(context, "Parent::Target"); - assert_declaration_exists!(context, "Parent::Target::"); + assert_declaration_exists!(context, "Foo"); + assert_declaration_does_not_exist!(context, "Foo::#@x"); + assert_declaration_does_not_exist!(context, "Foo::#bar()"); } #[test] @@ -3738,4 +3968,222 @@ mod incremental_resolution_tests { assert_declaration_exists!(context, "Foo::#run()"); assert_declaration_exists!(context, "Foo#run()"); } + + /// Regression test derived from Rails via `--bisect --skip-file`. + /// Deleting an unrelated file triggers re-resolution that creates an orphan + /// `` declaration at the wrong scope. The bug depends on resolution + /// ordering (hash-based), so the specific definitions and `class << self` + /// block are required to trigger it. + #[test] + fn no_orphan_todo_for_compact_nested_class_after_unrelated_delete() { + let mut incremental = GraphTest::new(); + incremental.index_uri( + "file:///a.rb", + "module A; module B; class C; end; def self.x; C.x; end; end; end", + ); + incremental.index_uri( + "file:///b.rb", + "module A; module B; class C; def y; D.y; end; end; end; end", + ); + incremental.index_uri( + "file:///c.rb", + "module A; module B; class C; class D; class << self; def y; end; end; end; end; end; end", + ); + incremental.index_uri("file:///d.rb", "module A; end"); + incremental.resolve(); + + assert_declaration_exists!(incremental, "A::B::C::D"); + + incremental.delete_uri("file:///d.rb"); + incremental.resolve(); + + incremental.index_uri("file:///d.rb", "module A; end"); + incremental.resolve(); + + let mut fresh = GraphTest::new(); + fresh.index_uri( + "file:///a.rb", + "module A; module B; class C; end; def self.x; C.x; end; end; end", + ); + fresh.index_uri( + "file:///b.rb", + "module A; module B; class C; def y; D.y; end; end; end; end", + ); + fresh.index_uri( + "file:///c.rb", + "module A; module B; class C; class D; class << self; def y; end; end; end; end; end; end", + ); + fresh.index_uri("file:///d.rb", "module A; end"); + fresh.resolve(); + + let extras: Vec<_> = incremental + .graph() + .declarations() + .iter() + .filter(|(id, _)| !fresh.graph().declarations().contains_key(id)) + .map(|(_, d)| format!("{} ({})", d.name(), d.kind())) + .collect(); + + assert!( + extras.is_empty(), + "Orphan declarations after unrelated file delete: {extras:?}" + ); + } + + /// Regression test from Core via `--bisect`: compact-notation class with deep + /// nesting loses declarations after deleting and re-adding a file that defines + /// a shared ancestor. The TODO placeholders created by `create_todo_for_parent` + /// were being cleaned up by `cleanup_empty_declarations` from a previous cycle + /// even though they now hold members. + #[test] + fn compact_class_declarations_restored_after_shared_ancestor_delete_readd() { + let a = "module Google; end; class Google::Protobuf; end"; + let b = "class Google::Cloud::V2::Type < Google::Protobuf; end"; + + let mut incremental = GraphTest::new(); + incremental.index_uri("file:///a.rb", a); + incremental.index_uri("file:///b.rb", b); + incremental.resolve(); + + assert_declaration_exists!(incremental, "Google::Cloud::V2::Type"); + + incremental.delete_uri("file:///a.rb"); + incremental.resolve(); + + incremental.index_uri("file:///a.rb", a); + incremental.resolve(); + + assert_declaration_exists!(incremental, "Google"); + assert_declaration_exists!(incremental, "Google::Protobuf"); + assert_declaration_exists!(incremental, "Google::Cloud::V2::Type"); + } + + /// Bisect found 3 files needed: session.rbi (compact class with class << self), + /// errors.rb (module ActiveRecord with include), and activesupport.rbi (defines + /// the included module). The third file changes resolution ordering so the TODO + /// promotion path diverges between single and double resolve. + fn todo_promotion_fixtures() -> (&'static str, &'static str, &'static str) { + ( + // session.rbi: compact-notation class with class << self + r" + class ActiveRecord::SessionStore::Session + class << self + def new(attributes = nil); end + end + end + ", + // errors.rb: real module with include (changes resolution order) + r" + module ActiveRecord + include ActiveSupport::Deprecation::DeprecatedConstantAccessor + class ActiveRecordError < StandardError; end + end + ", + // activesupport.rbi: defines the included module chain + r" + module ActiveSupport + module Deprecation + module DeprecatedConstantAccessor; end + end + end + ", + ) + } + + /// Regression test from Core via `--bisect`: compact-notation class with + /// `class << self` creates a singleton on a TODO placeholder. Double + /// resolve must not leak extra singleton declarations. + #[test] + fn no_leaked_singleton_on_todo_after_double_resolve() { + let (session_rbi, errors_rb, activesupport_rbi) = todo_promotion_fixtures(); + + let mut incremental = GraphTest::new(); + incremental.index_uri("file:///session.rbi", session_rbi); + incremental.index_uri("file:///errors.rb", errors_rb); + incremental.index_uri("file:///activesupport.rbi", activesupport_rbi); + incremental.resolve(); + incremental.resolve(); + + let mut fresh = GraphTest::new(); + fresh.index_uri("file:///session.rbi", session_rbi); + fresh.index_uri("file:///errors.rb", errors_rb); + fresh.index_uri("file:///activesupport.rbi", activesupport_rbi); + fresh.resolve(); + + let extras: Vec<_> = incremental + .graph() + .declarations() + .iter() + .filter(|(id, _)| !fresh.graph().declarations().contains_key(id)) + .map(|(_, d)| format!("{} ({})", d.name(), d.kind())) + .collect(); + + let missing: Vec<_> = fresh + .graph() + .declarations() + .iter() + .filter(|(id, _)| !incremental.graph().declarations().contains_key(id)) + .map(|(_, d)| format!("{} ({})", d.name(), d.kind())) + .collect(); + + assert!( + extras.is_empty() && missing.is_empty(), + "Declaration mismatch after double resolve:\n Extra: {extras:?}\n Missing: {missing:?}" + ); + } + + #[test] + fn empty_singleton_removed_by_cleanup_after_member_deletion() { + // Regression: empty singletons (no definitions, no members) must be + // removed by cleanup. Previously, invalidate_declaration treated all + // SingletonClass as synthetic (has_no_backing_definitions = false), + // keeping them alive via the update path even when empty. + let mut context = GraphTest::new(); + + context.index_uri("file:///foo.rb", "class Foo; def self.bar; end; end"); + context.index_uri("file:///reopener.rb", "class Foo; end"); + context.resolve(); + + assert_declaration_exists!(context, "Foo"); + assert_declaration_exists!(context, "Foo::"); + + // Delete the file with the singleton method — Foo:: loses its + // member #bar(). The singleton becomes empty. + context.delete_uri("file:///foo.rb"); + context.resolve(); + + // Foo survives (reopener has a definition). The empty singleton + // should be cleaned up. + assert_declaration_exists!(context, "Foo"); + + // Re-add the file — Foo:: should be recreated with its member + context.index_uri("file:///foo.rb", "class Foo; def self.bar; end; end"); + context.resolve(); + + assert_declaration_exists!(context, "Foo"); + assert_declaration_exists!(context, "Foo::"); + } + + #[test] + fn resolved_attached_name_creates_singleton_for_promoted_todo() { + // Regression: when a NameRef::Resolved name has ParentScope::Attached + // but resolves to a non-singleton declaration (stale resolution from + // when the target was a TODO), the Resolved path must create the + // singleton. Verifies that after double-resolve, the singleton exists + // on the fully-qualified class and not on an orphaned TODO. + // + // Uses the same 3-file scenario as no_leaked_singleton_on_todo above. + let (session_rbi, errors_rb, activesupport_rbi) = todo_promotion_fixtures(); + + let mut context = GraphTest::new(); + context.index_uri("file:///session.rbi", session_rbi); + context.index_uri("file:///errors.rb", errors_rb); + context.index_uri("file:///activesupport.rbi", activesupport_rbi); + context.resolve(); + context.resolve(); + + // Singleton should be on the fully-qualified class, not an orphaned TODO + assert_declaration_exists!(context, "ActiveRecord::SessionStore::Session::"); + assert_declaration_does_not_exist!(context, "Session::"); + } } // mod incremental_resolution_tests diff --git a/rust/rubydex/src/resolution.rs b/rust/rubydex/src/resolution.rs index 7ba9f498..4eb87d8b 100644 --- a/rust/rubydex/src/resolution.rs +++ b/rust/rubydex/src/resolution.rs @@ -10,7 +10,7 @@ use crate::model::{ Namespace, SingletonClassDeclaration, TodoDeclaration, }, definitions::{Definition, Mixin, Receiver}, - graph::{CLASS_ID, Graph, MODULE_ID, OBJECT_ID, Unit}, + graph::{CLASS_ID, Graph, MODULE_ID, OBJECT_ID, TraceEvent, Unit}, identity_maps::{IdentityHashBuilder, IdentityHashMap, IdentityHashSet}, ids::{ConstantReferenceId, DeclarationId, DefinitionId, NameId, StringId}, name::{Name, NameRef, ParentScope}, @@ -83,6 +83,47 @@ impl<'a> Resolver<'a> { /// /// Can panic if there's inconsistent data in the graph pub fn resolve(&mut self) { + self.resolve_inner(); + + // After resolution + cleanup, orphan TODOs from create_todo_for_parent may + // remain. These are TODOs created during this resolve cycle for compact- + // notation qualifiers that never acquired definitions (the real namespace + // resolved elsewhere). Clean them up separately — they can't be included + // in the main cleanup pass because singletons created in the same cycle + // might not have acquired members yet. Non-TODO candidates (singletons) + // are intentionally discarded: empty singletons are caught by the + // is_empty_singleton check in invalidate_declaration during subsequent + // invalidation passes. + let current_cycle = self.graph.take_pending_declaration_cleanup(); + let todo_only: Vec<_> = current_cycle + .into_iter() + .filter(|id| { + matches!( + self.graph.declarations().get(id), + Some(Declaration::Namespace(Namespace::Todo(_))) + ) + }) + .collect(); + if !todo_only.is_empty() { + self.graph.cleanup_empty_declarations(todo_only); + } + + // Both the main cleanup (inside resolve_inner) and the TODO cleanup above + // may have cascaded — removing empty singletons, unresolving names, and + // re-queuing dependent references. Run another resolve pass to process + // the resulting pending_work and recreate declarations at their correct FQN. + if self.graph.has_pending_work() { + self.resolve_inner(); + } + } + + fn resolve_inner(&mut self) { + // Snapshot cleanup candidates from previous cycles. This includes both: + // - Declarations emptied during invalidation cascade (from unresolve_dependent_name / remove_document_data) + // - Orphan Todos from a previous resolve (from create_todo_for_parent) + // Items added during THIS resolve survive in the vec for the next cycle. + let cleanup_candidates = self.graph.take_pending_declaration_cleanup(); + let other_ids = self.prepare_units(); loop { @@ -124,6 +165,9 @@ impl<'a> Resolver<'a> { self.graph.extend_work(std::mem::take(&mut self.unit_queue)); self.handle_remaining_definitions(other_ids); + + // Post-resolution cleanup: remove declarations that are still empty. + self.graph.cleanup_empty_declarations(cleanup_candidates); } /// Resolves a single constant against the graph. This method is not meant to be used by the resolution phase, but by @@ -665,6 +709,7 @@ impl<'a> Resolver<'a> { let decl_id = DeclarationId::from(&fully_qualified_name); namespace_decl.set_singleton_class_id(decl_id); + let trace_name = fully_qualified_name.clone(); self.graph.declarations_mut().insert( decl_id, Declaration::Namespace(Namespace::SingletonClass(Box::new(SingletonClassDeclaration::new( @@ -672,6 +717,17 @@ impl<'a> Resolver<'a> { attached_id, )))), ); + self.graph.emit_trace(TraceEvent::Created { + name: &trace_name, + kind: "SingletonClass", + caller: "get_or_create_singleton", + }); + + // Mark for post-resolution cleanup: if the singleton stays empty + // (no members added during resolution), it was created speculatively + // and should be removed. This handles singletons created during + // incremental re-resolution when ancestor chains are temporarily broken. + self.graph.mark_declaration_for_cleanup(decl_id); // Queue ancestor linearization so the singleton's ancestor chain is // built — this cascades upward via singleton_parent_id, creating @@ -1148,11 +1204,18 @@ impl<'a> Resolver<'a> { let declaration_id = DeclarationId::from(&fully_qualified_name); if let Entry::Vacant(e) = self.graph.declarations_mut().entry(declaration_id) { + let trace_name = fully_qualified_name.clone(); e.insert(Declaration::Namespace(Namespace::Todo(Box::new(TodoDeclaration::new( fully_qualified_name, parent_owner_id, ))))); self.graph.add_member(&parent_owner_id, declaration_id, parent_str_id); + self.graph.mark_declaration_for_cleanup(declaration_id); + self.graph.emit_trace(TraceEvent::Created { + name: &trace_name, + kind: "Todo", + caller: "create_todo", + }); } declaration_id @@ -1298,7 +1361,29 @@ impl<'a> Resolver<'a> { } } } - NameRef::Resolved(resolved) => Outcome::Resolved(*resolved.declaration_id(), None), + NameRef::Resolved(resolved) => { + // For Attached names that were previously resolved to a non-singleton + // (stale from a previous cycle where the target was a TODO), ensure the + // singleton exists. Without this, the Unresolved→Attached path creates + // singletons during incremental re-resolution that this Resolved path + // skips in fresh, causing incremental/fresh divergence. + if matches!(resolved.name().parent_scope(), ParentScope::Attached(_)) { + let decl_id = *resolved.declaration_id(); + let is_singleton = matches!( + self.graph.declarations().get(&decl_id), + Some(Declaration::Namespace(Namespace::SingletonClass(_))) + ); + if !is_singleton { + // The name was resolved to the target class, not its singleton. + // Create the singleton — the reference handler will update the + // resolution target via the returned Outcome. + if let Some(singleton_id) = self.get_or_create_singleton_class(decl_id) { + return Outcome::Resolved(singleton_id, Some(singleton_id)); + } + } + } + Outcome::Resolved(*resolved.declaration_id(), None) + } } }