From 586166fa63dd8f1031906e1efe0560af44a262c3 Mon Sep 17 00:00:00 2001 From: sidux Date: Fri, 3 Jul 2026 13:48:33 +0200 Subject: [PATCH 1/4] feat(index): add full workspace reference index --- config-schema.json | 4 +- docs/ARCHITECTURE.md | 8 +- docs/SETUP.md | 20 +- docs/todo/indexing.md | 35 +- src/config.rs | 23 +- src/definition/implementation.rs | 170 +++++++ src/lib.rs | 23 + src/parser/ast_update.rs | 670 +++++++++++++++----------- src/reference_index.rs | 623 ++++++++++++++++++++++++ src/references/mod.rs | 508 ++++++++++++++++--- src/references/tests.rs | 330 +++++++++++++ src/server.rs | 124 ++++- src/util.rs | 1 + tests/integration/classmap_scanner.rs | 4 +- 14 files changed, 2123 insertions(+), 420 deletions(-) create mode 100644 src/reference_index.rs diff --git a/config-schema.json b/config-schema.json index d6f8f936..c67f6e4c 100644 --- a/config-schema.json +++ b/config-schema.json @@ -43,14 +43,14 @@ "properties": { "strategy": { "type": "string", - "description": "The indexing strategy for class discovery. \"composer\" (default): use Composer's classmap, fall back to self-scan. \"self\": scan all PHP files, ignore classmap. \"full\": background-parse all files (not yet implemented). \"none\": no proactive scanning, classmap only.", + "description": "The indexing strategy for class discovery. \"full\" (default): scan PHP files, then background-parse user files to populate symbol and reference indexes. \"composer\": use Composer's classmap, fall back to self-scan. \"self\": scan all PHP files, ignore classmap. \"none\": no proactive scanning, classmap only.", "enum": [ "composer", "self", "full", "none" ], - "default": "composer" + "default": "full" } } }, diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index e80a1384..159a0b6a 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -31,9 +31,9 @@ PHPantom is built in layers. Each layer is independently useful and independentl - **Layer 1: Single file.** Parse the open file, extract classes/functions/symbols. Completion, hover, and go-to-definition work within the file with no cross-file resolution at all. - **Layer 2: On-demand resolution.** When a symbol references a class in another file, resolve it through the `fqn_uri_index` or PSR-4 and parse that file. Only the files actually needed are touched. - **Layer 3: FQN-to-URI index.** A name-to-URI index covering the whole project. Enables class name completion and O(1) cross-file lookup. Built from Composer's classmap or self-generated via a fast byte-level scan. -- **Layer 4: Full index (opt-in).** Background-parse every file in the fqn_uri_index. Enables workspace symbols, fast find-references, and rich completion item detail. +- **Layer 4: Full index (default).** Background-parse every file in the fqn_uri_index. Enables workspace symbols, fast find-references, and rich completion item detail. -Each layer builds on the one below it. A bug in the FQN index doesn't break single-file completion. A slow full index doesn't block on-demand resolution. New features can be developed and tested against the lower layers without waiting for a full project scan. This is also why PHPantom starts fast: Layer 0-2 are ready in milliseconds, Layer 3 takes seconds, and Layer 4 (when enabled) fills in over the following minute. +Each layer builds on the one below it. A bug in the FQN index doesn't break single-file completion. A slow full index doesn't block on-demand resolution. New features can be developed and tested against the lower layers without waiting for a full project scan. This is also why PHPantom starts fast: Layer 0-2 are ready in milliseconds, Layer 3 takes seconds, and Layer 4 fills in afterward. ## Module Layout @@ -687,9 +687,9 @@ Scanning is parallelised using a two-phase approach: directory walks collect fil The indexing strategy is configurable via `[indexing] strategy` in `.phpantom.toml`: -- **`"composer"`** (default) — merged classmap + self-scan. Load Composer's classmap (if it exists) as a skip set, then self-scan all PSR-4 and vendor directories for anything the classmap missed. Whatever the classmap already covers is a free performance win; whatever it's missing, we find ourselves. No completeness heuristic needed. +- **`"full"`** (default) — same discovery as `"self"`, then background-parses user PHP files to populate symbol maps and the reference candidate index. +- **`"composer"`** — merged classmap + self-scan. Load Composer's classmap (if it exists) as a skip set, then self-scan all PSR-4 and vendor directories for anything the classmap missed. Whatever the classmap already covers is a free performance win; whatever it's missing, we find ourselves. No completeness heuristic needed. - **`"self"`** — always self-scan, ignoring Composer's classmap entirely. Equivalent to the merged approach with an empty skip set. -- **`"full"`** — same as `"self"` for now; reserved for future background indexing. - **`"none"`** — no proactive scanning; uses Composer's classmap if present but never self-scans to fill gaps. The merged pipeline works in three steps: (1) load `autoload_classmap.php` into a `HashMap`, (2) collect the classmap's file paths into a `HashSet` skip set, (3) self-scan all PSR-4 and vendor directories, skipping files already in the skip set. The result is a merged index: classmap entries for everything Composer already knew about, plus self-scanned entries for everything it missed. When the classmap is complete (the common case), the self-scanner walks directories but skips every file, finishing almost instantly. When the classmap is empty or absent, it falls back to a full self-scan. When the classmap is partial (e.g. vendor classes only), vendor files are skipped and only user code is scanned. Every state of the classmap helps. diff --git a/docs/SETUP.md b/docs/SETUP.md index 6b95978e..297fedfc 100644 --- a/docs/SETUP.md +++ b/docs/SETUP.md @@ -246,33 +246,35 @@ This creates a `.phpantom.toml` in the current directory. Currently supported se [indexing] # How PHPantom discovers classes across the workspace. -# "composer" (default) - use Composer classmap, self-scan on fallback -# "self" - always self-scan, ignore Composer classmap -# "none" - no proactive scanning, Composer classmap only -# strategy = "composer" +# "full" (default) - scan PHP files and background-parse user files +# "composer" - use Composer classmap, self-scan on fallback +# "self" - always self-scan, ignore Composer classmap +# "none" - no proactive scanning, Composer classmap only +# strategy = "full" ``` The file is optional. When absent, all settings use their defaults. New settings will be added as features land. Unknown keys are silently ignored, so the file is forward-compatible. ### Indexing Strategy -By default, PHPantom trusts Composer's autoloader to determine which classes exist in your project. This is intentional: it means completions, diagnostics, and go-to-definition reflect what your code will actually see at runtime. Classes that aren't autoloadable don't appear, because using them would be an error. +By default, PHPantom builds a full workspace index: it discovers PHP files, then background-parses user files to populate symbol maps and the reference candidate index. This gives complete cross-file references, implementation lookup, and workspace-wide navigation without per-feature scanning. The `strategy` setting controls this behaviour: | Strategy | Behaviour | | --- | --- | -| `"composer"` (default) | Use Composer's classmap when available, self-scan to fill gaps. Results match what `composer dump-autoload` knows about. | +| `"full"` (default) | Scan PHP files, then background-parse user files to populate symbol and reference indexes. | +| `"composer"` | Use Composer's classmap when available, self-scan to fill gaps. Results stay closer to what `composer dump-autoload` knows about. | | `"self"` | Ignore Composer's classmap entirely and scan every PHP file in the workspace. Discovers all classes regardless of autoloading. | | `"none"` | Use only Composer's classmap with no fallback scanning. The most conservative option. | -Most projects should leave this at the default. Change it to `"self"` if your project loads classes outside of Composer (custom autoloaders, `require_once`, legacy inclusion patterns). Be aware that `"self"` will also surface vendor-internal classes and potential duplicates that Composer's autoloader would never load. +Most projects should leave this at the default. Change it to `"composer"` or `"none"` only if you want a lighter or more Composer-constrained index. ### Classes from other files are not found -PHPantom resolves cross-file classes through Composer's autoloading rules (PSR-4 mappings and the generated classmap). If a class exists in your project but PHPantom reports it as unknown, the most common causes are: +PHPantom resolves cross-file classes through the full workspace index by default. If a class exists in your project but PHPantom reports it as unknown, the most common causes are: -1. **The class isn't Composer-autoloadable.** If your project loads classes via `require_once`, `include`, or a custom autoloader alongside Composer, those classes won't be discovered by default. Set `strategy = "self"` in `.phpantom.toml` to scan all files. +1. **The file is excluded from the workspace walk.** Check ignored directories and `.gitignore` rules. If you explicitly set `strategy = "composer"` or `"none"`, classes outside Composer's autoload rules may be skipped. 2. **Composer's classmap is stale.** Run `composer dump-autoload` to regenerate it. PHPantom reads the classmap at startup. diff --git a/docs/todo/indexing.md b/docs/todo/indexing.md index fbac168f..43c4ec3b 100644 --- a/docs/todo/indexing.md +++ b/docs/todo/indexing.md @@ -67,20 +67,26 @@ Four indexing strategies, selectable via `.phpantom.toml`: ```toml [indexing] -# "composer" (default) - merged classmap + self-scan -# "self" - always self-scan, ignore composer classmap -# "full" - background-parse all project files for rich intelligence -# "none" - no proactive scanning -strategy = "composer" +# "full" (default) - background-parse all project files for rich intelligence +# "composer" - merged classmap + self-scan +# "self" - always self-scan, ignore composer classmap +# "none" - no proactive scanning +strategy = "full" ``` -### `"composer"` (default) +### `"full"` (default) + +Background-parse user PHP files for rich intelligence after discovery. +This is the zero-config experience and populates the symbol/reference +indexes used by workspace-wide navigation. + +### `"composer"` Merged classmap + self-scan. Load Composer's classmap (if it exists) as a skip set, then self-scan all PSR-4 and vendor directories for anything the classmap missed. Whatever the classmap already covers is a free performance win; whatever it's missing, we find ourselves. No -completeness heuristic needed. This is the zero-config experience. +completeness heuristic needed. ### `"self"` @@ -246,7 +252,8 @@ scanning, and complete completion item detail. ### Trigger -When `strategy = "full"` is set in `.phpantom.toml`. +By default. Users can opt out with `strategy = "composer"`, `"self"`, or +`"none"` in `.phpantom.toml`. ### Design: self + second pass @@ -299,9 +306,9 @@ Each stage improves on the last without blocking the previous one. Currently we store `ClassInfo`, `FunctionInfo`, and `SymbolMap` structs that are not as lean as they could be. For a 21K-file -codebase, full indexing will use meaningful RAM. This is acceptable -because it's an opt-in mode, but we should profile and trim struct -sizes over time. The aim is to stay under 512 MB for a full project. +codebase, full indexing will use meaningful RAM. Since full indexing is +the default, we should profile and trim struct sizes over time. The aim +is to stay under 512 MB for a full project. The performance prerequisites above (P1 `Arc`, `Arc`, `Arc`) directly reduce memory usage by @@ -315,11 +322,9 @@ With the full index populated, `workspace/symbol` becomes a simple filter over the uri_classes_index and global_functions maps. No additional infrastructure needed. -In other modes, workspace symbols still works but only returns results +When full indexing is disabled, workspace symbols still works but only returns results from already-parsed files (opened files, on-demand resolutions, stubs). -When the user invokes workspace symbols outside of full mode, show a -one-time hint suggesting they enable `strategy = "full"` in -`.phpantom.toml` for complete coverage. +Complete coverage requires the default `strategy = "full"`. --- diff --git a/src/config.rs b/src/config.rs index ef794244..7ee8fad1 100644 --- a/src/config.rs +++ b/src/config.rs @@ -301,13 +301,14 @@ impl PhpcsConfig { pub struct IndexingConfig { /// The indexing strategy. /// - /// - `"composer"` (default) — use Composer's classmap when available, + /// - `"full"` (default) — same discovery as `"self"`, then + /// background-parse every user PHP file to populate symbol and + /// reference indexes. + /// - `"composer"` — use Composer's classmap when available, /// fall back to self-scan when it is missing or incomplete. /// - `"self"` — scan every PHP file under the workspace root, /// ignoring Composer's generated classmap and PSR-4 mappings. /// Vendor packages are still scanned via `installed.json`. - /// - `"full"` — background-parse every PHP file for rich intelligence - /// (not yet implemented, treated as `"self"` for now). /// - `"none"` — no proactive scanning. Still uses Composer's classmap /// if present, still resolves on demand, but never falls back to /// self-scan. @@ -323,20 +324,20 @@ impl IndexingConfig { /// The indexing strategy that controls class discovery behaviour. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum IndexingStrategy { + /// Background-parse every PHP file for rich intelligence. + #[default] + Full, /// Merged classmap + self-scan. Load Composer's classmap (if it /// exists) as a skip set, then self-scan all PSR-4 and vendor /// directories for anything the classmap missed. Whatever the /// classmap already covers is a free performance win; whatever it's /// missing, we find ourselves. No completeness heuristic needed. - #[default] Composer, /// Scan every PHP file under the workspace root, ignoring /// Composer's generated classmap and PSR-4 mappings entirely. /// The vendor directory is scanned separately (via /// `installed.json`) since it is typically gitignored. SelfScan, - /// Background-parse every PHP file for rich intelligence. - Full, /// No proactive scanning. Uses Composer's classmap if present but /// never self-scans to fill gaps. None, @@ -546,7 +547,7 @@ mod tests { assert!(!config.diagnostics.unresolved_member_access_enabled()); assert!(!config.diagnostics.extra_arguments_enabled()); assert!(!config.diagnostics.report_magic_properties_enabled()); - assert_eq!(config.indexing.strategy(), IndexingStrategy::Composer); + assert_eq!(config.indexing.strategy(), IndexingStrategy::Full); assert!(config.formatting.php_cs_fixer.is_none()); assert!(config.formatting.phpcbf.is_none()); assert!(config.formatting.timeout.is_none()); @@ -575,7 +576,7 @@ mod tests { assert!(!config.diagnostics.unresolved_member_access_enabled()); assert!(!config.diagnostics.extra_arguments_enabled()); assert!(!config.diagnostics.report_magic_properties_enabled()); - assert_eq!(config.indexing.strategy(), IndexingStrategy::Composer); + assert_eq!(config.indexing.strategy(), IndexingStrategy::Full); assert!(config.formatting.php_cs_fixer.is_none()); assert!(config.formatting.phpcbf.is_none()); assert!(config.phpstan.command.is_none()); @@ -593,7 +594,7 @@ mod tests { assert!(!config.diagnostics.unresolved_member_access_enabled()); assert!(!config.diagnostics.extra_arguments_enabled()); assert!(!config.diagnostics.report_magic_properties_enabled()); - assert_eq!(config.indexing.strategy(), IndexingStrategy::Composer); + assert_eq!(config.indexing.strategy(), IndexingStrategy::Full); assert!(config.formatting.php_cs_fixer.is_none()); assert!(config.formatting.phpcbf.is_none()); assert!(config.phpstan.command.is_none()); @@ -865,12 +866,12 @@ analyze-timeout = 45000 } #[test] - fn indexing_strategy_defaults_to_composer() { + fn indexing_strategy_defaults_to_full() { let dir = tempfile::tempdir().unwrap(); let path = dir.path().join(CONFIG_FILE_NAME); std::fs::write(&path, "[indexing]\n").unwrap(); let config = load_config(dir.path()).unwrap(); - assert_eq!(config.indexing.strategy(), IndexingStrategy::Composer); + assert_eq!(config.indexing.strategy(), IndexingStrategy::Full); } #[test] diff --git a/src/definition/implementation.rs b/src/definition/implementation.rs index 1ccfc7d8..de4b345d 100644 --- a/src/definition/implementation.rs +++ b/src/definition/implementation.rs @@ -31,6 +31,7 @@ use std::path::PathBuf; /// walk the class's interfaces and parent abstract classes to find the /// prototype method declaration and return its location. use std::sync::Arc; +use std::sync::atomic::Ordering; use tower_lsp::lsp_types::*; @@ -38,6 +39,7 @@ use super::member::MemberKind; use super::point_location; use crate::Backend; use crate::completion::resolver::ResolutionCtx; +use crate::config::IndexingStrategy; use crate::symbol_map::{SelfStaticParentKind, SymbolKind}; use crate::types::{ClassInfo, ClassLikeKind, FileContext, MAX_INHERITANCE_DEPTH, ResolvedType}; use crate::util::{collect_php_files, find_class_at_offset, position_to_offset, short_name}; @@ -626,6 +628,14 @@ impl Backend { let mut result: Vec = Vec::new(); // Track by FQN to avoid short-name collisions across namespaces. let mut seen_fqns: HashSet = HashSet::new(); + let workspace_index_ready = if self.config().indexing.strategy() == IndexingStrategy::Full { + if !self.workspace_indexed.load(Ordering::Acquire) { + self.ensure_workspace_indexed(); + } + self.workspace_indexed.load(Ordering::Acquire) + } else { + self.workspace_indexed.load(Ordering::Acquire) + }; // ── Phase 1: GTI index lookup ─────────────────────────────────── // Use the reverse inheritance index for O(1) lookup of classes @@ -673,6 +683,10 @@ impl Backend { } } + if workspace_index_ready { + return result; + } + // ── Phase 2: scan fqn_uri_index for classes not yet in uri_classes_index ──── let index_entries: Vec<(String, String)> = { let idx = self.fqn_uri_index.read(); @@ -1128,3 +1142,159 @@ impl Backend { Some(point_location(parsed_uri, position)) } } + +#[cfg(test)] +mod tests { + use std::fs; + + use tower_lsp::lsp_types::{Position, Url}; + + use super::*; + use crate::config::{Config, IndexingStrategy}; + + #[test] + fn full_indexed_implementation_uses_gti_without_vendor_fallback() { + let dir = tempfile::tempdir().expect("temp dir"); + let src = dir.path().join("src"); + let vendor = dir.path().join("vendor"); + fs::create_dir_all(src.join("Contracts")).expect("src contracts dir"); + fs::create_dir_all(src.join("Impl")).expect("src impl dir"); + fs::create_dir_all(vendor.join("Pkg")).expect("vendor pkg dir"); + + let interface_php = concat!( + ">>>, + /// Cross-file candidate index for find-references. + /// + /// Maintained from each file's [`symbol_maps`] entry during parsing. + /// It is deliberately coarse: reference scanners use it only to narrow + /// candidate files, then run their existing semantic checks for aliases, + /// inheritance, Laravel declarations, and `self/static/parent`. + pub(crate) reference_index: reference_index::ReferenceIndex, /// Per-file parse errors from the Mago parser. /// /// Each entry is `(message, start_byte_offset, end_byte_offset)`. @@ -666,6 +674,12 @@ pub struct Backend { /// files, but the flag lets us log the difference between initial and /// refresh scans. pub(crate) workspace_indexed: Arc, + /// Serializes whole-workspace indexing so a foreground request does not + /// duplicate the background full-index parse. + pub(crate) workspace_index_lock: Arc>, + /// Prevents duplicate background full-index tasks when initialization and + /// a request both race to parse the whole workspace. + pub(crate) full_index_in_progress: Arc, } /// Request-coalescing state for expensive whole-file requests (semantic @@ -750,6 +764,7 @@ impl Backend { open_files: Arc::new(RwLock::new(HashMap::new())), uri_classes_index: Arc::new(RwLock::new(HashMap::new())), symbol_maps: Arc::new(RwLock::new(HashMap::new())), + reference_index: reference_index::new_reference_index(), parse_errors: Arc::new(RwLock::new(HashMap::new())), did_change_parse_locks: Arc::new(Mutex::new(HashMap::new())), whole_file_coalesce: Arc::new(WholeFileCoalesce::default()), @@ -814,6 +829,8 @@ impl Backend { blade_source_maps: Arc::new(RwLock::new(HashMap::new())), blade_uris: Arc::new(RwLock::new(std::collections::HashSet::new())), workspace_indexed: Arc::new(std::sync::atomic::AtomicBool::new(false)), + workspace_index_lock: Arc::new(Mutex::new(())), + full_index_in_progress: Arc::new(std::sync::atomic::AtomicBool::new(false)), sync_ast_updates: false, } } @@ -832,6 +849,7 @@ impl Backend { open_files: Arc::new(RwLock::new(HashMap::new())), uri_classes_index: Arc::new(RwLock::new(HashMap::new())), symbol_maps: Arc::new(RwLock::new(HashMap::new())), + reference_index: reference_index::new_reference_index(), parse_errors: Arc::new(RwLock::new(HashMap::new())), did_change_parse_locks: Arc::new(Mutex::new(HashMap::new())), whole_file_coalesce: Arc::new(WholeFileCoalesce::default()), @@ -895,6 +913,8 @@ impl Backend { blade_source_maps: Arc::new(RwLock::new(HashMap::new())), blade_uris: Arc::new(RwLock::new(std::collections::HashSet::new())), workspace_indexed: Arc::new(std::sync::atomic::AtomicBool::new(false)), + workspace_index_lock: Arc::new(Mutex::new(())), + full_index_in_progress: Arc::new(std::sync::atomic::AtomicBool::new(false)), sync_ast_updates: true, } } @@ -1327,6 +1347,7 @@ impl Backend { open_files: Arc::clone(&self.open_files), uri_classes_index: Arc::clone(&self.uri_classes_index), symbol_maps: Arc::clone(&self.symbol_maps), + reference_index: Arc::clone(&self.reference_index), parse_errors: Arc::clone(&self.parse_errors), did_change_parse_locks: Arc::clone(&self.did_change_parse_locks), whole_file_coalesce: Arc::clone(&self.whole_file_coalesce), @@ -1393,6 +1414,8 @@ impl Backend { blade_source_maps: Arc::clone(&self.blade_source_maps), blade_uris: Arc::clone(&self.blade_uris), workspace_indexed: Arc::clone(&self.workspace_indexed), + workspace_index_lock: Arc::clone(&self.workspace_index_lock), + full_index_in_progress: Arc::clone(&self.full_index_in_progress), sync_ast_updates: self.sync_ast_updates, } } diff --git a/src/parser/ast_update.rs b/src/parser/ast_update.rs index c827fe92..c51cbc5f 100644 --- a/src/parser/ast_update.rs +++ b/src/parser/ast_update.rs @@ -10,10 +10,12 @@ use std::cell::RefCell; use std::collections::HashMap; use std::sync::Arc; +use crate::ParseErrorEntry; use crate::atom::{Atom, atom, bytes_to_str}; +use crate::names::OwnedResolvedNames; use crate::php_type::PhpType; -use crate::symbol_map::extract_symbol_map; -use crate::types::TypeAliasDef; +use crate::symbol_map::{SymbolMap, extract_symbol_map}; +use crate::types::{ClassInfo, DefineInfo, FunctionInfo, NamespaceSpan, TypeAliasDef}; use bumpalo::Bump; @@ -22,7 +24,6 @@ use mago_syntax::ast::*; use mago_syntax::parser::parse_file_content; use crate::Backend; -use crate::types::ClassInfo; use super::DocblockCtx; @@ -55,6 +56,33 @@ fn with_reusable_arena(f: impl FnOnce(&Bump) -> R) -> R { }) } +pub(crate) enum AstIndexParseResult { + Update(AstIndexUpdate), + ParseFailed { + uri: String, + errors: Vec, + }, +} + +pub(crate) struct AstIndexUpdate { + uri: String, + parse_errors: Vec, + classes: Vec, + use_map: HashMap, + resolved_names: Arc, + namespace_spans: Vec, + functions: Vec, + defines: Vec<(String, DefineInfo)>, + symbol_map: Arc, +} + +fn class_info_fqn(class: &ClassInfo) -> String { + match &class.file_namespace { + Some(ns) if !ns.is_empty() => format!("{}\\{}", ns, class.name), + _ => class.name.to_string(), + } +} + impl Backend { /// Update the uri_classes_index, use_map, and namespace_map for a given file URI /// by parsing its content. @@ -110,12 +138,59 @@ impl Backend { } } - /// Inner implementation of [`update_ast`] that performs the actual - /// parsing and map updates. Separated so that [`update_ast`] can - /// wrap the call in [`std::panic::catch_unwind`]. - /// - /// Returns `true` when at least one class signature changed. + /// Inner implementation of [`update_ast`] that performs the actual parse + /// and publishes the resulting single-file update. fn update_ast_inner(&self, uri: &str, content: &str) -> bool { + let update = self.build_ast_index_update(uri, content); + self.apply_ast_index_updates_batch(vec![update]) + } + + pub(crate) fn parse_ast_index_update_for_index( + &self, + uri: &str, + content: &str, + ) -> AstIndexParseResult { + let uri_owned = uri.to_string(); + + match crate::util::catch_panic_unwind_safe("parse", uri, None, || { + self.build_ast_index_update(uri, content) + }) { + Some(update) => AstIndexParseResult::Update(update), + None => AstIndexParseResult::ParseFailed { + uri: uri_owned, + errors: vec![("Parse failed (internal error)".to_string(), 0, 0)], + }, + } + } + + pub(crate) fn apply_ast_index_parse_results_batch( + &self, + results: Vec, + ) -> bool { + if results.is_empty() { + return false; + } + + let mut updates = Vec::new(); + let mut failures = Vec::new(); + for result in results { + match result { + AstIndexParseResult::Update(update) => updates.push(update), + AstIndexParseResult::ParseFailed { uri, errors } => failures.push((uri, errors)), + } + } + + if !failures.is_empty() { + let mut parse_errors = self.parse_errors.write(); + for (uri, errors) in failures { + parse_errors.insert(uri, errors); + } + } + + self.apply_ast_index_updates_batch(updates) + } + + fn build_ast_index_update(&self, uri: &str, content: &str) -> AstIndexUpdate { with_reusable_arena(|arena| { let file_id = mago_database::file::FileId::new(b"input.php"); let program = parse_file_content(arena, file_id, content.as_bytes()); @@ -127,28 +202,20 @@ impl Backend { // the arena drop. let name_resolver = mago_names::resolver::NameResolver::new(arena); let mago_resolved = name_resolver.resolve(program); - let owned_resolved = crate::names::OwnedResolvedNames::from_resolved(&mago_resolved); - - // Cache parse errors for the syntax-error diagnostic collector. - // Extract (message, start_byte, end_byte) tuples from the - // arena-allocated errors before the arena is dropped. - { - use mago_span::HasSpan; + let owned_resolved = OwnedResolvedNames::from_resolved(&mago_resolved); - let errors: Vec<(String, u32, u32)> = program - .errors - .iter() - .map(|e| { - let span = e.span(); - ( - super::error_format::format_parse_error(e), - span.start.offset, - span.end.offset, - ) - }) - .collect(); - self.parse_errors.write().insert(uri.to_string(), errors); - } + let parse_errors: Vec = program + .errors + .iter() + .map(|e| { + let span = e.span(); + ( + super::error_format::format_parse_error(e), + span.start.offset, + span.end.offset, + ) + }) + .collect(); let doc_ctx = DocblockCtx { trivias: program.trivia.as_slice(), @@ -169,7 +236,7 @@ impl Backend { let mut classes_with_ns: Vec<(ClassInfo, Option)> = Vec::new(); let mut use_map = HashMap::new(); let mut namespace: Option = None; - let mut namespace_spans: Vec = Vec::new(); + let mut namespace_spans: Vec = Vec::new(); for statement in program.statements.iter() { match statement { @@ -186,7 +253,7 @@ impl Backend { // Record the byte span of this namespace block. let ns_span = ns.span(); - namespace_spans.push(crate::types::NamespaceSpan { + namespace_spans.push(NamespaceSpan { namespace: block_ns.clone(), start: ns_span.start.offset, end: ns_span.end.offset, @@ -372,38 +439,6 @@ impl Backend { } } } - - let mut fmap = self.global_functions.write(); - for func_info in functions { - let fqn = if let Some(ref ns) = func_info.namespace { - format!("{}\\{}", ns, &func_info.name) - } else { - func_info.name.to_string() - }; - - // Skip polyfill functions when a native stub exists. - // Libraries like Laravel wrap helpers such as - // `str_contains` in `if (! function_exists('…'))` guards - // and mark them `@deprecated`. On the configured PHP - // version the native function exists, so the guard is - // never entered and the polyfill is dead code. Letting - // the stub win ensures the correct signature, return - // type, and deprecation status are used everywhere - // (hover, completion, diagnostics). - if func_info.is_polyfill - && self.stub_function_index.read().contains_key(fqn.as_str()) - { - continue; - } - - // Insert under the FQN only. For namespaced functions - // the FQN is `Namespace\name`; for global functions it - // is just the bare name. `resolve_function_name` already - // builds namespace-qualified candidates, so a short-name - // fallback entry is unnecessary and would cause collisions - // when two namespaces define the same short name. - fmap.insert(fqn, (uri.to_string(), func_info)); - } } // Extract define() constants from the already-parsed AST and @@ -416,17 +451,19 @@ impl Backend { &mut define_entries, content, ); - if !define_entries.is_empty() { - let mut dmap = self.global_defines.write(); - for (name, offset, value) in define_entries { - dmap.entry(name) - .or_insert_with(|| crate::types::DefineInfo { + let defines: Vec<(String, DefineInfo)> = define_entries + .into_iter() + .map(|(name, offset, value)| { + ( + name, + DefineInfo { file_uri: uri.to_string(), name_offset: offset, value, - }); - } - } + }, + ) + }) + .collect(); // Post-process: resolve parent_class short names to fully-qualified // names using the file's use_map and each class's own namespace so @@ -483,260 +520,294 @@ impl Backend { }) .collect(); - let uri_string = uri.to_string(); - - // Collect old ClassInfo values (not just FQNs) before the uri_classes_index - // entry is overwritten. These are compared against the new classes - // using `signature_eq` to decide whether each FQN's cache entry - // actually needs eviction (signature-level cache invalidation). - let old_classes_snapshot: Vec = self - .uri_classes_index - .read() - .get(&uri_string) - .map(|v| { - v.iter() - .map(|c| crate::types::ClassInfo::clone(c)) - .collect() - }) - .unwrap_or_default(); - let old_fqns: Vec = old_classes_snapshot + // Build the precomputed symbol map while the AST is still alive. + // This must happen before the `Program` (and its arena) are dropped. + let symbol_map = Arc::new(extract_symbol_map(program, content)); + + // For files without any explicit namespace blocks, synthesize a + // single span covering the entire file with the detected namespace + // (which will be None for files without namespace declarations). + if namespace_spans.is_empty() { + namespace_spans.push(NamespaceSpan { + namespace: namespace.clone(), + start: 0, + end: content.len() as u32, + }); + } + + AstIndexUpdate { + uri: uri.to_string(), + parse_errors, + classes, + use_map, + resolved_names: Arc::new(owned_resolved), + namespace_spans, + functions, + defines, + symbol_map, + } + }) + } + + pub(crate) fn apply_ast_index_updates_batch(&self, updates: Vec) -> bool { + if updates.is_empty() { + return false; + } + + struct PreparedAstIndexUpdate { + uri: String, + parse_errors: Vec, + old_classes: Vec, + old_fqns: Vec, + new_fqns: Vec, + classes: Vec>, + use_map: HashMap, + resolved_names: Arc, + namespace_spans: Vec, + functions: Vec, + defines: Vec<(String, DefineInfo)>, + symbol_map: Arc, + } + + let old_classes_by_update: Vec> = { + let uri_classes = self.uri_classes_index.read(); + updates .iter() - .filter(|c| !c.name.starts_with("__anonymous@")) - .map(|c| match &c.file_namespace { - Some(ns) if !ns.is_empty() => format!("{}\\{}", ns, c.name), - _ => c.name.to_string(), + .map(|update| { + uri_classes + .get(&update.uri) + .map(|classes| { + classes + .iter() + .map(|class| ClassInfo::clone(class)) + .collect() + }) + .unwrap_or_default() }) + .collect() + }; + + let mut prepared = Vec::with_capacity(updates.len()); + let mut all_old_fqns = Vec::new(); + let mut all_new_fqns = Vec::new(); + let mut all_classes = Vec::new(); + + for (update, old_classes) in updates.into_iter().zip(old_classes_by_update) { + let old_fqns: Vec = old_classes + .iter() + .filter(|class| !class.name.starts_with("__anonymous@")) + .map(class_info_fqn) + .collect(); + let classes: Vec> = update.classes.into_iter().map(Arc::new).collect(); + let new_fqns: Vec = classes + .iter() + .filter(|class| !class.name.starts_with("__anonymous@")) + .map(|class| class.fqn().to_string()) .collect(); - // Populate the fqn_uri_index with FQN → URI mappings for every class - // found in this file. This enables reliable lookup of classes that - // don't follow PSR-4 conventions (e.g. classes defined in Composer - // autoload_files.php entries). - // - // Uses the per-class namespace (not the file-level namespace) so - // that files with multiple namespace blocks produce correct FQNs. - { - let mut idx = self.fqn_uri_index.write(); - let mut fqn_idx = self.fqn_class_index.write(); - // Remove stale entries from previous parses of this file. - // When a file's namespace changes (e.g. while the user is - // typing a namespace declaration), old FQNs linger under - // the previous namespace and pollute completions. - // - // Use targeted removes via old_fqns instead of a full - // retain() scan — O(old_classes) ~ O(1) vs O(fqn_uri_index). - for old_fqn in &old_fqns { - idx.remove(old_fqn); - fqn_idx.remove(old_fqn); - } + all_old_fqns.extend(old_fqns.iter().cloned()); + all_new_fqns.extend(new_fqns.iter().cloned()); + all_classes.extend(classes.iter().cloned()); + + prepared.push(PreparedAstIndexUpdate { + uri: update.uri, + parse_errors: update.parse_errors, + old_classes, + old_fqns, + new_fqns, + classes, + use_map: update.use_map, + resolved_names: update.resolved_names, + namespace_spans: update.namespace_spans, + functions: update.functions, + defines: update.defines, + symbol_map: update.symbol_map, + }); + } + + all_old_fqns.sort(); + all_old_fqns.dedup(); + all_new_fqns.sort(); + all_new_fqns.dedup(); - for (i, (class, class_ns)) in classes_with_ns.iter().enumerate() { - // Anonymous classes (named `__anonymous@`) are - // internal bookkeeping — they should never appear in - // cross-file lookups or completion results. + { + let mut parse_errors = self.parse_errors.write(); + for update in &mut prepared { + parse_errors.insert(update.uri.clone(), std::mem::take(&mut update.parse_errors)); + } + } + + { + let mut idx = self.fqn_uri_index.write(); + let mut fqn_idx = self.fqn_class_index.write(); + + for old_fqn in &all_old_fqns { + idx.remove(old_fqn); + fqn_idx.remove(old_fqn); + } + + for update in &prepared { + for class in &update.classes { if class.name.starts_with("__anonymous@") { continue; } - let fqn = if let Some(ns) = class_ns { - format!("{}\\{}", ns, &class.name) - } else { - class.name.to_string() - }; - idx.insert(fqn.clone(), uri_string.clone()); - // The `classes` vec already has `file_namespace` set, - // so use it for the fqn_index entry. - fqn_idx.insert(fqn, Arc::new(classes[i].clone())); + let fqn = class.fqn().to_string(); + idx.insert(fqn.clone(), update.uri.clone()); + fqn_idx.insert(fqn, Arc::clone(class)); } } + } - // Remove newly-discovered FQNs from the negative-result cache - // so classes that just became available are not suppressed. - { - let nf_cache = self.class_not_found_cache.read(); - if !nf_cache.is_empty() { - drop(nf_cache); - let mut nf_cache = self.class_not_found_cache.write(); - for (class, class_ns) in &classes_with_ns { - if class.name.starts_with("__anonymous@") { - continue; - } - let fqn = match class_ns { - Some(ns) if !ns.is_empty() => format!("{}\\{}", ns, class.name), - _ => class.name.to_string(), - }; - nf_cache.remove(&fqn); - } + { + let nf_cache = self.class_not_found_cache.read(); + if !nf_cache.is_empty() { + drop(nf_cache); + let mut nf_cache = self.class_not_found_cache.write(); + for fqn in &all_new_fqns { + nf_cache.remove(fqn); } } + } - // Build the precomputed symbol map while the AST is still alive. - // This must happen before the `Program` (and its arena) are dropped. - let symbol_map = std::sync::Arc::new(extract_symbol_map(program, content)); + { + let mut fmap = self.global_functions.write(); + for update in &mut prepared { + for func_info in std::mem::take(&mut update.functions) { + let fqn = if let Some(ref ns) = func_info.namespace { + format!("{}\\{}", ns, &func_info.name) + } else { + func_info.name.to_string() + }; - self.uri_classes_index.write().insert( - uri_string.clone(), - classes.into_iter().map(Arc::new).collect(), - ); - self.parsed_uris.write().insert(uri_string.clone()); - - // Populate the global method store for O(1) method lookup. - self.evict_methods_for_fqns(&old_fqns); - self.evict_gti_for_fqns(&old_fqns); - if let Some(arc_classes) = self.uri_classes_index.read().get(&uri_string) { - self.populate_method_store(arc_classes); - self.populate_gti_index(arc_classes); + if func_info.is_polyfill + && self.stub_function_index.read().contains_key(fqn.as_str()) + { + continue; + } + + fmap.insert(fqn, (update.uri.clone(), func_info)); + } } + } - self.symbol_maps - .write() - .insert(uri_string.clone(), symbol_map); - self.file_imports - .write() - .insert(uri_string.clone(), use_map); - self.resolved_names - .write() - .insert(uri_string.clone(), Arc::new(owned_resolved)); - // For files without any explicit namespace blocks, synthesize a - // single span covering the entire file with the detected namespace - // (which will be None for files without namespace declarations). - if namespace_spans.is_empty() { - namespace_spans.push(crate::types::NamespaceSpan { - namespace: namespace.clone(), - start: 0, - end: content.len() as u32, - }); + { + let mut dmap = self.global_defines.write(); + for update in &mut prepared { + for (name, define) in std::mem::take(&mut update.defines) { + dmap.entry(name).or_insert(define); + } } - self.file_namespaces - .write() - .insert(uri_string, namespace_spans); + } - // Selectively invalidate the resolved-class cache with - // signature-level granularity. - // - // Instead of evicting every FQN defined in this file on every - // keystroke, compare the old and new ClassInfo values using - // `signature_eq`. When the signature has not changed (the - // overwhelmingly common case during normal editing inside a - // method body), the cache entry is kept warm. - // - // FQNs that only appear in the old set (renamed/removed classes) - // or only in the new set (newly added classes) are always evicted. - // FQNs present in both sets are evicted only when their signature - // differs. - // - // `evict_fqn` transitively evicts dependents (classes that - // extend/use/implement/mixin the changed class) so that - // cached child classes don't serve stale inherited members. - // - // **First-parse fast path**: when `old_fqns` is empty the file - // has never been parsed by `update_ast` before. There are no - // stale cache entries to evict — any existing cache entries for - // these FQNs were populated by legitimate resolution paths - // (classmap / PSR-4 / stubs) reading the same on-disk content. - // Skipping eviction here eliminates the O(N²) cost of calling - // `evict_fqn` (which does a full cache scan + transitive - // dependent cascade) for every class during bulk operations - // like `analyse`. - let mut any_signature_changed = false; - let mut evicted_fqns: Vec = Vec::new(); - - if !old_fqns.is_empty() { - let mut cache = self.resolved_class_cache.write(); - // Collect new FQNs from the classes we just parsed. - let new_fqns: Vec = classes_with_ns - .iter() - .filter(|(c, _)| !c.name.starts_with("__anonymous@")) - .map(|(c, ns)| match ns { - Some(ns) if !ns.is_empty() => format!("{}\\{}", ns, c.name), - _ => c.name.to_string(), - }) - .collect(); + self.evict_methods_for_fqns(&all_old_fqns); + self.evict_gti_for_fqns(&all_old_fqns); + self.populate_method_store(&all_classes); + self.populate_gti_index(&all_classes); + + // Selectively invalidate the resolved-class cache with + // signature-level granularity. Full indexing usually hits the + // first-parse fast path (`old_fqns` is empty), so this stays cheap + // during background indexing while preserving edit-time semantics. + let mut any_signature_changed = false; + let mut evicted_fqns = Vec::new(); + { + let mut cache = self.resolved_class_cache.write(); + for update in &prepared { + if update.old_fqns.is_empty() { + continue; + } - // Evict old FQNs that no longer exist (renames / removals), - // or whose signature changed. - for (i, fqn) in old_fqns.iter().enumerate() { - let old_cls = &old_classes_snapshot[old_classes_snapshot + for fqn in &update.old_fqns { + let old_cls = update + .old_classes .iter() - .position(|c| { - !c.name.starts_with("__anonymous@") && { - let f = match &c.file_namespace { - Some(ns) if !ns.is_empty() => { - format!("{}\\{}", ns, c.name) - } - _ => c.name.to_string(), - }; - f == *fqn - } - }) - .unwrap_or(i)]; - - // Find the matching new class by FQN. - let new_cls = classes_with_ns.iter().find(|(c, ns)| { - !c.name.starts_with("__anonymous@") && { - let f = match ns { - Some(ns) if !ns.is_empty() => format!("{}\\{}", ns, c.name), - _ => c.name.to_string(), - }; - f == *fqn - } - }); + .find(|class| class_info_fqn(class) == *fqn); + let new_cls = update + .classes + .iter() + .find(|class| class.fqn().as_str() == fqn); - match new_cls { - Some((new, _)) if old_cls.signature_eq(new) => { - // Signature unchanged — keep the cache entry warm. - } + match (old_cls, new_cls) { + (Some(old), Some(new)) if old.signature_eq(new) => {} _ => { - // Signature changed or class was removed — evict. - let evicted = crate::virtual_members::evict_fqn(&mut cache, fqn); - evicted_fqns.extend(evicted); + evicted_fqns.extend(crate::virtual_members::evict_fqn(&mut cache, fqn)); any_signature_changed = true; } } } - // Evict new FQNs that did not exist before (new classes). - for fqn in &new_fqns { - if !old_fqns.contains(fqn) { - let evicted = crate::virtual_members::evict_fqn(&mut cache, fqn); - evicted_fqns.extend(evicted); + for fqn in &update.new_fqns { + if !update.old_fqns.contains(fqn) { + evicted_fqns.extend(crate::virtual_members::evict_fqn(&mut cache, fqn)); any_signature_changed = true; } } } + } + evicted_fqns.sort(); + evicted_fqns.dedup(); + + { + let mut uri_classes = self.uri_classes_index.write(); + let mut parsed_uris = self.parsed_uris.write(); + for update in &mut prepared { + uri_classes.insert(update.uri.clone(), std::mem::take(&mut update.classes)); + parsed_uris.insert(update.uri.clone()); + } + } - // Dedup evicted FQNs before repopulation. - evicted_fqns.sort(); - evicted_fqns.dedup(); - - // ── ER4: Eagerly re-populate evicted classes ───────────────── - if !evicted_fqns.is_empty() { - // Toposort just the evicted subset using their current - // (just-parsed) ClassInfo from uri_classes_index. - let sorted = { - let uri_classes = self.uri_classes_index.read(); - let iter = uri_classes - .values() - .flat_map(|classes| classes.iter()) - .filter(|c| evicted_fqns.contains(&c.fqn().to_string())) - .map(|c| (c.fqn().to_string(), c.as_ref())); - crate::toposort::toposort_classes(iter) - }; - - let class_loader = - |name: &str| -> Option> { self.find_or_load_class(name) }; - crate::virtual_members::populate_from_sorted( - &sorted, - &self.resolved_class_cache, - &class_loader, + { + let mut imports = self.file_imports.write(); + let mut resolved_names = self.resolved_names.write(); + let mut namespaces = self.file_namespaces.write(); + for update in &mut prepared { + imports.insert(update.uri.clone(), std::mem::take(&mut update.use_map)); + resolved_names.insert(update.uri.clone(), Arc::clone(&update.resolved_names)); + namespaces.insert( + update.uri.clone(), + std::mem::take(&mut update.namespace_spans), ); } + } + + if !evicted_fqns.is_empty() { + let sorted = { + let uri_classes = self.uri_classes_index.read(); + let iter = uri_classes + .values() + .flat_map(|classes| classes.iter()) + .filter(|class| evicted_fqns.contains(&class.fqn().to_string())) + .map(|class| (class.fqn().to_string(), class.as_ref())); + crate::toposort::toposort_classes(iter) + }; + + let class_loader = + |name: &str| -> Option> { self.find_or_load_class(name) }; + crate::virtual_members::populate_from_sorted( + &sorted, + &self.resolved_class_cache, + &class_loader, + ); + } - if any_signature_changed { - self.member_completion_cache.lock().clear(); + if any_signature_changed { + self.member_completion_cache.lock().clear(); + } + + let reference_items: Vec<(String, Arc)> = prepared + .iter() + .map(|update| (update.uri.clone(), Arc::clone(&update.symbol_map))) + .collect(); + self.reindex_references_for_symbol_maps_batch(reference_items); + + { + let mut symbol_maps = self.symbol_maps.write(); + for update in prepared { + symbol_maps.insert(update.uri, update.symbol_map); } + } - any_signature_changed - }) + any_signature_changed } /// Resolve `parent_class` short names in a list of `ClassInfo` to @@ -1268,3 +1339,28 @@ impl Backend { } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::Backend; + + #[test] + fn ast_index_parse_result_batch_records_failures_and_empty_noops() { + let backend = Backend::new_test(); + assert!(!backend.apply_ast_index_parse_results_batch(Vec::new())); + + let uri = "file:///project/src/Broken.php"; + let changed = + backend.apply_ast_index_parse_results_batch(vec![AstIndexParseResult::ParseFailed { + uri: uri.to_string(), + errors: vec![("Parse failed (internal error)".to_string(), 10, 20)], + }]); + + assert!(!changed); + assert_eq!( + backend.parse_errors.read().get(uri).cloned(), + Some(vec![("Parse failed (internal error)".to_string(), 10, 20)]) + ); + } +} diff --git a/src/reference_index.rs b/src/reference_index.rs new file mode 100644 index 00000000..00221ee1 --- /dev/null +++ b/src/reference_index.rs @@ -0,0 +1,623 @@ +//! Cross-file reference candidate index. +//! +//! The precise find-references logic still lives in `references`: it resolves +//! aliases, class hierarchies, `self/static/parent`, and Laravel declarations. +//! This index is intentionally a coarse candidate index keyed by symbol name so +//! those scanners can skip files that cannot contain a match once the workspace +//! has been fully parsed. + +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; +use std::sync::atomic::Ordering; + +use parking_lot::RwLock; + +use crate::Backend; +use crate::symbol_map::{LaravelStringKind, SelfStaticParentKind, SymbolKind, SymbolMap}; +use crate::util::{build_fqn, find_class_at_offset, short_name, strip_fqn_prefix}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) enum ReferenceIndexKey { + Class(String), + Function(String), + Constant(String), + Member { + name: String, + is_static: bool, + }, + LaravelString { + kind: LaravelStringKind, + key: String, + }, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct ReferenceIndexEntry { + pub(crate) uri: String, + pub(crate) start: u32, + pub(crate) end: u32, + pub(crate) is_declaration: bool, +} + +pub(crate) type ReferenceIndex = Arc>>>; + +pub(crate) fn new_reference_index() -> ReferenceIndex { + Arc::new(RwLock::new(HashMap::new())) +} + +impl Backend { + pub(crate) fn evict_reference_index_uri(&self, uri: &str) { + let mut index = self.reference_index.write(); + evict_reference_index_uri_locked(&mut index, uri); + } + + pub(crate) fn reference_candidate_uris_for_keys( + &self, + keys: &[ReferenceIndexKey], + ) -> Option> { + if !self.workspace_indexed.load(Ordering::Acquire) { + return None; + } + + let index = self.reference_index.read(); + let mut uris = HashSet::new(); + for key in keys { + if let Some(entries) = index.get(key) { + uris.extend(entries.iter().map(|entry| entry.uri.clone())); + } + } + Some(uris) + } + + pub(crate) fn reindex_references_for_symbol_maps_batch( + &self, + items: Vec<(String, Arc)>, + ) { + if items.is_empty() { + return; + } + + let mut rebuilt = Vec::with_capacity(items.len()); + for (uri, symbol_map) in items { + if self.is_reference_indexable_uri(&uri) { + rebuilt.push(( + uri.clone(), + self.reference_entries_for_symbol_map(&uri, &symbol_map), + )); + } else { + rebuilt.push((uri, Vec::new())); + } + } + + let mut keep = vec![true; rebuilt.len()]; + let mut seen_uris = HashSet::new(); + for (idx, (uri, _)) in rebuilt.iter().enumerate().rev() { + if !seen_uris.insert(uri.clone()) { + keep[idx] = false; + } + } + rebuilt = rebuilt + .into_iter() + .enumerate() + .filter_map(|(idx, item)| keep[idx].then_some(item)) + .collect(); + + let batch_uris: HashSet = rebuilt.iter().map(|(uri, _)| uri.clone()).collect(); + let mut index = self.reference_index.write(); + evict_reference_index_uris_locked(&mut index, &batch_uris); + for (_uri, entries) in rebuilt { + for (key, entry) in entries { + index.entry(key).or_default().push(entry); + } + } + } + + fn reference_entries_for_symbol_map( + &self, + uri: &str, + symbol_map: &SymbolMap, + ) -> Vec<(ReferenceIndexKey, ReferenceIndexEntry)> { + if !self.is_reference_indexable_uri(uri) { + return Vec::new(); + } + + let mut entries: Vec<(ReferenceIndexKey, ReferenceIndexEntry)> = Vec::new(); + for span in &symbol_map.spans { + let is_declaration = matches!( + &span.kind, + SymbolKind::ClassDeclaration { .. } + | SymbolKind::FunctionCall { + is_definition: true, + .. + } + | SymbolKind::MemberDeclaration { .. } + ); + + for key in self.reference_keys_for_span(uri, span) { + entries.push(( + key, + ReferenceIndexEntry { + uri: uri.to_string(), + start: span.start, + end: span.end, + is_declaration, + }, + )); + } + } + + if let Some(classes) = self.uri_classes_index.read().get(uri).cloned() { + for class in classes { + for prop in &class.properties { + let Some((start, end)) = member_range(prop.name_offset, &prop.name, true) + else { + continue; + }; + entries.push(( + ReferenceIndexKey::Member { + name: prop + .name + .strip_prefix('$') + .unwrap_or(&prop.name) + .to_string(), + is_static: prop.is_static, + }, + ReferenceIndexEntry { + uri: uri.to_string(), + start, + end, + is_declaration: true, + }, + )); + } + } + } + + entries + } + + fn reference_keys_for_span( + &self, + uri: &str, + span: &crate::symbol_map::SymbolSpan, + ) -> Vec { + match &span.kind { + SymbolKind::ClassReference { name, is_fqn, .. } => { + let resolved = if *is_fqn { + normalize_symbol_name(name) + } else if let Some(fqn) = self.resolved_name_at(uri, span.start) { + fqn + } else { + let ctx = self.file_context_at(uri, span.start); + normalize_symbol_name(Self::resolve_to_fqn(name, &ctx.use_map, &ctx.namespace)) + }; + class_keys(&resolved, name) + } + SymbolKind::ClassDeclaration { name } => { + let namespace = self.namespace_at_offset(uri, span.start); + let fqn = build_fqn(name, namespace.as_deref()); + class_keys(&fqn, name) + } + SymbolKind::SelfStaticParent(kind) if *kind != SelfStaticParentKind::This => { + let ctx = self.file_context_at(uri, span.start); + let Some(current_class) = find_class_at_offset(&ctx.classes, span.start) else { + return Vec::new(); + }; + let fqn = match kind { + SelfStaticParentKind::Parent => { + current_class.parent_class.map(normalize_symbol_name) + } + _ => Some(build_fqn(¤t_class.name, ctx.namespace.as_deref())), + }; + fqn.map(|name| class_keys(&name, short_name(&name))) + .unwrap_or_default() + } + SymbolKind::FunctionCall { + name, + is_definition: _, + } => { + let resolved = if let Some(fqn) = self.resolved_name_at(uri, span.start) { + fqn + } else { + let ctx = self.file_context_at(uri, span.start); + normalize_symbol_name(Self::resolve_to_fqn(name, &ctx.use_map, &ctx.namespace)) + }; + function_keys(&resolved, name) + } + SymbolKind::ConstantReference { name } => { + vec![ReferenceIndexKey::Constant(name.to_string())] + } + SymbolKind::MemberAccess { + member_name, + is_static, + .. + } + | SymbolKind::MemberDeclaration { + name: member_name, + is_static, + } => { + vec![ReferenceIndexKey::Member { + name: member_name.to_string(), + is_static: *is_static, + }] + } + SymbolKind::LaravelStringKey { kind, key } => { + vec![ReferenceIndexKey::LaravelString { + kind: kind.clone(), + key: key.to_string(), + }] + } + _ => Vec::new(), + } + } + + fn is_reference_indexable_uri(&self, uri: &str) -> bool { + if uri.starts_with("phpantom-stub://") || uri.starts_with("phpantom-stub-fn://") { + return false; + } + !self + .vendor_uri_prefixes + .lock() + .iter() + .any(|prefix| uri.starts_with(prefix.as_str())) + } + + fn resolved_name_at(&self, uri: &str, offset: u32) -> Option { + self.resolved_names + .read() + .get(uri) + .and_then(|rn| rn.get(offset).map(normalize_symbol_name)) + } +} + +fn evict_reference_index_uri_locked( + index: &mut HashMap>, + uri: &str, +) { + index.retain(|_, entries| { + entries.retain(|entry| entry.uri != uri); + !entries.is_empty() + }); +} + +fn evict_reference_index_uris_locked( + index: &mut HashMap>, + uris: &HashSet, +) { + if uris.is_empty() { + return; + } + + index.retain(|_, entries| { + entries.retain(|entry| !uris.contains(entry.uri.as_str())); + !entries.is_empty() + }); +} + +fn normalize_symbol_name(name: impl AsRef) -> String { + strip_fqn_prefix(name.as_ref()).to_string() +} + +fn class_keys(resolved: &str, source_name: &str) -> Vec { + symbol_name_keys(resolved, source_name) + .into_iter() + .map(ReferenceIndexKey::Class) + .collect() +} + +fn function_keys(resolved: &str, source_name: &str) -> Vec { + symbol_name_keys(resolved, source_name) + .into_iter() + .map(ReferenceIndexKey::Function) + .collect() +} + +fn symbol_name_keys(resolved: &str, source_name: &str) -> Vec { + let mut keys = vec![ + normalize_symbol_name(resolved), + normalize_symbol_name(source_name), + ]; + keys.push(short_name(resolved).to_string()); + keys.sort(); + keys.dedup(); + keys +} + +fn member_range(name_offset: u32, name: &str, has_dollar_prefix: bool) -> Option<(u32, u32)> { + if name_offset == 0 { + return None; + } + let len = name.len() as u32 + u32::from(has_dollar_prefix); + Some((name_offset, name_offset.saturating_add(len))) +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + use std::sync::atomic::Ordering; + + use super::*; + use crate::Backend; + use crate::symbol_map::{SymbolMap, SymbolSpan}; + + #[test] + fn candidate_lookup_is_disabled_until_workspace_is_indexed() { + let backend = Backend::new_test(); + let uri = "file:///project/src/Foo.php"; + backend.update_ast( + uri, + "save();\nconfig('app.name');\n", + ); + backend.workspace_indexed.store(true, Ordering::Release); + + assert_candidate_contains( + &backend, + ReferenceIndexKey::Class("App\\Foo".to_string()), + uri, + ); + assert_candidate_contains( + &backend, + ReferenceIndexKey::Function("App\\helper".to_string()), + uri, + ); + assert_candidate_contains( + &backend, + ReferenceIndexKey::Member { + name: "save".to_string(), + is_static: false, + }, + uri, + ); + assert_candidate_contains( + &backend, + ReferenceIndexKey::Member { + name: "name".to_string(), + is_static: false, + }, + uri, + ); + assert_candidate_contains( + &backend, + ReferenceIndexKey::LaravelString { + kind: LaravelStringKind::Config, + key: "app.name".to_string(), + }, + uri, + ); + } + + #[test] + fn reference_index_evicts_candidates_when_file_maps_clear() { + let backend = Backend::new_test(); + let uri = "file:///project/src/Foo.php"; + backend.update_ast(uri, " Arc { + Arc::new(SymbolMap { + spans: vec![SymbolSpan { + start: 0, + end: name.len() as u32, + kind: SymbolKind::ClassDeclaration { + name: name.to_string(), + }, + }], + ..SymbolMap::default() + }) + } +} diff --git a/src/references/mod.rs b/src/references/mod.rs index 62bf9c62..d24a3f28 100644 --- a/src/references/mod.rs +++ b/src/references/mod.rs @@ -24,12 +24,14 @@ //! same name are excluded. use std::collections::{HashMap, HashSet}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::Arc; +use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; use tower_lsp::lsp_types::{Location, Position, Range, Url}; use crate::Backend; +use crate::reference_index::ReferenceIndexKey; use crate::symbol_map::{ClassRefContext, SelfStaticParentKind, SymbolKind, SymbolMap, VarDefKind}; use crate::types::ClassInfo; use crate::util::{ @@ -312,7 +314,18 @@ impl Backend { SymbolKind::NamespaceDeclaration { .. } => Vec::new(), SymbolKind::LaravelStringKey { kind, key } => { - let snapshot = self.user_file_symbol_maps(); + let snapshot = if include_declaration + && matches!(kind, crate::symbol_map::LaravelStringKind::Config) + { + self.user_file_symbol_maps() + } else { + self.user_file_symbol_maps_for_reference_keys(&[ + ReferenceIndexKey::LaravelString { + kind: kind.clone(), + key: key.to_string(), + }, + ]) + }; laravel::find_laravel_string_key_references( self, kind, @@ -722,13 +735,29 @@ impl Backend { /// reference scanners use this to restrict results to user code. pub(crate) fn user_file_symbol_maps(&self) -> Vec<(String, Arc)> { self.ensure_workspace_indexed(); + self.user_file_symbol_maps_matching(None) + } + + fn user_file_symbol_maps_for_reference_keys( + &self, + keys: &[ReferenceIndexKey], + ) -> Vec<(String, Arc)> { + self.ensure_workspace_indexed(); + let candidate_uris = self.reference_candidate_uris_for_keys(keys); + self.user_file_symbol_maps_matching(candidate_uris.as_ref()) + } + fn user_file_symbol_maps_matching( + &self, + candidate_uris: Option<&HashSet>, + ) -> Vec<(String, Arc)> { let vendor_prefixes = self.vendor_uri_prefixes.lock().clone(); let maps = self.symbol_maps.read(); maps.iter() .filter(|(uri, _)| { - !uri.starts_with("phpantom-stub://") + candidate_uris.is_none_or(|uris| uris.contains(uri.as_str())) + && !uri.starts_with("phpantom-stub://") && !uri.starts_with("phpantom-stub-fn://") && !vendor_prefixes.iter().any(|p| uri.starts_with(p.as_str())) }) @@ -747,8 +776,8 @@ impl Backend { let target = strip_fqn_prefix(target_fqn); let target_short = crate::util::short_name(target); - // Snapshot user-file symbol maps (excludes vendor and stubs). - let snapshot = self.user_file_symbol_maps(); + let candidate_keys = class_candidate_keys(target, target_short); + let snapshot = self.user_file_symbol_maps_for_reference_keys(&candidate_keys); for (file_uri, symbol_map) in &snapshot { // Prefer mago-names resolved_names for FQN resolution (byte-offset @@ -909,7 +938,21 @@ impl Backend { } let mut locations = Vec::new(); - let snapshot = self.user_file_symbol_maps(); + let mut candidate_keys = Vec::new(); + for fqn in &scoped { + candidate_keys.extend(class_candidate_keys(fqn, crate::util::short_name(fqn))); + } + candidate_keys.extend([ + ReferenceIndexKey::Member { + name: "__construct".to_string(), + is_static: true, + }, + ReferenceIndexKey::Member { + name: "__construct".to_string(), + is_static: false, + }, + ]); + let snapshot = self.user_file_symbol_maps_for_reference_keys(&candidate_keys); for (file_uri, symbol_map) in &snapshot { let resolved_names = self.resolved_names.read().get(file_uri).cloned(); @@ -1102,7 +1145,8 @@ impl Backend { ) -> Vec { let mut locations = Vec::new(); - let snapshot = self.user_file_symbol_maps(); + let candidate_keys = member_candidate_keys(target_member, target_is_static, hierarchy); + let snapshot = self.user_file_symbol_maps_for_reference_keys(&candidate_keys); for (file_uri, symbol_map) in &snapshot { // First pass: name-only check to avoid unnecessary work. @@ -1338,7 +1382,8 @@ impl Backend { // Input boundary: callers may pass FQNs with a leading `\`. let target = strip_fqn_prefix(target_fqn); - let snapshot = self.user_file_symbol_maps(); + let candidate_keys = function_candidate_keys(target, target_short); + let snapshot = self.user_file_symbol_maps_for_reference_keys(&candidate_keys); for (file_uri, symbol_map) in &snapshot { // Prefer mago-names resolved_names; lazy-load use_map only @@ -1454,7 +1499,10 @@ impl Backend { ) -> Vec { let mut locations = Vec::new(); - let snapshot = self.user_file_symbol_maps(); + let snapshot = + self.user_file_symbol_maps_for_reference_keys(&[ReferenceIndexKey::Constant( + target_name.to_string(), + )]); for (file_uri, symbol_map) in &snapshot { // First pass: name-only check. @@ -1923,7 +1971,16 @@ impl Backend { /// via the fqn_uri_index. The vendor directory (read from /// skipped during the filesystem walk. pub(crate) fn ensure_workspace_indexed(&self) { + self.ensure_workspace_indexed_with_progress(None); + } + + pub(crate) fn ensure_workspace_indexed_with_progress( + &self, + progress: Option<&(dyn Fn(u32, String) + Sync)>, + ) { + let _workspace_index_guard = self.workspace_index_lock.lock(); let start = std::time::Instant::now(); + report_workspace_index_progress(progress, 1, "Preparing workspace index"); // Collect URIs that already have symbol maps. let existing_uris: HashSet = self.symbol_maps.read().keys().cloned().collect(); @@ -1945,19 +2002,6 @@ impl Backend { }) .collect(); - if !phase1_uris.is_empty() { - tracing::info!( - "ensure_workspace_indexed: Phase 1 parsing {} files", - phase1_uris.len() - ); - self.parse_files_parallel( - phase1_uris - .iter() - .map(|uri| (uri.as_str(), None::<&str>)) - .collect(), - ); - } - // ── Phase 2: workspace directory scan ─────────────────────────── // // Even after the initial scan, repeat the walk so newly-created PHP @@ -1969,13 +2013,11 @@ impl Backend { .load(std::sync::atomic::Ordering::Relaxed); let workspace_root = self.workspace_root.read().clone(); - - if let Some(root) = workspace_root { + let phase1_uri_set: HashSet<&str> = phase1_uris.iter().map(|uri| uri.as_str()).collect(); + let phase2_work = if let Some(root) = workspace_root.clone() { let vendor_dir_paths = self.vendor_dir_paths.lock().clone(); - // Re-read existing URIs after phase 1 may have added more. - let existing_uris: HashSet = self.symbol_maps.read().keys().cloned().collect(); - + report_workspace_index_progress(progress, 3, "Scanning workspace files"); let walk_start = std::time::Instant::now(); let php_files = collect_php_files_gitignore(&root, &vendor_dir_paths); tracing::info!( @@ -1989,55 +2031,140 @@ impl Backend { walk_start.elapsed() ); - let phase2_work: Vec<(String, PathBuf)> = php_files + php_files .into_iter() .filter_map(|path| { let uri = crate::util::path_to_uri(&path); - if existing_uris.contains(&uri) { + if existing_uris.contains(&uri) || phase1_uri_set.contains(uri.as_str()) { None } else { Some((uri, path)) } }) - .collect(); + .collect() + } else { + Vec::new() + }; + + let total_to_parse = phase1_uris.len() + phase2_work.len(); + let phase1_units: u64 = phase1_uris + .iter() + .map(|uri| self.index_progress_weight_for_uri(uri, None)) + .sum(); + let phase2_units: u64 = phase2_work + .iter() + .map(|(_, path)| index_progress_weight_for_path(path)) + .sum(); + let total_parse_units = phase1_units.saturating_add(phase2_units).max(1); + report_workspace_index_progress( + progress, + 5, + format!("Queued {total_to_parse} PHP files for indexing"), + ); + + if !phase1_uris.is_empty() { + tracing::info!( + "ensure_workspace_indexed: Phase 1 parsing {} files", + phase1_uris.len() + ); + self.parse_files_parallel_with_progress( + phase1_uris + .iter() + .map(|uri| (uri.to_string(), None::)) + .collect(), + Some(&|done_files, _phase_total, done_units, _phase_units| { + report_workspace_index_progress( + progress, + workspace_parse_percentage(done_units, total_parse_units), + format!("Parsing indexed files ({done_files}/{total_to_parse})"), + ); + }), + ); + } + + if workspace_root.is_some() { + report_workspace_index_progress( + progress, + workspace_parse_percentage(phase1_units, total_parse_units), + format!( + "Indexed known files ({}/{total_to_parse})", + phase1_uris.len() + ), + ); if !phase2_work.is_empty() { tracing::info!( "ensure_workspace_indexed: Phase 2 parsing {} files", phase2_work.len() ); - self.parse_paths_parallel(&phase2_work); + let parsed_before_phase2 = phase1_uris.len(); + let units_before_phase2 = phase1_units; + self.parse_paths_parallel_with_progress( + &phase2_work, + Some(&|done_files, _phase_total, done_units, _phase_units| { + let total_done = parsed_before_phase2 + done_files; + let total_units_done = units_before_phase2.saturating_add(done_units); + report_workspace_index_progress( + progress, + workspace_parse_percentage(total_units_done, total_parse_units), + format!("Parsing workspace files ({total_done}/{total_to_parse})"), + ); + }), + ); } + report_workspace_index_progress(progress, 99, "Finalizing workspace index"); self.workspace_indexed .store(true, std::sync::atomic::Ordering::Relaxed); } + report_workspace_index_progress(progress, 100, "Workspace index ready"); tracing::info!("ensure_workspace_indexed: total time {:?}", start.elapsed()); } /// Parse a batch of files in parallel using OS threads. /// /// Each entry is `(uri, optional_content)`. When `content` is `None`, - /// the file is loaded via [`get_file_content`]. The expensive parsing - /// step runs without any locks held; only the brief map insertions at - /// the end of [`update_ast`] acquire write locks. + /// the file is loaded via [`get_file_content`]. Workers parse files into + /// owned index updates, then a single merge publishes the whole batch. /// /// Uses [`std::thread::scope`] for structured concurrency so that all /// spawned threads are guaranteed to finish before this method returns. /// The thread count is capped at the number of available CPU cores. - fn parse_files_parallel(&self, files: Vec<(&str, Option<&str>)>) { + fn parse_files_parallel_with_progress( + &self, + files: Vec<(String, Option)>, + progress: Option<&(dyn Fn(usize, usize, u64, u64) + Sync)>, + ) { if files.is_empty() { return; } + let total = files.len(); + let parsed = AtomicUsize::new(0); + let weights: Vec = files + .iter() + .map(|(uri, content)| self.index_progress_weight_for_uri(uri, content.as_deref())) + .collect(); + let total_units = weights.iter().copied().sum::().max(1); + let parsed_units = AtomicU64::new(0); // For very small batches, avoid thread overhead. if files.len() <= 2 { - for (uri, content) in &files { - if let Some(c) = content { - self.update_ast(uri, c); - } else if let Some(c) = self.get_file_content(uri) { - self.update_ast(uri, &c); + let mut results = Vec::with_capacity(files.len()); + for (idx, (uri, content)) in files.iter().enumerate() { + let content = content.clone().or_else(|| self.get_file_content(uri)); + if let Some(content) = content { + results.push(self.parse_ast_index_update_for_index(uri, &content)); } + report_weighted_parse_progress( + progress, + &parsed, + &parsed_units, + weights[idx], + total, + total_units, + ); } + report_weighted_merge_progress(progress, total, total_units); + self.apply_ast_index_parse_results_batch(results); return; } @@ -2045,11 +2172,8 @@ impl Backend { .map(|n| n.get()) .unwrap_or(4) .min(files.len()); - - let chunks: Vec)>> = { - let chunk_size = files.len().div_ceil(n_threads); - files.chunks(chunk_size).map(|c| c.to_vec()).collect() - }; + let next = AtomicUsize::new(0); + let work_order = largest_first_work_order(&weights); // Use a 16 MB stack per thread. The default 8 MB can overflow // when parsing deeply-nested PHP files (e.g. WordPress @@ -2059,43 +2183,111 @@ impl Backend { // (abort, not panic) so `catch_unwind` cannot save us. const PARSE_STACK_SIZE: usize = 16 * 1024 * 1024; - std::thread::scope(|s| { - for chunk in &chunks { - let handle = std::thread::Builder::new() + let files_ref = &files; + let weights_ref = &weights; + let work_order_ref = &work_order; + let mut results = std::thread::scope(|s| { + let mut handles = Vec::with_capacity(n_threads); + for _ in 0..n_threads { + let parsed = &parsed; + let parsed_units = &parsed_units; + let next = &next; + let files = files_ref; + let weights = weights_ref; + let work_order = work_order_ref; + match std::thread::Builder::new() .stack_size(PARSE_STACK_SIZE) .spawn_scoped(s, move || { - for (uri, content) in chunk { - if let Some(c) = content { - self.update_ast(uri, c); - } else if let Some(c) = self.get_file_content(uri) { - self.update_ast(uri, &c); + let mut local_results = Vec::new(); + loop { + let work_idx = next.fetch_add(1, Ordering::Relaxed); + let Some(&idx) = work_order.get(work_idx) else { + break; + }; + let Some((uri, content)) = files.get(idx) else { + break; + }; + + let content = content.clone().or_else(|| self.get_file_content(uri)); + if let Some(content) = content { + local_results.push(( + idx, + self.parse_ast_index_update_for_index(uri, &content), + )); } + report_weighted_parse_progress( + progress, + parsed, + parsed_units, + weights[idx], + total, + total_units, + ); } - }); - if let Err(e) = handle { - tracing::error!("failed to spawn parse thread: {e}"); + local_results + }) { + Ok(handle) => handles.push(handle), + Err(e) => tracing::error!("failed to spawn parse thread: {e}"), } } + + handles + .into_iter() + .flat_map(|handle| { + handle.join().unwrap_or_else(|_| { + tracing::error!("parse thread panicked during workspace indexing"); + Vec::new() + }) + }) + .collect::>() }); + results.sort_by_key(|(idx, _)| *idx); + report_weighted_merge_progress(progress, total, total_units); + self.apply_ast_index_parse_results_batch( + results.into_iter().map(|(_, result)| result).collect(), + ); } /// Parse a batch of files from disk paths in parallel. /// - /// Each entry is `(uri, path)`. The file is read from disk and - /// parsed in a worker thread. Uses [`std::thread::scope`] for - /// structured concurrency. - pub(crate) fn parse_paths_parallel(&self, files: &[(String, PathBuf)]) { + /// Each entry is `(uri, path)`. The file is read from disk and parsed in + /// a worker thread. Work is pulled from a shared atomic counter so large + /// files cannot leave one fixed chunk as the long tail. + pub(crate) fn parse_paths_parallel_with_progress( + &self, + files: &[(String, PathBuf)], + progress: Option<&(dyn Fn(usize, usize, u64, u64) + Sync)>, + ) { if files.is_empty() { return; } + let total = files.len(); + let parsed = AtomicUsize::new(0); + let weights: Vec = files + .iter() + .map(|(_, path)| index_progress_weight_for_path(path)) + .collect(); + let total_units = weights.iter().copied().sum::().max(1); + let parsed_units = AtomicU64::new(0); // For very small batches, avoid thread overhead. if files.len() <= 2 { - for (uri, path) in files { + let mut results = Vec::with_capacity(files.len()); + for (idx, (uri, path)) in files.iter().enumerate() { if let Ok(content) = std::fs::read_to_string(path) { - self.update_ast(uri, &content); + results.push(self.parse_ast_index_update_for_index(uri, &content)); } + report_weighted_parse_progress( + progress, + &parsed, + &parsed_units, + weights[idx], + total, + total_units, + ); } + report_weighted_merge_progress(progress, total, total_units); + self.apply_ast_index_parse_results_batch(results); return; } @@ -2103,30 +2295,85 @@ impl Backend { .map(|n| n.get()) .unwrap_or(4) .min(files.len()); - - let chunks: Vec<&[(String, PathBuf)]> = { - let chunk_size = files.len().div_ceil(n_threads); - files.chunks(chunk_size).collect() - }; + let next = AtomicUsize::new(0); + let work_order = largest_first_work_order(&weights); const PARSE_STACK_SIZE: usize = 16 * 1024 * 1024; - std::thread::scope(|s| { - for chunk in &chunks { - let handle = std::thread::Builder::new() + let weights_ref = &weights; + let work_order_ref = &work_order; + let mut results = std::thread::scope(|s| { + let mut handles = Vec::with_capacity(n_threads); + for _ in 0..n_threads { + let parsed = &parsed; + let parsed_units = &parsed_units; + let next = &next; + let weights = weights_ref; + let work_order = work_order_ref; + match std::thread::Builder::new() .stack_size(PARSE_STACK_SIZE) .spawn_scoped(s, move || { - for (uri, path) in *chunk { + let mut local_results = Vec::new(); + loop { + let work_idx = next.fetch_add(1, Ordering::Relaxed); + let Some(&idx) = work_order.get(work_idx) else { + break; + }; + let Some((uri, path)) = files.get(idx) else { + break; + }; + if let Ok(content) = std::fs::read_to_string(path) { - self.update_ast(uri, &content); + local_results.push(( + idx, + self.parse_ast_index_update_for_index(uri, &content), + )); } + report_weighted_parse_progress( + progress, + parsed, + parsed_units, + weights[idx], + total, + total_units, + ); } - }); - if let Err(e) = handle { - tracing::error!("failed to spawn parse thread: {e}"); + local_results + }) { + Ok(handle) => handles.push(handle), + Err(e) => tracing::error!("failed to spawn parse thread: {e}"), } } + + handles + .into_iter() + .flat_map(|handle| { + handle.join().unwrap_or_else(|_| { + tracing::error!("parse thread panicked during workspace indexing"); + Vec::new() + }) + }) + .collect::>() }); + results.sort_by_key(|(idx, _)| *idx); + report_weighted_merge_progress(progress, total, total_units); + self.apply_ast_index_parse_results_batch( + results.into_iter().map(|(_, result)| result).collect(), + ); + } + + fn index_progress_weight_for_uri(&self, uri: &str, content: Option<&str>) -> u64 { + if let Some(content) = content { + return (content.len() as u64).max(1); + } + if let Some(content) = self.open_files.read().get(uri) { + return (content.len() as u64).max(1); + } + Url::parse(uri) + .ok() + .and_then(|url| url.to_file_path().ok()) + .map(|path| index_progress_weight_for_path(&path)) + .unwrap_or(1) } } @@ -2171,5 +2418,116 @@ fn class_names_match(resolved: &str, target: &str, target_short: &str) -> bool { false } +fn class_candidate_keys(target: &str, target_short: &str) -> Vec { + symbol_candidate_names(target, target_short) + .into_iter() + .map(ReferenceIndexKey::Class) + .collect() +} + +fn function_candidate_keys(target: &str, target_short: &str) -> Vec { + symbol_candidate_names(target, target_short) + .into_iter() + .map(ReferenceIndexKey::Function) + .collect() +} + +fn symbol_candidate_names(target: &str, target_short: &str) -> Vec { + let mut keys = vec![ + strip_fqn_prefix(target).to_string(), + strip_fqn_prefix(target_short).to_string(), + ]; + keys.sort(); + keys.dedup(); + keys +} + +fn member_candidate_keys( + target_member: &str, + target_is_static: bool, + hierarchy: Option<&HashSet>, +) -> Vec { + let mut keys = vec![ReferenceIndexKey::Member { + name: target_member.to_string(), + is_static: target_is_static, + }]; + if hierarchy.is_some() { + keys.push(ReferenceIndexKey::Member { + name: target_member.to_string(), + is_static: !target_is_static, + }); + } + keys +} + +fn report_workspace_index_progress( + progress: Option<&(dyn Fn(u32, String) + Sync)>, + percentage: u32, + message: impl Into, +) { + if let Some(progress) = progress { + progress(percentage.min(100), message.into()); + } +} + +fn workspace_parse_percentage(done: u64, total: u64) -> u32 { + if total == 0 { + return 95; + } + + 5 + ((done.saturating_mul(90) / total).min(90) as u32) +} + +fn report_weighted_parse_progress( + progress: Option<&(dyn Fn(usize, usize, u64, u64) + Sync)>, + parsed: &AtomicUsize, + parsed_units: &AtomicU64, + weight: u64, + total: usize, + total_units: u64, +) { + let done = parsed.fetch_add(1, Ordering::Relaxed) + 1; + let done_units = parsed_units.fetch_add(weight, Ordering::Relaxed) + weight; + let file_report_every = (total / 100).max(1); + let unit_report_every = (total_units / 100).max(1); + let crossed_unit_boundary = + done_units == total_units || done_units % unit_report_every < weight.min(unit_report_every); + + if done == 1 || done == total || done.is_multiple_of(file_report_every) || crossed_unit_boundary + { + report_weighted_progress(progress, done, total, done_units, total_units); + } +} + +fn report_weighted_merge_progress( + progress: Option<&(dyn Fn(usize, usize, u64, u64) + Sync)>, + total: usize, + total_units: u64, +) { + report_weighted_progress(progress, total, total, total_units, total_units); +} + +fn report_weighted_progress( + progress: Option<&(dyn Fn(usize, usize, u64, u64) + Sync)>, + done: usize, + total: usize, + done_units: u64, + total_units: u64, +) { + if let Some(progress) = progress { + progress(done, total, done_units, total_units); + } +} + +fn largest_first_work_order(weights: &[u64]) -> Vec { + let mut order: Vec = (0..weights.len()).collect(); + order.sort_by_key(|&idx| std::cmp::Reverse(weights[idx])); + order +} + +fn index_progress_weight_for_path(path: &Path) -> u64 { + path.metadata().map(|meta| meta.len()).unwrap_or(1).max(1) +} + #[cfg(test)] mod tests; diff --git a/src/references/tests.rs b/src/references/tests.rs index 531c1f2d..ffb70f15 100644 --- a/src/references/tests.rs +++ b/src/references/tests.rs @@ -426,6 +426,25 @@ async fn test_constant_references() { ); } +#[tokio::test] +async fn test_define_constant_references_use_reference_index_snapshot() { + let backend = Backend::new_test(); + let uri = Url::parse("file:///constants.php").unwrap(); + let text = concat!( + "run();\nhelper();\n", + ) + .expect("use file"); + + let backend = Backend::new_test_with_workspace(dir.path().to_path_buf(), Vec::new()); + backend.ensure_workspace_indexed(); + + assert!( + backend + .workspace_indexed + .load(std::sync::atomic::Ordering::Acquire) + ); + assert_eq!( + backend.symbol_maps.read().len(), + 4, + "all disk files should publish symbol maps through the batch merge" + ); + assert!( + backend + .fqn_class_index + .read() + .contains_key("App\\Contracts\\Service") + ); + assert!(backend.fqn_class_index.read().contains_key("App\\Impl\\A")); + assert!(backend.global_functions.read().contains_key("App\\helper")); + assert!(backend.global_defines.read().contains_key("APP_FLAG")); + + let service_children = backend + .gti_index + .read() + .get("App\\Contracts\\Service") + .cloned() + .unwrap_or_default(); + assert!(service_children.contains(&"App\\Impl\\A".to_string())); + + let use_uri = crate::util::path_to_uri(&src.join("Use.php")); + let class_candidates = backend + .reference_candidate_uris_for_keys(&[ReferenceIndexKey::Class("App\\Impl\\A".to_string())]) + .expect("reference index should be active after workspace indexing"); + assert!(class_candidates.contains(&use_uri)); + + let member_candidates = backend + .reference_candidate_uris_for_keys(&[ReferenceIndexKey::Member { + name: "run".to_string(), + is_static: false, + }]) + .expect("reference index should be active after workspace indexing"); + assert!(member_candidates.contains(&use_uri)); + + let function_snapshot = + backend.user_file_symbol_maps_for_reference_keys(&[ReferenceIndexKey::Function( + "App\\helper".to_string(), + )]); + assert_eq!( + function_snapshot.len(), + 1, + "reference-key snapshots should use the reference index instead of cloning every user file" + ); + assert_eq!(function_snapshot[0].0, use_uri); +} + +#[test] +fn indexing_work_order_processes_largest_files_first() { + assert_eq!( + super::largest_first_work_order(&[10, 1, 50, 3]), + vec![2, 0, 3, 1] + ); +} + +#[test] +fn reference_key_snapshot_falls_back_until_workspace_index_ready() { + use crate::reference_index::ReferenceIndexKey; + + let backend = Backend::new_test(); + let matching_uri = "file:///project/src/Use.php"; + let unrelated_uri = "file:///project/src/Other.php"; + + backend.update_ast( + matching_uri, + " = snapshot.into_iter().map(|(uri, _)| uri).collect(); + + assert!( + !backend + .workspace_indexed + .load(std::sync::atomic::Ordering::Acquire) + ); + assert!(uris.contains(matching_uri)); + assert!( + uris.contains(unrelated_uri), + "before the full-index flag is ready, reference scans must fall back to all user files" + ); +} + +#[test] +fn user_file_symbol_maps_exclude_vendor_and_stubs() { + let dir = tempfile::tempdir().expect("temp dir"); + let vendor = dir.path().join("vendor"); + std::fs::create_dir_all(&vendor).expect("vendor dir"); + + let backend = Backend::new_test(); + backend.add_vendor_dir(&vendor); + + let user_uri = "file:///project/src/User.php"; + let vendor_uri = crate::util::path_to_uri(&vendor.join("Package.php")); + backend.update_ast(user_uri, " = snapshot.into_iter().map(|(uri, _)| uri).collect(); + + assert!(uris.contains(user_uri)); + assert!(!uris.contains(&vendor_uri)); + assert!(!uris.contains("phpantom-stub://core.php")); + assert!(!uris.contains("phpantom-stub-fn://core.php")); +} + +#[test] +fn workspace_index_progress_covers_known_files_and_refresh_walks() { + let dir = tempfile::tempdir().expect("temp dir"); + let src = dir.path().join("src"); + std::fs::create_dir_all(&src).expect("src dir"); + + let known_path = src.join("Known.php"); + let disk_path = src.join("Disk.php"); + std::fs::write(&known_path, " = progress + .lock() + .expect("progress lock") + .iter() + .map(|(_, message)| message.clone()) + .collect(); + assert!( + messages + .iter() + .any(|message| message == "Preparing workspace index") + ); + assert!( + messages + .iter() + .any(|message| message.starts_with("Parsing indexed files")) + ); + assert!( + messages + .iter() + .any(|message| message.starts_with("Parsing workspace files")) + ); + assert_eq!( + progress + .lock() + .expect("progress lock") + .last() + .map(|(pct, _)| *pct), + Some(100) + ); + assert!(backend.fqn_class_index.read().contains_key("App\\Known")); + assert!(backend.fqn_class_index.read().contains_key("App\\Disk")); + + let refresh_path = src.join("Refresh.php"); + std::fs::write(&refresh_path, ") -> Option { } } +fn vendor_uri_prefixes_for_path(vendor_path: &std::path::Path) -> Vec { + let mut prefixes = vec![format!("{}/", crate::util::path_to_uri(vendor_path))]; + if let Ok(canonical) = vendor_path.canonicalize() { + prefixes.push(format!("{}/", crate::util::path_to_uri(&canonical))); + } + prefixes.sort(); + prefixes.dedup(); + prefixes +} + #[cfg(test)] mod tests { use super::*; @@ -1643,6 +1655,86 @@ mod tests { // ─── Self-scan helpers ────────────────────────────────────────────────────── impl Backend { + pub(crate) async fn start_full_background_index(&self) { + if self.config().indexing.strategy() != IndexingStrategy::Full { + return; + } + if self.workspace_root.read().is_none() { + return; + } + if self.full_index_in_progress.swap(true, Ordering::AcqRel) { + return; + } + + let progress_token = self.progress_create("phpantom/full-index").await; + if let Some(ref tok) = progress_token { + self.progress_begin( + tok, + "PHPantom: Full index", + Some("Parsing workspace files".to_string()), + ) + .await; + } + + let parse_backend = self.clone_for_blocking(); + let progress_backend = self.clone_for_blocking(); + tokio::spawn(async move { + let (progress_tx, mut progress_rx) = tokio::sync::mpsc::unbounded_channel(); + let indexed_files = tokio::task::spawn_blocking(move || { + let progress_tx = std::sync::Arc::new(std::sync::Mutex::new(progress_tx)); + let report_progress = |percentage, message| { + if let Ok(tx) = progress_tx.lock() { + let _ = tx.send((percentage, message)); + } + }; + parse_backend.ensure_workspace_indexed_with_progress(Some(&report_progress)); + parse_backend.symbol_maps.read().len() + }); + tokio::pin!(indexed_files); + + let indexed_files = loop { + tokio::select! { + Some((percentage, message)) = progress_rx.recv() => { + if let Some(ref tok) = progress_token { + progress_backend + .progress_report(tok, percentage, Some(message)) + .await; + } + } + result = &mut indexed_files => { + break result.unwrap_or(0); + } + } + }; + + while let Ok((percentage, message)) = progress_rx.try_recv() { + if let Some(ref tok) = progress_token { + progress_backend + .progress_report(tok, percentage, Some(message)) + .await; + } + } + + progress_backend + .full_index_in_progress + .store(false, Ordering::Release); + + if let Some(tok) = progress_token { + progress_backend + .progress_end(&tok, Some(format!("Parsed {} files", indexed_files))) + .await; + } + + if progress_backend + .supports_pull_diagnostics + .load(Ordering::Acquire) + && let Some(ref client) = progress_backend.client + { + let _ = client.workspace_diagnostic_refresh().await; + } + }); + } + /// Fetch the open-file content for `uri`, run `f` inside a panic /// guard, and return the result. /// @@ -2190,16 +2282,18 @@ impl Backend { let mut paths = self.vendor_dir_paths.lock(); paths.push(vendor_path.to_path_buf()); } - // Store the URI prefix for URI-level skip logic (diagnostics, - // find references, rename). - let prefix = if let Ok(canonical) = vendor_path.canonicalize() { - format!("{}/", crate::util::path_to_uri(&canonical)) - } else { - format!("{}/", crate::util::path_to_uri(vendor_path)) - }; + // Store URI prefixes for URI-level skip logic (diagnostics, find + // references, rename). Keep both raw and canonical forms so macOS + // `/tmp` vs `/private/tmp` style aliases do not leak vendor files into + // workspace indexing. + let new_prefixes = vendor_uri_prefixes_for_path(vendor_path); { let mut prefixes = self.vendor_uri_prefixes.lock(); - prefixes.push(prefix); + for prefix in new_prefixes { + if !prefixes.contains(&prefix) { + prefixes.push(prefix); + } + } } } @@ -2315,15 +2409,15 @@ impl Backend { // Rebuild vendor classmap. let vendor_scan = classmap_scanner::scan_vendor_packages(root, &vendor_dir); { - let vendor_uri_prefix = if let Ok(canonical) = vendor_path.canonicalize() { - format!("{}/", crate::util::path_to_uri(&canonical)) - } else { - format!("{}/", crate::util::path_to_uri(&vendor_path)) - }; + let vendor_uri_prefixes = vendor_uri_prefixes_for_path(&vendor_path); // Remove old vendor entries and insert new ones. let mut idx = self.fqn_uri_index.write(); - idx.retain(|_, v| !v.starts_with(&vendor_uri_prefix)); + idx.retain(|_, v| { + !vendor_uri_prefixes + .iter() + .any(|prefix| v.starts_with(prefix.as_str())) + }); for (fqn, path) in vendor_scan.classmap { idx.insert(fqn, crate::util::path_to_uri(&path)); } diff --git a/src/util.rs b/src/util.rs index 81380c06..aa425d7e 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1761,6 +1761,7 @@ impl Backend { // when the uri_classes_index entry is missing. self.uri_classes_index.write().remove(uri); self.symbol_maps.write().remove(uri); + self.evict_reference_index_uri(uri); self.file_imports.write().remove(uri); self.resolved_names.write().remove(uri); self.file_namespaces.write().remove(uri); diff --git a/tests/integration/classmap_scanner.rs b/tests/integration/classmap_scanner.rs index ce3b5acd..fa10ca3e 100644 --- a/tests/integration/classmap_scanner.rs +++ b/tests/integration/classmap_scanner.rs @@ -638,10 +638,10 @@ fn scan_directories_ignores_non_php_files() { // ─── Config strategy tests ───────────────────────────────────────────────── #[test] -fn config_strategy_defaults_to_composer() { +fn config_strategy_defaults_to_full() { use phpantom_lsp::config::{Config, IndexingStrategy}; let config: Config = toml::from_str("").unwrap(); - assert_eq!(config.indexing.strategy(), IndexingStrategy::Composer); + assert_eq!(config.indexing.strategy(), IndexingStrategy::Full); } #[test] From e48ee4dfa4a875dfab496c743c01bc3c3f549bc0 Mon Sep 17 00:00:00 2001 From: sidux Date: Fri, 3 Jul 2026 18:03:26 +0200 Subject: [PATCH 2/4] fix(references): scope member searches by resolved receiver --- src/references/mod.rs | 356 ++++++++++++++++++++++++++++++++++------ src/references/tests.rs | 85 +++++++++- 2 files changed, 387 insertions(+), 54 deletions(-) diff --git a/src/references/mod.rs b/src/references/mod.rs index d24a3f28..38cf1517 100644 --- a/src/references/mod.rs +++ b/src/references/mod.rs @@ -53,13 +53,11 @@ impl Backend { position: Position, include_declaration: bool, ) -> Option> { - self.find_references_with_member_mode(uri, content, position, include_declaration, true) + self.find_references_inner(uri, content, position, include_declaration) } - /// Like [`find_references`], but excludes unresolved member-call matches. - /// - /// Rename uses this stricter mode so same-named methods on unrelated or - /// unknown receiver types are not renamed conservatively. + /// Like [`find_references`], but kept separate for rename-specific call + /// sites that need the same precise member filtering. pub(crate) fn find_references_for_rename( &self, uri: &str, @@ -67,16 +65,15 @@ impl Backend { position: Position, include_declaration: bool, ) -> Option> { - self.find_references_with_member_mode(uri, content, position, include_declaration, false) + self.find_references_inner(uri, content, position, include_declaration) } - fn find_references_with_member_mode( + fn find_references_inner( &self, uri: &str, content: &str, position: Position, include_declaration: bool, - allow_unresolved_member_subjects: bool, ) -> Option> { let start_total = std::time::Instant::now(); tracing::info!( @@ -103,7 +100,6 @@ impl Backend { content, sym.start, include_declaration, - allow_unresolved_member_subjects, ); tracing::info!( "Find References: total time for {:?}: {:?}", @@ -146,7 +142,6 @@ impl Backend { content: &str, span_start: u32, include_declaration: bool, - allow_unresolved_member_subjects: bool, ) -> Vec { match kind { SymbolKind::Variable { name } | SymbolKind::CompactVariable { name } => { @@ -183,7 +178,6 @@ impl Backend { include_declaration, hierarchy.as_ref(), declaration_scope.as_ref(), - allow_unresolved_member_subjects, ); } self.find_variable_references(uri, content, name, span_start, include_declaration) @@ -225,7 +219,7 @@ impl Backend { // seeded with the subject's resolved class(es). if is_constructor_name(member_name) { let seeds = self - .get_file_content(uri) + .reference_file_content(uri) .map(|content| { self.resolve_subject_to_fqns( subject_text, @@ -245,7 +239,6 @@ impl Backend { include_declaration, hierarchy.as_ref(), declaration_scope.as_ref(), - allow_unresolved_member_subjects, ) } SymbolKind::FunctionCall { name, .. } => { @@ -281,7 +274,6 @@ impl Backend { include_declaration, hierarchy.as_ref(), declaration_scope.as_ref(), - allow_unresolved_member_subjects, ) } SymbolKind::SelfStaticParent(ssp_kind) => { @@ -765,6 +757,24 @@ impl Backend { .collect() } + fn reference_file_content(&self, uri: &str) -> Option { + if self.is_blade_file(uri) + && let Some(content) = self.blade_virtual_content.read().get(uri) + { + return Some(content.clone()); + } + self.get_file_content(uri) + } + + fn reference_file_content_arc(&self, uri: &str) -> Option> { + if self.is_blade_file(uri) + && let Some(content) = self.blade_virtual_content.read().get(uri) + { + return Some(Arc::new(content.clone())); + } + self.get_file_content_arc(uri) + } + /// Find all references to a class/interface/trait/enum across all files. /// /// Matches `ClassReference` spans whose resolved FQN equals `target_fqn`, @@ -880,7 +890,7 @@ impl Backend { if matched { if file_content.is_none() { - file_content = self.get_file_content_arc(file_uri); + file_content = self.reference_file_content_arc(file_uri); } if let Some(ref content) = file_content { let start = offset_to_position(content, span.start as usize); @@ -1007,7 +1017,7 @@ impl Backend { .. } if is_constructor_name(member_name) => { if file_content.is_none() { - file_content = self.get_file_content_arc(file_uri); + file_content = self.reference_file_content_arc(file_uri); } match &file_content { Some(content) => { @@ -1030,7 +1040,7 @@ impl Backend { if matched { if file_content.is_none() { - file_content = self.get_file_content_arc(file_uri); + file_content = self.reference_file_content_arc(file_uri); } if let Some(content) = &file_content { let start = offset_to_position(content, span.start as usize); @@ -1051,7 +1061,7 @@ impl Backend { for method in class.methods.iter() { if is_constructor_name(&method.name) && method.name_offset != 0 { if file_content.is_none() { - file_content = self.get_file_content_arc(file_uri); + file_content = self.reference_file_content_arc(file_uri); } let Some(content) = &file_content else { break; @@ -1129,7 +1139,9 @@ impl Backend { /// When `hierarchy` is `Some`, only references where the subject /// resolves to a class in the given set of FQNs are returned. When /// the subject cannot be resolved (e.g. a complex expression or an - /// untyped variable), the reference is conservatively included. + /// untyped variable), the reference is skipped; accepting every + /// unresolved `$x->method()` makes common names such as `find` unusably + /// noisy in large projects. /// /// When `hierarchy` is `None`, all references with a matching member /// name and static-ness are returned (the v1 behaviour, kept as a @@ -1141,7 +1153,6 @@ impl Backend { include_declaration: bool, hierarchy: Option<&HashSet>, declaration_scope: Option<&HashSet>, - allow_unresolved_subjects: bool, ) -> Vec { let mut locations = Vec::new(); @@ -1225,7 +1236,7 @@ impl Backend { // Check if the subject belongs to the target hierarchy. if let Some(hier) = hierarchy { if file_content.is_none() { - file_content = self.get_file_content_arc(file_uri); + file_content = self.reference_file_content_arc(file_uri); } let Some(ref content) = file_content else { break; @@ -1240,7 +1251,7 @@ impl Backend { content, ); if subject_fqns.is_empty() { - if !allow_unresolved_subjects { + if !unresolved_member_subject_matches_scope(subject_text, hier) { continue; } } else if !subject_fqns.iter().any(|fqn| hier.contains(fqn)) { @@ -1251,7 +1262,7 @@ impl Backend { } if file_content.is_none() { - file_content = self.get_file_content_arc(file_uri); + file_content = self.reference_file_content_arc(file_uri); } let Some(ref content) = file_content else { break; @@ -1300,7 +1311,7 @@ impl Backend { } if file_content.is_none() { - file_content = self.get_file_content_arc(file_uri); + file_content = self.reference_file_content_arc(file_uri); } let Some(ref content) = file_content else { break; @@ -1339,7 +1350,7 @@ impl Backend { && prop.name_offset != 0 { if file_content.is_none() { - file_content = self.get_file_content_arc(file_uri); + file_content = self.reference_file_content_arc(file_uri); } let Some(ref content) = file_content else { break; @@ -1465,7 +1476,7 @@ impl Backend { || crate::util::short_name(resolved_normalized) == target_short { if file_content.is_none() { - file_content = self.get_file_content_arc(file_uri); + file_content = self.reference_file_content_arc(file_uri); } if let Some(ref content) = file_content { let start = offset_to_position(content, span.start as usize); @@ -1540,7 +1551,7 @@ impl Backend { if matched { if file_content.is_none() { - file_content = self.get_file_content_arc(file_uri); + file_content = self.reference_file_content_arc(file_uri); } if let Some(ref content) = file_content { let start = offset_to_position(content, span.start as usize); @@ -1602,7 +1613,7 @@ impl Backend { member_name: &str, ) -> (Option>, Option>) { let ctx = self.file_context(uri); - let Some(content) = self.get_file_content(uri) else { + let Some(content) = self.reference_file_content(uri) else { return (None, None); }; let fqns = @@ -1610,10 +1621,10 @@ impl Backend { if fqns.is_empty() { return (None, None); } - ( - Some(self.collect_hierarchy_for_fqns(&fqns)), - self.collect_declaring_seed_scope(&fqns, member_name, is_static), - ) + let member_scope = self + .collect_member_receiver_scope(&fqns, member_name, is_static) + .unwrap_or_else(|| self.collect_hierarchy_for_fqns(&fqns)); + (Some(member_scope.clone()), Some(member_scope)) } /// Resolve the class hierarchy for a `MemberDeclaration` at a given offset. @@ -1623,8 +1634,8 @@ impl Backend { &self, uri: &str, offset: u32, - _member_name: &str, - _is_static: bool, + member_name: &str, + is_static: bool, ) -> Option> { let classes: Vec> = self .uri_classes_index @@ -1643,7 +1654,10 @@ impl Backend { .min_by_key(|c| c.start_offset) })?; let fqn = current_class.fqn().to_string(); - Some(self.collect_hierarchy_for_fqns(&[fqn])) + Some( + self.collect_member_receiver_scope(std::slice::from_ref(&fqn), member_name, is_static) + .unwrap_or_else(|| self.collect_hierarchy_for_fqns(&[fqn])), + ) } fn resolve_member_declaration_scope( @@ -1666,7 +1680,7 @@ impl Backend { .filter(|c| c.keyword_offset > 0 && offset < c.start_offset) .min_by_key(|c| c.start_offset) })?; - self.collect_declaring_seed_scope( + self.collect_member_receiver_scope( &[current_class.fqn().to_string()], member_name, is_static, @@ -1724,10 +1738,52 @@ impl Backend { } }) .collect(), - None => Vec::new(), + None => self.resolve_static_laravel_builder_subject_to_fqns( + subject_text, + use_map, + namespace, + &class_loader, + ), } } + fn resolve_static_laravel_builder_subject_to_fqns( + &self, + subject_text: &str, + use_map: &HashMap, + namespace: &Option, + class_loader: &dyn Fn(&str) -> Option>, + ) -> Vec { + let expr = crate::subject_expr::SubjectExpr::parse(subject_text); + let Some((class_name, method_name)) = static_call_root(&expr) else { + return Vec::new(); + }; + if !is_laravel_builder_static_entrypoint(method_name) { + return Vec::new(); + } + + let class_fqn = normalize_fqn(&Self::resolve_to_fqn(class_name, use_map, namespace)); + let Some(class_info) = class_loader(&class_fqn) else { + return Vec::new(); + }; + let Some(laravel) = class_info.laravel() else { + return Vec::new(); + }; + + let mut fqns = vec![class_fqn]; + if let Some(builder_fqn) = laravel + .custom_builder + .as_ref() + .and_then(|builder| builder.base_name()) + .map(normalize_fqn) + { + fqns.push(builder_fqn.to_string()); + } + fqns.sort(); + fqns.dedup(); + fqns + } + /// Collect the full class hierarchy (ancestors and descendants) for /// a set of starting FQNs. /// @@ -1838,25 +1894,142 @@ impl Backend { hierarchy } - fn collect_declaring_seed_scope( + fn collect_member_receiver_scope( &self, seed_fqns: &[String], member_name: &str, is_static: bool, ) -> Option> { let class_loader = |name: &str| -> Option> { self.find_or_load_class(name) }; - let declaring_seeds: Vec = seed_fqns - .iter() - .map(|fqn| normalize_fqn(fqn).to_string()) - .filter(|fqn| self.defines_member(fqn, member_name, is_static, &class_loader)) - .collect(); + let mut roots = HashSet::new(); + let mut seen = HashSet::new(); + + for fqn in seed_fqns { + let normalized = normalize_fqn(fqn).to_string(); + if self.defines_member(&normalized, member_name, is_static, &class_loader) { + roots.insert(normalized); + } else { + self.collect_declaring_member_ancestors( + &normalized, + member_name, + is_static, + &class_loader, + &mut roots, + &mut seen, + ); + } + } - if declaring_seeds.is_empty() { + if roots.is_empty() { return None; } - let mut scope: HashSet = declaring_seeds.iter().cloned().collect(); - let mut queue: std::collections::VecDeque = declaring_seeds.into(); + self.extend_laravel_member_roots(&mut roots); + Some(self.collect_descendants_for_roots(roots)) + } + + fn extend_laravel_member_roots(&self, roots: &mut HashSet) { + let class_loader = |name: &str| -> Option> { self.find_or_load_class(name) }; + let initial_roots: Vec = roots.iter().cloned().collect(); + let mut candidate_roots: HashSet = initial_roots.iter().cloned().collect(); + let mut builder_roots: HashSet = HashSet::new(); + if candidate_roots.contains(crate::virtual_members::laravel::ELOQUENT_BUILDER_FQN) { + builder_roots.insert(crate::virtual_members::laravel::ELOQUENT_BUILDER_FQN.to_string()); + } + + for fqn in &initial_roots { + if let Some(cls) = class_loader(fqn) + && let Some(builder_fqn) = cls + .laravel() + .and_then(|l| l.custom_builder.as_ref()) + .and_then(|b| b.base_name()) + .map(normalize_fqn) + { + let builder = builder_fqn.to_string(); + roots.insert(builder.clone()); + candidate_roots.insert(builder.clone()); + builder_roots.insert(builder); + } + } + + let mut model_roots = Vec::new(); + { + let class_index = self.fqn_class_index.read(); + for (class_fqn, class_info) in class_index.iter() { + if let Some(laravel) = class_info.laravel() { + if let Some(builder_fqn) = laravel + .custom_builder + .as_ref() + .and_then(|b| b.base_name()) + .map(normalize_fqn) + { + if candidate_roots.contains(&builder_fqn) { + model_roots.push(normalize_fqn(class_fqn).to_string()); + builder_roots.insert(builder_fqn); + } + } else if candidate_roots + .contains(crate::virtual_members::laravel::ELOQUENT_BUILDER_FQN) + { + model_roots.push(normalize_fqn(class_fqn).to_string()); + builder_roots.insert( + crate::virtual_members::laravel::ELOQUENT_BUILDER_FQN.to_string(), + ); + } + } + } + } + + roots.extend(model_roots); + for builder in builder_roots { + self.collect_ancestors(&builder, &class_loader, roots); + } + } + + fn collect_declaring_member_ancestors( + &self, + fqn: &str, + member_name: &str, + is_static: bool, + class_loader: &dyn Fn(&str) -> Option>, + roots: &mut HashSet, + seen: &mut HashSet, + ) { + let normalized = normalize_fqn(fqn).to_string(); + if !seen.insert(normalized.clone()) { + return; + } + let Some(cls) = class_loader(&normalized) else { + return; + }; + + let ancestors = cls + .parent_class + .iter() + .chain(cls.interfaces.iter()) + .chain(cls.used_traits.iter()) + .chain(cls.mixins.iter()) + .map(|name| normalize_fqn(name).to_string()) + .collect::>(); + + for ancestor in ancestors { + if self.defines_member(&ancestor, member_name, is_static, class_loader) { + roots.insert(ancestor); + } else { + self.collect_declaring_member_ancestors( + &ancestor, + member_name, + is_static, + class_loader, + roots, + seen, + ); + } + } + } + + fn collect_descendants_for_roots(&self, roots: HashSet) -> HashSet { + let mut scope = roots.clone(); + let mut queue: std::collections::VecDeque = roots.into_iter().collect(); let gti = self.gti_index.read(); while let Some(fqn) = queue.pop_front() { if let Some(descendants) = gti.get(&fqn) { @@ -1868,8 +2041,7 @@ impl Backend { } } } - - Some(scope) + scope } fn defines_member( @@ -1891,6 +2063,14 @@ impl Backend { return true; } + let property_name = name.strip_prefix('$').unwrap_or(name); + if cls.properties.iter().any(|p| { + p.name.as_str().strip_prefix('$').unwrap_or(p.name.as_str()) == property_name + && p.is_static == is_static + }) { + return true; + } + if let Some(laravel) = cls.laravel() { if let Some(builder_cls) = laravel .custom_builder @@ -2382,6 +2562,86 @@ fn normalize_fqn(fqn: &str) -> String { strip_fqn_prefix(fqn).to_string() } +fn static_call_root(expr: &crate::subject_expr::SubjectExpr) -> Option<(&str, &str)> { + match expr { + crate::subject_expr::SubjectExpr::CallExpr { callee, .. } => static_call_root(callee), + crate::subject_expr::SubjectExpr::MethodCall { base, .. } => static_call_root(base), + crate::subject_expr::SubjectExpr::StaticMethodCall { class, method } => { + Some((class.as_str(), method.as_str())) + } + _ => None, + } +} + +fn unresolved_member_subject_matches_scope(subject_text: &str, scope: &HashSet) -> bool { + let Some(subject_name) = unresolved_member_subject_name(subject_text) else { + return false; + }; + let subject_key = normalized_member_subject_key(&subject_name); + if subject_key.is_empty() { + return false; + } + + scope.iter().any(|fqn| { + member_scope_name_keys(crate::util::short_name(fqn)) + .into_iter() + .any(|key| key == subject_key) + }) +} + +fn unresolved_member_subject_name(subject_text: &str) -> Option { + match crate::subject_expr::SubjectExpr::parse(subject_text) { + crate::subject_expr::SubjectExpr::Variable(name) => { + Some(name.trim_start_matches('$').to_string()) + } + crate::subject_expr::SubjectExpr::PropertyChain { property, .. } => Some(property), + _ => None, + } +} + +fn member_scope_name_keys(short_name: &str) -> Vec { + let mut names = vec![short_name.to_string()]; + for suffix in ["Repository", "Gateway"] { + if let Some(stem) = short_name.strip_suffix(suffix) { + names.push(format!("{stem}{suffix}")); + if suffix == "Repository" { + names.push(format!("{stem}Repo")); + } + } + } + + names + .into_iter() + .map(|name| normalized_member_subject_key(&name)) + .filter(|name| !name.is_empty()) + .collect() +} + +fn normalized_member_subject_key(name: &str) -> String { + name.chars() + .filter(|ch| ch.is_ascii_alphanumeric()) + .flat_map(char::to_lowercase) + .collect() +} + +fn is_laravel_builder_static_entrypoint(method_name: &str) -> bool { + matches!( + method_name.to_ascii_lowercase().as_str(), + "query" + | "newquery" + | "where" + | "wherein" + | "wherenull" + | "wherenotnull" + | "orderby" + | "select" + | "with" + | "without" + | "latest" + | "oldest" + ) +} + /// Whether a member name is the PHP constructor (`__construct`). /// /// PHP method names are case-insensitive, so `__CONSTRUCT` matches too. diff --git a/src/references/tests.rs b/src/references/tests.rs index ffb70f15..cc65e4f5 100644 --- a/src/references/tests.rs +++ b/src/references/tests.rs @@ -1334,9 +1334,11 @@ async fn test_self_static_method_references_scoped() { } #[tokio::test] -async fn test_unresolvable_variable_included_conservatively() { - // When a variable's type cannot be resolved, the reference should - // be included conservatively rather than dropped. +async fn test_unresolvable_variable_excluded_when_member_scope_known() { + // Once a member search has a resolved receiver scope, unresolved + // receivers with the same member name should not be included. In large + // projects, common methods such as `find` otherwise match unrelated + // untyped services and repositories. let backend = Backend::new_test(); let uri = Url::parse("file:///test.php").unwrap(); let text = concat!( @@ -1361,10 +1363,81 @@ async fn test_unresolvable_variable_included_conservatively() { "Should find $a->save() on L5; got lines: {:?}", lines ); - // $unknown has no type hint — should be included conservatively. assert!( - lines.contains(&6), - "Should conservatively include $unknown->save() on L6 (unresolvable type); got lines: {:?}", + !lines.contains(&6), + "Should NOT include unresolved $unknown->save() on L6; got lines: {:?}", + lines + ); +} + +#[tokio::test] +async fn test_overridden_find_excludes_base_repository_and_unresolved_calls() { + let backend = Backend::new_test(); + let uri = Url::parse("file:///test.php").unwrap(); + let text = concat!( + "find(1);\n", // L11 + " $base->find(2);\n", // L12 + " $users->find(3);\n", // L13 + " $notificationRepository = $managerRegistry->getManager()->getRepository(NotificationImpl::class);\n", // L14 + " $notificationRepository->find(4);\n", // L15 + " $unknown->find(5);\n", // L16 + "}\n", // L17 + ); + + open_file(&backend, &uri, text).await; + + let locs = find_references(&backend, &uri, 5, 21, true).await; + let lines: Vec = locs.iter().map(|l| l.range.start.line).collect(); + + assert!( + lines.contains(&5), + "Should include NotificationRepository::find declaration on L5; got lines: {:?}", + lines + ); + assert!( + lines.contains(&11), + "Should include $notifications->find() on L11; got lines: {:?}", + lines + ); + assert!( + lines.contains(&15), + "Should include unresolved but clearly named $notificationRepository->find() on L15; got lines: {:?}", + lines + ); + assert!( + !lines.contains(&2), + "Should NOT include base ServiceEntityRepository::find declaration on L2; got lines: {:?}", + lines + ); + assert!( + !lines.contains(&8), + "Should NOT include sibling UserRepository::find declaration on L8; got lines: {:?}", + lines + ); + assert!( + !lines.contains(&12), + "Should NOT include base-typed $base->find() on L12; got lines: {:?}", + lines + ); + assert!( + !lines.contains(&13), + "Should NOT include sibling $users->find() on L13; got lines: {:?}", + lines + ); + assert!( + !lines.contains(&16), + "Should NOT include unresolved $unknown->find() on L16; got lines: {:?}", lines ); } From 19fffd426a9b043404ebb6a1c4111aa852f7703a Mon Sep 17 00:00:00 2001 From: sidux Date: Fri, 3 Jul 2026 19:15:46 +0200 Subject: [PATCH 3/4] fix(references): include interface-declared member usages --- src/references/mod.rs | 43 +++++++++++++++++++++++++++- src/references/tests.rs | 63 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/src/references/mod.rs b/src/references/mod.rs index 38cf1517..63b1a4e2 100644 --- a/src/references/mod.rs +++ b/src/references/mod.rs @@ -1907,7 +1907,15 @@ impl Backend { for fqn in seed_fqns { let normalized = normalize_fqn(fqn).to_string(); if self.defines_member(&normalized, member_name, is_static, &class_loader) { - roots.insert(normalized); + roots.insert(normalized.clone()); + self.collect_declaring_member_interfaces( + &normalized, + member_name, + is_static, + &class_loader, + &mut roots, + &mut seen, + ); } else { self.collect_declaring_member_ancestors( &normalized, @@ -1928,6 +1936,39 @@ impl Backend { Some(self.collect_descendants_for_roots(roots)) } + fn collect_declaring_member_interfaces( + &self, + fqn: &str, + member_name: &str, + is_static: bool, + class_loader: &dyn Fn(&str) -> Option>, + roots: &mut HashSet, + seen: &mut HashSet, + ) { + let normalized = normalize_fqn(fqn).to_string(); + if !seen.insert(normalized.clone()) { + return; + } + let Some(cls) = class_loader(&normalized) else { + return; + }; + + for iface in &cls.interfaces { + let iface_fqn = normalize_fqn(iface).to_string(); + if self.defines_member(&iface_fqn, member_name, is_static, class_loader) { + roots.insert(iface_fqn.clone()); + } + self.collect_declaring_member_interfaces( + &iface_fqn, + member_name, + is_static, + class_loader, + roots, + seen, + ); + } + } + fn extend_laravel_member_roots(&self, roots: &mut HashSet) { let class_loader = |name: &str| -> Option> { self.find_or_load_class(name) }; let initial_roots: Vec = roots.iter().cloned().collect(); diff --git a/src/references/tests.rs b/src/references/tests.rs index cc65e4f5..23726e62 100644 --- a/src/references/tests.rs +++ b/src/references/tests.rs @@ -1442,6 +1442,69 @@ async fn test_overridden_find_excludes_base_repository_and_unresolved_calls() { ); } +#[tokio::test] +async fn test_concrete_method_references_include_interface_typed_calls() { + let backend = Backend::new_test(); + let uri = Url::parse("file:///test.php").unwrap(); + let text = concat!( + "notificationGateway->insert($notification);\n", // L15 + " }\n", // L16 + "}\n", // L17 + "class AddUser {\n", // L18 + " public function __construct(private readonly UserGateway $userGateway) {}\n", // L19 + " public function execute(Notification $notification): void {\n", // L20 + " $this->userGateway->insert($notification);\n", // L21 + " }\n", // L22 + "}\n", // L23 + ); + + open_file(&backend, &uri, text).await; + + let locs = find_references(&backend, &uri, 10, 21, true).await; + let lines: Vec = locs.iter().map(|l| l.range.start.line).collect(); + + assert!( + lines.contains(&4), + "Should include NotificationGateway::insert declaration on L4; got lines: {:?}", + lines + ); + assert!( + lines.contains(&10), + "Should include NotificationRepository::insert declaration on L10; got lines: {:?}", + lines + ); + assert!( + lines.contains(&15), + "Should include interface-typed $notificationGateway->insert() on L15; got lines: {:?}", + lines + ); + assert!( + !lines.contains(&7), + "Should NOT include unrelated UserGateway::insert declaration on L7; got lines: {:?}", + lines + ); + assert!( + !lines.contains(&21), + "Should NOT include unrelated $userGateway->insert() on L21; got lines: {:?}", + lines + ); +} + #[tokio::test] async fn test_this_method_references_excludes_unrelated() { // $this->method() inside one class should not match $this->method() From 224611d3d2b4ab18d3a822e1067e5ac98fb107cc Mon Sep 17 00:00:00 2001 From: sidux Date: Fri, 3 Jul 2026 19:27:37 +0200 Subject: [PATCH 4/4] fix(rename): keep concrete member renames scoped --- src/references/mod.rs | 86 +++++++++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 20 deletions(-) diff --git a/src/references/mod.rs b/src/references/mod.rs index 63b1a4e2..48fa1ffa 100644 --- a/src/references/mod.rs +++ b/src/references/mod.rs @@ -40,6 +40,18 @@ use crate::util::{ }; use crate::virtual_members::laravel; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ReferenceSearchMode { + References, + Rename, +} + +impl ReferenceSearchMode { + fn include_declaring_interfaces(self) -> bool { + matches!(self, ReferenceSearchMode::References) + } +} + impl Backend { /// Entry point for `textDocument/references`. /// @@ -53,7 +65,13 @@ impl Backend { position: Position, include_declaration: bool, ) -> Option> { - self.find_references_inner(uri, content, position, include_declaration) + self.find_references_inner( + uri, + content, + position, + include_declaration, + ReferenceSearchMode::References, + ) } /// Like [`find_references`], but kept separate for rename-specific call @@ -65,7 +83,13 @@ impl Backend { position: Position, include_declaration: bool, ) -> Option> { - self.find_references_inner(uri, content, position, include_declaration) + self.find_references_inner( + uri, + content, + position, + include_declaration, + ReferenceSearchMode::Rename, + ) } fn find_references_inner( @@ -74,6 +98,7 @@ impl Backend { content: &str, position: Position, include_declaration: bool, + mode: ReferenceSearchMode, ) -> Option> { let start_total = std::time::Instant::now(); tracing::info!( @@ -100,6 +125,7 @@ impl Backend { content, sym.start, include_declaration, + mode, ); tracing::info!( "Find References: total time for {:?}: {:?}", @@ -142,6 +168,7 @@ impl Backend { content: &str, span_start: u32, include_declaration: bool, + mode: ReferenceSearchMode, ) -> Vec { match kind { SymbolKind::Variable { name } | SymbolKind::CompactVariable { name } => { @@ -168,10 +195,11 @@ impl Backend { }); // Resolve the enclosing class to scope the search. - let hierarchy = - self.resolve_member_declaration_hierarchy(uri, span_start, name, is_static); - let declaration_scope = - self.resolve_member_declaration_scope(uri, span_start, name, is_static); + let hierarchy = self.resolve_member_declaration_hierarchy( + uri, span_start, name, is_static, mode, + ); + let declaration_scope = self + .resolve_member_declaration_scope(uri, span_start, name, is_static, mode); return self.find_member_references( name, is_static, @@ -210,6 +238,7 @@ impl Backend { *is_static, span_start, member_name, + mode, ); // Constructors are not invoked through member accesses @@ -264,10 +293,10 @@ impl Backend { } // Resolve the enclosing class to scope the search. - let hierarchy = - self.resolve_member_declaration_hierarchy(uri, span_start, name, *is_static); + let hierarchy = self + .resolve_member_declaration_hierarchy(uri, span_start, name, *is_static, mode); let declaration_scope = - self.resolve_member_declaration_scope(uri, span_start, name, *is_static); + self.resolve_member_declaration_scope(uri, span_start, name, *is_static, mode); self.find_member_references( name, *is_static, @@ -1611,6 +1640,7 @@ impl Backend { is_static: bool, span_start: u32, member_name: &str, + mode: ReferenceSearchMode, ) -> (Option>, Option>) { let ctx = self.file_context(uri); let Some(content) = self.reference_file_content(uri) else { @@ -1622,7 +1652,12 @@ impl Backend { return (None, None); } let member_scope = self - .collect_member_receiver_scope(&fqns, member_name, is_static) + .collect_member_receiver_scope( + &fqns, + member_name, + is_static, + mode.include_declaring_interfaces(), + ) .unwrap_or_else(|| self.collect_hierarchy_for_fqns(&fqns)); (Some(member_scope.clone()), Some(member_scope)) } @@ -1636,6 +1671,7 @@ impl Backend { offset: u32, member_name: &str, is_static: bool, + mode: ReferenceSearchMode, ) -> Option> { let classes: Vec> = self .uri_classes_index @@ -1655,8 +1691,13 @@ impl Backend { })?; let fqn = current_class.fqn().to_string(); Some( - self.collect_member_receiver_scope(std::slice::from_ref(&fqn), member_name, is_static) - .unwrap_or_else(|| self.collect_hierarchy_for_fqns(&[fqn])), + self.collect_member_receiver_scope( + std::slice::from_ref(&fqn), + member_name, + is_static, + mode.include_declaring_interfaces(), + ) + .unwrap_or_else(|| self.collect_hierarchy_for_fqns(&[fqn])), ) } @@ -1666,6 +1707,7 @@ impl Backend { offset: u32, member_name: &str, is_static: bool, + mode: ReferenceSearchMode, ) -> Option> { let classes: Vec> = self .uri_classes_index @@ -1684,6 +1726,7 @@ impl Backend { &[current_class.fqn().to_string()], member_name, is_static, + mode.include_declaring_interfaces(), ) } @@ -1899,6 +1942,7 @@ impl Backend { seed_fqns: &[String], member_name: &str, is_static: bool, + include_declaring_interfaces: bool, ) -> Option> { let class_loader = |name: &str| -> Option> { self.find_or_load_class(name) }; let mut roots = HashSet::new(); @@ -1908,14 +1952,16 @@ impl Backend { let normalized = normalize_fqn(fqn).to_string(); if self.defines_member(&normalized, member_name, is_static, &class_loader) { roots.insert(normalized.clone()); - self.collect_declaring_member_interfaces( - &normalized, - member_name, - is_static, - &class_loader, - &mut roots, - &mut seen, - ); + if include_declaring_interfaces { + self.collect_declaring_member_interfaces( + &normalized, + member_name, + is_static, + &class_loader, + &mut roots, + &mut seen, + ); + } } else { self.collect_declaring_member_ancestors( &normalized,