diff --git a/README.md b/README.md index cb30457e..f49b7e7b 100644 --- a/README.md +++ b/README.md @@ -256,6 +256,7 @@ require('fff').setup({ }, git = { status_text_color = false, -- true to color filenames by git status + support_submodules = true, -- walk into submodules and report their git status; set false to skip them }, grep = { max_file_size = 10 * 1024 * 1024, diff --git a/crates/fff-c/include/fff.h b/crates/fff-c/include/fff.h index 8a51e996..4ed36ad6 100644 --- a/crates/fff-c/include/fff.h +++ b/crates/fff-c/include/fff.h @@ -379,6 +379,10 @@ struct FffResult *fff_create_instance(const char *base_path, * * `frecency_db_path` – frecency LMDB database path (NULL/empty to skip) * * `history_db_path` – query history LMDB database path (NULL/empty to skip) * * `use_unsafe_no_lock` – **deprecated, ignored.** Previously enabled + * `MDB_NOLOCK|MDB_NOSYNC|MDB_NOMETASYNC` for LMDB; benchmarks showed no + * measurable win under realistic contention, so the flag is now a no-op. + * The parameter remains in the signature for ABI compatibility and will be + * removed in a future release. * * `enable_mmap_cache` – pre-populate mmap caches after the initial scan * * `enable_content_indexing` – build content index after the initial scan * * `watch` – start a background file-system watcher for live updates @@ -413,6 +417,32 @@ struct FffResult *fff_create_instance2(const char *base_path, uint64_t cache_budget_max_bytes, uint64_t cache_budget_max_file_size); +/** + * Create a new file finder instance (v3, with submodule support toggle). + * + * Identical to [`fff_create_instance2`] except for the trailing + * `support_submodules` flag. When `true` (recommended default), submodule + * directories are walked and reported in git status. When `false`, submodule + * paths are skipped during traversal and excluded from git status. + * + * ## Safety + * String parameters must be valid null-terminated UTF-8 or NULL. + */ +struct FffResult *fff_create_instance3(const char *base_path, + const char *frecency_db_path, + const char *history_db_path, + bool _use_unsafe_no_lock, + bool enable_mmap_cache, + bool enable_content_indexing, + bool watch, + bool ai_mode, + const char *log_file_path, + const char *log_level, + uint64_t cache_budget_max_files, + uint64_t cache_budget_max_bytes, + uint64_t cache_budget_max_file_size, + bool support_submodules); + /** * Destroy a file finder instance and free all its resources. * diff --git a/crates/fff-c/src/lib.rs b/crates/fff-c/src/lib.rs index d25180c2..7d6d704c 100644 --- a/crates/fff-c/src/lib.rs +++ b/crates/fff-c/src/lib.rs @@ -270,6 +270,121 @@ pub unsafe extern "C" fn fff_create_instance2( watch, mode, cache_budget, + support_submodules: true, + }, + ) { + return FffResult::err(&format!("Failed to init file picker: {}", e)); + } + + let instance = Box::new(FffInstance { + picker: shared_picker, + frecency: shared_frecency, + query_tracker, + }); + + let fff_handle = Box::into_raw(instance) as *mut c_void; + FffResult::ok_handle(fff_handle) +} + +/// Create a new file finder instance (v3, with submodule support toggle). +/// +/// Identical to [`fff_create_instance2`] except for the trailing +/// `support_submodules` flag. When `true` (recommended default), submodule +/// directories are walked and reported in git status. When `false`, submodule +/// paths are skipped during traversal and excluded from git status. +/// +/// ## Safety +/// String parameters must be valid null-terminated UTF-8 or NULL. +#[unsafe(no_mangle)] +#[allow(clippy::too_many_arguments)] +pub unsafe extern "C" fn fff_create_instance3( + base_path: *const c_char, + frecency_db_path: *const c_char, + history_db_path: *const c_char, + _use_unsafe_no_lock: bool, + enable_mmap_cache: bool, + enable_content_indexing: bool, + watch: bool, + ai_mode: bool, + log_file_path: *const c_char, + log_level: *const c_char, + cache_budget_max_files: u64, + cache_budget_max_bytes: u64, + cache_budget_max_file_size: u64, + support_submodules: bool, +) -> *mut FffResult { + let base_path_str = match unsafe { cstr_to_str(base_path) } { + Some(s) if !s.is_empty() => s.to_string(), + _ => return FffResult::err("base_path is null or empty"), + }; + + if let Some(log_path) = unsafe { optional_cstr(log_file_path) } { + let level = unsafe { optional_cstr(log_level) }; + if let Err(e) = fff::log::init_tracing(log_path, level) { + return FffResult::err(&format!("Failed to init tracing: {}", e)); + } + } + + let frecency_path = unsafe { optional_cstr(frecency_db_path) }.map(|s| s.to_string()); + let history_path = unsafe { optional_cstr(history_db_path) }.map(|s| s.to_string()); + + let shared_picker = SharedFilePicker::default(); + let shared_frecency = SharedFrecency::default(); + let query_tracker = SharedQueryTracker::default(); + + if let Some(ref frecency_path) = frecency_path { + if let Some(parent) = PathBuf::from(frecency_path).parent() { + let _ = std::fs::create_dir_all(parent); + } + + match FrecencyTracker::open(frecency_path) { + Ok(tracker) => { + if let Err(e) = shared_frecency.init(tracker) { + return FffResult::err(&format!("Failed to acquire frecency lock: {}", e)); + } + } + Err(e) => return FffResult::err(&format!("Failed to init frecency db: {}", e)), + } + } + + if let Some(ref history_path) = history_path { + if let Some(parent) = PathBuf::from(history_path).parent() { + let _ = std::fs::create_dir_all(parent); + } + + match QueryTracker::open(history_path) { + Ok(tracker) => { + if let Err(e) = query_tracker.init(tracker) { + return FffResult::err(&format!("Failed to acquire query tracker lock: {}", e)); + } + } + Err(e) => return FffResult::err(&format!("Failed to init query tracker db: {}", e)), + } + } + + let mode = if ai_mode { + FFFMode::Ai + } else { + FFFMode::Neovim + }; + + let cache_budget = fff::ContentCacheBudget::from_overrides( + cache_budget_max_files as usize, + cache_budget_max_bytes, + cache_budget_max_file_size, + ); + + if let Err(e) = FilePicker::new_with_shared_state( + shared_picker.clone(), + shared_frecency.clone(), + fff::FilePickerOptions { + base_path: base_path_str, + enable_mmap_cache, + enable_content_indexing, + watch, + mode, + cache_budget, + support_submodules, }, ) { return FffResult::err(&format!("Failed to init file picker: {}", e)); @@ -901,19 +1016,27 @@ pub unsafe extern "C" fn fff_restart_index( Err(e) => return FffResult::err(&format!("Failed to acquire file picker lock: {}", e)), }; - let (warmup_caches, content_indexing, watch, mode) = if let Some(mut picker) = guard.take() { - let warmup = picker.has_mmap_cache(); - let enable_content_indexing = picker.has_content_indexing(); - let watch = picker.has_watcher(); - let mode = picker.mode(); - - picker.stop_background_monitor(); - - (warmup, enable_content_indexing, watch, mode) - } else { - // this is error state anyway - (false, true, true, FFFMode::default()) - }; + let (warmup_caches, content_indexing, watch, mode, support_submodules) = + if let Some(mut picker) = guard.take() { + let warmup = picker.has_mmap_cache(); + let enable_content_indexing = picker.has_content_indexing(); + let watch = picker.has_watcher(); + let mode = picker.mode(); + let support_submodules = picker.has_submodule_support(); + + picker.stop_background_monitor(); + + ( + warmup, + enable_content_indexing, + watch, + mode, + support_submodules, + ) + } else { + // this is error state anyway + (false, true, true, FFFMode::default(), true) + }; drop(guard); @@ -927,6 +1050,7 @@ pub unsafe extern "C" fn fff_restart_index( watch, mode, cache_budget: None, + support_submodules, }, ) { Ok(()) => FffResult::ok_empty(), diff --git a/crates/fff-core/src/background_watcher.rs b/crates/fff-core/src/background_watcher.rs index 33811200..0a2a9cae 100644 --- a/crates/fff-core/src/background_watcher.rs +++ b/crates/fff-core/src/background_watcher.rs @@ -41,6 +41,7 @@ impl BackgroundWatcher { shared_picker: SharedFilePicker, shared_frecency: SharedFrecency, mode: FFFMode, + support_submodules: bool, ) -> Result { info!( "Initializing background watcher for path: {}, mode: {:?}", @@ -93,6 +94,7 @@ impl BackgroundWatcher { mode, use_recursive, watch_tx_for_debouncer, + support_submodules, )?; info!("Background file watcher initialized successfully"); @@ -145,6 +147,7 @@ impl BackgroundWatcher { &strong_picker, &owner_frecency, &owner_git_workdir, + support_submodules, ); // Transient strong ref drops here, back @@ -162,6 +165,7 @@ impl BackgroundWatcher { }) } + #[allow(clippy::too_many_arguments)] fn create_debouncer( base_path: PathBuf, git_workdir: Option, @@ -170,6 +174,7 @@ impl BackgroundWatcher { mode: FFFMode, use_recursive: bool, watch_tx: mpsc::Sender, + support_submodules: bool, ) -> Result { let config = Config::default() // do not follow symlinks as then notifiers spawns a bunch of events for symlinked @@ -212,6 +217,7 @@ impl BackgroundWatcher { &strong_picker, &shared_frecency, mode, + support_submodules, ); // every new directory creates had to be reflected in the picker state @@ -399,6 +405,7 @@ fn handle_debounced_events( shared_picker: &SharedFilePicker, shared_frecency: &SharedFrecency, mode: FFFMode, + support_submodules: bool, ) -> Vec { // this will be called very often, we have to minimiy the lock time for file picker let repo = git_workdir.as_ref().and_then(|p| Repository::open(p).ok()); @@ -666,7 +673,11 @@ fn handle_debounced_events( files_to_update_git_status.len() ); - let status = match GitStatusCache::git_status_for_paths(repo, &files_to_update_git_status) { + let status = match GitStatusCache::git_status_for_paths( + repo, + &files_to_update_git_status, + support_submodules, + ) { Ok(status) => status, Err(e) => { tracing::error!(?e, "Failed to query git status"); @@ -697,6 +708,7 @@ fn track_files_from_new_directories( shared_picker: &SharedFilePicker, shared_frecency: &SharedFrecency, git_workdir: &Option, + support_submodules: bool, ) { let Ok(entries) = std::fs::read_dir(dir) else { return; @@ -733,13 +745,14 @@ fn track_files_from_new_directories( } if let Some(repo) = repo.as_ref() { - let status = match GitStatusCache::git_status_for_paths(repo, &files_to_add) { - Ok(status) => status, - Err(e) => { - tracing::error!(?e, "inject_existing_files: git status query failed"); - return; - } - }; + let status = + match GitStatusCache::git_status_for_paths(repo, &files_to_add, support_submodules) { + Ok(status) => status, + Err(e) => { + tracing::error!(?e, "inject_existing_files: git status query failed"); + return; + } + }; if let Ok(mut guard) = shared_picker.write() && let Some(ref mut picker) = *guard diff --git a/crates/fff-core/src/file_picker.rs b/crates/fff-core/src/file_picker.rs index fa4283a4..ce9ea528 100644 --- a/crates/fff-core/src/file_picker.rs +++ b/crates/fff-core/src/file_picker.rs @@ -449,6 +449,10 @@ pub struct FilePickerOptions { pub cache_budget: Option, /// When `false`, `new_with_shared_state` skips the background file watcher. pub watch: bool, + /// When `true` (default), submodule directories are walked and their + /// statuses are reported by libgit2. When `false`, submodule paths are + /// skipped during traversal and excluded from git status. + pub support_submodules: bool, } impl Default for FilePickerOptions { @@ -460,6 +464,7 @@ impl Default for FilePickerOptions { mode: FFFMode::default(), cache_budget: None, watch: true, + support_submodules: true, } } } @@ -476,6 +481,7 @@ pub struct FilePicker { enable_mmap_cache: bool, enable_content_indexing: bool, watch: bool, + support_submodules: bool, } impl std::fmt::Debug for FilePicker { @@ -529,6 +535,10 @@ impl FilePicker { self.watch } + pub fn has_submodule_support(&self) -> bool { + self.support_submodules + } + pub fn mode(&self) -> FFFMode { self.mode } @@ -722,6 +732,7 @@ impl FilePicker { enable_mmap_cache: options.enable_mmap_cache, enable_content_indexing: options.enable_content_indexing, watch: options.watch, + support_submodules: options.support_submodules, }) } @@ -745,6 +756,7 @@ impl FilePicker { let warmup = picker.enable_mmap_cache; let content_indexing = picker.enable_content_indexing; let watch = picker.watch; + let support_submodules = picker.support_submodules; let mode = picker.mode; let signals = picker.scan_signals(); @@ -773,6 +785,7 @@ impl FilePicker { watch, auto_cache_budget: true, install_watcher: true, + support_submodules, }, ) .spawn(); @@ -793,7 +806,10 @@ impl FilePicker { self.scanned_files_count.store(0, Ordering::Relaxed); let git_workdir = FileSync::discover_git_workdir(&self.base_path); - let git_handle = git_workdir.clone().map(FileSync::spawn_git_status); + let support_submodules = self.support_submodules; + let git_handle = git_workdir + .clone() + .map(|wd| FileSync::spawn_git_status(wd, support_submodules)); let empty_frecency = SharedFrecency::default(); let sync = FileSync::walk_filesystem( @@ -802,6 +818,7 @@ impl FilePicker { &self.scanned_files_count, &empty_frecency, self.mode, + self.support_submodules, )?; self.sync_data = sync; @@ -847,6 +864,7 @@ impl FilePicker { shared_picker.clone(), shared_frecency.clone(), self.mode, + self.support_submodules, )?; self.background_watcher = Some(watcher); self.signals.watcher_ready.store(true, Ordering::Release); @@ -1783,11 +1801,14 @@ impl FileSync { git_workdir } - pub(crate) fn spawn_git_status(git_workdir: PathBuf) -> JoinHandle> { + pub(crate) fn spawn_git_status( + git_workdir: PathBuf, + support_submodules: bool, + ) -> JoinHandle> { std::thread::spawn(move || { GitStatusCache::read_git_status( Some(git_workdir.as_path()), - &mut crate::git::default_status_options(), + &mut crate::git::default_status_options(support_submodules), ) }) } @@ -1801,6 +1822,7 @@ impl FileSync { synced_files_count: &Arc, shared_frecency: &SharedFrecency, mode: FFFMode, + support_submodules: bool, ) -> Result { use ignore::WalkBuilder; @@ -1826,6 +1848,20 @@ impl FileSync { walk_builder.overrides(overrides); } + // When submodule support is disabled, collect submodule absolute paths + // up-front and prune them from the walk via filter_entry. We resolve + // each submodule path relative to its containing repository workdir + // (which may itself be a submodule) so nested setups work. + if !support_submodules && let Some(workdir) = git_workdir.as_ref() { + let submodule_paths = collect_submodule_paths(workdir); + if !submodule_paths.is_empty() { + debug!(count = submodule_paths.len(), "Skipping submodule paths"); + walk_builder.filter_entry(move |entry| { + !submodule_paths.iter().any(|p| entry.path() == p.as_path()) + }); + } + } + let walker = walk_builder.build_parallel(); let walker_start = std::time::Instant::now(); debug!("SCAN: Starting file walker"); @@ -1982,6 +2018,34 @@ impl FileSync { } } +/// Resolve every submodule registered under `workdir` to its absolute on-disk +/// path. Includes nested submodules by descending into each submodule's own +/// repository when it is initialized. Returns normalized paths suitable for +/// equality comparison against `WalkBuilder` entries. +fn collect_submodule_paths(workdir: &Path) -> Vec { + let Ok(repo) = Repository::open(workdir) else { + return Vec::new(); + }; + + let mut out = Vec::new(); + let mut stack: Vec<(Repository, PathBuf)> = vec![(repo, workdir.to_path_buf())]; + while let Some((repo, root)) = stack.pop() { + let Ok(submodules) = repo.submodules() else { + continue; + }; + for sm in submodules { + let abs = root.join(sm.path()); + let abs = crate::path_utils::normalize(abs); + // Recurse into initialized submodules so nested ones are found too. + if let Ok(sub_repo) = sm.open() { + stack.push((sub_repo, abs.clone())); + } + out.push(abs); + } + } + out +} + /// This does both thing (yes sorry all the OOP morons) /// in one go: populates files chunked storage and creates new directories fn populates_dirs_files_chunked_storage<'a>( diff --git a/crates/fff-core/src/git.rs b/crates/fff-core/src/git.rs index cb6be46a..9a9fec85 100644 --- a/crates/fff-core/src/git.rs +++ b/crates/fff-core/src/git.rs @@ -7,12 +7,12 @@ use std::{ }; use tracing::debug; -pub(crate) fn default_status_options() -> StatusOptions { +pub(crate) fn default_status_options(support_submodules: bool) -> StatusOptions { let mut opts = StatusOptions::new(); opts.include_untracked(true) .recurse_untracked_dirs(true) .include_unmodified(true) - .exclude_submodules(true); + .exclude_submodules(!support_submodules); opts } @@ -83,6 +83,7 @@ impl GitStatusCache { pub fn git_status_for_paths + Debug>( repo: &Repository, paths: &[TPath], + support_submodules: bool, ) -> Result { if paths.is_empty() { return Ok(Self(AHashMap::new())); @@ -105,7 +106,7 @@ impl GitStatusCache { return Ok(Self(map)); } - let mut status_options = default_status_options(); + let mut status_options = default_status_options(support_submodules); for path in paths { status_options.pathspec(path.as_ref().strip_prefix(&workdir)?); } @@ -226,7 +227,7 @@ mod tests { let repo = Repository::open(&base).unwrap(); let paths: Vec = names.iter().map(|n| base.join(n)).collect(); - let cache = GitStatusCache::git_status_for_paths(&repo, &paths).unwrap(); + let cache = GitStatusCache::git_status_for_paths(&repo, &paths, true).unwrap(); for (n, abs) in names.iter().zip(paths.iter()) { let status = cache.lookup_status(abs); diff --git a/crates/fff-core/src/scan.rs b/crates/fff-core/src/scan.rs index b222eddd..3a1e5657 100644 --- a/crates/fff-core/src/scan.rs +++ b/crates/fff-core/src/scan.rs @@ -57,13 +57,27 @@ pub(crate) struct ScanSignals { } /// Which optional phases a scan should run. -#[derive(Clone, Copy, Default)] +#[derive(Clone, Copy)] pub(crate) struct ScanConfig { pub(crate) warmup: bool, pub(crate) content_indexing: bool, pub(crate) watch: bool, pub(crate) auto_cache_budget: bool, pub(crate) install_watcher: bool, + pub(crate) support_submodules: bool, +} + +impl Default for ScanConfig { + fn default() -> Self { + Self { + warmup: false, + content_indexing: false, + watch: false, + auto_cache_budget: false, + install_watcher: false, + support_submodules: true, + } + } } /// A fully-configured scan job ready to run on a background thread. @@ -112,6 +126,7 @@ impl ScanJob { watch: picker.has_watcher(), auto_cache_budget: !picker.has_explicit_cache_budget(), install_watcher: false, // the watcher is independent of rescan, it is not restarting EVER + support_submodules: picker.has_submodule_support(), }; drop(guard); // just a sanity check @@ -175,13 +190,16 @@ impl ScanJob { // 1. Start git discovery and walk filesystem off-lock. let git_workdir = FileSync::discover_git_workdir(&base_path); - let status_handle = git_workdir.clone().map(FileSync::spawn_git_status); + let status_handle = git_workdir + .clone() + .map(|wd| FileSync::spawn_git_status(wd, config.support_submodules)); let sync = match FileSync::walk_filesystem( &base_path, git_workdir.clone(), &scanned_files_counter, &shared_frecency, mode, + config.support_submodules, ) { Ok(sync) => sync, Err(e) => { @@ -235,6 +253,7 @@ impl ScanJob { shared_picker.clone(), shared_frecency.clone(), mode, + config.support_submodules, ) { Ok(watcher) => { if let Ok(mut guard) = shared_picker.write() diff --git a/crates/fff-core/src/shared.rs b/crates/fff-core/src/shared.rs index d5da4691..dc03e422 100644 --- a/crates/fff-core/src/shared.rs +++ b/crates/fff-core/src/shared.rs @@ -224,6 +224,7 @@ impl SharedFilePicker { }; let git_root = picker.git_root().map(|p| p.to_path_buf()); + let support_submodules = picker.has_submodule_support(); drop(guard); // updating git status could take very long time, there is not risky as we // do not allow any mutations and deletions of files from the sync @@ -235,7 +236,7 @@ impl SharedFilePicker { GitStatusCache::read_git_status( git_root.as_deref(), - &mut crate::git::default_status_options(), + &mut crate::git::default_status_options(support_submodules), ) }; diff --git a/crates/fff-core/tests/sparse_checkout_test.rs b/crates/fff-core/tests/sparse_checkout_test.rs new file mode 100644 index 00000000..635b27e9 --- /dev/null +++ b/crates/fff-core/tests/sparse_checkout_test.rs @@ -0,0 +1,287 @@ +//! Regression test: indexing a sparse-checked-out git repository must not +//! crash and must only surface paths that are actually materialized on +//! disk. libgit2 happily reports statuses for paths that were excluded by +//! `core.sparseCheckout`; `update_git_statuses` already silently skips +//! those (see `file_picker.rs::1346`), this test pins that contract. + +use std::fs; +use std::io::Write; +use std::path::Path; +use std::process::Command; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use fff_search::file_picker::FilePicker; +use fff_search::{ + FilePickerOptions, FuzzySearchOptions, PaginationArgs, QueryParser, SharedFilePicker, + SharedFrecency, +}; +use git2::Repository; +use tempfile::TempDir; +use tracing_subscriber::fmt::MakeWriter; + +fn git(dir: &Path, args: &[&str]) { + let out = Command::new("git") + .args(args) + .current_dir(dir) + .env("GIT_AUTHOR_NAME", "t") + .env("GIT_AUTHOR_EMAIL", "t@t") + .env("GIT_COMMITTER_NAME", "t") + .env("GIT_COMMITTER_EMAIL", "t@t") + .env("GIT_CONFIG_GLOBAL", "/dev/null") + .env("GIT_CONFIG_SYSTEM", "/dev/null") + .output() + .expect("git binary must be installed to run this test"); + assert!( + out.status.success(), + "git {args:?} failed:\nstdout={}\nstderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr), + ); +} + +#[derive(Clone, Default)] +struct CapturedLogs(Arc>>); + +impl CapturedLogs { + fn lines(&self) -> String { + let buf = self.0.lock().unwrap(); + String::from_utf8_lossy(&buf).to_string() + } +} + +impl<'a> MakeWriter<'a> for CapturedLogs { + type Writer = CapturedLogsWriter; + fn make_writer(&'a self) -> Self::Writer { + CapturedLogsWriter(Arc::clone(&self.0)) + } +} + +struct CapturedLogsWriter(Arc>>); + +impl Write for CapturedLogsWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.0.lock().unwrap().extend_from_slice(buf); + Ok(buf.len()) + } + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +fn fuzzy_search_paths(picker: &FilePicker, query: &str) -> Vec { + let parser = QueryParser::default(); + let parsed = parser.parse(query); + let result = picker.fuzzy_search( + &parsed, + None, + FuzzySearchOptions { + max_threads: 1, + pagination: PaginationArgs { + offset: 0, + limit: 200, + }, + ..Default::default() + }, + ); + result + .items + .iter() + .map(|f| f.relative_path(picker)) + .collect() +} + +/// End-to-end: build a "remote" repo with files in two top-level dirs, +/// clone it with a cone-mode sparse-checkout that materializes only one +/// dir, then index the worktree both via the synchronous `collect_files` +/// path and the async `new_with_shared_state` path. The picker must: +/// * see only the materialized files, +/// * NOT panic when libgit2 hands it statuses for non-materialized paths, +/// * surface no `ERROR`/`WARN` log lines from fff core, +/// * yield correct fuzzy-search results, +/// * skip non-materialized paths via the documented debug message in +/// `update_git_statuses`. +#[test] +fn sparse_checkout_indexes_only_materialized_files() { + let logs = CapturedLogs::default(); + let _guard = tracing::subscriber::set_default( + tracing_subscriber::fmt() + .with_max_level(tracing::Level::DEBUG) + .with_ansi(false) + .with_writer(logs.clone()) + .finish(), + ); + + let upstream_tmp = TempDir::new().unwrap(); + let upstream = upstream_tmp.path(); + + git(upstream, &["init", "-b", "main"]); + fs::create_dir_all(upstream.join("included/sub")).unwrap(); + fs::create_dir_all(upstream.join("excluded/deep")).unwrap(); + fs::write(upstream.join("README.md"), "root readme\n").unwrap(); + fs::write(upstream.join("included/in_a.rs"), "// a\n").unwrap(); + fs::write(upstream.join("included/sub/in_b.rs"), "// b\n").unwrap(); + fs::write(upstream.join("excluded/ex_a.rs"), "// ea\n").unwrap(); + fs::write(upstream.join("excluded/deep/ex_b.rs"), "// eb\n").unwrap(); + git(upstream, &["add", "-A"]); + git(upstream, &["commit", "-m", "seed", "--no-gpg-sign"]); + + let work_tmp = TempDir::new().unwrap(); + let work = work_tmp.path(); + git( + work, + &[ + "clone", + "--no-local", + "--no-checkout", + upstream.to_str().unwrap(), + ".", + ], + ); + git(work, &["sparse-checkout", "init", "--cone"]); + git(work, &["sparse-checkout", "set", "included"]); + git(work, &["checkout", "main"]); + + // Sanity: only the included dir + top-level files materialized. + assert!(work.join("README.md").is_file()); + assert!(work.join("included/in_a.rs").is_file()); + assert!(work.join("included/sub/in_b.rs").is_file()); + assert!(!work.join("excluded").exists()); + + // Sanity: libgit2's status output DOES contain the sparse-excluded + // paths in the index — proving the "path not in index" branch in + // `update_git_statuses` actually has work to do. + { + let repo = Repository::open(work).unwrap(); + let mut opts = git2::StatusOptions::new(); + opts.include_untracked(true) + .include_unmodified(true) + .recurse_untracked_dirs(true) + .exclude_submodules(false); + let statuses = repo.statuses(Some(&mut opts)).unwrap(); + let entries: Vec = statuses + .iter() + .filter_map(|e| e.path().map(|p| p.to_owned())) + .collect(); + assert!( + entries.iter().any(|p| p == "excluded/ex_a.rs"), + "libgit2 must surface sparse-excluded paths so we can prove the \ + picker handles them; got {:?}", + entries + ); + } + + // ---- Path 1: synchronous `collect_files`. ----------------------- + let mut picker = FilePicker::new(FilePickerOptions { + base_path: work.to_string_lossy().to_string(), + enable_mmap_cache: false, + enable_content_indexing: false, + watch: false, + ..Default::default() + }) + .expect("failed to create FilePicker"); + picker + .collect_files() + .expect("collect_files must not panic on a sparse-checked repo"); + + let indexed: Vec = picker + .get_files() + .iter() + .map(|f| f.relative_path(&picker)) + .collect(); + + assert!( + indexed.iter().any(|p| p.ends_with("in_a.rs")), + "expected to find included/in_a.rs in index, got {:?}", + indexed + ); + assert!( + indexed.iter().any(|p| p.ends_with("in_b.rs")), + "expected to find included/sub/in_b.rs in index, got {:?}", + indexed + ); + assert!( + indexed + .iter() + .all(|p| !p.contains("ex_a.rs") && !p.contains("ex_b.rs")), + "sparse-excluded files leaked into the index: {:?}", + indexed + ); + + let hits = fuzzy_search_paths(&picker, "in_b"); + assert!( + hits.iter().any(|p| p.ends_with("in_b.rs")), + "fuzzy search must surface materialized files, got {:?}", + hits + ); + + drop(picker); + + // ---- Path 2: async `new_with_shared_state` + refresh_git_status. --- + // This goes through the orchestrated scan + git-apply pipeline + // (`scan.rs::apply_git_status_and_frecency` followed by an explicit + // `refresh_git_status` that hits the `update_git_statuses` branch + // which is the one that has to silently drop sparse-excluded paths). + let shared_picker = SharedFilePicker::default(); + let shared_frecency = SharedFrecency::default(); + FilePicker::new_with_shared_state( + shared_picker.clone(), + shared_frecency.clone(), + FilePickerOptions { + base_path: work.to_string_lossy().to_string(), + enable_mmap_cache: false, + enable_content_indexing: false, + watch: false, + ..Default::default() + }, + ) + .expect("new_with_shared_state must succeed on a sparse-checked repo"); + + assert!( + shared_picker.wait_for_scan(Duration::from_secs(15)), + "initial scan timed out" + ); + + let applied = shared_picker + .refresh_git_status(&shared_frecency) + .expect("refresh_git_status must not error on a sparse-checked repo"); + assert!( + applied >= 3, + "expected statuses for materialized files, got {applied}" + ); + + { + let guard = shared_picker.read().unwrap(); + let picker = guard.as_ref().unwrap(); + for f in picker.get_files() { + let rel = f.relative_path(picker); + assert!( + !rel.contains("ex_a.rs") && !rel.contains("ex_b.rs"), + "sparse-excluded file leaked through async path: {rel}" + ); + } + } + + // ---- Log assertions. -------------------------------------------- + let captured = logs.lines(); + + assert!( + captured.contains("Git status for path not in index, skipping"), + "expected the documented debug skip-message for sparse-excluded \ + paths to be emitted at least once, full log was:\n{captured}" + ); + + let unexpected: Vec<&str> = captured + .lines() + .filter(|l| l.contains(" ERROR ") || l.contains(" WARN ")) + // The walker emits a warning when given a non-git base path; we + // are *inside* a git repo so it should not trip, but keep the + // filter narrow regardless. + .filter(|l| !l.contains("No git repository found")) + .collect(); + assert!( + unexpected.is_empty(), + "no ERROR/WARN lines expected on a sparse-checked repo, got:\n{}", + unexpected.join("\n") + ); +} diff --git a/crates/fff-mcp/src/main.rs b/crates/fff-mcp/src/main.rs index 94984cb7..53475066 100644 --- a/crates/fff-mcp/src/main.rs +++ b/crates/fff-mcp/src/main.rs @@ -149,6 +149,11 @@ pub(crate) struct Args { #[arg(long = "no-watch")] no_watch: bool, + /// Skip git submodule directories during traversal and exclude them + /// from git status. Default: submodules are walked and reported. + #[arg(long = "no-submodules")] + no_submodules: bool, + /// Maximum number of files whose content is kept persistently in memory. /// Files beyond this limit are still searchable via temporary mmaps that /// are released after each grep. Defaults to 30 000. @@ -269,6 +274,7 @@ async fn main() -> Result<(), Box> { cache_budget: args .max_cached_files .map(fff::ContentCacheBudget::new_for_repo), + support_submodules: !args.no_submodules, }, ) .map_err(|e| format!("Failed to init file picker: {}", e))?; diff --git a/crates/fff-nvim/src/lib.rs b/crates/fff-nvim/src/lib.rs index 4992a269..4b219af5 100644 --- a/crates/fff-nvim/src/lib.rs +++ b/crates/fff-nvim/src/lib.rs @@ -57,7 +57,10 @@ pub fn destroy_query_db(_: &Lua, _: ()) -> LuaResult { Ok(QUERY_TRACKER.destroy().into_lua_result()?.is_some()) } -pub fn init_file_picker(_: &Lua, base_path: String) -> LuaResult { +pub fn init_file_picker( + _: &Lua, + (base_path, support_submodules): (String, Option), +) -> LuaResult { { let guard = FILE_PICKER.read().into_lua_result()?; if guard.is_some() { @@ -73,6 +76,7 @@ pub fn init_file_picker(_: &Lua, base_path: String) -> LuaResult { enable_mmap_cache: true, enable_content_indexing: true, mode: FFFMode::Neovim, + support_submodules: support_submodules.unwrap_or(true), ..Default::default() }, ) @@ -87,18 +91,21 @@ fn reinit_file_picker_internal(path: &Path) -> Result<(), Error> { // to exit on its next tick without joining), so it's safe under // the lock. In-flight watcher handlers finish naturally once we // release the guard. - { + let prev_support_submodules = { let mut guard = FILE_PICKER.write()?; + // Don't take() the picker here — leave the old one in place so + // searches still work until new_with_shared_state replaces it. if let Some(ref mut picker) = *guard { // Signal cancellation BEFORE stopping the watcher so any // orphaned scan/post-scan threads discard their results // instead of racing with the new picker. picker.cancel(); picker.stop_background_monitor(); + picker.has_submodule_support() + } else { + true } - // Don't take() the picker here — leave the old one in place so - // searches still work until new_with_shared_state replaces it. - } + }; // Create new picker — this atomically replaces the old one via write lock FilePicker::new_with_shared_state( @@ -109,6 +116,7 @@ fn reinit_file_picker_internal(path: &Path) -> Result<(), Error> { enable_mmap_cache: true, enable_content_indexing: true, mode: FFFMode::Neovim, + support_submodules: prev_support_submodules, ..Default::default() }, )?; diff --git a/doc/fff.nvim.txt b/doc/fff.nvim.txt index b0107b4f..eb4a66c0 100644 --- a/doc/fff.nvim.txt +++ b/doc/fff.nvim.txt @@ -280,6 +280,7 @@ Defaults are sensible. Override only what you care about. }, git = { status_text_color = false, -- true to color filenames by git status + support_submodules = true, -- walk into submodules and report their git status; set false to skip them }, grep = { max_file_size = 10 * 1024 * 1024, diff --git a/lua/fff/conf.lua b/lua/fff/conf.lua index 4a964dae..c0af81a7 100644 --- a/lua/fff/conf.lua +++ b/lua/fff/conf.lua @@ -317,6 +317,10 @@ local function init() -- Git integration git = { status_text_color = false, -- Apply git status colors to filename text (default: false, only sign column) + -- When true (default), git submodules are walked during indexing and their + -- statuses are reported by libgit2. When false, submodule directories are + -- skipped from traversal and excluded from the git status cache. + support_submodules = true, }, debug = { enabled = false, -- Show file info panel in preview diff --git a/lua/fff/core.lua b/lua/fff/core.lua index 7a2f6887..5bccb22b 100644 --- a/lua/fff/core.lua +++ b/lua/fff/core.lua @@ -118,7 +118,7 @@ M.ensure_initialized = function() local ok, result = pcall(fuzzy.init_db, frecency_db_path, history_db_path, true) if not ok then vim.notify('Failed to databases: ' .. tostring(result), vim.log.levels.WARN) end - ok, result = pcall(fuzzy.init_file_picker, config.base_path) + ok, result = pcall(fuzzy.init_file_picker, config.base_path, config.git.support_submodules) if not ok then vim.notify('Failed to initialize file picker: ' .. tostring(result), vim.log.levels.ERROR) return fuzzy