Skip to content

Commit 00702ee

Browse files
committed
perf(pet-fs): memoize norm_case to avoid repeated GetLongPathNameW syscalls (Fixes #455)
1 parent ab6d1d2 commit 00702ee

3 files changed

Lines changed: 139 additions & 2 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/pet-fs/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ windows-sys = { version = "0.59", features = ["Win32_Storage_FileSystem", "Win32
1010

1111
[dependencies]
1212
glob = "0.3.1"
13+
lazy_static = "1.4.0"
1314
log = "0.4.21"

crates/pet-fs/src/path.rs

Lines changed: 137 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,29 @@ use std::{
99
#[cfg(unix)]
1010
use std::path::MAIN_SEPARATOR;
1111

12+
#[cfg(windows)]
13+
use std::{collections::HashMap, sync::RwLock};
14+
15+
#[cfg(windows)]
16+
use lazy_static::lazy_static;
17+
18+
#[cfg(windows)]
19+
lazy_static! {
20+
/// Per-process memoization cache for `norm_case` results on Windows.
21+
///
22+
/// Each `norm_case` call invokes `GetLongPathNameW`, which is a real
23+
/// filesystem syscall (Defender-intercepted on Windows). Many hot paths
24+
/// — the user home directory, common parents, every `PATH` entry, every
25+
/// registry `env_path`/`executable` — get normalized repeatedly per
26+
/// refresh. Memoizing eliminates the repeat syscalls.
27+
///
28+
/// We cache only successful normalizations: a failure (`GetLongPathNameW`
29+
/// returning 0, typically because the path doesn't exist yet) is *not*
30+
/// cached so a path that gets created later can still be normalized.
31+
static ref NORM_CASE_CACHE: RwLock<HashMap<PathBuf, PathBuf>> =
32+
RwLock::new(HashMap::new());
33+
}
34+
1235
/// Strips trailing path separators from a path, preserving root paths.
1336
///
1437
/// This function removes trailing `/` or `\` from paths while ensuring that root paths
@@ -91,6 +114,11 @@ pub fn strip_trailing_separator<P: AsRef<Path>>(path: P) -> PathBuf {
91114
/// - On Windows, this function uses `GetLongPathNameW` which **preserves junction paths**
92115
/// unlike `fs::canonicalize` which would resolve them to their target.
93116
/// - For symlink resolution, use `resolve_symlink()` instead.
117+
/// - On Windows, results are memoized in a per-process cache keyed on the
118+
/// computed *absolute* path (so relative inputs remain correct across
119+
/// CWD changes). Only successful normalizations are cached, so paths
120+
/// that do not yet exist on disk can still be normalized later. The
121+
/// cache is never invalidated for the lifetime of the process.
94122
///
95123
/// # Related
96124
/// - `strip_trailing_separator()` - Just removes trailing separators
@@ -109,7 +137,10 @@ pub fn norm_case<P: AsRef<Path>>(path: P) -> PathBuf {
109137

110138
#[cfg(windows)]
111139
{
112-
// First, convert to absolute path if relative, without resolving symlinks/junctions
140+
// First, convert to absolute path if relative, without resolving symlinks/junctions.
141+
// We key the cache on this *absolute* path rather than the caller's input so
142+
// a relative input like "./foo" stays correct if the process CWD changes
143+
// between calls.
113144
let absolute_path = if path.as_ref().is_absolute() {
114145
path.as_ref().to_path_buf()
115146
} else if let Ok(abs) = std::env::current_dir() {
@@ -118,9 +149,32 @@ pub fn norm_case<P: AsRef<Path>>(path: P) -> PathBuf {
118149
path.as_ref().to_path_buf()
119150
};
120151

152+
// Fast path: cache hit. Read lock is held only while we copy the
153+
// already-normalized PathBuf out. We treat lock poisoning as a
154+
// soft-miss (fall through to recompute) because `norm_case` is
155+
// hot and side-effect-free; a poisoned mutex from one bad caller
156+
// shouldn't permanently break path normalization for the process.
157+
if let Ok(cache) = NORM_CASE_CACHE.read() {
158+
if let Some(cached) = cache.get(&absolute_path) {
159+
return cached.clone();
160+
}
161+
}
162+
121163
// Use GetLongPathNameW to normalize case without resolving junctions.
122164
// If normalization fails, fall back to the computed absolute path to keep behavior consistent.
123-
normalize_case_windows(&absolute_path).unwrap_or(absolute_path)
165+
match normalize_case_windows(&absolute_path) {
166+
Some(normalized) => {
167+
// Only cache successful normalizations: a failure usually
168+
// means the path does not exist yet, and we don't want to
169+
// poison the cache with a non-normalized fallback for paths
170+
// that may be created later in the process.
171+
if let Ok(mut cache) = NORM_CASE_CACHE.write() {
172+
cache.insert(absolute_path, normalized.clone());
173+
}
174+
normalized
175+
}
176+
None => absolute_path,
177+
}
124178
}
125179
}
126180

@@ -619,6 +673,87 @@ mod tests {
619673
);
620674
}
621675

676+
#[test]
677+
#[cfg(windows)]
678+
fn test_norm_case_windows_memoizes_existing_path() {
679+
// Successful normalizations should be cached so subsequent calls
680+
// for the same input do not re-issue GetLongPathNameW.
681+
// The cache is keyed on the computed absolute path, so we pass an
682+
// absolute path here to make the assertion deterministic.
683+
let path = PathBuf::from("C:\\Windows\\System32");
684+
let first = norm_case(&path);
685+
// Cache must contain the exact (absolute) input we passed in.
686+
let cached = NORM_CASE_CACHE
687+
.read()
688+
.expect("norm_case cache poisoned")
689+
.get(&path)
690+
.cloned();
691+
assert_eq!(
692+
cached.as_ref(),
693+
Some(&first),
694+
"expected norm_case to insert the result into the cache"
695+
);
696+
// Second call returns the same result (and goes through the cache).
697+
let second = norm_case(&path);
698+
assert_eq!(first, second);
699+
}
700+
701+
#[test]
702+
#[cfg(windows)]
703+
fn test_norm_case_windows_does_not_cache_failures() {
704+
// GetLongPathNameW fails for non-existent paths. We must NOT cache
705+
// those, otherwise a path that gets created later in the process
706+
// would forever return its non-normalized fallback form.
707+
let nonexistent = PathBuf::from("C:\\pet_test_norm_case_no_cache_xyz_zzz_zzz");
708+
// Make sure we start from a clean slate for this key.
709+
NORM_CASE_CACHE
710+
.write()
711+
.expect("norm_case cache poisoned")
712+
.remove(&nonexistent);
713+
714+
let _ = norm_case(&nonexistent);
715+
716+
let has_entry = NORM_CASE_CACHE
717+
.read()
718+
.expect("norm_case cache poisoned")
719+
.contains_key(&nonexistent);
720+
assert!(
721+
!has_entry,
722+
"failed normalizations must not be inserted into the cache"
723+
);
724+
}
725+
726+
#[test]
727+
#[cfg(windows)]
728+
fn test_norm_case_windows_cache_key_is_absolute_path() {
729+
// The cache is keyed on the computed absolute path, not the
730+
// caller-supplied input. Calling norm_case on a relative path
731+
// must NOT insert an entry under that relative key; the entry
732+
// must instead live under the resolved absolute path.
733+
//
734+
// We avoid mutating the process CWD here because cargo runs tests
735+
// in parallel and a global chdir would race with other tests.
736+
let relative = PathBuf::from(".");
737+
let result = norm_case(&relative);
738+
739+
assert!(result.is_absolute(), "result must be absolute");
740+
let cache = NORM_CASE_CACHE.read().expect("norm_case cache poisoned");
741+
assert!(
742+
!cache.contains_key(&relative),
743+
"cache must not be keyed on the caller-supplied relative path"
744+
);
745+
// The absolute form must be present (assuming the CWD exists,
746+
// which it always does for a running test process).
747+
let abs = std::env::current_dir()
748+
.expect("cwd must exist")
749+
.join(&relative);
750+
assert_eq!(
751+
cache.get(&abs).cloned(),
752+
Some(result),
753+
"cache must be keyed on the computed absolute path"
754+
);
755+
}
756+
622757
// ==================== resolve_any_symlink tests ====================
623758

624759
#[test]

0 commit comments

Comments
 (0)