Skip to content

Commit c114864

Browse files
committed
fix(#469 review): guard workspace_entry against stale parses across concurrent configure()
Address Copilot review on PR #469. workspace_entry() previously parsed TOMLs outside the lock and re-acquired only to insert via or_insert_with. If configure() landed between the cache miss and the re-acquire, the stale parse would be written back into the just-cleared cache, silently undoing the invalidation. Add a generation counter to HatchState that configure() bumps on every call. workspace_entry() snapshots the generation before parsing and discards its result if the generation changed by the time it re-acquires the lock — looping to re-read the current on-disk state instead. Tests: - configure_bumps_generation_so_workspace_entry_can_detect_invalidation pins the generation-bump invariant. - workspace_entry_concurrent_configure_does_not_leak_stale_parse drives the race with multiple threads and verifies the cache reflects the final on-disk state after the concurrent configures stop.
1 parent 29d84ff commit c114864

1 file changed

Lines changed: 159 additions & 24 deletions

File tree

crates/pet-hatch/src/lib.rs

Lines changed: 159 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,22 @@ struct WorkspaceEntry {
8484
///
8585
/// `parsed` is keyed by workspace path; entries are wrapped in `Arc` so
8686
/// snapshots can be handed out cheaply.
87+
///
88+
/// `generation` is bumped on every `configure()` so that
89+
/// `workspace_entry()` can detect a reconfigure that landed while it was
90+
/// parsing TOMLs outside the lock. Without this guard, a stale parse
91+
/// could be inserted after the cache was invalidated and silently undo
92+
/// the invalidation.
8793
struct HatchState {
94+
generation: u64,
8895
workspaces: Vec<PathBuf>,
8996
parsed: HashMap<PathBuf, Arc<WorkspaceEntry>>,
9097
}
9198

9299
impl HatchState {
93100
fn new() -> Self {
94101
Self {
102+
generation: 0,
95103
workspaces: Vec::new(),
96104
parsed: HashMap::new(),
97105
}
@@ -132,32 +140,52 @@ impl Hatch {
132140
/// `pyproject.toml` / `hatch.toml` on first access. The TOML read is
133141
/// performed outside the state mutex so concurrent `try_from()` calls
134142
/// for *other* workspaces are not blocked by a slow filesystem.
143+
///
144+
/// Race handling: `configure()` may run between our cache-miss check
145+
/// and our re-acquire to insert, invalidating the cache while we were
146+
/// parsing. We snapshot the generation before the parse and only
147+
/// insert if the generation has not changed; otherwise we discard the
148+
/// stale parse and retry. The loop is bounded in practice because
149+
/// `configure()` is a client-driven, infrequent operation.
135150
fn workspace_entry(&self, workspace: &Path) -> Arc<WorkspaceEntry> {
136-
if let Some(entry) = self
137-
.state
138-
.lock()
139-
.expect("hatch state mutex poisoned")
140-
.parsed
141-
.get(workspace)
142-
{
143-
return entry.clone();
151+
loop {
152+
// Fast path: cache hit. Also snapshot the generation so we can
153+
// detect a reconfigure that lands while we're parsing below.
154+
let generation = {
155+
let state = self.state.lock().expect("hatch state mutex poisoned");
156+
if let Some(entry) = state.parsed.get(workspace) {
157+
return entry.clone();
158+
}
159+
state.generation
160+
};
161+
162+
// Slow path: parse outside the lock so other workspaces are
163+
// not blocked on this workspace's filesystem.
164+
let (virtual_dirs, env_names) = resolve_workspace_hatch_config(workspace);
165+
let parsed = Arc::new(WorkspaceEntry {
166+
virtual_dirs,
167+
matcher: EnvNameMatcher::from_names(env_names),
168+
});
169+
170+
let mut state = self.state.lock().expect("hatch state mutex poisoned");
171+
if state.generation != generation {
172+
// configure() ran while we were parsing. Our result may
173+
// reflect a now-stale view of the workspace's TOMLs (or
174+
// belong to a workspace that has since been removed).
175+
// Drop it and retry against the current generation.
176+
continue;
177+
}
178+
// A concurrent caller for the same workspace may have already
179+
// inserted while we were parsing; prefer the existing entry
180+
// so every caller observes the same `Arc`. `or_insert_with`
181+
// runs the closure only on miss, avoiding a redundant clone
182+
// on hit.
183+
return state
184+
.parsed
185+
.entry(workspace.to_path_buf())
186+
.or_insert_with(|| parsed.clone())
187+
.clone();
144188
}
145-
let (virtual_dirs, env_names) = resolve_workspace_hatch_config(workspace);
146-
let parsed = Arc::new(WorkspaceEntry {
147-
virtual_dirs,
148-
matcher: EnvNameMatcher::from_names(env_names),
149-
});
150-
// Re-acquire the lock and install. A concurrent caller may have
151-
// already inserted while we were parsing; prefer the existing entry
152-
// so every caller observes the same `Arc`. `or_insert_with` runs
153-
// the closure only on miss, avoiding a redundant clone on hit.
154-
self.state
155-
.lock()
156-
.expect("hatch state mutex poisoned")
157-
.parsed
158-
.entry(workspace.to_path_buf())
159-
.or_insert_with(|| parsed.clone())
160-
.clone()
161189
}
162190
}
163191

@@ -195,6 +223,10 @@ impl Locator for Hatch {
195223
.cloned()
196224
.unwrap_or_default();
197225
let mut state = self.state.lock().expect("hatch state mutex poisoned");
226+
// Bump the generation so any in-flight `workspace_entry()` parse
227+
// detects the invalidation on re-acquire and discards its stale
228+
// result instead of writing it back into the cleared cache.
229+
state.generation = state.generation.wrapping_add(1);
198230
state.workspaces = new_workspaces;
199231
state.parsed.clear();
200232
}
@@ -1313,6 +1345,109 @@ mod tests {
13131345
);
13141346
}
13151347

1348+
#[test]
1349+
fn configure_bumps_generation_so_workspace_entry_can_detect_invalidation() {
1350+
// The lazy workspace_entry() path snapshots the generation before
1351+
// parsing TOMLs outside the lock and refuses to write its result
1352+
// back if the generation has moved. That guard is what prevents a
1353+
// mid-parse worker from silently undoing configure()'s
1354+
// invalidation. This test pins the generation-bump invariant so a
1355+
// future refactor cannot remove it without breaking a test.
1356+
let locator = make_locator(None);
1357+
let g0 = locator.state.lock().unwrap().generation;
1358+
1359+
let config = Configuration {
1360+
workspace_directories: Some(vec![PathBuf::from("/tmp/example")]),
1361+
..Configuration::default()
1362+
};
1363+
locator.configure(&config);
1364+
let g1 = locator.state.lock().unwrap().generation;
1365+
assert_ne!(g0, g1, "configure() must bump generation");
1366+
1367+
locator.configure(&config);
1368+
let g2 = locator.state.lock().unwrap().generation;
1369+
assert_ne!(g1, g2, "repeat configure() must also bump generation");
1370+
}
1371+
1372+
#[test]
1373+
fn workspace_entry_concurrent_configure_does_not_leak_stale_parse() {
1374+
// Race scenario the generation guard is designed to prevent:
1375+
// T1: workspace_entry(W) misses cache, snapshots generation, drops lock
1376+
// T2: configure() bumps generation and clears `parsed`
1377+
// T1: finishes parse, re-acquires lock, MUST NOT insert stale data
1378+
//
1379+
// This test drives the race with many threads to make a stale
1380+
// insert observable. Without the generation guard the loop body
1381+
// would occasionally see virtual_dirs reflecting an older TOML
1382+
// version that had been overwritten on disk before a configure().
1383+
use std::sync::atomic::{AtomicBool, Ordering};
1384+
use std::thread;
1385+
1386+
let temp = TempDir::new().unwrap();
1387+
let project = temp.path().join("project");
1388+
fs::create_dir_all(&project).unwrap();
1389+
let pyproject = project.join("pyproject.toml");
1390+
fs::write(
1391+
&pyproject,
1392+
b"[tool.hatch.dirs.env]\nvirtual = \".hatch-v1\"\n",
1393+
)
1394+
.unwrap();
1395+
1396+
let locator = Arc::new(make_locator(None));
1397+
let config = Configuration {
1398+
workspace_directories: Some(vec![project.clone()]),
1399+
..Configuration::default()
1400+
};
1401+
locator.configure(&config);
1402+
1403+
let stop = Arc::new(AtomicBool::new(false));
1404+
let mut readers = Vec::new();
1405+
for _ in 0..4 {
1406+
let locator = locator.clone();
1407+
let project = project.clone();
1408+
let stop = stop.clone();
1409+
readers.push(thread::spawn(move || {
1410+
while !stop.load(Ordering::Relaxed) {
1411+
let _ = locator.workspace_entry(&project);
1412+
}
1413+
}));
1414+
}
1415+
1416+
// Flip the TOML between two known states, calling configure()
1417+
// after each write so the lazy readers race the invalidation.
1418+
for i in 0..50 {
1419+
let payload = if i % 2 == 0 {
1420+
b"[tool.hatch.dirs.env]\nvirtual = \".hatch-v1\"\n" as &[u8]
1421+
} else {
1422+
b"[tool.hatch.dirs.env]\nvirtual = \".hatch-v2\"\n"
1423+
};
1424+
fs::write(&pyproject, payload).unwrap();
1425+
locator.configure(&config);
1426+
}
1427+
stop.store(true, Ordering::Relaxed);
1428+
for r in readers {
1429+
r.join().unwrap();
1430+
}
1431+
1432+
// After the loop, ensure one final configure has cleared the
1433+
// cache, write a distinct final state, and verify the next
1434+
// workspace_entry observes it. If the generation guard were
1435+
// missing, an in-flight stale parse from an earlier iteration
1436+
// could still be cached here.
1437+
fs::write(
1438+
&pyproject,
1439+
b"[tool.hatch.dirs.env]\nvirtual = \".hatch-final\"\n",
1440+
)
1441+
.unwrap();
1442+
locator.configure(&config);
1443+
let entry = locator.workspace_entry(&project);
1444+
assert_eq!(
1445+
entry.virtual_dirs,
1446+
vec![norm_case(&project.join(".hatch-final"))],
1447+
"post-configure workspace_entry must reflect on-disk state, not a leaked stale parse"
1448+
);
1449+
}
1450+
13161451
#[cfg(target_os = "linux")]
13171452
#[test]
13181453
fn data_dir_uses_xdg_data_home_when_set() {

0 commit comments

Comments
 (0)