From d2e2c8e83206a2b73b142dcef0bf8f5729a31467 Mon Sep 17 00:00:00 2001 From: Tobias Brox Date: Fri, 2 Jan 2026 00:50:12 +0100 Subject: [PATCH 1/3] fix: wrap datastore worker in catch_unwind to prevent poisoned locks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use std::panic::catch_unwind() around the worker loop as a safety net. If any panic occurs, the worker will exit gracefully instead of poisoning the Mutex lock and leaving the server in an unusable state. This ensures: - Panics are caught and logged with their message - The worker thread exits cleanly - The channel closes properly - Future requests get clean "channel closed" errors - No poisoned locks requiring server restart Related to: https://github.com/ActivityWatch/aw-server-rust/issues/405 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude Opus 4.5 --- aw-datastore/src/worker.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/aw-datastore/src/worker.rs b/aw-datastore/src/worker.rs index 18eaf665..05d23a2c 100644 --- a/aw-datastore/src/worker.rs +++ b/aw-datastore/src/worker.rs @@ -314,7 +314,29 @@ impl Datastore { mpsc_requests::channel::>(); let _thread = thread::spawn(move || { let mut di = DatastoreWorker::new(responder, legacy_import); - di.work_loop(method); + // Wrap work_loop in catch_unwind to handle any unexpected panics gracefully. + // This prevents panics from poisoning locks and leaving the server in an + // unusable state. Instead, the worker exits cleanly and the channel closes, + // allowing clients to receive proper errors instead of "poisoned lock" errors. + // See: https://github.com/ActivityWatch/aw-server-rust/issues/405 + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + di.work_loop(method); + })); + if let Err(panic_info) = result { + // Extract panic message if possible + let panic_msg = if let Some(s) = panic_info.downcast_ref::<&str>() { + s.to_string() + } else if let Some(s) = panic_info.downcast_ref::() { + s.clone() + } else { + "Unknown panic".to_string() + }; + error!( + "Datastore worker panicked: {}. Worker shutting down gracefully.", + panic_msg + ); + } + // Worker exits, channel closes, future requests get clean errors }); Datastore { requester } } From 20344c564d44d50bda4edcab2cc62595b815b596 Mon Sep 17 00:00:00 2001 From: Tobias Brox Date: Fri, 2 Jan 2026 16:59:35 +0100 Subject: [PATCH 2/3] test: add unit tests for catch_unwind panic handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tests to verify that the catch_unwind wrapper properly handles worker panics and prevents poisoned lock errors. Tests are gated behind a `test-panic` feature flag. Run with: cargo test --package aw-datastore --features test-panic 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- aw-datastore/Cargo.toml | 2 + aw-datastore/src/worker.rs | 30 +++++++++ aw-datastore/tests/datastore.rs | 111 ++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+) diff --git a/aw-datastore/Cargo.toml b/aw-datastore/Cargo.toml index 73d768b5..51f0a003 100644 --- a/aw-datastore/Cargo.toml +++ b/aw-datastore/Cargo.toml @@ -7,6 +7,8 @@ edition = "2021" [features] default = [] # no features by default legacy_import_tests = [] +# Test-only feature to enable panic triggering for testing catch_unwind behavior +test-panic = [] [dependencies] dirs = "6" diff --git a/aw-datastore/src/worker.rs b/aw-datastore/src/worker.rs index 05d23a2c..f31323a7 100644 --- a/aw-datastore/src/worker.rs +++ b/aw-datastore/src/worker.rs @@ -78,6 +78,10 @@ pub enum Command { SetKeyValue(String, String), DeleteKeyValue(String), Close(), + /// Test-only command to trigger a panic in the worker thread. + /// Only available with the `test-panic` feature. + #[cfg(feature = "test-panic")] + TriggerPanic(String), } fn _unwrap_response( @@ -294,6 +298,10 @@ impl DatastoreWorker { self.quit = true; Ok(Response::Empty()) } + #[cfg(feature = "test-panic")] + Command::TriggerPanic(msg) => { + panic!("{}", msg); + } } } } @@ -549,4 +557,26 @@ impl Datastore { Err(e) => panic!("Error closing database: {:?}", e), } } + + /// Test-only method to trigger a panic in the worker thread. + /// Used to verify that catch_unwind properly handles panics. + /// Only available with the `test-panic` feature. + #[cfg(feature = "test-panic")] + pub fn trigger_panic(&self, msg: &str) -> Result<(), DatastoreError> { + let cmd = Command::TriggerPanic(msg.to_string()); + match self.requester.request(cmd) { + Ok(receiver) => match receiver.collect() { + Ok(result) => match result { + Ok(_) => Ok(()), + Err(e) => Err(e), + }, + Err(_) => Err(DatastoreError::InternalError( + "Channel closed (worker panicked)".to_string(), + )), + }, + Err(_) => Err(DatastoreError::InternalError( + "Failed to send command (channel closed)".to_string(), + )), + } + } } diff --git a/aw-datastore/tests/datastore.rs b/aw-datastore/tests/datastore.rs index 739e8ade..9560daa2 100644 --- a/aw-datastore/tests/datastore.rs +++ b/aw-datastore/tests/datastore.rs @@ -531,4 +531,115 @@ mod datastore_tests { ); } } + + /// Test that when the worker thread panics, it shuts down gracefully + /// and subsequent requests receive clean errors instead of "poisoned lock" errors. + /// This tests the catch_unwind wrapper around work_loop(). + #[cfg(feature = "test-panic")] + #[test] + fn test_worker_panic_graceful_shutdown() { + // Setup datastore + let ds = Datastore::new_in_memory(false); + let bucket = create_test_bucket(&ds); + + // Verify datastore is working normally + let fetched_bucket = ds.get_bucket(&bucket.id).unwrap(); + assert_eq!(fetched_bucket.id, bucket.id); + + // Trigger a panic in the worker thread + let panic_result = ds.trigger_panic("Test panic for graceful shutdown"); + + // The panic should cause the worker to exit and close the channel. + // We may or may not get an error from the trigger_panic call itself + // depending on timing (whether the response was sent before the panic). + info!("trigger_panic result: {:?}", panic_result); + + // Give the worker thread a moment to fully shut down + std::thread::sleep(std::time::Duration::from_millis(100)); + + // Subsequent requests will fail. The current API uses .unwrap() which panics + // on closed channels, so we need to catch the panic and verify it's NOT + // a "poisoned lock" error (which would indicate catch_unwind didn't work). + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + ds.get_buckets() + })); + + // We expect either: + // 1. A panic with "SendError" (channel closed) - this is expected + // 2. An Ok(Err(...)) with a clean error - this is also fine + // What we must NOT see is a "poisoned" error which would mean catch_unwind failed + match result { + Ok(inner_result) => { + // If we got through without panic, check the error doesn't mention poison + if let Err(e) = inner_result { + let err_msg = format!("{:?}", e); + assert!( + !err_msg.to_lowercase().contains("poison"), + "Error should not be a poisoned lock error, got: {}", + err_msg + ); + info!("Got expected clean error: {}", err_msg); + } + } + Err(panic_info) => { + // We got a panic - extract the message and verify it's not about poisoned locks + let panic_msg = if let Some(s) = panic_info.downcast_ref::<&str>() { + s.to_string() + } else if let Some(s) = panic_info.downcast_ref::() { + s.clone() + } else { + format!("{:?}", panic_info) + }; + assert!( + !panic_msg.to_lowercase().contains("poison"), + "Panic should not be about poisoned lock, got: {}", + panic_msg + ); + info!( + "Got expected panic (channel closed, not poisoned lock): {}", + panic_msg + ); + } + } + } + + /// Test that panic handling correctly extracts both &str and String panic messages. + #[cfg(feature = "test-panic")] + #[test] + fn test_worker_panic_with_string_message() { + let ds = Datastore::new_in_memory(false); + + // The trigger_panic method accepts a &str which gets converted to String, + // so this tests the String panic message extraction path. + let _ = ds.trigger_panic("String panic message test"); + + std::thread::sleep(std::time::Duration::from_millis(100)); + + // Verify the datastore is no longer usable (channel closed) + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + ds.get_buckets() + })); + + // Should fail in some way (panic or error), just not with "poisoned" + match result { + Ok(inner_result) => { + assert!(inner_result.is_err(), "Expected error after worker panic"); + } + Err(panic_info) => { + // Got a panic (expected behavior with current API that uses unwrap) + let panic_msg = if let Some(s) = panic_info.downcast_ref::<&str>() { + s.to_string() + } else if let Some(s) = panic_info.downcast_ref::() { + s.clone() + } else { + format!("{:?}", panic_info) + }; + assert!( + !panic_msg.to_lowercase().contains("poison"), + "Panic should not be about poisoned lock, got: {}", + panic_msg + ); + } + } + } } From 6d234a3a7e53bd6610d36a92872e17fa352e22b4 Mon Sep 17 00:00:00 2001 From: Tobias Brox Date: Fri, 20 Feb 2026 01:51:30 +0100 Subject: [PATCH 3/3] test: improve catch_unwind coverage with unit tests for panic message extraction Extract the panic payload message logic into a standalone `extract_panic_message` helper and add unit tests covering all three branches: &str literal panics, String (formatted) panics, and unknown payload types. These tests run unconditionally (no feature flag required), so they are picked up by tarpaulin in CI and close the coverage gap reported by Codecov on the previous commit. Co-Authored-By: Claude Sonnet 4.6 --- aw-datastore/src/worker.rs | 51 ++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/aw-datastore/src/worker.rs b/aw-datastore/src/worker.rs index f31323a7..81eac165 100644 --- a/aw-datastore/src/worker.rs +++ b/aw-datastore/src/worker.rs @@ -306,6 +306,18 @@ impl DatastoreWorker { } } +/// Extracts a human-readable message from a panic payload. +/// Handles the two common payload types (&str and String), with a fallback for other types. +fn extract_panic_message(panic_info: &Box) -> String { + if let Some(s) = panic_info.downcast_ref::<&str>() { + s.to_string() + } else if let Some(s) = panic_info.downcast_ref::() { + s.clone() + } else { + "Unknown panic".to_string() + } +} + impl Datastore { pub fn new(dbpath: String, legacy_import: bool) -> Self { let method = DatastoreMethod::File(dbpath); @@ -331,14 +343,7 @@ impl Datastore { di.work_loop(method); })); if let Err(panic_info) = result { - // Extract panic message if possible - let panic_msg = if let Some(s) = panic_info.downcast_ref::<&str>() { - s.to_string() - } else if let Some(s) = panic_info.downcast_ref::() { - s.clone() - } else { - "Unknown panic".to_string() - }; + let panic_msg = extract_panic_message(&panic_info); error!( "Datastore worker panicked: {}. Worker shutting down gracefully.", panic_msg @@ -580,3 +585,33 @@ impl Datastore { } } } + +#[cfg(test)] +mod tests { + use super::extract_panic_message; + + /// &str panic payload (produced by panic!("literal")) + #[test] + fn test_extract_panic_message_str_literal() { + let panic_info = std::panic::catch_unwind(|| panic!("static str panic")).unwrap_err(); + assert_eq!(extract_panic_message(&panic_info), "static str panic"); + } + + /// String panic payload (produced by panic!("{}", expr)) + #[test] + fn test_extract_panic_message_string() { + let msg = format!("formatted panic {}", 42); + let panic_info = + std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| panic!("{}", msg))) + .unwrap_err(); + assert_eq!(extract_panic_message(&panic_info), "formatted panic 42"); + } + + /// Unknown payload type falls back to "Unknown panic" + #[test] + fn test_extract_panic_message_unknown() { + let panic_info = + std::panic::catch_unwind(|| std::panic::panic_any(42u32)).unwrap_err(); + assert_eq!(extract_panic_message(&panic_info), "Unknown panic"); + } +}