From 5c551adf40a1e227e64808411c5d3af01c2c3af2 Mon Sep 17 00:00:00 2001 From: Rajat Arya Date: Tue, 2 Dec 2025 15:14:43 -0800 Subject: [PATCH 1/4] Initial impl from Cursor/Composer - attempts to call winapi directly as unsafe block to allow deregistering ctrl+c handler --- hf_xet/Cargo.lock | 2 + hf_xet/Cargo.toml | 2 + hf_xet/src/runtime.rs | 188 ++++++++++++++++++++++++++++++++++++++---- 3 files changed, 175 insertions(+), 17 deletions(-) diff --git a/hf_xet/Cargo.lock b/hf_xet/Cargo.lock index 7089f4ffc..42517d1b2 100644 --- a/hf_xet/Cargo.lock +++ b/hf_xet/Cargo.lock @@ -1422,8 +1422,10 @@ dependencies = [ "serde", "serde_json", "signal-hook", + "signal-hook-registry", "tracing", "utils", + "winapi", "xet_config", "xet_logging", "xet_runtime", diff --git a/hf_xet/Cargo.toml b/hf_xet/Cargo.toml index dd8b5a45a..5ee25d919 100644 --- a/hf_xet/Cargo.toml +++ b/hf_xet/Cargo.toml @@ -42,10 +42,12 @@ tracing = "0.1" # Unix-specific dependencies [target.'cfg(unix)'.dependencies] signal-hook = "0.3" +signal-hook-registry = "1.4" # Windows-specific dependencies [target.'cfg(windows)'.dependencies] ctrlc = "3.4" +winapi = { version = "0.3", features = ["consoleapi", "wincon", "errhandlingapi"] } [features] default = ["no-default-cache"] # By default, hf_xet disables the disk cache. diff --git a/hf_xet/src/runtime.rs b/hf_xet/src/runtime.rs index a47a9baca..8e511af87 100644 --- a/hf_xet/src/runtime.rs +++ b/hf_xet/src/runtime.rs @@ -17,30 +17,85 @@ lazy_static! { } #[cfg(unix)] -fn install_sigint_handler() -> Result<(), MultithreadedRuntimeError> { +lazy_static! { + static ref SIGINT_HANDLER_REGISTRATION: RwLock> = RwLock::new(None); +} + +#[cfg(windows)] +lazy_static! { + static ref SIGINT_HANDLER_INSTALLED: AtomicBool = AtomicBool::new(false); +} + +#[cfg(unix)] +fn install_sigint_handler() -> Result { use signal_hook::consts::SIGINT; - use signal_hook::flag; // Register the SIGINT handler to set our atomic flag. - // Using `signal_hook::flag::register` allows us to set the atomic flag when SIGINT is received. - flag::register(SIGINT, SIGINT_DETECTED.clone()).map_err(|e| { + // Using signal_hook_registry directly to get a SigId that we can unregister later. + let flag = SIGINT_DETECTED.clone(); + let signal_id = unsafe { + signal_hook_registry::register(SIGINT, move || { + flag.store(true, Ordering::SeqCst); + }) + } + .map_err(|e| { MultithreadedRuntimeError::Other(format!("Initialization Error: Unable to register SIGINT handler {e:?}")) })?; - Ok(()) + Ok(signal_id) +} + +#[cfg(windows)] +extern "system" fn console_ctrl_handler( + ctrl_type: winapi::shared::minwindef::DWORD, +) -> winapi::shared::minwindef::BOOL { + use winapi::um::wincon; + + // Only handle CTRL_C_EVENT + if ctrl_type == wincon::CTRL_C_EVENT { + // Check if we have active operations + let has_active_ops = { + let guard = MULTITHREADED_RUNTIME.read().unwrap(); + if let Some((runtime_pid, ref runtime)) = *guard { + runtime_pid == std::process::id() && runtime.external_executor_count() > 0 + } else { + false + } + }; + + if has_active_ops { + // We have active operations, handle it ourselves + SIGINT_DETECTED.store(true, Ordering::SeqCst); + winapi::shared::minwindef::TRUE + } else { + // No active operations, let Python's handler (or default) handle it + // Return FALSE to continue handler chain + winapi::shared::minwindef::FALSE + } + } else { + // For other control events, let default handler process them + winapi::shared::minwindef::FALSE + } } #[cfg(windows)] fn install_sigint_handler() -> Result<(), MultithreadedRuntimeError> { - // On Windows, use ctrlc crate. - // This sets a callback to run on Ctrl-C: - let sigint_detected_flag = SIGINT_DETECTED.clone(); - ctrlc::set_handler(move || { - sigint_detected_flag.store(true, Ordering::SeqCst); - }) - .map_err(|e| { - MultithreadedRuntimeError::Other(format!("Initialization Error: Unable to register SIGINT handler {e:?}")) - })?; + use winapi::um::consoleapi::SetConsoleCtrlHandler; + use winapi::um::wincon::CTRL_C_EVENT; + + // Install our handler using Windows API directly (instead of ctrlc) + // Our handler checks if operations are active: + // - If active: handles Ctrl+C and returns TRUE (stops propagation) + // - If not active: returns FALSE (allows Python's handler or default to handle it) + // This way we don't need to save/restore Python's handler - we just delegate to it + unsafe { + if SetConsoleCtrlHandler(Some(console_ctrl_handler), winapi::shared::minwindef::TRUE) == 0 { + let error = winapi::um::errhandlingapi::GetLastError(); + return Err(MultithreadedRuntimeError::Other(format!( + "Initialization Error: Unable to register SIGINT handler. Windows error: {error}" + ))); + } + } Ok(()) } @@ -54,7 +109,20 @@ fn check_sigint_handler() -> Result<(), MultithreadedRuntimeError> { let pid = std::process::id(); if stored_pid == pid { - return Ok(()); + // Check if handler is already registered + #[cfg(unix)] + { + let reg = SIGINT_HANDLER_REGISTRATION.read().unwrap(); + if reg.is_some() { + return Ok(()); + } + } + #[cfg(windows)] + { + if SIGINT_HANDLER_INSTALLED.load(Ordering::SeqCst) { + return Ok(()); + } + } } // Need to install it; acquire a lock to do so. @@ -63,10 +131,33 @@ fn check_sigint_handler() -> Result<(), MultithreadedRuntimeError> { // If another thread beat us to it while we're waiting for the lock. let stored_pid = SIGINT_HANDLER_INSTALL_PID.0.load(Ordering::SeqCst); if stored_pid == pid { - return Ok(()); + #[cfg(unix)] + { + let reg = SIGINT_HANDLER_REGISTRATION.read().unwrap(); + if reg.is_some() { + return Ok(()); + } + } + #[cfg(windows)] + { + if SIGINT_HANDLER_INSTALLED.load(Ordering::SeqCst) { + return Ok(()); + } + } + } + + #[cfg(unix)] + { + let signal_id = install_sigint_handler()?; + let mut reg = SIGINT_HANDLER_REGISTRATION.write().unwrap(); + *reg = Some(signal_id); } - install_sigint_handler()?; + #[cfg(windows)] + { + install_sigint_handler()?; + SIGINT_HANDLER_INSTALLED.store(true, Ordering::SeqCst); + } // Finally, store that we have installed it successfully. SIGINT_HANDLER_INSTALL_PID.0.store(pid, Ordering::SeqCst); @@ -94,6 +185,65 @@ fn in_sigint_shutdown() -> bool { SIGINT_DETECTED.load(Ordering::Relaxed) } +#[cfg(unix)] +fn restore_sigint_handler() -> Result<(), MultithreadedRuntimeError> { + use signal_hook_registry; + + let mut reg = SIGINT_HANDLER_REGISTRATION.write().unwrap(); + if let Some(signal_id) = reg.take() { + signal_hook_registry::unregister(signal_id); + } + Ok(()) +} + +#[cfg(windows)] +fn restore_sigint_handler() -> Result<(), MultithreadedRuntimeError> { + use winapi::um::consoleapi::SetConsoleCtrlHandler; + + // Remove our handler by unregistering it + // This allows Python's handler (if any) or the default handler to take over + unsafe { + if SetConsoleCtrlHandler(Some(console_ctrl_handler), winapi::shared::minwindef::FALSE) == 0 { + let error = winapi::um::errhandlingapi::GetLastError(); + // Log but don't fail - handler removal is best effort + if cfg!(debug_assertions) { + eprintln!("[debug] Warning: Failed to unregister console control handler. Windows error: {error}"); + } + } + } + SIGINT_HANDLER_INSTALLED.store(false, Ordering::SeqCst); + Ok(()) +} + +fn maybe_restore_sigint_handler() -> Result<(), MultithreadedRuntimeError> { + // Check if runtime exists and has no active operations + let guard = MULTITHREADED_RUNTIME.read().unwrap(); + if let Some((runtime_pid, ref runtime)) = *guard + && runtime_pid == std::process::id() + && runtime.external_executor_count() == 0 + { + // Check if handler is installed + let should_restore = { + #[cfg(unix)] + { + let reg = SIGINT_HANDLER_REGISTRATION.read().unwrap(); + reg.is_some() + } + #[cfg(windows)] + { + SIGINT_HANDLER_INSTALLED.load(Ordering::SeqCst) + } + }; + + if should_restore { + // No active operations, restore handler + drop(guard); + restore_sigint_handler()?; + } + } + Ok(()) +} + fn signal_check_background_loop() { const SIGNAL_CHECK_INTERVAL: Duration = Duration::from_millis(250); @@ -223,6 +373,10 @@ where return Err(PyKeyboardInterrupt::new_err(())); } + // After operation completes, check if we should restore the signal handler + // This allows Python's original handler to be restored when no operations are active + let _ = maybe_restore_sigint_handler(); + // Now return the result. result } From c88c73fbcc31211f3b06aca6551e19897a9177f4 Mon Sep 17 00:00:00 2001 From: Rajat Arya Date: Wed, 3 Dec 2025 12:34:24 -0800 Subject: [PATCH 2/4] Remove ctrlc crate, using winapi directly now --- hf_xet/Cargo.lock | 25 +------------------------ hf_xet/Cargo.toml | 1 - 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/hf_xet/Cargo.lock b/hf_xet/Cargo.lock index 42517d1b2..ccf2a9549 100644 --- a/hf_xet/Cargo.lock +++ b/hf_xet/Cargo.lock @@ -753,16 +753,6 @@ version = "0.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2931af7e13dc045d8e9d26afccc6fa115d64e115c9c84b1166288b46f6782c2" -[[package]] -name = "ctrlc" -version = "3.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f93780a459b7d656ef7f071fe699c4d3d2cb201c4b24d085b6ddc505276e73" -dependencies = [ - "nix 0.30.1", - "windows-sys 0.59.0", -] - [[package]] name = "data" version = "0.14.5" @@ -1410,7 +1400,6 @@ dependencies = [ "bipbuffer", "cas_client", "chrono", - "ctrlc", "data", "error_printer", "itertools 0.14.0", @@ -2271,18 +2260,6 @@ dependencies = [ "libc", ] -[[package]] -name = "nix" -version = "0.30.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" -dependencies = [ - "bitflags 2.9.2", - "cfg-if 1.0.1", - "cfg_aliases", - "libc", -] - [[package]] name = "nom" version = "7.1.3" @@ -2640,7 +2617,7 @@ dependencies = [ "inferno", "libc", "log", - "nix 0.26.4", + "nix", "once_cell", "prost 0.12.6", "protobuf 2.28.0", diff --git a/hf_xet/Cargo.toml b/hf_xet/Cargo.toml index 5ee25d919..e0b7b240b 100644 --- a/hf_xet/Cargo.toml +++ b/hf_xet/Cargo.toml @@ -46,7 +46,6 @@ signal-hook-registry = "1.4" # Windows-specific dependencies [target.'cfg(windows)'.dependencies] -ctrlc = "3.4" winapi = { version = "0.3", features = ["consoleapi", "wincon", "errhandlingapi"] } [features] From 010c9d346959a257d2a873d55748aff7d1e1057a Mon Sep 17 00:00:00 2001 From: Rajat Arya Date: Wed, 10 Dec 2025 08:19:38 -0800 Subject: [PATCH 3/4] Remove Unix SIGINT changes - simply keep Windows CTRL_C SIGINT handling --- hf_xet/src/runtime.rs | 117 ++++++------------------------------------ 1 file changed, 17 insertions(+), 100 deletions(-) diff --git a/hf_xet/src/runtime.rs b/hf_xet/src/runtime.rs index 8e511af87..dfb7a9d74 100644 --- a/hf_xet/src/runtime.rs +++ b/hf_xet/src/runtime.rs @@ -16,33 +16,23 @@ lazy_static! { static ref MULTITHREADED_RUNTIME: RwLock)>> = RwLock::new(None); } -#[cfg(unix)] -lazy_static! { - static ref SIGINT_HANDLER_REGISTRATION: RwLock> = RwLock::new(None); -} - #[cfg(windows)] lazy_static! { static ref SIGINT_HANDLER_INSTALLED: AtomicBool = AtomicBool::new(false); } #[cfg(unix)] -fn install_sigint_handler() -> Result { +fn install_sigint_handler() -> Result<(), MultithreadedRuntimeError> { use signal_hook::consts::SIGINT; + use signal_hook::flag; // Register the SIGINT handler to set our atomic flag. - // Using signal_hook_registry directly to get a SigId that we can unregister later. - let flag = SIGINT_DETECTED.clone(); - let signal_id = unsafe { - signal_hook_registry::register(SIGINT, move || { - flag.store(true, Ordering::SeqCst); - }) - } - .map_err(|e| { + // Using `signal_hook::flag::register` allows us to set the atomic flag when SIGINT is received. + flag::register(SIGINT, SIGINT_DETECTED.clone()).map_err(|e| { MultithreadedRuntimeError::Other(format!("Initialization Error: Unable to register SIGINT handler {e:?}")) })?; - Ok(signal_id) + Ok(()) } #[cfg(windows)] @@ -109,20 +99,16 @@ fn check_sigint_handler() -> Result<(), MultithreadedRuntimeError> { let pid = std::process::id(); if stored_pid == pid { - // Check if handler is already registered - #[cfg(unix)] - { - let reg = SIGINT_HANDLER_REGISTRATION.read().unwrap(); - if reg.is_some() { - return Ok(()); - } - } #[cfg(windows)] { if SIGINT_HANDLER_INSTALLED.load(Ordering::SeqCst) { return Ok(()); } } + #[cfg(not(windows))] + { + return Ok(()); + } } // Need to install it; acquire a lock to do so. @@ -131,26 +117,16 @@ fn check_sigint_handler() -> Result<(), MultithreadedRuntimeError> { // If another thread beat us to it while we're waiting for the lock. let stored_pid = SIGINT_HANDLER_INSTALL_PID.0.load(Ordering::SeqCst); if stored_pid == pid { - #[cfg(unix)] - { - let reg = SIGINT_HANDLER_REGISTRATION.read().unwrap(); - if reg.is_some() { - return Ok(()); - } - } #[cfg(windows)] { if SIGINT_HANDLER_INSTALLED.load(Ordering::SeqCst) { return Ok(()); } } - } - - #[cfg(unix)] - { - let signal_id = install_sigint_handler()?; - let mut reg = SIGINT_HANDLER_REGISTRATION.write().unwrap(); - *reg = Some(signal_id); + #[cfg(not(windows))] + { + return Ok(()); + } } #[cfg(windows)] @@ -158,6 +134,10 @@ fn check_sigint_handler() -> Result<(), MultithreadedRuntimeError> { install_sigint_handler()?; SIGINT_HANDLER_INSTALLED.store(true, Ordering::SeqCst); } + #[cfg(not(windows))] + { + install_sigint_handler()?; + } // Finally, store that we have installed it successfully. SIGINT_HANDLER_INSTALL_PID.0.store(pid, Ordering::SeqCst); @@ -185,65 +165,6 @@ fn in_sigint_shutdown() -> bool { SIGINT_DETECTED.load(Ordering::Relaxed) } -#[cfg(unix)] -fn restore_sigint_handler() -> Result<(), MultithreadedRuntimeError> { - use signal_hook_registry; - - let mut reg = SIGINT_HANDLER_REGISTRATION.write().unwrap(); - if let Some(signal_id) = reg.take() { - signal_hook_registry::unregister(signal_id); - } - Ok(()) -} - -#[cfg(windows)] -fn restore_sigint_handler() -> Result<(), MultithreadedRuntimeError> { - use winapi::um::consoleapi::SetConsoleCtrlHandler; - - // Remove our handler by unregistering it - // This allows Python's handler (if any) or the default handler to take over - unsafe { - if SetConsoleCtrlHandler(Some(console_ctrl_handler), winapi::shared::minwindef::FALSE) == 0 { - let error = winapi::um::errhandlingapi::GetLastError(); - // Log but don't fail - handler removal is best effort - if cfg!(debug_assertions) { - eprintln!("[debug] Warning: Failed to unregister console control handler. Windows error: {error}"); - } - } - } - SIGINT_HANDLER_INSTALLED.store(false, Ordering::SeqCst); - Ok(()) -} - -fn maybe_restore_sigint_handler() -> Result<(), MultithreadedRuntimeError> { - // Check if runtime exists and has no active operations - let guard = MULTITHREADED_RUNTIME.read().unwrap(); - if let Some((runtime_pid, ref runtime)) = *guard - && runtime_pid == std::process::id() - && runtime.external_executor_count() == 0 - { - // Check if handler is installed - let should_restore = { - #[cfg(unix)] - { - let reg = SIGINT_HANDLER_REGISTRATION.read().unwrap(); - reg.is_some() - } - #[cfg(windows)] - { - SIGINT_HANDLER_INSTALLED.load(Ordering::SeqCst) - } - }; - - if should_restore { - // No active operations, restore handler - drop(guard); - restore_sigint_handler()?; - } - } - Ok(()) -} - fn signal_check_background_loop() { const SIGNAL_CHECK_INTERVAL: Duration = Duration::from_millis(250); @@ -373,10 +294,6 @@ where return Err(PyKeyboardInterrupt::new_err(())); } - // After operation completes, check if we should restore the signal handler - // This allows Python's original handler to be restored when no operations are active - let _ = maybe_restore_sigint_handler(); - // Now return the result. result } From dc351bd78cff2315ab0356abc3a9582f960de2bb Mon Sep 17 00:00:00 2001 From: Rajat Arya Date: Wed, 10 Dec 2025 09:00:30 -0800 Subject: [PATCH 4/4] Remove SIGINT_HANDLER_INSTALLED, redundant --- hf_xet/src/runtime.rs | 37 +++---------------------------------- 1 file changed, 3 insertions(+), 34 deletions(-) diff --git a/hf_xet/src/runtime.rs b/hf_xet/src/runtime.rs index dfb7a9d74..4fbe3ec27 100644 --- a/hf_xet/src/runtime.rs +++ b/hf_xet/src/runtime.rs @@ -16,11 +16,6 @@ lazy_static! { static ref MULTITHREADED_RUNTIME: RwLock)>> = RwLock::new(None); } -#[cfg(windows)] -lazy_static! { - static ref SIGINT_HANDLER_INSTALLED: AtomicBool = AtomicBool::new(false); -} - #[cfg(unix)] fn install_sigint_handler() -> Result<(), MultithreadedRuntimeError> { use signal_hook::consts::SIGINT; @@ -99,16 +94,7 @@ fn check_sigint_handler() -> Result<(), MultithreadedRuntimeError> { let pid = std::process::id(); if stored_pid == pid { - #[cfg(windows)] - { - if SIGINT_HANDLER_INSTALLED.load(Ordering::SeqCst) { - return Ok(()); - } - } - #[cfg(not(windows))] - { - return Ok(()); - } + return Ok(()); } // Need to install it; acquire a lock to do so. @@ -117,27 +103,10 @@ fn check_sigint_handler() -> Result<(), MultithreadedRuntimeError> { // If another thread beat us to it while we're waiting for the lock. let stored_pid = SIGINT_HANDLER_INSTALL_PID.0.load(Ordering::SeqCst); if stored_pid == pid { - #[cfg(windows)] - { - if SIGINT_HANDLER_INSTALLED.load(Ordering::SeqCst) { - return Ok(()); - } - } - #[cfg(not(windows))] - { - return Ok(()); - } + return Ok(()); } - #[cfg(windows)] - { - install_sigint_handler()?; - SIGINT_HANDLER_INSTALLED.store(true, Ordering::SeqCst); - } - #[cfg(not(windows))] - { - install_sigint_handler()?; - } + install_sigint_handler()?; // Finally, store that we have installed it successfully. SIGINT_HANDLER_INSTALL_PID.0.store(pid, Ordering::SeqCst);