From 7774e545e8f71317488db08948e0284fb7a4fa2e Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Fri, 3 Apr 2026 10:56:27 +0800 Subject: [PATCH 1/6] {"schema":"delivery/1","type":"feat","scope":"overlay","summary":"add macOS OCR action for frozen captures","intent":"let frozen captures recognize text locally on macOS and copy the result without adding backend settings yet","impact":"Frozen toolbar gains a Recognize Text action backed by Apple Vision OCR, app logging handles text-copy exits, and the README/spec document the new macOS-only behavior","breaking":false,"risk":"low","authority":"linear","delivery_mode":"status-only","refs":[]} --- Cargo.toml | 1 + README.md | 2 + apps/rsnap/src/app/capture.rs | 6 + docs/spec/v0.md | 2 + packages/rsnap-overlay/Cargo.toml | 2 +- packages/rsnap-overlay/src/lib.rs | 2 + packages/rsnap-overlay/src/ocr_macos.rs | 57 +++++++ packages/rsnap-overlay/src/overlay.rs | 193 ++++++++++++++++++++++-- packages/rsnap-overlay/src/worker.rs | 62 ++++++++ 9 files changed, 315 insertions(+), 12 deletions(-) create mode 100644 packages/rsnap-overlay/src/ocr_macos.rs diff --git a/Cargo.toml b/Cargo.toml index 373b4ac8..495d6e84 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ objc2-core-media = { version = "0.3" } objc2-core-video = { version = "0.3" } objc2-foundation = { version = "0.3" } objc2-screen-capture-kit = { version = "0.3" } +objc2-vision = { version = "0.3" } pollster = { version = "0.4" } serde = { version = "1.0", features = ["derive"] } thiserror = { version = "2.0" } diff --git a/README.md b/README.md index c46fe53c..3e4b6970 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Pure-Rust menubar screenshot prototype (macOS-first). - In Frozen mode, a dragged-region capture can be dragged from inside the bright selection area to reposition it without resizing. - In Frozen mode, `Space` copies the current frozen PNG to the clipboard and exits. - In Frozen mode, Cmd+S (macOS) / Ctrl+S saves the current PNG to disk and exits. +- On macOS, Frozen mode can recognize text from the current capture and copy the result to the clipboard from the toolbar. - After a dragged region freeze, press `s` or use the frozen toolbar `Scroll Capture ↓` action to enter scroll capture. - Scroll capture is currently implemented on macOS for dragged-region freezes and uses image-first downward stitching with a live side preview. - Upward scrolling may be observed for rewind/reacquire, but it never appends stitched rows. @@ -93,6 +94,7 @@ cargo run -p rsnap ### Output (save-to-disk) - In Frozen mode, use Cmd+S (macOS) / Ctrl+S to save a PNG to disk and exit. +- On macOS, use the frozen toolbar `Recognize Text` action to copy recognized text from the current frozen capture and exit. - After entering scroll capture from a dragged region on macOS, downward scrolling may append newly proven rows into the side preview. Upward scrolling never appends. Returning to already-stitched content should not grow the export; only newly proven content may be added. The scroll-capture commit path uses discrete region screenshots plus pairwise image registration; clipboard and save must match the committed preview the user sees. diff --git a/apps/rsnap/src/app/capture.rs b/apps/rsnap/src/app/capture.rs index 1a0c1357..e9f69733 100644 --- a/apps/rsnap/src/app/capture.rs +++ b/apps/rsnap/src/app/capture.rs @@ -248,6 +248,12 @@ impl App { OverlayExit::PngBytes(png_bytes) => { tracing::info!(bytes = png_bytes.len(), "Capture copied to clipboard."); }, + OverlayExit::TextCopied(character_count) => { + tracing::info!( + characters = character_count, + "Recognized text copied to clipboard." + ); + }, OverlayExit::Saved(path) => { tracing::info!(path = %path.display(), "Capture saved to file."); }, diff --git a/docs/spec/v0.md b/docs/spec/v0.md index f0535309..2bdd084d 100644 --- a/docs/spec/v0.md +++ b/docs/spec/v0.md @@ -31,6 +31,7 @@ cross-platform architecture. - Hovering over a window in live mode shows a glowing border that tracks the target window. - `Space` copies the frozen PNG of the selected region/window/fullscreen to clipboard. +- On macOS, Frozen mode may recognize text from the current frozen capture and copy the recognized text to the clipboard. - Cmd+S (macOS) / Ctrl+S saves the frozen PNG to disk. - `Esc` cancels capture. - In Frozen mode, a loupe and toolbar are part of the floating HUD set and can still @@ -57,6 +58,7 @@ cross-platform architecture. - Left click (without drag) -> hit-test window under the cursor on the same monitor and freeze that window bounds; fallback to fullscreen of the current monitor if no window is hit - `Space` -> copy the frozen cropped PNG (region/window/fullscreen) to the system clipboard, then exit + - On macOS, the frozen toolbar may expose `Recognize Text`, which runs Apple Vision OCR on the current frozen capture, copies the recognized text to the clipboard, and exits - Cmd+S (macOS) / Ctrl+S -> save the frozen cropped PNG to disk, then exit - Esc -> cancel and exit without copying - After a dragged-region freeze enters Frozen mode, dragging inside the bright region diff --git a/packages/rsnap-overlay/Cargo.toml b/packages/rsnap-overlay/Cargo.toml index 0820ac01..14148e4d 100644 --- a/packages/rsnap-overlay/Cargo.toml +++ b/packages/rsnap-overlay/Cargo.toml @@ -49,7 +49,7 @@ objc2-core-media = { workspace = true } objc2-core-video = { workspace = true } objc2-foundation = { workspace = true } objc2-screen-capture-kit = { workspace = true } -objc2-vision = "0.3.2" +objc2-vision = { workspace = true } raw-window-handle = { workspace = true } [dev-dependencies] diff --git a/packages/rsnap-overlay/src/lib.rs b/packages/rsnap-overlay/src/lib.rs index 175c7dc0..ac359afc 100644 --- a/packages/rsnap-overlay/src/lib.rs +++ b/packages/rsnap-overlay/src/lib.rs @@ -26,6 +26,8 @@ pub mod replay_support { mod backend; #[cfg(target_os = "macos")] mod live_frame_stream_macos; +#[cfg(target_os = "macos")] +mod ocr_macos; mod overlay; mod png; mod scroll_capture; diff --git a/packages/rsnap-overlay/src/ocr_macos.rs b/packages/rsnap-overlay/src/ocr_macos.rs new file mode 100644 index 00000000..d2da7ed3 --- /dev/null +++ b/packages/rsnap-overlay/src/ocr_macos.rs @@ -0,0 +1,57 @@ +use color_eyre::eyre::{Result, WrapErr}; +use image::RgbaImage; +use objc2::rc::{self, Retained}; +use objc2::runtime::AnyObject; +use objc2::{AnyThread, ClassType}; +use objc2_foundation::{NSArray, NSData, NSDictionary}; +use objc2_vision::{ + VNImageOption, VNImageRequestHandler, VNRecognizeTextRequest, VNRequest, + VNRequestTextRecognitionLevel, +}; + +use crate::png; + +pub(crate) fn recognize_text_from_image(image: &RgbaImage) -> Result { + rc::autoreleasepool(|_| { + let image_data = NSData::with_bytes( + &png::rgba_image_to_png_bytes(image).wrap_err("failed to encode OCR source image")?, + ); + let options: Retained> = NSDictionary::new(); + let request_handler = VNImageRequestHandler::initWithData_options( + VNImageRequestHandler::alloc(), + &image_data, + &options, + ); + let request = VNRecognizeTextRequest::new(); + + request.setRecognitionLevel(VNRequestTextRecognitionLevel::Accurate); + request.setUsesLanguageCorrection(true); + request.setAutomaticallyDetectsLanguage(true); + + let requests: Retained> = + NSArray::from_slice(&[request.as_super().as_super()]); + + request_handler + .performRequests_error(&requests) + .wrap_err("Vision text recognition request failed")?; + + let mut lines = Vec::new(); + + if let Some(results) = request.results() { + for index in 0..results.count() { + let observation = results.objectAtIndex(index); + let candidates = observation.topCandidates(1); + let Some(candidate) = candidates.firstObject() else { + continue; + }; + let line = candidate.string().to_string(); + + if !line.trim().is_empty() { + lines.push(line); + } + } + } + + Ok(lines.join("\n")) + }) +} diff --git a/packages/rsnap-overlay/src/overlay.rs b/packages/rsnap-overlay/src/overlay.rs index f1ff244f..4b45494f 100644 --- a/packages/rsnap-overlay/src/overlay.rs +++ b/packages/rsnap-overlay/src/overlay.rs @@ -353,6 +353,8 @@ pub enum OverlayExit { Cancelled, /// The session completed by copying PNG bytes to the caller. PngBytes(Vec), + /// The session completed by copying recognized text to the clipboard. + TextCopied(usize), /// The session completed by saving a file to disk. Saved(PathBuf), /// The session failed with a user-visible error message. @@ -457,6 +459,7 @@ enum FrozenToolbarTool { Redo, AutoCenter, Scroll, + Ocr, Copy, Save, } @@ -471,6 +474,7 @@ impl FrozenToolbarTool { Self::Redo => "Redo", Self::AutoCenter => "Auto-center (C)", Self::Scroll => "Scroll Capture", + Self::Ocr => "Recognize Text", Self::Copy => "Copy", Self::Save => "Save", } @@ -486,6 +490,7 @@ impl FrozenToolbarTool { Self::Redo => regular::ARROW_CLOCKWISE, Self::AutoCenter => regular::TARGET, Self::Scroll => regular::MOUSE_SCROLL, + Self::Ocr => regular::SCAN, Self::Copy => regular::COPY, Self::Save => regular::FLOPPY_DISK, } @@ -496,7 +501,7 @@ impl FrozenToolbarTool { } const fn requires_final_capture(self) -> bool { - matches!(self, Self::Scroll | Self::Copy | Self::Save) + matches!(self, Self::Scroll | Self::Ocr | Self::Copy | Self::Save) } } @@ -719,9 +724,12 @@ pub struct OverlaySession { frozen_window_image: Option, frozen_capture_source: FrozenCaptureSource, capture_windows_hidden: bool, + pending_recognize_text: Option, pending_encode_png: Option, pending_png_action: Option, #[cfg(target_os = "macos")] + ocr_inflight: bool, + #[cfg(target_os = "macos")] png_encode_inflight: bool, #[cfg(target_os = "macos")] pending_self_capture_exception_window_ids_worker_refresh: bool, @@ -782,14 +790,16 @@ impl OverlaySession { Self::with_config(OverlayConfig::default()) } + fn initial_timing() -> (Duration, Duration, Instant) { + (Duration::from_millis(500), LIVE_WINDOW_LIST_REFRESH_INTERVAL, Instant::now()) + } + #[must_use] /// Creates a new overlay session with the provided runtime configuration. pub fn with_config(config: OverlayConfig) -> Self { - let live_bg_request_interval = Duration::from_millis(500); + let (live_bg_request_interval, window_list_refresh_interval, now) = Self::initial_timing(); let loupe_sample_side_px = Self::normalized_loupe_sample_side_px(config.loupe_sample_side_px); - let (window_list_refresh_interval, now) = - (LIVE_WINDOW_LIST_REFRESH_INTERVAL, Instant::now()); #[cfg(not(target_os = "macos"))] let cursor_device = Self::try_create_cursor_device(); let state = Self::overlay_state_with_loupe_patch(loupe_sample_side_px); @@ -823,7 +833,7 @@ impl OverlaySession { gpu: None, last_hud_window_move_at: now, last_loupe_window_move_at: now, - last_present_at: Instant::now(), + last_present_at: now, last_live_cursor_poll_at: now - CURSOR_POLL_INTERVAL_MIN, last_frozen_cursor_poll_at: now - CURSOR_POLL_INTERVAL_MIN, window_list_snapshot: None, @@ -872,9 +882,12 @@ impl OverlaySession { frozen_window_image: None, frozen_capture_source: FrozenCaptureSource::None, capture_windows_hidden: false, + pending_recognize_text: None, pending_encode_png: None, pending_png_action: None, #[cfg(target_os = "macos")] + ocr_inflight: false, + #[cfg(target_os = "macos")] png_encode_inflight: false, #[cfg(target_os = "macos")] pending_self_capture_exception_window_ids_worker_refresh: false, @@ -1104,6 +1117,7 @@ impl OverlaySession { self.inflight_freeze_capture.is_some() || self.pending_click_hit_test_request_id.is_some() || self.window_list_refresh_inflight + || self.ocr_inflight || self.png_encode_inflight } @@ -2186,7 +2200,24 @@ impl OverlaySession { } } - if let Some(image) = self.pending_encode_png.take() { + let mut queued_recognize_text = false; + + #[cfg(target_os = "macos")] + if let Some(image) = self.pending_recognize_text.take() { + queued_recognize_text = true; + + if let Some(worker) = self.worker.as_ref() { + if let Err(image) = worker.request_recognize_text(image) { + self.pending_recognize_text = Some(image); + } else { + self.ocr_inflight = true; + } + } else { + self.pending_recognize_text = Some(image); + } + } + + if !queued_recognize_text && let Some(image) = self.pending_encode_png.take() { if let Some(worker) = self.worker.as_ref() { if let Err(image) = worker.request_encode_png(image) { self.pending_encode_png = Some(image); @@ -2660,6 +2691,12 @@ impl OverlaySession { OverlayControl::Continue }, + #[cfg(target_os = "macos")] + WorkerResponse::RecognizedText { text } => { + self.ocr_inflight = false; + + self.handle_recognized_text_response(text) + }, WorkerResponse::Error { source, message } => { match source { WorkerErrorSource::FreezeCapture => { @@ -2684,6 +2721,10 @@ impl OverlaySession { self.png_encode_inflight = false; } }, + #[cfg(target_os = "macos")] + WorkerErrorSource::RecognizeText => { + self.ocr_inflight = false; + }, WorkerErrorSource::CaptureMonitorRegion => { self.clear_scroll_capture_inflight_request(); self.scroll_capture_set_error(message); @@ -3751,6 +3792,26 @@ impl OverlaySession { } } + #[cfg(target_os = "macos")] + fn handle_recognized_text_response(&mut self, text: String) -> OverlayControl { + if text.trim().is_empty() { + self.state.set_error(String::from("No text recognized.")); + self.request_redraw_all(); + + return OverlayControl::Continue; + } + + match output::write_text_to_clipboard(&text) { + Ok(()) => self.exit(OverlayExit::TextCopied(text.chars().count())), + Err(err) => { + self.state.set_error(format!("{err:#}")); + self.request_redraw_all(); + + OverlayControl::Continue + }, + } + } + fn maybe_stop_frozen_selection_drag_for_mouse_input( &mut self, state: ElementState, @@ -6172,6 +6233,36 @@ impl OverlaySession { self.request_redraw_all(); } + fn begin_ocr_action(&mut self) { + #[cfg(not(target_os = "macos"))] + { + self.state.set_error(String::from("OCR is only available on macOS.")); + self.request_redraw_all(); + + return; + } + + if !matches!(self.state.mode, OverlayMode::Frozen) { + return; + } + if !self.frozen_final_capture_ready() { + self.state.set_error("Preparing capture..."); + self.request_redraw_all(); + + return; + } + + let Some(export_image) = self.current_export_image() else { + return; + }; + + self.state.set_error("Recognizing text..."); + + self.pending_recognize_text = Some(export_image); + + self.request_redraw_all(); + } + fn handle_redraw_requested(&mut self, window_id: WindowId) -> OverlayControl { let now = Instant::now(); @@ -7308,6 +7399,11 @@ impl OverlaySession { OverlayControl::Continue }, FrozenToolbarTool::Scroll => self.start_scroll_capture(), + FrozenToolbarTool::Ocr => { + self.begin_ocr_action(); + + OverlayControl::Continue + }, _ => OverlayControl::Continue, } } @@ -7332,6 +7428,7 @@ impl OverlaySession { let (exit_kind, png_bytes_len, saved_path, error_message) = match &exit { OverlayExit::Cancelled => ("cancelled", None, None, None), OverlayExit::PngBytes(png_bytes) => ("png_bytes", Some(png_bytes.len()), None, None), + OverlayExit::TextCopied(_) => ("text_copied", None, None, None), OverlayExit::Saved(path) => ("saved", None, Some(path.display().to_string()), None), OverlayExit::Error(message) => ("error", None, None, Some(message.as_str())), }; @@ -7426,10 +7523,7 @@ impl OverlaySession { self.toolbar_pointer_local = None; self.stop_frozen_selection_drag(); - - self.pending_encode_png = None; - self.pending_png_action = None; - self.keyboard_modifiers = ModifiersState::default(); + self.clear_pending_output_actions(); tracing::info!( op = "overlay.exit_end", @@ -7443,6 +7537,18 @@ impl OverlaySession { OverlayControl::Exit(exit) } + fn clear_pending_output_actions(&mut self) { + self.pending_recognize_text = None; + self.pending_encode_png = None; + self.pending_png_action = None; + #[cfg(target_os = "macos")] + { + self.ocr_inflight = false; + self.png_encode_inflight = false; + } + self.keyboard_modifiers = ModifiersState::default(); + } + fn initialize_cursor_state_for_cursor( &mut self, cursor: GlobalPoint, @@ -10624,8 +10730,27 @@ impl WindowRenderer { } fn frozen_toolbar_tools(toolbar_state: &FrozenToolbarState) -> &'static [FrozenToolbarTool] { + #[cfg(target_os = "macos")] + const TOOLS_SCROLL_MODE: [FrozenToolbarTool; 3] = + [FrozenToolbarTool::Ocr, FrozenToolbarTool::Copy, FrozenToolbarTool::Save]; + #[cfg(not(target_os = "macos"))] const TOOLS_SCROLL_MODE: [FrozenToolbarTool; 2] = [FrozenToolbarTool::Copy, FrozenToolbarTool::Save]; + #[cfg(target_os = "macos")] + const TOOLS_WITH_SCROLL_AND_AUTO_CENTER: [FrozenToolbarTool; 11] = [ + FrozenToolbarTool::Pointer, + FrozenToolbarTool::Pen, + FrozenToolbarTool::Text, + FrozenToolbarTool::Mosaic, + FrozenToolbarTool::Undo, + FrozenToolbarTool::Redo, + FrozenToolbarTool::AutoCenter, + FrozenToolbarTool::Scroll, + FrozenToolbarTool::Ocr, + FrozenToolbarTool::Copy, + FrozenToolbarTool::Save, + ]; + #[cfg(not(target_os = "macos"))] const TOOLS_WITH_SCROLL_AND_AUTO_CENTER: [FrozenToolbarTool; 10] = [ FrozenToolbarTool::Pointer, FrozenToolbarTool::Pen, @@ -10638,6 +10763,20 @@ impl WindowRenderer { FrozenToolbarTool::Copy, FrozenToolbarTool::Save, ]; + #[cfg(target_os = "macos")] + const TOOLS_WITH_AUTO_CENTER: [FrozenToolbarTool; 10] = [ + FrozenToolbarTool::Pointer, + FrozenToolbarTool::Pen, + FrozenToolbarTool::Text, + FrozenToolbarTool::Mosaic, + FrozenToolbarTool::Undo, + FrozenToolbarTool::Redo, + FrozenToolbarTool::AutoCenter, + FrozenToolbarTool::Ocr, + FrozenToolbarTool::Copy, + FrozenToolbarTool::Save, + ]; + #[cfg(not(target_os = "macos"))] const TOOLS_WITH_AUTO_CENTER: [FrozenToolbarTool; 9] = [ FrozenToolbarTool::Pointer, FrozenToolbarTool::Pen, @@ -10649,6 +10788,20 @@ impl WindowRenderer { FrozenToolbarTool::Copy, FrozenToolbarTool::Save, ]; + #[cfg(target_os = "macos")] + const TOOLS_WITH_SCROLL: [FrozenToolbarTool; 10] = [ + FrozenToolbarTool::Pointer, + FrozenToolbarTool::Pen, + FrozenToolbarTool::Text, + FrozenToolbarTool::Mosaic, + FrozenToolbarTool::Undo, + FrozenToolbarTool::Redo, + FrozenToolbarTool::Scroll, + FrozenToolbarTool::Ocr, + FrozenToolbarTool::Copy, + FrozenToolbarTool::Save, + ]; + #[cfg(not(target_os = "macos"))] const TOOLS_WITH_SCROLL: [FrozenToolbarTool; 9] = [ FrozenToolbarTool::Pointer, FrozenToolbarTool::Pen, @@ -10660,6 +10813,19 @@ impl WindowRenderer { FrozenToolbarTool::Copy, FrozenToolbarTool::Save, ]; + #[cfg(target_os = "macos")] + const TOOLS_WITHOUT_SCROLL: [FrozenToolbarTool; 9] = [ + FrozenToolbarTool::Pointer, + FrozenToolbarTool::Pen, + FrozenToolbarTool::Text, + FrozenToolbarTool::Mosaic, + FrozenToolbarTool::Undo, + FrozenToolbarTool::Redo, + FrozenToolbarTool::Ocr, + FrozenToolbarTool::Copy, + FrozenToolbarTool::Save, + ]; + #[cfg(not(target_os = "macos"))] const TOOLS_WITHOUT_SCROLL: [FrozenToolbarTool; 8] = [ FrozenToolbarTool::Pointer, FrozenToolbarTool::Pen, @@ -14964,7 +15130,6 @@ mod tests { Some(session.frozen_toolbar_default_position_for_capture_rect(monitor, capture_rect)) ); } - #[test] fn auto_center_toolbar_tool_only_appears_when_available() { let default_tools = WindowRenderer::frozen_toolbar_tools(&FrozenToolbarState::default()); @@ -14973,7 +15138,9 @@ mod tests { ..FrozenToolbarState::default() }); + assert!(default_tools.contains(&FrozenToolbarTool::Ocr)); assert!(!default_tools.contains(&FrozenToolbarTool::AutoCenter)); + assert!(auto_center_tools.contains(&FrozenToolbarTool::Ocr)); assert!(auto_center_tools.contains(&FrozenToolbarTool::AutoCenter)); } @@ -15408,6 +15575,7 @@ mod tests { let mut session = OverlaySession { window_list_refresh_inflight: true, drop_next_window_list_refresh_snapshot: true, + ocr_inflight: true, png_encode_inflight: true, pending_self_capture_exception_window_ids_worker_refresh: true, authoritative_frozen_capture_ready: true, @@ -15427,6 +15595,7 @@ mod tests { assert!(!session.window_list_refresh_inflight); assert!(!session.drop_next_window_list_refresh_snapshot); + assert!(!session.ocr_inflight); assert!(!session.png_encode_inflight); assert!(!session.pending_self_capture_exception_window_ids_worker_refresh); assert!(!session.authoritative_frozen_capture_ready); @@ -18733,6 +18902,7 @@ mod tests { assert!(!FrozenToolbarTool::Redo.is_mode_tool()); assert!(!FrozenToolbarTool::AutoCenter.is_mode_tool()); assert!(!FrozenToolbarTool::Scroll.is_mode_tool()); + assert!(!FrozenToolbarTool::Ocr.is_mode_tool()); assert!(!FrozenToolbarTool::Copy.is_mode_tool()); assert!(!FrozenToolbarTool::Save.is_mode_tool()); } @@ -18753,6 +18923,7 @@ mod tests { assert!(!FrozenToolbarTool::Redo.requires_final_capture()); assert!(!FrozenToolbarTool::AutoCenter.requires_final_capture()); assert!(FrozenToolbarTool::Scroll.requires_final_capture()); + assert!(FrozenToolbarTool::Ocr.requires_final_capture()); assert!(FrozenToolbarTool::Copy.requires_final_capture()); assert!(FrozenToolbarTool::Save.requires_final_capture()); } diff --git a/packages/rsnap-overlay/src/worker.rs b/packages/rsnap-overlay/src/worker.rs index 6e4650b2..d312335e 100644 --- a/packages/rsnap-overlay/src/worker.rs +++ b/packages/rsnap-overlay/src/worker.rs @@ -11,6 +11,8 @@ use std::time::{Duration, Instant}; use image::RgbaImage; use crate::backend::CaptureBackend; +#[cfg(target_os = "macos")] +use crate::ocr_macos; use crate::png; #[cfg(not(target_os = "macos"))] use crate::state::LiveCursorSample; @@ -44,6 +46,10 @@ pub(crate) enum WorkerRequest { monitor: MonitorRect, target: FreezeCaptureTarget, }, + #[cfg(target_os = "macos")] + RecognizeText { + image: RgbaImage, + }, CaptureMonitorRegion { monitor: MonitorRect, rect_px: RectPoints, @@ -78,6 +84,10 @@ pub(crate) enum WorkerResponse { window_image: Option, captured_window_id: Option, }, + #[cfg(target_os = "macos")] + RecognizedText { + text: String, + }, EncodedPng { png_bytes: Vec, }, @@ -91,6 +101,8 @@ pub(crate) enum WorkerResponse { pub(crate) enum WorkerErrorSource { EncodePng, FreezeCapture, + #[cfg(target_os = "macos")] + RecognizeText, RefreshWindowList, CaptureMonitorRegion, } @@ -238,6 +250,33 @@ impl OverlayWorker { } } + #[cfg(target_os = "macos")] + fn handle_recognize_text_request( + resp_tx: &Sender, + response_waker: Option<&(dyn Fn() + Send + Sync)>, + image: RgbaImage, + ) { + match ocr_macos::recognize_text_from_image(&image) { + Ok(text) => { + Self::send_response( + resp_tx, + response_waker, + WorkerResponse::RecognizedText { text }, + ); + }, + Err(err) => { + Self::send_response( + resp_tx, + response_waker, + WorkerResponse::Error { + source: WorkerErrorSource::RecognizeText, + message: format!("{err:#}"), + }, + ); + }, + } + } + fn handle_refresh_window_list_request( backend: &mut dyn CaptureBackend, resp_tx: &Sender, @@ -448,6 +487,17 @@ impl OverlayWorker { } } + #[cfg(target_os = "macos")] + pub(crate) fn request_recognize_text(&self, image: RgbaImage) -> Result<(), RgbaImage> { + match self.req_tx.try_send(WorkerRequest::RecognizeText { image }) { + Ok(()) => Ok(()), + Err(TrySendError::Full(WorkerRequest::RecognizeText { image })) + | Err(TrySendError::Disconnected(WorkerRequest::RecognizeText { image })) => Err(image), + Err(TrySendError::Full(_)) | Err(TrySendError::Disconnected(_)) => { + unreachable!("request_recognize_text only sends WorkerRequest::RecognizeText") + }, + } + } pub(crate) fn request_capture_monitor_region( &self, monitor: MonitorRect, @@ -486,6 +536,8 @@ struct PendingWorkerRequests { last_sample_cursor: Option<(MonitorRect, GlobalPoint, u64, bool, u32, u32)>, last_refresh_window_list: bool, last_freeze: Option<(MonitorRect, FreezeCaptureTarget)>, + #[cfg(target_os = "macos")] + last_recognize_text: Option, last_capture_region: Option<(MonitorRect, RectPoints, u64)>, last_encode: Option, } @@ -513,6 +565,10 @@ impl PendingWorkerRequests { WorkerRequest::FreezeCapture { monitor, target } => { self.last_freeze = Some((monitor, target)); }, + #[cfg(target_os = "macos")] + WorkerRequest::RecognizeText { image } => { + self.last_recognize_text = Some(image); + }, WorkerRequest::CaptureMonitorRegion { monitor, rect_px, request_id } => { self.last_capture_region = Some((monitor, rect_px, request_id)); }, @@ -534,6 +590,12 @@ impl PendingWorkerRequests { return; } + #[cfg(target_os = "macos")] + if let Some(image) = self.last_recognize_text { + OverlayWorker::handle_recognize_text_request(resp_tx, response_waker, image); + + return; + } if let Some((monitor, target)) = self.last_freeze { OverlayWorker::handle_freeze_request(backend, resp_tx, response_waker, monitor, target); From c54bba51be952a3af14076d1ce160b19e5ebfc0c Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Fri, 3 Apr 2026 11:04:20 +0800 Subject: [PATCH 2/6] {"schema":"delivery/1","type":"fix","scope":"overlay","summary":"fix Linux linting for the macOS OCR action","intent":"keep the new macOS OCR feature out of non-macOS dead-code paths so the shared Rust gate passes on Linux CI","impact":"Frozen toolbar OCR wiring now compiles cleanly across platforms by gating the toolbar variant, action handler, and assertions to macOS while preserving the macOS behavior","breaking":false,"risk":"low","authority":"linear","delivery_mode":"status-only","refs":[]} --- packages/rsnap-overlay/src/overlay.rs | 49 ++++++++++++++++++++------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/packages/rsnap-overlay/src/overlay.rs b/packages/rsnap-overlay/src/overlay.rs index 4b45494f..63b2e48c 100644 --- a/packages/rsnap-overlay/src/overlay.rs +++ b/packages/rsnap-overlay/src/overlay.rs @@ -459,6 +459,7 @@ enum FrozenToolbarTool { Redo, AutoCenter, Scroll, + #[cfg(target_os = "macos")] Ocr, Copy, Save, @@ -474,6 +475,7 @@ impl FrozenToolbarTool { Self::Redo => "Redo", Self::AutoCenter => "Auto-center (C)", Self::Scroll => "Scroll Capture", + #[cfg(target_os = "macos")] Self::Ocr => "Recognize Text", Self::Copy => "Copy", Self::Save => "Save", @@ -490,6 +492,7 @@ impl FrozenToolbarTool { Self::Redo => regular::ARROW_CLOCKWISE, Self::AutoCenter => regular::TARGET, Self::Scroll => regular::MOUSE_SCROLL, + #[cfg(target_os = "macos")] Self::Ocr => regular::SCAN, Self::Copy => regular::COPY, Self::Save => regular::FLOPPY_DISK, @@ -501,7 +504,18 @@ impl FrozenToolbarTool { } const fn requires_final_capture(self) -> bool { - matches!(self, Self::Scroll | Self::Ocr | Self::Copy | Self::Save) + match self { + Self::Pointer + | Self::Pen + | Self::Text + | Self::Mosaic + | Self::Undo + | Self::Redo + | Self::AutoCenter => false, + Self::Scroll | Self::Copy | Self::Save => true, + #[cfg(target_os = "macos")] + Self::Ocr => true, + } } } @@ -2200,6 +2214,9 @@ impl OverlaySession { } } + #[cfg(not(target_os = "macos"))] + let queued_recognize_text = false; + #[cfg(target_os = "macos")] let mut queued_recognize_text = false; #[cfg(target_os = "macos")] @@ -6233,15 +6250,8 @@ impl OverlaySession { self.request_redraw_all(); } + #[cfg(target_os = "macos")] fn begin_ocr_action(&mut self) { - #[cfg(not(target_os = "macos"))] - { - self.state.set_error(String::from("OCR is only available on macOS.")); - self.request_redraw_all(); - - return; - } - if !matches!(self.state.mode, OverlayMode::Frozen) { return; } @@ -6263,6 +6273,12 @@ impl OverlaySession { self.request_redraw_all(); } + #[cfg(not(target_os = "macos"))] + fn begin_ocr_action(&mut self) { + self.state.set_error(String::from("OCR is only available on macOS.")); + self.request_redraw_all(); + } + fn handle_redraw_requested(&mut self, window_id: WindowId) -> OverlayControl { let now = Instant::now(); @@ -7399,6 +7415,7 @@ impl OverlaySession { OverlayControl::Continue }, FrozenToolbarTool::Scroll => self.start_scroll_capture(), + #[cfg(target_os = "macos")] FrozenToolbarTool::Ocr => { self.begin_ocr_action(); @@ -15138,10 +15155,14 @@ mod tests { ..FrozenToolbarState::default() }); - assert!(default_tools.contains(&FrozenToolbarTool::Ocr)); assert!(!default_tools.contains(&FrozenToolbarTool::AutoCenter)); - assert!(auto_center_tools.contains(&FrozenToolbarTool::Ocr)); assert!(auto_center_tools.contains(&FrozenToolbarTool::AutoCenter)); + + #[cfg(target_os = "macos")] + { + assert!(default_tools.contains(&FrozenToolbarTool::Ocr)); + assert!(auto_center_tools.contains(&FrozenToolbarTool::Ocr)); + } } #[test] @@ -18902,9 +18923,10 @@ mod tests { assert!(!FrozenToolbarTool::Redo.is_mode_tool()); assert!(!FrozenToolbarTool::AutoCenter.is_mode_tool()); assert!(!FrozenToolbarTool::Scroll.is_mode_tool()); - assert!(!FrozenToolbarTool::Ocr.is_mode_tool()); assert!(!FrozenToolbarTool::Copy.is_mode_tool()); assert!(!FrozenToolbarTool::Save.is_mode_tool()); + #[cfg(target_os = "macos")] + assert!(!FrozenToolbarTool::Ocr.is_mode_tool()); } #[test] @@ -18923,9 +18945,10 @@ mod tests { assert!(!FrozenToolbarTool::Redo.requires_final_capture()); assert!(!FrozenToolbarTool::AutoCenter.requires_final_capture()); assert!(FrozenToolbarTool::Scroll.requires_final_capture()); - assert!(FrozenToolbarTool::Ocr.requires_final_capture()); assert!(FrozenToolbarTool::Copy.requires_final_capture()); assert!(FrozenToolbarTool::Save.requires_final_capture()); + #[cfg(target_os = "macos")] + assert!(FrozenToolbarTool::Ocr.requires_final_capture()); } #[test] From d1ab5a6fb266b0fe7049f763353b4ebb5e3e247d Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Fri, 3 Apr 2026 11:14:28 +0800 Subject: [PATCH 3/6] {"schema":"delivery/1","type":"fix","scope":"overlay","summary":"repair OCR queue handling and Linux lint","intent":"keep the Apple OCR action from leaking stale PNG output and ensure queued OCR requests survive mixed worker batches while the PR stays green on Linux CI","impact":"OCR now supersedes prior PNG export intent, stale PNG responses are ignored after OCR takes over, worker batches process both PNG encode and OCR requests, and the non-macOS build no longer carries an unused OCR entry point","breaking":false,"risk":"low","authority":"linear","delivery_mode":"status-only","refs":[]} --- packages/rsnap-overlay/src/overlay.rs | 63 ++++++++++++++++++++++++--- packages/rsnap-overlay/src/worker.rs | 37 +++++++++++++++- 2 files changed, 91 insertions(+), 9 deletions(-) diff --git a/packages/rsnap-overlay/src/overlay.rs b/packages/rsnap-overlay/src/overlay.rs index 63b2e48c..148c748c 100644 --- a/packages/rsnap-overlay/src/overlay.rs +++ b/packages/rsnap-overlay/src/overlay.rs @@ -3783,7 +3783,9 @@ impl OverlaySession { } fn handle_encoded_png_response(&mut self, png_bytes: Vec) -> OverlayControl { - let action = self.pending_png_action.take().unwrap_or(PngAction::Copy); + let Some(action) = self.pending_png_action.take() else { + return OverlayControl::Continue; + }; match action { PngAction::Copy => match output::write_png_bytes_to_clipboard(&png_bytes) { @@ -6266,6 +6268,9 @@ impl OverlaySession { return; }; + self.pending_png_action = None; + self.pending_encode_png = None; + self.state.set_error("Recognizing text..."); self.pending_recognize_text = Some(export_image); @@ -6273,12 +6278,6 @@ impl OverlaySession { self.request_redraw_all(); } - #[cfg(not(target_os = "macos"))] - fn begin_ocr_action(&mut self) { - self.state.set_error(String::from("OCR is only available on macOS.")); - self.request_redraw_all(); - } - fn handle_redraw_requested(&mut self, window_id: WindowId) -> OverlayControl { let now = Instant::now(); @@ -15349,6 +15348,56 @@ mod tests { assert_eq!(session.state.error_message.as_deref(), Some("Copying...")); } + #[cfg(target_os = "macos")] + #[test] + fn begin_ocr_action_clears_stale_png_output_intent() { + let monitor = test_monitor(); + let expected_export = test_frozen_image(); + let mut session = OverlaySession::new(); + + session.state.begin_freeze(monitor); + session.state.finish_freeze(monitor, expected_export.clone()); + + session.state.frozen_capture_rect = Some(RectPoints::new(100, 120, 220, 180)); + session.frozen_capture_source = FrozenCaptureSource::DragRegion; + session.authoritative_frozen_capture_ready = true; + + session.begin_png_action(PngAction::Copy); + + assert_eq!(session.pending_png_action, Some(PngAction::Copy)); + assert_eq!(session.pending_encode_png.as_ref(), Some(&expected_export)); + + session.begin_ocr_action(); + + assert_eq!(session.pending_png_action, None); + assert!(session.pending_encode_png.is_none()); + assert_eq!(session.pending_recognize_text.as_ref(), Some(&expected_export)); + assert_eq!(session.state.error_message.as_deref(), Some("Recognizing text...")); + } + + #[cfg(target_os = "macos")] + #[test] + fn stale_png_response_is_ignored_after_ocr_supersedes_export() { + let monitor = test_monitor(); + let mut session = OverlaySession::new(); + + session.state.begin_freeze(monitor); + session.state.finish_freeze(monitor, test_frozen_image()); + + session.state.frozen_capture_rect = Some(RectPoints::new(100, 120, 220, 180)); + session.frozen_capture_source = FrozenCaptureSource::DragRegion; + session.authoritative_frozen_capture_ready = true; + + session.begin_png_action(PngAction::Copy); + session.begin_ocr_action(); + + let control = session.handle_encoded_png_response(Vec::new()); + + assert!(matches!(control, OverlayControl::Continue)); + assert_eq!(session.pending_png_action, None); + assert_eq!(session.state.error_message.as_deref(), Some("Recognizing text...")); + } + #[cfg(target_os = "macos")] #[test] fn duplicate_live_frames_schedule_forced_refresh_when_downward_backlog_is_fresh() { diff --git a/packages/rsnap-overlay/src/worker.rs b/packages/rsnap-overlay/src/worker.rs index d312335e..ed189586 100644 --- a/packages/rsnap-overlay/src/worker.rs +++ b/packages/rsnap-overlay/src/worker.rs @@ -585,17 +585,24 @@ impl PendingWorkerRequests { _region_capture_resp_tx: &Sender, response_waker: Option<&(dyn Fn() + Send + Sync)>, ) { + let mut handled_high_priority = false; + if let Some(image) = self.last_encode { OverlayWorker::handle_encode_request(resp_tx, response_waker, image); - return; + handled_high_priority = true; } #[cfg(target_os = "macos")] if let Some(image) = self.last_recognize_text { OverlayWorker::handle_recognize_text_request(resp_tx, response_waker, image); + handled_high_priority = true; + } + + if handled_high_priority { return; } + if let Some((monitor, target)) = self.last_freeze { OverlayWorker::handle_freeze_request(backend, resp_tx, response_waker, monitor, target); @@ -665,7 +672,7 @@ mod tests { }; use crate::worker::{ CapturedMonitorRegionResponse, CapturedMonitorRegionResult, OverlayWorker, - WorkerErrorSource, WorkerResponse, + PendingWorkerRequests, WorkerErrorSource, WorkerRequest, WorkerResponse, }; enum MockScrollCaptureResult { @@ -919,4 +926,30 @@ mod tests { other => panic!("expected worker error, got {other:?}"), } } + + #[cfg(target_os = "macos")] + #[test] + fn dispatch_processes_encode_and_recognize_requests_from_same_batch() { + let (resp_tx, resp_rx) = mpsc::channel::(); + let (region_tx, region_rx) = mpsc::channel::(); + let mut backend = + MockScrollCaptureBackend { scroll_capture_result: MockScrollCaptureResult::NoNewFrame }; + let mut pending = PendingWorkerRequests::default(); + + pending.record(WorkerRequest::EncodePng { image: sample_image() }); + pending.record(WorkerRequest::RecognizeText { image: sample_image() }); + pending.dispatch(&mut backend, &resp_tx, ®ion_tx, None); + + let first = resp_rx.try_recv().expect("png response"); + let second = resp_rx.try_recv().expect("ocr response"); + + assert!(matches!(first, WorkerResponse::EncodedPng { .. })); + assert!(matches!( + second, + WorkerResponse::RecognizedText { .. } + | WorkerResponse::Error { source: WorkerErrorSource::RecognizeText, .. } + )); + assert!(resp_rx.try_recv().is_err()); + assert!(region_rx.try_recv().is_err()); + } } From fa2ee0a3adc767a4ac95bde7b62faab61211d6a5 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Fri, 3 Apr 2026 11:18:02 +0800 Subject: [PATCH 4/6] {"schema":"delivery/1","type":"fix","scope":"worker","summary":"fix Linux-only OCR test imports","intent":"remove the last Linux-specific dead import failure from the OCR repair branch so the shared Rust checks match the verified local state","impact":"worker test imports now gate macOS-only OCR helpers behind the same platform condition as the test, avoiding a Linux clippy failure without changing runtime OCR behavior","breaking":false,"risk":"low","authority":"linear","delivery_mode":"status-only","refs":[]} --- packages/rsnap-overlay/src/worker.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/rsnap-overlay/src/worker.rs b/packages/rsnap-overlay/src/worker.rs index ed189586..62bbfef0 100644 --- a/packages/rsnap-overlay/src/worker.rs +++ b/packages/rsnap-overlay/src/worker.rs @@ -672,8 +672,10 @@ mod tests { }; use crate::worker::{ CapturedMonitorRegionResponse, CapturedMonitorRegionResult, OverlayWorker, - PendingWorkerRequests, WorkerErrorSource, WorkerRequest, WorkerResponse, + WorkerErrorSource, WorkerResponse, }; + #[cfg(target_os = "macos")] + use crate::worker::{PendingWorkerRequests, WorkerRequest}; enum MockScrollCaptureResult { Image(RgbaImage), From 609e0dd77b8b502d5b058132ce2be29a8d334328 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Fri, 3 Apr 2026 11:48:57 +0800 Subject: [PATCH 5/6] {"schema":"delivery/1","type":"fix","scope":"rsnap-overlay","summary":"repair OCR response supersession handling","intent":"keep stale OCR responses from overriding newer output intent","impact":"serializes OCR requests and ignores stale OCR completions","breaking":false,"risk":"low","authority":"linear","delivery_mode":"status-only","refs":[]} --- packages/rsnap-overlay/src/overlay.rs | 312 +++++++++++++++++++++----- packages/rsnap-overlay/src/worker.rs | 36 ++- 2 files changed, 283 insertions(+), 65 deletions(-) diff --git a/packages/rsnap-overlay/src/overlay.rs b/packages/rsnap-overlay/src/overlay.rs index 148c748c..ba6333ea 100644 --- a/packages/rsnap-overlay/src/overlay.rs +++ b/packages/rsnap-overlay/src/overlay.rs @@ -571,26 +571,6 @@ impl DeviceCursorPointSource { } } -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum SelectionFlowStyle { - Band, - FullBorder, -} - -#[derive(Clone, Copy, Debug)] -enum WindowRendererPath { - Overlay, - LoupeTile, -} -impl WindowRendererPath { - const fn as_str(self) -> &'static str { - match self { - Self::Overlay => "overlay", - Self::LoupeTile => "loupe_tile", - } - } -} - #[derive(Clone, Debug)] /// Runtime configuration applied to a capture overlay session. pub struct OverlayConfig { @@ -738,7 +718,12 @@ pub struct OverlaySession { frozen_window_image: Option, frozen_capture_source: FrozenCaptureSource, capture_windows_hidden: bool, - pending_recognize_text: Option, + #[cfg(target_os = "macos")] + next_ocr_request_id: u64, + #[cfg(target_os = "macos")] + active_ocr_request_id: Option, + #[cfg(target_os = "macos")] + pending_recognize_text: Option, pending_encode_png: Option, pending_png_action: Option, #[cfg(target_os = "macos")] @@ -811,13 +796,23 @@ impl OverlaySession { #[must_use] /// Creates a new overlay session with the provided runtime configuration. pub fn with_config(config: OverlayConfig) -> Self { - let (live_bg_request_interval, window_list_refresh_interval, now) = Self::initial_timing(); - let loupe_sample_side_px = - Self::normalized_loupe_sample_side_px(config.loupe_sample_side_px); + let runtime = Self::initial_session_runtime(&config); #[cfg(not(target_os = "macos"))] let cursor_device = Self::try_create_cursor_device(); - let state = Self::overlay_state_with_loupe_patch(loupe_sample_side_px); + Self::build_with_config( + config, + runtime, + #[cfg(not(target_os = "macos"))] + cursor_device, + ) + } + + fn build_with_config( + config: OverlayConfig, + runtime: InitialSessionRuntime, + #[cfg(not(target_os = "macos"))] cursor_device: Option, + ) -> Self { Self { config, worker: None, @@ -827,7 +822,7 @@ impl OverlaySession { live_sample_stream: None, #[cfg(not(target_os = "macos"))] cursor_device, - state, + state: runtime.state, cursor_monitor: None, windows: HashMap::new(), hud_window: None, @@ -845,16 +840,16 @@ impl OverlaySession { toolbar_outer_pos: None, toolbar_inner_size_points: None, gpu: None, - last_hud_window_move_at: now, - last_loupe_window_move_at: now, - last_present_at: now, - last_live_cursor_poll_at: now - CURSOR_POLL_INTERVAL_MIN, - last_frozen_cursor_poll_at: now - CURSOR_POLL_INTERVAL_MIN, + last_hud_window_move_at: runtime.now, + last_loupe_window_move_at: runtime.now, + last_present_at: runtime.now, + last_live_cursor_poll_at: runtime.now - CURSOR_POLL_INTERVAL_MIN, + last_frozen_cursor_poll_at: runtime.now - CURSOR_POLL_INTERVAL_MIN, window_list_snapshot: None, - last_window_list_refresh_request_at: now - window_list_refresh_interval, - window_list_refresh_interval, - last_live_bg_request_at: Instant::now() - live_bg_request_interval, - live_bg_request_interval, + last_window_list_refresh_request_at: runtime.now - runtime.window_list_refresh_interval, + window_list_refresh_interval: runtime.window_list_refresh_interval, + last_live_bg_request_at: runtime.now - runtime.live_bg_request_interval, + live_bg_request_interval: runtime.live_bg_request_interval, hit_test_send_full_count: 0, hit_test_send_disconnected_count: 0, hit_test_request_id: 0, @@ -879,13 +874,13 @@ impl OverlaySession { keyboard_modifiers: ModifiersState::default(), event_loop_phase: OverlayEventLoopPhase::Idle, event_loop_progress_seq: 0, - event_loop_last_progress_at: now, + event_loop_last_progress_at: runtime.now, event_loop_last_progress_window_id: None, event_loop_last_progress_monitor_id: None, event_loop_last_progress_detail: None, event_loop_last_stall_warn_at: None, - loupe_patch_width_px: loupe_sample_side_px, - loupe_patch_height_px: loupe_sample_side_px, + loupe_patch_width_px: runtime.loupe_sample_side_px, + loupe_patch_height_px: runtime.loupe_sample_side_px, egui_repaint_deadline: Arc::new(Mutex::new(None)), pending_freeze_capture: None, inflight_freeze_capture: None, @@ -896,6 +891,11 @@ impl OverlaySession { frozen_window_image: None, frozen_capture_source: FrozenCaptureSource::None, capture_windows_hidden: false, + #[cfg(target_os = "macos")] + next_ocr_request_id: 0, + #[cfg(target_os = "macos")] + active_ocr_request_id: None, + #[cfg(target_os = "macos")] pending_recognize_text: None, pending_encode_png: None, pending_png_action: None, @@ -939,6 +939,13 @@ impl OverlaySession { state } + fn overlay_state_with_config(config: &OverlayConfig) -> (u32, OverlayState) { + let loupe_sample_side_px = + Self::normalized_loupe_sample_side_px(config.loupe_sample_side_px); + + (loupe_sample_side_px, Self::overlay_state_with_loupe_patch(loupe_sample_side_px)) + } + #[cfg(target_os = "macos")] /// Registers a wake callback for macOS live-stream frame notifications. pub fn set_scroll_frame_waker(&mut self, waker: Arc) { @@ -2217,23 +2224,25 @@ impl OverlaySession { #[cfg(not(target_os = "macos"))] let queued_recognize_text = false; #[cfg(target_os = "macos")] - let mut queued_recognize_text = false; + let queued_recognize_text = self.pending_recognize_text.is_some(); #[cfg(target_os = "macos")] - if let Some(image) = self.pending_recognize_text.take() { - queued_recognize_text = true; - + if !self.ocr_inflight + && let Some(request) = self.pending_recognize_text.take() + { if let Some(worker) = self.worker.as_ref() { - if let Err(image) = worker.request_recognize_text(image) { - self.pending_recognize_text = Some(image); + if let Err((request_id, image)) = + worker.request_recognize_text(request.request_id, request.image) + { + self.pending_recognize_text = + Some(PendingRecognizeTextRequest { request_id, image }); } else { self.ocr_inflight = true; } } else { - self.pending_recognize_text = Some(image); + self.pending_recognize_text = Some(request); } } - if !queued_recognize_text && let Some(image) = self.pending_encode_png.take() { if let Some(worker) = self.worker.as_ref() { if let Err(image) = worker.request_encode_png(image) { @@ -2709,10 +2718,8 @@ impl OverlaySession { OverlayControl::Continue }, #[cfg(target_os = "macos")] - WorkerResponse::RecognizedText { text } => { - self.ocr_inflight = false; - - self.handle_recognized_text_response(text) + WorkerResponse::RecognizedText { request_id, text } => { + self.handle_recognized_text_worker_response(request_id, text) }, WorkerResponse::Error { source, message } => { match source { @@ -2740,7 +2747,9 @@ impl OverlaySession { }, #[cfg(target_os = "macos")] WorkerErrorSource::RecognizeText => { - self.ocr_inflight = false; + if self.handle_recognized_text_worker_error() { + return OverlayControl::Continue; + } }, WorkerErrorSource::CaptureMonitorRegion => { self.clear_scroll_capture_inflight_request(); @@ -2767,6 +2776,7 @@ impl OverlaySession { #[cfg(target_os = "macos")] if matches!(control, OverlayControl::Continue) { + self.maybe_request_redraw_for_pending_output(); self.maybe_apply_pending_self_capture_exception_window_ids_worker_refresh(); } @@ -3046,6 +3056,19 @@ impl OverlaySession { ) } + fn initial_session_runtime(config: &OverlayConfig) -> InitialSessionRuntime { + let (live_bg_request_interval, window_list_refresh_interval, now) = Self::initial_timing(); + let (loupe_sample_side_px, state) = Self::overlay_state_with_config(config); + + InitialSessionRuntime { + live_bg_request_interval, + window_list_refresh_interval, + now, + loupe_sample_side_px, + state, + } + } + fn refresh_frozen_helper_windows_for_transition( &mut self, monitor: MonitorRect, @@ -3831,6 +3854,62 @@ impl OverlaySession { } } + #[cfg(target_os = "macos")] + fn next_ocr_request_id(&mut self) -> u64 { + let request_id = self.next_ocr_request_id; + + self.next_ocr_request_id = self.next_ocr_request_id.wrapping_add(1); + + request_id + } + + #[cfg(target_os = "macos")] + fn cancel_ocr_output_intent(&mut self) { + self.active_ocr_request_id = None; + self.pending_recognize_text = None; + } + + #[cfg(target_os = "macos")] + fn maybe_request_redraw_for_pending_output(&mut self) { + if !self.ocr_inflight + && (self.pending_recognize_text.is_some() || self.pending_encode_png.is_some()) + { + self.request_redraw_all(); + } + } + + #[cfg(target_os = "macos")] + fn handle_recognized_text_worker_response( + &mut self, + request_id: u64, + text: String, + ) -> OverlayControl { + self.ocr_inflight = false; + + if self.active_ocr_request_id != Some(request_id) { + return OverlayControl::Continue; + } + + self.active_ocr_request_id = None; + + self.handle_recognized_text_response(text) + } + + #[cfg(target_os = "macos")] + fn handle_recognized_text_worker_error(&mut self) -> bool { + self.ocr_inflight = false; + + if self.active_ocr_request_id.is_none() || self.pending_recognize_text.is_some() { + self.maybe_request_redraw_for_pending_output(); + + return true; + } + + self.active_ocr_request_id = None; + + false + } + fn maybe_stop_frozen_selection_drag_for_mouse_input( &mut self, state: ElementState, @@ -6240,6 +6319,9 @@ impl OverlaySession { return; }; + #[cfg(target_os = "macos")] + self.cancel_ocr_output_intent(); + self.pending_png_action = Some(action); match action { @@ -6267,13 +6349,16 @@ impl OverlaySession { let Some(export_image) = self.current_export_image() else { return; }; + let request_id = self.next_ocr_request_id(); self.pending_png_action = None; self.pending_encode_png = None; + self.active_ocr_request_id = Some(request_id); self.state.set_error("Recognizing text..."); - self.pending_recognize_text = Some(export_image); + self.pending_recognize_text = + Some(PendingRecognizeTextRequest { request_id, image: export_image }); self.request_redraw_all(); } @@ -7554,7 +7639,11 @@ impl OverlaySession { } fn clear_pending_output_actions(&mut self) { - self.pending_recognize_text = None; + #[cfg(target_os = "macos")] + { + self.active_ocr_request_id = None; + self.pending_recognize_text = None; + } self.pending_encode_png = None; self.pending_png_action = None; #[cfg(target_os = "macos")] @@ -8094,6 +8183,41 @@ impl Default for OverlaySession { } } +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum SelectionFlowStyle { + Band, + FullBorder, +} + +#[derive(Clone, Copy, Debug)] +enum WindowRendererPath { + Overlay, + LoupeTile, +} +impl WindowRendererPath { + const fn as_str(self) -> &'static str { + match self { + Self::Overlay => "overlay", + Self::LoupeTile => "loupe_tile", + } + } +} + +#[cfg(target_os = "macos")] +#[derive(Clone, Debug, Eq, PartialEq)] +struct PendingRecognizeTextRequest { + request_id: u64, + image: RgbaImage, +} + +struct InitialSessionRuntime { + live_bg_request_interval: Duration, + window_list_refresh_interval: Duration, + now: Instant, + loupe_sample_side_px: u32, + state: OverlayState, +} + #[derive(Clone, Copy, Debug, Eq, PartialEq)] struct FrozenToolbarButtonStyle { icon_color: Color32, @@ -15371,7 +15495,11 @@ mod tests { assert_eq!(session.pending_png_action, None); assert!(session.pending_encode_png.is_none()); - assert_eq!(session.pending_recognize_text.as_ref(), Some(&expected_export)); + assert_eq!( + session.pending_recognize_text.as_ref().map(|request| &request.image), + Some(&expected_export) + ); + assert_eq!(session.active_ocr_request_id, Some(0)); assert_eq!(session.state.error_message.as_deref(), Some("Recognizing text...")); } @@ -15398,6 +15526,82 @@ mod tests { assert_eq!(session.state.error_message.as_deref(), Some("Recognizing text...")); } + #[cfg(target_os = "macos")] + #[test] + fn stale_ocr_response_is_ignored_after_copy_supersedes_ocr() { + let monitor = test_monitor(); + let mut session = OverlaySession::new(); + + session.state.begin_freeze(monitor); + session.state.finish_freeze(monitor, test_frozen_image()); + + session.state.frozen_capture_rect = Some(RectPoints::new(100, 120, 220, 180)); + session.frozen_capture_source = FrozenCaptureSource::DragRegion; + session.authoritative_frozen_capture_ready = true; + + session.begin_ocr_action(); + + let request_id = session.active_ocr_request_id.expect("ocr request id"); + + session.pending_recognize_text = None; + session.ocr_inflight = true; + + session.begin_png_action(PngAction::Copy); + + let control = session.maybe_tick_worker_response_limiter(WorkerResponse::RecognizedText { + request_id, + text: String::from("stale text"), + }); + + assert!(matches!(control, OverlayControl::Continue)); + assert_eq!(session.active_ocr_request_id, None); + assert!(!session.ocr_inflight); + assert_eq!(session.pending_png_action, Some(PngAction::Copy)); + assert_eq!(session.state.error_message.as_deref(), Some("Copying...")); + } + + #[cfg(target_os = "macos")] + #[test] + fn stale_ocr_error_is_ignored_while_newer_ocr_request_is_pending() { + let monitor = test_monitor(); + let mut session = OverlaySession::new(); + + session.state.begin_freeze(monitor); + session.state.finish_freeze(monitor, test_frozen_image()); + + session.state.frozen_capture_rect = Some(RectPoints::new(100, 120, 220, 180)); + session.frozen_capture_source = FrozenCaptureSource::DragRegion; + session.authoritative_frozen_capture_ready = true; + + session.begin_ocr_action(); + + let first_request_id = session.active_ocr_request_id.expect("first ocr request id"); + + session.pending_recognize_text = None; + session.ocr_inflight = true; + + session.begin_ocr_action(); + + let second_request_id = + session.pending_recognize_text.as_ref().expect("newer pending ocr request").request_id; + + assert_ne!(first_request_id, second_request_id); + + let control = session.maybe_tick_worker_response_limiter(WorkerResponse::Error { + source: WorkerErrorSource::RecognizeText, + message: String::from("stale OCR failure"), + }); + + assert!(matches!(control, OverlayControl::Continue)); + assert_eq!(session.active_ocr_request_id, Some(second_request_id)); + assert_eq!( + session.pending_recognize_text.as_ref().map(|request| request.request_id), + Some(second_request_id) + ); + assert!(!session.ocr_inflight); + assert_eq!(session.state.error_message.as_deref(), Some("Recognizing text...")); + } + #[cfg(target_os = "macos")] #[test] fn duplicate_live_frames_schedule_forced_refresh_when_downward_backlog_is_fresh() { diff --git a/packages/rsnap-overlay/src/worker.rs b/packages/rsnap-overlay/src/worker.rs index 62bbfef0..fed1d9f6 100644 --- a/packages/rsnap-overlay/src/worker.rs +++ b/packages/rsnap-overlay/src/worker.rs @@ -48,6 +48,7 @@ pub(crate) enum WorkerRequest { }, #[cfg(target_os = "macos")] RecognizeText { + request_id: u64, image: RgbaImage, }, CaptureMonitorRegion { @@ -86,6 +87,7 @@ pub(crate) enum WorkerResponse { }, #[cfg(target_os = "macos")] RecognizedText { + request_id: u64, text: String, }, EncodedPng { @@ -254,6 +256,7 @@ impl OverlayWorker { fn handle_recognize_text_request( resp_tx: &Sender, response_waker: Option<&(dyn Fn() + Send + Sync)>, + request_id: u64, image: RgbaImage, ) { match ocr_macos::recognize_text_from_image(&image) { @@ -261,7 +264,7 @@ impl OverlayWorker { Self::send_response( resp_tx, response_waker, - WorkerResponse::RecognizedText { text }, + WorkerResponse::RecognizedText { request_id, text }, ); }, Err(err) => { @@ -488,11 +491,17 @@ impl OverlayWorker { } #[cfg(target_os = "macos")] - pub(crate) fn request_recognize_text(&self, image: RgbaImage) -> Result<(), RgbaImage> { - match self.req_tx.try_send(WorkerRequest::RecognizeText { image }) { + pub(crate) fn request_recognize_text( + &self, + request_id: u64, + image: RgbaImage, + ) -> Result<(), (u64, RgbaImage)> { + match self.req_tx.try_send(WorkerRequest::RecognizeText { request_id, image }) { Ok(()) => Ok(()), - Err(TrySendError::Full(WorkerRequest::RecognizeText { image })) - | Err(TrySendError::Disconnected(WorkerRequest::RecognizeText { image })) => Err(image), + Err(TrySendError::Full(WorkerRequest::RecognizeText { request_id, image })) + | Err(TrySendError::Disconnected(WorkerRequest::RecognizeText { request_id, image })) => { + Err((request_id, image)) + }, Err(TrySendError::Full(_)) | Err(TrySendError::Disconnected(_)) => { unreachable!("request_recognize_text only sends WorkerRequest::RecognizeText") }, @@ -537,7 +546,7 @@ struct PendingWorkerRequests { last_refresh_window_list: bool, last_freeze: Option<(MonitorRect, FreezeCaptureTarget)>, #[cfg(target_os = "macos")] - last_recognize_text: Option, + last_recognize_text: Option<(u64, RgbaImage)>, last_capture_region: Option<(MonitorRect, RectPoints, u64)>, last_encode: Option, } @@ -566,8 +575,8 @@ impl PendingWorkerRequests { self.last_freeze = Some((monitor, target)); }, #[cfg(target_os = "macos")] - WorkerRequest::RecognizeText { image } => { - self.last_recognize_text = Some(image); + WorkerRequest::RecognizeText { request_id, image } => { + self.last_recognize_text = Some((request_id, image)); }, WorkerRequest::CaptureMonitorRegion { monitor, rect_px, request_id } => { self.last_capture_region = Some((monitor, rect_px, request_id)); @@ -593,8 +602,13 @@ impl PendingWorkerRequests { handled_high_priority = true; } #[cfg(target_os = "macos")] - if let Some(image) = self.last_recognize_text { - OverlayWorker::handle_recognize_text_request(resp_tx, response_waker, image); + if let Some((request_id, image)) = self.last_recognize_text { + OverlayWorker::handle_recognize_text_request( + resp_tx, + response_waker, + request_id, + image, + ); handled_high_priority = true; } @@ -939,7 +953,7 @@ mod tests { let mut pending = PendingWorkerRequests::default(); pending.record(WorkerRequest::EncodePng { image: sample_image() }); - pending.record(WorkerRequest::RecognizeText { image: sample_image() }); + pending.record(WorkerRequest::RecognizeText { request_id: 7, image: sample_image() }); pending.dispatch(&mut backend, &resp_tx, ®ion_tx, None); let first = resp_rx.try_recv().expect("png response"); From 191527c62fc62fc555fa9661a364af728c3f1c82 Mon Sep 17 00:00:00 2001 From: Yvette Carlisle Date: Fri, 3 Apr 2026 12:08:50 +0800 Subject: [PATCH 6/6] {"schema":"delivery/1","type":"fix","scope":"rsnap-overlay","summary":"flush active scroll capture before OCR","intent":"keep OCR export aligned with the latest active scroll-capture state","impact":"shares the PNG export scroll-capture flush path with OCR and covers it with a regression test","breaking":false,"risk":"low","authority":"linear","delivery_mode":"status-only","refs":[]} --- packages/rsnap-overlay/src/overlay.rs | 62 ++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/packages/rsnap-overlay/src/overlay.rs b/packages/rsnap-overlay/src/overlay.rs index ba6333ea..82264ed2 100644 --- a/packages/rsnap-overlay/src/overlay.rs +++ b/packages/rsnap-overlay/src/overlay.rs @@ -6259,6 +6259,17 @@ impl OverlaySession { self.request_redraw_scroll_preview_window(); } + fn prepare_active_scroll_capture_output(&mut self) { + if !self.scroll_capture.active { + return; + } + + self.maybe_tick_scroll_capture(); + self.refresh_scroll_preview_committed_image(); + self.refresh_scroll_preview_display_image(); + self.sync_scroll_preview_segments(); + } + fn undo_scroll_capture_append(&mut self) { if !self.scroll_capture.active { return; @@ -6303,12 +6314,8 @@ impl OverlaySession { return; } - if self.scroll_capture.active { - self.maybe_tick_scroll_capture(); - self.refresh_scroll_preview_committed_image(); - self.refresh_scroll_preview_display_image(); - self.sync_scroll_preview_segments(); - } + + self.prepare_active_scroll_capture_output(); let image = if self.scroll_capture.active { self.current_scroll_preview_render_image() @@ -6346,6 +6353,8 @@ impl OverlaySession { return; } + self.prepare_active_scroll_capture_output(); + let Some(export_image) = self.current_export_image() else { return; }; @@ -15503,6 +15512,47 @@ mod tests { assert_eq!(session.state.error_message.as_deref(), Some("Recognizing text...")); } + #[cfg(target_os = "macos")] + #[test] + fn begin_ocr_action_ticks_active_scroll_capture_before_queueing_recognition() { + let monitor = test_monitor(); + let rect = RectPoints::new(100, 120, 512, 640); + let base = make_browser_like_worker_capture_window(512, 640, 0); + let mut session = OverlaySession::new(); + + session.worker = Some(OverlayWorker::new( + Box::new(SequenceScrollCaptureBackend::new([Some( + make_browser_like_worker_capture_window(512, 640, 84), + )])), + None, + )); + + session.state.begin_freeze(monitor); + session.state.finish_freeze(monitor, test_frozen_image()); + + session.state.frozen_capture_rect = Some(rect); + session.frozen_capture_source = FrozenCaptureSource::DragRegion; + session.authoritative_frozen_capture_ready = true; + session.scroll_capture.active = true; + session.scroll_capture.monitor = Some(monitor); + session.scroll_capture.capture_rect_pixels = Some(rect); + session.scroll_capture.session = Some(ScrollSession::new(base, 320).unwrap()); + + enable_test_worker_scroll_capture_path(&mut session); + set_scroll_capture_input(&mut session, ScrollDirection::Down); + + session.scroll_capture.next_sample_at = Some(Instant::now() - Duration::from_millis(1)); + + session.begin_ocr_action(); + + assert!( + session.scroll_capture.inflight_request_id.is_some(), + "OCR should flush active scroll capture by kicking the same worker sample path as PNG export" + ); + assert!(session.pending_recognize_text.is_some()); + assert_eq!(session.state.error_message.as_deref(), Some("Recognizing text...")); + } + #[cfg(target_os = "macos")] #[test] fn stale_png_response_is_ignored_after_ocr_supersedes_export() {