diff --git a/Cargo.toml b/Cargo.toml index 373b4ac8..495d6e84 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ objc2-core-media = { version = "0.3" } objc2-core-video = { version = "0.3" } objc2-foundation = { version = "0.3" } objc2-screen-capture-kit = { version = "0.3" } +objc2-vision = { version = "0.3" } pollster = { version = "0.4" } serde = { version = "1.0", features = ["derive"] } thiserror = { version = "2.0" } diff --git a/README.md b/README.md index c46fe53c..3e4b6970 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Pure-Rust menubar screenshot prototype (macOS-first). - In Frozen mode, a dragged-region capture can be dragged from inside the bright selection area to reposition it without resizing. - In Frozen mode, `Space` copies the current frozen PNG to the clipboard and exits. - In Frozen mode, Cmd+S (macOS) / Ctrl+S saves the current PNG to disk and exits. +- On macOS, Frozen mode can recognize text from the current capture and copy the result to the clipboard from the toolbar. - After a dragged region freeze, press `s` or use the frozen toolbar `Scroll Capture ↓` action to enter scroll capture. - Scroll capture is currently implemented on macOS for dragged-region freezes and uses image-first downward stitching with a live side preview. - Upward scrolling may be observed for rewind/reacquire, but it never appends stitched rows. @@ -93,6 +94,7 @@ cargo run -p rsnap ### Output (save-to-disk) - In Frozen mode, use Cmd+S (macOS) / Ctrl+S to save a PNG to disk and exit. +- On macOS, use the frozen toolbar `Recognize Text` action to copy recognized text from the current frozen capture and exit. - After entering scroll capture from a dragged region on macOS, downward scrolling may append newly proven rows into the side preview. Upward scrolling never appends. Returning to already-stitched content should not grow the export; only newly proven content may be added. The scroll-capture commit path uses discrete region screenshots plus pairwise image registration; clipboard and save must match the committed preview the user sees. diff --git a/apps/rsnap/src/app/capture.rs b/apps/rsnap/src/app/capture.rs index 1a0c1357..e9f69733 100644 --- a/apps/rsnap/src/app/capture.rs +++ b/apps/rsnap/src/app/capture.rs @@ -248,6 +248,12 @@ impl App { OverlayExit::PngBytes(png_bytes) => { tracing::info!(bytes = png_bytes.len(), "Capture copied to clipboard."); }, + OverlayExit::TextCopied(character_count) => { + tracing::info!( + characters = character_count, + "Recognized text copied to clipboard." + ); + }, OverlayExit::Saved(path) => { tracing::info!(path = %path.display(), "Capture saved to file."); }, diff --git a/docs/spec/v0.md b/docs/spec/v0.md index f0535309..2bdd084d 100644 --- a/docs/spec/v0.md +++ b/docs/spec/v0.md @@ -31,6 +31,7 @@ cross-platform architecture. - Hovering over a window in live mode shows a glowing border that tracks the target window. - `Space` copies the frozen PNG of the selected region/window/fullscreen to clipboard. +- On macOS, Frozen mode may recognize text from the current frozen capture and copy the recognized text to the clipboard. - Cmd+S (macOS) / Ctrl+S saves the frozen PNG to disk. - `Esc` cancels capture. - In Frozen mode, a loupe and toolbar are part of the floating HUD set and can still @@ -57,6 +58,7 @@ cross-platform architecture. - Left click (without drag) -> hit-test window under the cursor on the same monitor and freeze that window bounds; fallback to fullscreen of the current monitor if no window is hit - `Space` -> copy the frozen cropped PNG (region/window/fullscreen) to the system clipboard, then exit + - On macOS, the frozen toolbar may expose `Recognize Text`, which runs Apple Vision OCR on the current frozen capture, copies the recognized text to the clipboard, and exits - Cmd+S (macOS) / Ctrl+S -> save the frozen cropped PNG to disk, then exit - Esc -> cancel and exit without copying - After a dragged-region freeze enters Frozen mode, dragging inside the bright region diff --git a/packages/rsnap-overlay/Cargo.toml b/packages/rsnap-overlay/Cargo.toml index 0820ac01..14148e4d 100644 --- a/packages/rsnap-overlay/Cargo.toml +++ b/packages/rsnap-overlay/Cargo.toml @@ -49,7 +49,7 @@ objc2-core-media = { workspace = true } objc2-core-video = { workspace = true } objc2-foundation = { workspace = true } objc2-screen-capture-kit = { workspace = true } -objc2-vision = "0.3.2" +objc2-vision = { workspace = true } raw-window-handle = { workspace = true } [dev-dependencies] diff --git a/packages/rsnap-overlay/src/lib.rs b/packages/rsnap-overlay/src/lib.rs index 175c7dc0..ac359afc 100644 --- a/packages/rsnap-overlay/src/lib.rs +++ b/packages/rsnap-overlay/src/lib.rs @@ -26,6 +26,8 @@ pub mod replay_support { mod backend; #[cfg(target_os = "macos")] mod live_frame_stream_macos; +#[cfg(target_os = "macos")] +mod ocr_macos; mod overlay; mod png; mod scroll_capture; diff --git a/packages/rsnap-overlay/src/ocr_macos.rs b/packages/rsnap-overlay/src/ocr_macos.rs new file mode 100644 index 00000000..d2da7ed3 --- /dev/null +++ b/packages/rsnap-overlay/src/ocr_macos.rs @@ -0,0 +1,57 @@ +use color_eyre::eyre::{Result, WrapErr}; +use image::RgbaImage; +use objc2::rc::{self, Retained}; +use objc2::runtime::AnyObject; +use objc2::{AnyThread, ClassType}; +use objc2_foundation::{NSArray, NSData, NSDictionary}; +use objc2_vision::{ + VNImageOption, VNImageRequestHandler, VNRecognizeTextRequest, VNRequest, + VNRequestTextRecognitionLevel, +}; + +use crate::png; + +pub(crate) fn recognize_text_from_image(image: &RgbaImage) -> Result { + rc::autoreleasepool(|_| { + let image_data = NSData::with_bytes( + &png::rgba_image_to_png_bytes(image).wrap_err("failed to encode OCR source image")?, + ); + let options: Retained> = NSDictionary::new(); + let request_handler = VNImageRequestHandler::initWithData_options( + VNImageRequestHandler::alloc(), + &image_data, + &options, + ); + let request = VNRecognizeTextRequest::new(); + + request.setRecognitionLevel(VNRequestTextRecognitionLevel::Accurate); + request.setUsesLanguageCorrection(true); + request.setAutomaticallyDetectsLanguage(true); + + let requests: Retained> = + NSArray::from_slice(&[request.as_super().as_super()]); + + request_handler + .performRequests_error(&requests) + .wrap_err("Vision text recognition request failed")?; + + let mut lines = Vec::new(); + + if let Some(results) = request.results() { + for index in 0..results.count() { + let observation = results.objectAtIndex(index); + let candidates = observation.topCandidates(1); + let Some(candidate) = candidates.firstObject() else { + continue; + }; + let line = candidate.string().to_string(); + + if !line.trim().is_empty() { + lines.push(line); + } + } + } + + Ok(lines.join("\n")) + }) +} diff --git a/packages/rsnap-overlay/src/overlay.rs b/packages/rsnap-overlay/src/overlay.rs index f1ff244f..82264ed2 100644 --- a/packages/rsnap-overlay/src/overlay.rs +++ b/packages/rsnap-overlay/src/overlay.rs @@ -353,6 +353,8 @@ pub enum OverlayExit { Cancelled, /// The session completed by copying PNG bytes to the caller. PngBytes(Vec), + /// The session completed by copying recognized text to the clipboard. + TextCopied(usize), /// The session completed by saving a file to disk. Saved(PathBuf), /// The session failed with a user-visible error message. @@ -457,6 +459,8 @@ enum FrozenToolbarTool { Redo, AutoCenter, Scroll, + #[cfg(target_os = "macos")] + Ocr, Copy, Save, } @@ -471,6 +475,8 @@ impl FrozenToolbarTool { Self::Redo => "Redo", Self::AutoCenter => "Auto-center (C)", Self::Scroll => "Scroll Capture", + #[cfg(target_os = "macos")] + Self::Ocr => "Recognize Text", Self::Copy => "Copy", Self::Save => "Save", } @@ -486,6 +492,8 @@ impl FrozenToolbarTool { Self::Redo => regular::ARROW_CLOCKWISE, Self::AutoCenter => regular::TARGET, Self::Scroll => regular::MOUSE_SCROLL, + #[cfg(target_os = "macos")] + Self::Ocr => regular::SCAN, Self::Copy => regular::COPY, Self::Save => regular::FLOPPY_DISK, } @@ -496,7 +504,18 @@ impl FrozenToolbarTool { } const fn requires_final_capture(self) -> bool { - matches!(self, Self::Scroll | Self::Copy | Self::Save) + match self { + Self::Pointer + | Self::Pen + | Self::Text + | Self::Mosaic + | Self::Undo + | Self::Redo + | Self::AutoCenter => false, + Self::Scroll | Self::Copy | Self::Save => true, + #[cfg(target_os = "macos")] + Self::Ocr => true, + } } } @@ -552,26 +571,6 @@ impl DeviceCursorPointSource { } } -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum SelectionFlowStyle { - Band, - FullBorder, -} - -#[derive(Clone, Copy, Debug)] -enum WindowRendererPath { - Overlay, - LoupeTile, -} -impl WindowRendererPath { - const fn as_str(self) -> &'static str { - match self { - Self::Overlay => "overlay", - Self::LoupeTile => "loupe_tile", - } - } -} - #[derive(Clone, Debug)] /// Runtime configuration applied to a capture overlay session. pub struct OverlayConfig { @@ -719,9 +718,17 @@ pub struct OverlaySession { frozen_window_image: Option, frozen_capture_source: FrozenCaptureSource, capture_windows_hidden: bool, + #[cfg(target_os = "macos")] + next_ocr_request_id: u64, + #[cfg(target_os = "macos")] + active_ocr_request_id: Option, + #[cfg(target_os = "macos")] + pending_recognize_text: Option, pending_encode_png: Option, pending_png_action: Option, #[cfg(target_os = "macos")] + ocr_inflight: bool, + #[cfg(target_os = "macos")] png_encode_inflight: bool, #[cfg(target_os = "macos")] pending_self_capture_exception_window_ids_worker_refresh: bool, @@ -782,18 +789,30 @@ impl OverlaySession { Self::with_config(OverlayConfig::default()) } + fn initial_timing() -> (Duration, Duration, Instant) { + (Duration::from_millis(500), LIVE_WINDOW_LIST_REFRESH_INTERVAL, Instant::now()) + } + #[must_use] /// Creates a new overlay session with the provided runtime configuration. pub fn with_config(config: OverlayConfig) -> Self { - let live_bg_request_interval = Duration::from_millis(500); - let loupe_sample_side_px = - Self::normalized_loupe_sample_side_px(config.loupe_sample_side_px); - let (window_list_refresh_interval, now) = - (LIVE_WINDOW_LIST_REFRESH_INTERVAL, Instant::now()); + let runtime = Self::initial_session_runtime(&config); #[cfg(not(target_os = "macos"))] let cursor_device = Self::try_create_cursor_device(); - let state = Self::overlay_state_with_loupe_patch(loupe_sample_side_px); + Self::build_with_config( + config, + runtime, + #[cfg(not(target_os = "macos"))] + cursor_device, + ) + } + + fn build_with_config( + config: OverlayConfig, + runtime: InitialSessionRuntime, + #[cfg(not(target_os = "macos"))] cursor_device: Option, + ) -> Self { Self { config, worker: None, @@ -803,7 +822,7 @@ impl OverlaySession { live_sample_stream: None, #[cfg(not(target_os = "macos"))] cursor_device, - state, + state: runtime.state, cursor_monitor: None, windows: HashMap::new(), hud_window: None, @@ -821,16 +840,16 @@ impl OverlaySession { toolbar_outer_pos: None, toolbar_inner_size_points: None, gpu: None, - last_hud_window_move_at: now, - last_loupe_window_move_at: now, - last_present_at: Instant::now(), - last_live_cursor_poll_at: now - CURSOR_POLL_INTERVAL_MIN, - last_frozen_cursor_poll_at: now - CURSOR_POLL_INTERVAL_MIN, + last_hud_window_move_at: runtime.now, + last_loupe_window_move_at: runtime.now, + last_present_at: runtime.now, + last_live_cursor_poll_at: runtime.now - CURSOR_POLL_INTERVAL_MIN, + last_frozen_cursor_poll_at: runtime.now - CURSOR_POLL_INTERVAL_MIN, window_list_snapshot: None, - last_window_list_refresh_request_at: now - window_list_refresh_interval, - window_list_refresh_interval, - last_live_bg_request_at: Instant::now() - live_bg_request_interval, - live_bg_request_interval, + last_window_list_refresh_request_at: runtime.now - runtime.window_list_refresh_interval, + window_list_refresh_interval: runtime.window_list_refresh_interval, + last_live_bg_request_at: runtime.now - runtime.live_bg_request_interval, + live_bg_request_interval: runtime.live_bg_request_interval, hit_test_send_full_count: 0, hit_test_send_disconnected_count: 0, hit_test_request_id: 0, @@ -855,13 +874,13 @@ impl OverlaySession { keyboard_modifiers: ModifiersState::default(), event_loop_phase: OverlayEventLoopPhase::Idle, event_loop_progress_seq: 0, - event_loop_last_progress_at: now, + event_loop_last_progress_at: runtime.now, event_loop_last_progress_window_id: None, event_loop_last_progress_monitor_id: None, event_loop_last_progress_detail: None, event_loop_last_stall_warn_at: None, - loupe_patch_width_px: loupe_sample_side_px, - loupe_patch_height_px: loupe_sample_side_px, + loupe_patch_width_px: runtime.loupe_sample_side_px, + loupe_patch_height_px: runtime.loupe_sample_side_px, egui_repaint_deadline: Arc::new(Mutex::new(None)), pending_freeze_capture: None, inflight_freeze_capture: None, @@ -872,9 +891,17 @@ impl OverlaySession { frozen_window_image: None, frozen_capture_source: FrozenCaptureSource::None, capture_windows_hidden: false, + #[cfg(target_os = "macos")] + next_ocr_request_id: 0, + #[cfg(target_os = "macos")] + active_ocr_request_id: None, + #[cfg(target_os = "macos")] + pending_recognize_text: None, pending_encode_png: None, pending_png_action: None, #[cfg(target_os = "macos")] + ocr_inflight: false, + #[cfg(target_os = "macos")] png_encode_inflight: false, #[cfg(target_os = "macos")] pending_self_capture_exception_window_ids_worker_refresh: false, @@ -912,6 +939,13 @@ impl OverlaySession { state } + fn overlay_state_with_config(config: &OverlayConfig) -> (u32, OverlayState) { + let loupe_sample_side_px = + Self::normalized_loupe_sample_side_px(config.loupe_sample_side_px); + + (loupe_sample_side_px, Self::overlay_state_with_loupe_patch(loupe_sample_side_px)) + } + #[cfg(target_os = "macos")] /// Registers a wake callback for macOS live-stream frame notifications. pub fn set_scroll_frame_waker(&mut self, waker: Arc) { @@ -1104,6 +1138,7 @@ impl OverlaySession { self.inflight_freeze_capture.is_some() || self.pending_click_hit_test_request_id.is_some() || self.window_list_refresh_inflight + || self.ocr_inflight || self.png_encode_inflight } @@ -2186,7 +2221,29 @@ impl OverlaySession { } } - if let Some(image) = self.pending_encode_png.take() { + #[cfg(not(target_os = "macos"))] + let queued_recognize_text = false; + #[cfg(target_os = "macos")] + let queued_recognize_text = self.pending_recognize_text.is_some(); + + #[cfg(target_os = "macos")] + if !self.ocr_inflight + && let Some(request) = self.pending_recognize_text.take() + { + if let Some(worker) = self.worker.as_ref() { + if let Err((request_id, image)) = + worker.request_recognize_text(request.request_id, request.image) + { + self.pending_recognize_text = + Some(PendingRecognizeTextRequest { request_id, image }); + } else { + self.ocr_inflight = true; + } + } else { + self.pending_recognize_text = Some(request); + } + } + if !queued_recognize_text && let Some(image) = self.pending_encode_png.take() { if let Some(worker) = self.worker.as_ref() { if let Err(image) = worker.request_encode_png(image) { self.pending_encode_png = Some(image); @@ -2660,6 +2717,10 @@ impl OverlaySession { OverlayControl::Continue }, + #[cfg(target_os = "macos")] + WorkerResponse::RecognizedText { request_id, text } => { + self.handle_recognized_text_worker_response(request_id, text) + }, WorkerResponse::Error { source, message } => { match source { WorkerErrorSource::FreezeCapture => { @@ -2684,6 +2745,12 @@ impl OverlaySession { self.png_encode_inflight = false; } }, + #[cfg(target_os = "macos")] + WorkerErrorSource::RecognizeText => { + if self.handle_recognized_text_worker_error() { + return OverlayControl::Continue; + } + }, WorkerErrorSource::CaptureMonitorRegion => { self.clear_scroll_capture_inflight_request(); self.scroll_capture_set_error(message); @@ -2709,6 +2776,7 @@ impl OverlaySession { #[cfg(target_os = "macos")] if matches!(control, OverlayControl::Continue) { + self.maybe_request_redraw_for_pending_output(); self.maybe_apply_pending_self_capture_exception_window_ids_worker_refresh(); } @@ -2988,6 +3056,19 @@ impl OverlaySession { ) } + fn initial_session_runtime(config: &OverlayConfig) -> InitialSessionRuntime { + let (live_bg_request_interval, window_list_refresh_interval, now) = Self::initial_timing(); + let (loupe_sample_side_px, state) = Self::overlay_state_with_config(config); + + InitialSessionRuntime { + live_bg_request_interval, + window_list_refresh_interval, + now, + loupe_sample_side_px, + state, + } + } + fn refresh_frozen_helper_windows_for_transition( &mut self, monitor: MonitorRect, @@ -3725,7 +3806,9 @@ impl OverlaySession { } fn handle_encoded_png_response(&mut self, png_bytes: Vec) -> OverlayControl { - let action = self.pending_png_action.take().unwrap_or(PngAction::Copy); + let Some(action) = self.pending_png_action.take() else { + return OverlayControl::Continue; + }; match action { PngAction::Copy => match output::write_png_bytes_to_clipboard(&png_bytes) { @@ -3751,6 +3834,82 @@ impl OverlaySession { } } + #[cfg(target_os = "macos")] + fn handle_recognized_text_response(&mut self, text: String) -> OverlayControl { + if text.trim().is_empty() { + self.state.set_error(String::from("No text recognized.")); + self.request_redraw_all(); + + return OverlayControl::Continue; + } + + match output::write_text_to_clipboard(&text) { + Ok(()) => self.exit(OverlayExit::TextCopied(text.chars().count())), + Err(err) => { + self.state.set_error(format!("{err:#}")); + self.request_redraw_all(); + + OverlayControl::Continue + }, + } + } + + #[cfg(target_os = "macos")] + fn next_ocr_request_id(&mut self) -> u64 { + let request_id = self.next_ocr_request_id; + + self.next_ocr_request_id = self.next_ocr_request_id.wrapping_add(1); + + request_id + } + + #[cfg(target_os = "macos")] + fn cancel_ocr_output_intent(&mut self) { + self.active_ocr_request_id = None; + self.pending_recognize_text = None; + } + + #[cfg(target_os = "macos")] + fn maybe_request_redraw_for_pending_output(&mut self) { + if !self.ocr_inflight + && (self.pending_recognize_text.is_some() || self.pending_encode_png.is_some()) + { + self.request_redraw_all(); + } + } + + #[cfg(target_os = "macos")] + fn handle_recognized_text_worker_response( + &mut self, + request_id: u64, + text: String, + ) -> OverlayControl { + self.ocr_inflight = false; + + if self.active_ocr_request_id != Some(request_id) { + return OverlayControl::Continue; + } + + self.active_ocr_request_id = None; + + self.handle_recognized_text_response(text) + } + + #[cfg(target_os = "macos")] + fn handle_recognized_text_worker_error(&mut self) -> bool { + self.ocr_inflight = false; + + if self.active_ocr_request_id.is_none() || self.pending_recognize_text.is_some() { + self.maybe_request_redraw_for_pending_output(); + + return true; + } + + self.active_ocr_request_id = None; + + false + } + fn maybe_stop_frozen_selection_drag_for_mouse_input( &mut self, state: ElementState, @@ -6100,6 +6259,17 @@ impl OverlaySession { self.request_redraw_scroll_preview_window(); } + fn prepare_active_scroll_capture_output(&mut self) { + if !self.scroll_capture.active { + return; + } + + self.maybe_tick_scroll_capture(); + self.refresh_scroll_preview_committed_image(); + self.refresh_scroll_preview_display_image(); + self.sync_scroll_preview_segments(); + } + fn undo_scroll_capture_append(&mut self) { if !self.scroll_capture.active { return; @@ -6144,12 +6314,8 @@ impl OverlaySession { return; } - if self.scroll_capture.active { - self.maybe_tick_scroll_capture(); - self.refresh_scroll_preview_committed_image(); - self.refresh_scroll_preview_display_image(); - self.sync_scroll_preview_segments(); - } + + self.prepare_active_scroll_capture_output(); let image = if self.scroll_capture.active { self.current_scroll_preview_render_image() @@ -6160,6 +6326,9 @@ impl OverlaySession { return; }; + #[cfg(target_os = "macos")] + self.cancel_ocr_output_intent(); + self.pending_png_action = Some(action); match action { @@ -6172,6 +6341,37 @@ impl OverlaySession { self.request_redraw_all(); } + #[cfg(target_os = "macos")] + fn begin_ocr_action(&mut self) { + if !matches!(self.state.mode, OverlayMode::Frozen) { + return; + } + if !self.frozen_final_capture_ready() { + self.state.set_error("Preparing capture..."); + self.request_redraw_all(); + + return; + } + + self.prepare_active_scroll_capture_output(); + + let Some(export_image) = self.current_export_image() else { + return; + }; + let request_id = self.next_ocr_request_id(); + + self.pending_png_action = None; + self.pending_encode_png = None; + self.active_ocr_request_id = Some(request_id); + + self.state.set_error("Recognizing text..."); + + self.pending_recognize_text = + Some(PendingRecognizeTextRequest { request_id, image: export_image }); + + self.request_redraw_all(); + } + fn handle_redraw_requested(&mut self, window_id: WindowId) -> OverlayControl { let now = Instant::now(); @@ -7308,6 +7508,12 @@ impl OverlaySession { OverlayControl::Continue }, FrozenToolbarTool::Scroll => self.start_scroll_capture(), + #[cfg(target_os = "macos")] + FrozenToolbarTool::Ocr => { + self.begin_ocr_action(); + + OverlayControl::Continue + }, _ => OverlayControl::Continue, } } @@ -7332,6 +7538,7 @@ impl OverlaySession { let (exit_kind, png_bytes_len, saved_path, error_message) = match &exit { OverlayExit::Cancelled => ("cancelled", None, None, None), OverlayExit::PngBytes(png_bytes) => ("png_bytes", Some(png_bytes.len()), None, None), + OverlayExit::TextCopied(_) => ("text_copied", None, None, None), OverlayExit::Saved(path) => ("saved", None, Some(path.display().to_string()), None), OverlayExit::Error(message) => ("error", None, None, Some(message.as_str())), }; @@ -7426,10 +7633,7 @@ impl OverlaySession { self.toolbar_pointer_local = None; self.stop_frozen_selection_drag(); - - self.pending_encode_png = None; - self.pending_png_action = None; - self.keyboard_modifiers = ModifiersState::default(); + self.clear_pending_output_actions(); tracing::info!( op = "overlay.exit_end", @@ -7443,6 +7647,22 @@ impl OverlaySession { OverlayControl::Exit(exit) } + fn clear_pending_output_actions(&mut self) { + #[cfg(target_os = "macos")] + { + self.active_ocr_request_id = None; + self.pending_recognize_text = None; + } + self.pending_encode_png = None; + self.pending_png_action = None; + #[cfg(target_os = "macos")] + { + self.ocr_inflight = false; + self.png_encode_inflight = false; + } + self.keyboard_modifiers = ModifiersState::default(); + } + fn initialize_cursor_state_for_cursor( &mut self, cursor: GlobalPoint, @@ -7972,6 +8192,41 @@ impl Default for OverlaySession { } } +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum SelectionFlowStyle { + Band, + FullBorder, +} + +#[derive(Clone, Copy, Debug)] +enum WindowRendererPath { + Overlay, + LoupeTile, +} +impl WindowRendererPath { + const fn as_str(self) -> &'static str { + match self { + Self::Overlay => "overlay", + Self::LoupeTile => "loupe_tile", + } + } +} + +#[cfg(target_os = "macos")] +#[derive(Clone, Debug, Eq, PartialEq)] +struct PendingRecognizeTextRequest { + request_id: u64, + image: RgbaImage, +} + +struct InitialSessionRuntime { + live_bg_request_interval: Duration, + window_list_refresh_interval: Duration, + now: Instant, + loupe_sample_side_px: u32, + state: OverlayState, +} + #[derive(Clone, Copy, Debug, Eq, PartialEq)] struct FrozenToolbarButtonStyle { icon_color: Color32, @@ -10624,8 +10879,27 @@ impl WindowRenderer { } fn frozen_toolbar_tools(toolbar_state: &FrozenToolbarState) -> &'static [FrozenToolbarTool] { + #[cfg(target_os = "macos")] + const TOOLS_SCROLL_MODE: [FrozenToolbarTool; 3] = + [FrozenToolbarTool::Ocr, FrozenToolbarTool::Copy, FrozenToolbarTool::Save]; + #[cfg(not(target_os = "macos"))] const TOOLS_SCROLL_MODE: [FrozenToolbarTool; 2] = [FrozenToolbarTool::Copy, FrozenToolbarTool::Save]; + #[cfg(target_os = "macos")] + const TOOLS_WITH_SCROLL_AND_AUTO_CENTER: [FrozenToolbarTool; 11] = [ + FrozenToolbarTool::Pointer, + FrozenToolbarTool::Pen, + FrozenToolbarTool::Text, + FrozenToolbarTool::Mosaic, + FrozenToolbarTool::Undo, + FrozenToolbarTool::Redo, + FrozenToolbarTool::AutoCenter, + FrozenToolbarTool::Scroll, + FrozenToolbarTool::Ocr, + FrozenToolbarTool::Copy, + FrozenToolbarTool::Save, + ]; + #[cfg(not(target_os = "macos"))] const TOOLS_WITH_SCROLL_AND_AUTO_CENTER: [FrozenToolbarTool; 10] = [ FrozenToolbarTool::Pointer, FrozenToolbarTool::Pen, @@ -10638,6 +10912,20 @@ impl WindowRenderer { FrozenToolbarTool::Copy, FrozenToolbarTool::Save, ]; + #[cfg(target_os = "macos")] + const TOOLS_WITH_AUTO_CENTER: [FrozenToolbarTool; 10] = [ + FrozenToolbarTool::Pointer, + FrozenToolbarTool::Pen, + FrozenToolbarTool::Text, + FrozenToolbarTool::Mosaic, + FrozenToolbarTool::Undo, + FrozenToolbarTool::Redo, + FrozenToolbarTool::AutoCenter, + FrozenToolbarTool::Ocr, + FrozenToolbarTool::Copy, + FrozenToolbarTool::Save, + ]; + #[cfg(not(target_os = "macos"))] const TOOLS_WITH_AUTO_CENTER: [FrozenToolbarTool; 9] = [ FrozenToolbarTool::Pointer, FrozenToolbarTool::Pen, @@ -10649,6 +10937,20 @@ impl WindowRenderer { FrozenToolbarTool::Copy, FrozenToolbarTool::Save, ]; + #[cfg(target_os = "macos")] + const TOOLS_WITH_SCROLL: [FrozenToolbarTool; 10] = [ + FrozenToolbarTool::Pointer, + FrozenToolbarTool::Pen, + FrozenToolbarTool::Text, + FrozenToolbarTool::Mosaic, + FrozenToolbarTool::Undo, + FrozenToolbarTool::Redo, + FrozenToolbarTool::Scroll, + FrozenToolbarTool::Ocr, + FrozenToolbarTool::Copy, + FrozenToolbarTool::Save, + ]; + #[cfg(not(target_os = "macos"))] const TOOLS_WITH_SCROLL: [FrozenToolbarTool; 9] = [ FrozenToolbarTool::Pointer, FrozenToolbarTool::Pen, @@ -10660,6 +10962,19 @@ impl WindowRenderer { FrozenToolbarTool::Copy, FrozenToolbarTool::Save, ]; + #[cfg(target_os = "macos")] + const TOOLS_WITHOUT_SCROLL: [FrozenToolbarTool; 9] = [ + FrozenToolbarTool::Pointer, + FrozenToolbarTool::Pen, + FrozenToolbarTool::Text, + FrozenToolbarTool::Mosaic, + FrozenToolbarTool::Undo, + FrozenToolbarTool::Redo, + FrozenToolbarTool::Ocr, + FrozenToolbarTool::Copy, + FrozenToolbarTool::Save, + ]; + #[cfg(not(target_os = "macos"))] const TOOLS_WITHOUT_SCROLL: [FrozenToolbarTool; 8] = [ FrozenToolbarTool::Pointer, FrozenToolbarTool::Pen, @@ -14964,7 +15279,6 @@ mod tests { Some(session.frozen_toolbar_default_position_for_capture_rect(monitor, capture_rect)) ); } - #[test] fn auto_center_toolbar_tool_only_appears_when_available() { let default_tools = WindowRenderer::frozen_toolbar_tools(&FrozenToolbarState::default()); @@ -14975,6 +15289,12 @@ mod tests { assert!(!default_tools.contains(&FrozenToolbarTool::AutoCenter)); assert!(auto_center_tools.contains(&FrozenToolbarTool::AutoCenter)); + + #[cfg(target_os = "macos")] + { + assert!(default_tools.contains(&FrozenToolbarTool::Ocr)); + assert!(auto_center_tools.contains(&FrozenToolbarTool::Ocr)); + } } #[test] @@ -15161,6 +15481,177 @@ mod tests { assert_eq!(session.state.error_message.as_deref(), Some("Copying...")); } + #[cfg(target_os = "macos")] + #[test] + fn begin_ocr_action_clears_stale_png_output_intent() { + let monitor = test_monitor(); + let expected_export = test_frozen_image(); + let mut session = OverlaySession::new(); + + session.state.begin_freeze(monitor); + session.state.finish_freeze(monitor, expected_export.clone()); + + session.state.frozen_capture_rect = Some(RectPoints::new(100, 120, 220, 180)); + session.frozen_capture_source = FrozenCaptureSource::DragRegion; + session.authoritative_frozen_capture_ready = true; + + session.begin_png_action(PngAction::Copy); + + assert_eq!(session.pending_png_action, Some(PngAction::Copy)); + assert_eq!(session.pending_encode_png.as_ref(), Some(&expected_export)); + + session.begin_ocr_action(); + + assert_eq!(session.pending_png_action, None); + assert!(session.pending_encode_png.is_none()); + assert_eq!( + session.pending_recognize_text.as_ref().map(|request| &request.image), + Some(&expected_export) + ); + assert_eq!(session.active_ocr_request_id, Some(0)); + assert_eq!(session.state.error_message.as_deref(), Some("Recognizing text...")); + } + + #[cfg(target_os = "macos")] + #[test] + fn begin_ocr_action_ticks_active_scroll_capture_before_queueing_recognition() { + let monitor = test_monitor(); + let rect = RectPoints::new(100, 120, 512, 640); + let base = make_browser_like_worker_capture_window(512, 640, 0); + let mut session = OverlaySession::new(); + + session.worker = Some(OverlayWorker::new( + Box::new(SequenceScrollCaptureBackend::new([Some( + make_browser_like_worker_capture_window(512, 640, 84), + )])), + None, + )); + + session.state.begin_freeze(monitor); + session.state.finish_freeze(monitor, test_frozen_image()); + + session.state.frozen_capture_rect = Some(rect); + session.frozen_capture_source = FrozenCaptureSource::DragRegion; + session.authoritative_frozen_capture_ready = true; + session.scroll_capture.active = true; + session.scroll_capture.monitor = Some(monitor); + session.scroll_capture.capture_rect_pixels = Some(rect); + session.scroll_capture.session = Some(ScrollSession::new(base, 320).unwrap()); + + enable_test_worker_scroll_capture_path(&mut session); + set_scroll_capture_input(&mut session, ScrollDirection::Down); + + session.scroll_capture.next_sample_at = Some(Instant::now() - Duration::from_millis(1)); + + session.begin_ocr_action(); + + assert!( + session.scroll_capture.inflight_request_id.is_some(), + "OCR should flush active scroll capture by kicking the same worker sample path as PNG export" + ); + assert!(session.pending_recognize_text.is_some()); + assert_eq!(session.state.error_message.as_deref(), Some("Recognizing text...")); + } + + #[cfg(target_os = "macos")] + #[test] + fn stale_png_response_is_ignored_after_ocr_supersedes_export() { + let monitor = test_monitor(); + let mut session = OverlaySession::new(); + + session.state.begin_freeze(monitor); + session.state.finish_freeze(monitor, test_frozen_image()); + + session.state.frozen_capture_rect = Some(RectPoints::new(100, 120, 220, 180)); + session.frozen_capture_source = FrozenCaptureSource::DragRegion; + session.authoritative_frozen_capture_ready = true; + + session.begin_png_action(PngAction::Copy); + session.begin_ocr_action(); + + let control = session.handle_encoded_png_response(Vec::new()); + + assert!(matches!(control, OverlayControl::Continue)); + assert_eq!(session.pending_png_action, None); + assert_eq!(session.state.error_message.as_deref(), Some("Recognizing text...")); + } + + #[cfg(target_os = "macos")] + #[test] + fn stale_ocr_response_is_ignored_after_copy_supersedes_ocr() { + let monitor = test_monitor(); + let mut session = OverlaySession::new(); + + session.state.begin_freeze(monitor); + session.state.finish_freeze(monitor, test_frozen_image()); + + session.state.frozen_capture_rect = Some(RectPoints::new(100, 120, 220, 180)); + session.frozen_capture_source = FrozenCaptureSource::DragRegion; + session.authoritative_frozen_capture_ready = true; + + session.begin_ocr_action(); + + let request_id = session.active_ocr_request_id.expect("ocr request id"); + + session.pending_recognize_text = None; + session.ocr_inflight = true; + + session.begin_png_action(PngAction::Copy); + + let control = session.maybe_tick_worker_response_limiter(WorkerResponse::RecognizedText { + request_id, + text: String::from("stale text"), + }); + + assert!(matches!(control, OverlayControl::Continue)); + assert_eq!(session.active_ocr_request_id, None); + assert!(!session.ocr_inflight); + assert_eq!(session.pending_png_action, Some(PngAction::Copy)); + assert_eq!(session.state.error_message.as_deref(), Some("Copying...")); + } + + #[cfg(target_os = "macos")] + #[test] + fn stale_ocr_error_is_ignored_while_newer_ocr_request_is_pending() { + let monitor = test_monitor(); + let mut session = OverlaySession::new(); + + session.state.begin_freeze(monitor); + session.state.finish_freeze(monitor, test_frozen_image()); + + session.state.frozen_capture_rect = Some(RectPoints::new(100, 120, 220, 180)); + session.frozen_capture_source = FrozenCaptureSource::DragRegion; + session.authoritative_frozen_capture_ready = true; + + session.begin_ocr_action(); + + let first_request_id = session.active_ocr_request_id.expect("first ocr request id"); + + session.pending_recognize_text = None; + session.ocr_inflight = true; + + session.begin_ocr_action(); + + let second_request_id = + session.pending_recognize_text.as_ref().expect("newer pending ocr request").request_id; + + assert_ne!(first_request_id, second_request_id); + + let control = session.maybe_tick_worker_response_limiter(WorkerResponse::Error { + source: WorkerErrorSource::RecognizeText, + message: String::from("stale OCR failure"), + }); + + assert!(matches!(control, OverlayControl::Continue)); + assert_eq!(session.active_ocr_request_id, Some(second_request_id)); + assert_eq!( + session.pending_recognize_text.as_ref().map(|request| request.request_id), + Some(second_request_id) + ); + assert!(!session.ocr_inflight); + assert_eq!(session.state.error_message.as_deref(), Some("Recognizing text...")); + } + #[cfg(target_os = "macos")] #[test] fn duplicate_live_frames_schedule_forced_refresh_when_downward_backlog_is_fresh() { @@ -15408,6 +15899,7 @@ mod tests { let mut session = OverlaySession { window_list_refresh_inflight: true, drop_next_window_list_refresh_snapshot: true, + ocr_inflight: true, png_encode_inflight: true, pending_self_capture_exception_window_ids_worker_refresh: true, authoritative_frozen_capture_ready: true, @@ -15427,6 +15919,7 @@ mod tests { assert!(!session.window_list_refresh_inflight); assert!(!session.drop_next_window_list_refresh_snapshot); + assert!(!session.ocr_inflight); assert!(!session.png_encode_inflight); assert!(!session.pending_self_capture_exception_window_ids_worker_refresh); assert!(!session.authoritative_frozen_capture_ready); @@ -18735,6 +19228,8 @@ mod tests { assert!(!FrozenToolbarTool::Scroll.is_mode_tool()); assert!(!FrozenToolbarTool::Copy.is_mode_tool()); assert!(!FrozenToolbarTool::Save.is_mode_tool()); + #[cfg(target_os = "macos")] + assert!(!FrozenToolbarTool::Ocr.is_mode_tool()); } #[test] @@ -18755,6 +19250,8 @@ mod tests { assert!(FrozenToolbarTool::Scroll.requires_final_capture()); assert!(FrozenToolbarTool::Copy.requires_final_capture()); assert!(FrozenToolbarTool::Save.requires_final_capture()); + #[cfg(target_os = "macos")] + assert!(FrozenToolbarTool::Ocr.requires_final_capture()); } #[test] diff --git a/packages/rsnap-overlay/src/worker.rs b/packages/rsnap-overlay/src/worker.rs index 6e4650b2..fed1d9f6 100644 --- a/packages/rsnap-overlay/src/worker.rs +++ b/packages/rsnap-overlay/src/worker.rs @@ -11,6 +11,8 @@ use std::time::{Duration, Instant}; use image::RgbaImage; use crate::backend::CaptureBackend; +#[cfg(target_os = "macos")] +use crate::ocr_macos; use crate::png; #[cfg(not(target_os = "macos"))] use crate::state::LiveCursorSample; @@ -44,6 +46,11 @@ pub(crate) enum WorkerRequest { monitor: MonitorRect, target: FreezeCaptureTarget, }, + #[cfg(target_os = "macos")] + RecognizeText { + request_id: u64, + image: RgbaImage, + }, CaptureMonitorRegion { monitor: MonitorRect, rect_px: RectPoints, @@ -78,6 +85,11 @@ pub(crate) enum WorkerResponse { window_image: Option, captured_window_id: Option, }, + #[cfg(target_os = "macos")] + RecognizedText { + request_id: u64, + text: String, + }, EncodedPng { png_bytes: Vec, }, @@ -91,6 +103,8 @@ pub(crate) enum WorkerResponse { pub(crate) enum WorkerErrorSource { EncodePng, FreezeCapture, + #[cfg(target_os = "macos")] + RecognizeText, RefreshWindowList, CaptureMonitorRegion, } @@ -238,6 +252,34 @@ impl OverlayWorker { } } + #[cfg(target_os = "macos")] + fn handle_recognize_text_request( + resp_tx: &Sender, + response_waker: Option<&(dyn Fn() + Send + Sync)>, + request_id: u64, + image: RgbaImage, + ) { + match ocr_macos::recognize_text_from_image(&image) { + Ok(text) => { + Self::send_response( + resp_tx, + response_waker, + WorkerResponse::RecognizedText { request_id, text }, + ); + }, + Err(err) => { + Self::send_response( + resp_tx, + response_waker, + WorkerResponse::Error { + source: WorkerErrorSource::RecognizeText, + message: format!("{err:#}"), + }, + ); + }, + } + } + fn handle_refresh_window_list_request( backend: &mut dyn CaptureBackend, resp_tx: &Sender, @@ -448,6 +490,23 @@ impl OverlayWorker { } } + #[cfg(target_os = "macos")] + pub(crate) fn request_recognize_text( + &self, + request_id: u64, + image: RgbaImage, + ) -> Result<(), (u64, RgbaImage)> { + match self.req_tx.try_send(WorkerRequest::RecognizeText { request_id, image }) { + Ok(()) => Ok(()), + Err(TrySendError::Full(WorkerRequest::RecognizeText { request_id, image })) + | Err(TrySendError::Disconnected(WorkerRequest::RecognizeText { request_id, image })) => { + Err((request_id, image)) + }, + Err(TrySendError::Full(_)) | Err(TrySendError::Disconnected(_)) => { + unreachable!("request_recognize_text only sends WorkerRequest::RecognizeText") + }, + } + } pub(crate) fn request_capture_monitor_region( &self, monitor: MonitorRect, @@ -486,6 +545,8 @@ struct PendingWorkerRequests { last_sample_cursor: Option<(MonitorRect, GlobalPoint, u64, bool, u32, u32)>, last_refresh_window_list: bool, last_freeze: Option<(MonitorRect, FreezeCaptureTarget)>, + #[cfg(target_os = "macos")] + last_recognize_text: Option<(u64, RgbaImage)>, last_capture_region: Option<(MonitorRect, RectPoints, u64)>, last_encode: Option, } @@ -513,6 +574,10 @@ impl PendingWorkerRequests { WorkerRequest::FreezeCapture { monitor, target } => { self.last_freeze = Some((monitor, target)); }, + #[cfg(target_os = "macos")] + WorkerRequest::RecognizeText { request_id, image } => { + self.last_recognize_text = Some((request_id, image)); + }, WorkerRequest::CaptureMonitorRegion { monitor, rect_px, request_id } => { self.last_capture_region = Some((monitor, rect_px, request_id)); }, @@ -529,11 +594,29 @@ impl PendingWorkerRequests { _region_capture_resp_tx: &Sender, response_waker: Option<&(dyn Fn() + Send + Sync)>, ) { + let mut handled_high_priority = false; + if let Some(image) = self.last_encode { OverlayWorker::handle_encode_request(resp_tx, response_waker, image); + handled_high_priority = true; + } + #[cfg(target_os = "macos")] + if let Some((request_id, image)) = self.last_recognize_text { + OverlayWorker::handle_recognize_text_request( + resp_tx, + response_waker, + request_id, + image, + ); + + handled_high_priority = true; + } + + if handled_high_priority { return; } + if let Some((monitor, target)) = self.last_freeze { OverlayWorker::handle_freeze_request(backend, resp_tx, response_waker, monitor, target); @@ -605,6 +688,8 @@ mod tests { CapturedMonitorRegionResponse, CapturedMonitorRegionResult, OverlayWorker, WorkerErrorSource, WorkerResponse, }; + #[cfg(target_os = "macos")] + use crate::worker::{PendingWorkerRequests, WorkerRequest}; enum MockScrollCaptureResult { Image(RgbaImage), @@ -857,4 +942,30 @@ mod tests { other => panic!("expected worker error, got {other:?}"), } } + + #[cfg(target_os = "macos")] + #[test] + fn dispatch_processes_encode_and_recognize_requests_from_same_batch() { + let (resp_tx, resp_rx) = mpsc::channel::(); + let (region_tx, region_rx) = mpsc::channel::(); + let mut backend = + MockScrollCaptureBackend { scroll_capture_result: MockScrollCaptureResult::NoNewFrame }; + let mut pending = PendingWorkerRequests::default(); + + pending.record(WorkerRequest::EncodePng { image: sample_image() }); + pending.record(WorkerRequest::RecognizeText { request_id: 7, image: sample_image() }); + pending.dispatch(&mut backend, &resp_tx, ®ion_tx, None); + + let first = resp_rx.try_recv().expect("png response"); + let second = resp_rx.try_recv().expect("ocr response"); + + assert!(matches!(first, WorkerResponse::EncodedPng { .. })); + assert!(matches!( + second, + WorkerResponse::RecognizedText { .. } + | WorkerResponse::Error { source: WorkerErrorSource::RecognizeText, .. } + )); + assert!(resp_rx.try_recv().is_err()); + assert!(region_rx.try_recv().is_err()); + } }