From 4e9fa709ce52a8adc16e87e3486eadcb27131ebe Mon Sep 17 00:00:00 2001 From: Jon Patterson Date: Mon, 29 Jun 2026 10:16:36 -0500 Subject: [PATCH 01/10] fix: play start cue after capture is ready --- Sources/Fluid/ContentView.swift | 37 +++++-- Sources/Fluid/Services/ASRService.swift | 96 ++++++++++++++++++- .../Services/TranscriptionSoundPlayer.swift | 36 +++++-- 3 files changed, 145 insertions(+), 24 deletions(-) diff --git a/Sources/Fluid/ContentView.swift b/Sources/Fluid/ContentView.swift index 1a5d3ec3..ff9cf420 100644 --- a/Sources/Fluid/ContentView.swift +++ b/Sources/Fluid/ContentView.swift @@ -2830,15 +2830,13 @@ struct ContentView: View { self.menuBarManager.showRecordingOverlayImmediately() } - if !self.isRecordingForCommand, !self.isRecordingForRewrite { - TranscriptionSoundPlayer.shared.playStartSound() - } - Task { await self.asr.start() if !self.asr.isRunning { self.menuBarManager.hideRecordingOverlayImmediately(reason: "asr_start_failed") + return } + await self.playStartCueWhenCaptureReady() } // Pre-load model in background while recording (avoids 10s freeze on stop) @@ -3077,9 +3075,9 @@ struct ContentView: View { "Starting voice recording for command", source: "ContentView" ) - TranscriptionSoundPlayer.shared.playStartSound() Task { await self.asr.start() + await self.playStartCueWhenCaptureReady() } }, rewriteModeCallback: { @@ -3112,9 +3110,9 @@ struct ContentView: View { // Start recording immediately for the edit instruction DebugLogger.shared.info("Starting voice recording for edit mode", source: "ContentView") - TranscriptionSoundPlayer.shared.playStartSound() Task { await self.asr.start() + await self.playStartCueWhenCaptureReady() } }, isDictateRecordingProvider: { @@ -3430,24 +3428,45 @@ extension ContentView { self.appBench("asr_start_skipped reason=already_running") return } - if SettingsStore.shared.enableTranscriptionSounds { - TranscriptionSoundPlayer.shared.playStartSound() - } Task { let asrStartStartedAt = ProcessInfo.processInfo.systemUptime DebugLogger.shared.benchmark("APP_BENCH", message: "asr_start_call", source: "AppBenchmark") await self.asr.start() if !self.asr.isRunning { self.menuBarManager.hideRecordingOverlayImmediately(reason: "asr_start_failed") + DebugLogger.shared.benchmark( + "APP_BENCH", + message: "asr_start_return elapsedMs=\(Int(((ProcessInfo.processInfo.systemUptime - asrStartStartedAt) * 1000).rounded()))", + source: "AppBenchmark" + ) + return } DebugLogger.shared.benchmark( "APP_BENCH", message: "asr_start_return elapsedMs=\(Int(((ProcessInfo.processInfo.systemUptime - asrStartStartedAt) * 1000).rounded()))", source: "AppBenchmark" ) + await self.playStartCueWhenCaptureReady() } } + private func playStartCueWhenCaptureReady() async { + let cueWaitStartedAt = ProcessInfo.processInfo.systemUptime + let ready = await self.asr.waitForCaptureReadyForStartCue() + DebugLogger.shared.benchmark( + "APP_BENCH", + message: "start_cue_ready ready=\(ready) elapsedMs=\(Int(((ProcessInfo.processInfo.systemUptime - cueWaitStartedAt) * 1000).rounded()))", + source: "AppBenchmark" + ) + + guard ready, self.asr.isRunning else { + DebugLogger.shared.debug("Start cue skipped because capture is no longer active", source: "ContentView") + return + } + + TranscriptionSoundPlayer.shared.playStartSound() + } + private func beginDictationRecording(for selection: SettingsStore.DictationPromptSelection, mode: ActiveRecordingMode) { let settings = SettingsStore.shared settings.setDictationPromptSelection(selection, for: .secondary) diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index 29d4d7b2..fc126c05 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -576,6 +576,16 @@ final class ASRService: ObservableObject { private var engineConfigurationChangeObserver: NSObjectProtocol? private var audioRouteRecoveryTask: Task? private let audioRouteRecoveryDelayNanoseconds: UInt64 = 1_000_000_000 + private let startupEngineConfigurationRecoveryDelayNanoseconds: UInt64 = 100_000_000 + private let startupEngineConfigurationRecoveryWindowSeconds: TimeInterval = 2.0 + private let startupCaptureReadyStableDelaySeconds: TimeInterval = 0.45 + private let startupCaptureReadyAfterRecoveryDelaySeconds: TimeInterval = 0.10 + private let startupCaptureReadyTimeoutSeconds: TimeInterval = 1.75 + private let startupCaptureReadyPollNanoseconds: UInt64 = 25_000_000 + private let startupCaptureReadyMinimumSamples = 2_048 + private var lastEngineStartCompletedAt: TimeInterval? + private var startupEngineConfigurationRecoveryScheduled = false + private var startupEngineConfigurationRecoveryCompletedAt: TimeInterval? private var isRecoveringAudioRoute = false private let fastPreviewStopGraceNanoseconds: UInt64 = 200_000_000 private let fastPreviewSampleRate = 16_000 @@ -871,6 +881,9 @@ final class ASRService: ObservableObject { self.benchmarkStreamingChunkIndex = 0 self.benchmarkCompletedStreamingChunks = 0 self.benchmarkLastChunkSampleCount = 0 + self.lastEngineStartCompletedAt = nil + self.startupEngineConfigurationRecoveryScheduled = false + self.startupEngineConfigurationRecoveryCompletedAt = nil self.streamingChunkAnalyticsSuccessCount = 0 self.lastStreamingChunkFailureAnalyticsAt = nil (self.transcriptionProvider as? FluidAudioProvider)?.resetStreamingPreviewCache() @@ -963,6 +976,56 @@ final class ASRService: ObservableObject { } } + func waitForCaptureReadyForStartCue() async -> Bool { + let waitStartedAt = Date().timeIntervalSince1970 + + while self.isRunning { + let now = Date().timeIntervalSince1970 + let sampleCount = self.audioBuffer.count + let routeRecoveryIdle = self.isRecoveringAudioRoute == false && self.audioRouteRecoveryTask == nil + + let stableEnough: Bool + if let completedAt = self.startupEngineConfigurationRecoveryCompletedAt { + stableEnough = now - completedAt >= self.startupCaptureReadyAfterRecoveryDelaySeconds + } else if self.startupEngineConfigurationRecoveryScheduled { + stableEnough = false + } else if let engineStartedAt = self.lastEngineStartCompletedAt { + stableEnough = now - engineStartedAt >= self.startupCaptureReadyStableDelaySeconds + } else { + stableEnough = false + } + + if routeRecoveryIdle, + stableEnough, + sampleCount >= self.startupCaptureReadyMinimumSamples + { + DebugLogger.shared.info( + "Capture ready for start cue (samples=\(sampleCount), waitedMs=\(Int(((now - waitStartedAt) * 1000).rounded())))", + source: "ASRService" + ) + return true + } + + if now - waitStartedAt >= self.startupCaptureReadyTimeoutSeconds { + let ready = routeRecoveryIdle && sampleCount > 0 + DebugLogger.shared.warning( + "Timed out waiting for capture-ready start cue (ready=\(ready), samples=\(sampleCount), routeRecoveryIdle=\(routeRecoveryIdle), stableEnough=\(stableEnough))", + source: "ASRService" + ) + return ready + } + + do { + try await Task.sleep(nanoseconds: self.startupCaptureReadyPollNanoseconds) + } catch { + return false + } + } + + DebugLogger.shared.debug("Start cue wait cancelled because ASR is no longer running", source: "ASRService") + return false + } + /// Stops the recording session and returns the transcribed text. /// /// This method performs the complete transcription process: @@ -1746,6 +1809,7 @@ final class ASRService: ObservableObject { ) try self.engine.start() + self.lastEngineStartCompletedAt = Date().timeIntervalSince1970 DebugLogger.shared.info("AVAudioEngine started successfully on attempt \(attempts + 1)", source: "ASRService") return } catch { @@ -1864,12 +1928,16 @@ final class ASRService: ObservableObject { return } - DebugLogger.shared.warning("Audio route changed while recording; scheduling recovery (\(reason))", source: "ASRService") - self.audioCapturePipeline.setRecordingEnabled(false) - self.audioLevelSubject.send(0.0) - self.audioRouteRecoveryTask?.cancel() - let recoveryDelayNanoseconds = self.audioRouteRecoveryDelayNanoseconds + let isStartupEngineConfigurationRecovery = self.isStartupEngineConfigurationRecovery(reason: reason) + if isStartupEngineConfigurationRecovery { + self.startupEngineConfigurationRecoveryScheduled = true + } + let recoveryDelayNanoseconds = self.recoveryDelayNanoseconds(for: reason) + DebugLogger.shared.warning( + "Audio route changed while recording; scheduling recovery (\(reason), delayMs=\(recoveryDelayNanoseconds / 1_000_000))", + source: "ASRService" + ) self.audioRouteRecoveryTask = Task { [weak self] in do { try await Task.sleep(nanoseconds: recoveryDelayNanoseconds) @@ -1880,6 +1948,21 @@ final class ASRService: ObservableObject { } } + private func recoveryDelayNanoseconds(for reason: String) -> UInt64 { + self.isStartupEngineConfigurationRecovery(reason: reason) + ? self.startupEngineConfigurationRecoveryDelayNanoseconds + : self.audioRouteRecoveryDelayNanoseconds + } + + private func isStartupEngineConfigurationRecovery(reason: String) -> Bool { + guard reason == "engine configuration changed", + let lastEngineStartCompletedAt + else { return false } + + let startAge = Date().timeIntervalSince1970 - lastEngineStartCompletedAt + return startAge >= 0 && startAge <= self.startupEngineConfigurationRecoveryWindowSeconds + } + @MainActor private func recoverAudioRoute(reason: String) async { guard self.isRunning else { return } @@ -1915,6 +1998,9 @@ final class ASRService: ObservableObject { } DebugLogger.shared.info("Audio route recovery succeeded", source: "ASRService") + if reason == "engine configuration changed", self.startupEngineConfigurationRecoveryScheduled { + self.startupEngineConfigurationRecoveryCompletedAt = Date().timeIntervalSince1970 + } } catch { DebugLogger.shared.error("Audio route recovery failed: \(error)", source: "ASRService") await self.stopWithoutTranscription() diff --git a/Sources/Fluid/Services/TranscriptionSoundPlayer.swift b/Sources/Fluid/Services/TranscriptionSoundPlayer.swift index 5c7ced21..9b0a3cd7 100644 --- a/Sources/Fluid/Services/TranscriptionSoundPlayer.swift +++ b/Sources/Fluid/Services/TranscriptionSoundPlayer.swift @@ -12,33 +12,45 @@ final class TranscriptionSoundPlayer { private init() {} func playStartSound() { - guard SettingsStore.shared.enableTranscriptionSounds else { return } + guard SettingsStore.shared.enableTranscriptionSounds else { + DebugLogger.shared.debug("Start sound skipped because transcription sounds are disabled", source: "TranscriptionSoundPlayer") + return + } let selected = SettingsStore.shared.transcriptionStartSound - guard let soundName = selected.startSoundFileName else { return } - self.play(soundName: soundName) + guard let soundName = selected.startSoundFileName else { + DebugLogger.shared.debug("Start sound skipped because selected sound is none", source: "TranscriptionSoundPlayer") + return + } + self.play(soundName: soundName, purpose: "start") } func playStopSound() { - guard SettingsStore.shared.enableTranscriptionSounds else { return } + guard SettingsStore.shared.enableTranscriptionSounds else { + DebugLogger.shared.debug("Stop sound skipped because transcription sounds are disabled", source: "TranscriptionSoundPlayer") + return + } let selected = SettingsStore.shared.transcriptionStartSound - guard let soundName = selected.stopSoundFileName else { return } - self.play(soundName: soundName) + guard let soundName = selected.stopSoundFileName else { + DebugLogger.shared.debug("Stop sound skipped because selected sound has no stop sound", source: "TranscriptionSoundPlayer") + return + } + self.play(soundName: soundName, purpose: "stop") } /// Preview a specific sound at the current volume setting (used in Settings UI). func playPreview(sound: SettingsStore.TranscriptionStartSound) { guard let soundName = sound.startSoundFileName else { return } - self.play(soundName: soundName) + self.play(soundName: soundName, purpose: "preview") } /// Preview current sound at a specific volume (used when slider is released). func playPreviewAtVolume(_ volume: Float) { let selected = SettingsStore.shared.transcriptionStartSound guard let soundName = selected.startSoundFileName else { return } - self.play(soundName: soundName, overrideVolume: volume) + self.play(soundName: soundName, overrideVolume: volume, purpose: "preview") } - private func play(soundName: String, overrideVolume: Float? = nil) { + private func play(soundName: String, overrideVolume: Float? = nil, purpose: String) { guard let url = Bundle.main.url(forResource: soundName, withExtension: "m4a") else { DebugLogger.shared.error("Missing sound resource: \(soundName).m4a", source: "TranscriptionSoundPlayer") return @@ -71,7 +83,11 @@ final class TranscriptionSoundPlayer { } else { player.volume = desiredVolume } - player.play() + let started = player.play() + DebugLogger.shared.info( + "Played \(purpose) sound \(soundName).m4a started=\(started) volume=\(player.volume)", + source: "TranscriptionSoundPlayer" + ) // Restore system volume after the sound finishes if settings.transcriptionSoundIndependentVolume, let saved = self.savedSystemVolume { From a8890dcb2a8e6e3c693dc08bbd16c8e47b821341 Mon Sep 17 00:00:00 2001 From: Jon Patterson Date: Mon, 29 Jun 2026 20:23:41 -0500 Subject: [PATCH 02/10] perf: reuse stopped audio engine briefly --- Sources/Fluid/Services/ASRService.swift | 96 +++++++++++++++++++++---- 1 file changed, 82 insertions(+), 14 deletions(-) diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index fc126c05..7e995e69 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -583,9 +583,12 @@ final class ASRService: ObservableObject { private let startupCaptureReadyTimeoutSeconds: TimeInterval = 1.75 private let startupCaptureReadyPollNanoseconds: UInt64 = 25_000_000 private let startupCaptureReadyMinimumSamples = 2_048 + private let stoppedEngineReuseGraceNanoseconds: UInt64 = 20_000_000_000 private var lastEngineStartCompletedAt: TimeInterval? private var startupEngineConfigurationRecoveryScheduled = false private var startupEngineConfigurationRecoveryCompletedAt: TimeInterval? + private var stoppedEngineRetainedAt: TimeInterval? + private var stoppedEngineReleaseTask: Task? private var isRecoveringAudioRoute = false private let fastPreviewStopGraceNanoseconds: UInt64 = 200_000_000 private let fastPreviewSampleRate = 16_000 @@ -659,6 +662,7 @@ final class ASRService: ObservableObject { } deinit { + self.stoppedEngineReleaseTask?.cancel() if let observer = self.vocabularyChangeObserver { NotificationCenter.default.removeObserver(observer) } @@ -877,6 +881,12 @@ final class ASRService: ObservableObject { self.isProcessingChunk = false self.skipNextChunk = false self.benchmarkSessionID += 1 + let reusedStoppedEngineAgeMs = self.stoppedEngineAgeMilliseconds() + self.cancelStoppedEngineRelease(reason: "start") + self.benchmarkLog( + "engine_reuse_start hit=\(reusedStoppedEngineAgeMs != nil) ageMs=\(reusedStoppedEngineAgeMs.map(String.init) ?? "nil")" + ) + self.stoppedEngineRetainedAt = nil self.benchmarkRecordingStartedAt = Date().timeIntervalSince1970 self.benchmarkStreamingChunkIndex = 0 self.benchmarkCompletedStreamingChunks = 0 @@ -1026,6 +1036,67 @@ final class ASRService: ObservableObject { return false } + private func stoppedEngineAgeMilliseconds() -> Int? { + guard let stoppedEngineRetainedAt else { return nil } + return self.elapsedMilliseconds(since: stoppedEngineRetainedAt) + } + + private func cancelStoppedEngineRelease(reason: String) { + guard let stoppedEngineReleaseTask else { return } + stoppedEngineReleaseTask.cancel() + self.stoppedEngineReleaseTask = nil + self.benchmarkLog("engine_reuse_release_cancelled reason=\(reason)") + } + + private func releaseStoppedEngine(reason: String) { + self.stoppedEngineReleaseTask?.cancel() + self.stoppedEngineReleaseTask = nil + self.stoppedEngineRetainedAt = nil + + let oldEngine = self.engineStorage + self.engineStorage = nil + self.benchmarkLog("engine_reuse_release reason=\(reason) hadEngine=\(oldEngine != nil)") + + if let oldEngine { + DispatchQueue.global(qos: .utility).async { _ = oldEngine } + } + } + + private func retainStoppedEngineForReuse(reason: String) { + guard self.engineStorage != nil else { + self.benchmarkLog("engine_reuse_retained hit=false reason=\(reason)") + return + } + + self.cancelStoppedEngineRelease(reason: "reschedule") + self.stoppedEngineRetainedAt = Date().timeIntervalSince1970 + let reuseGraceNanoseconds = self.stoppedEngineReuseGraceNanoseconds + self.benchmarkLog( + "engine_reuse_retained hit=true reason=\(reason) graceMs=\(reuseGraceNanoseconds / 1_000_000)" + ) + + self.stoppedEngineReleaseTask = Task { [weak self] in + do { + try await Task.sleep(nanoseconds: reuseGraceNanoseconds) + } catch { + return + } + + await MainActor.run { [weak self] in + guard let self else { return } + guard self.isRunning == false, + self.isStarting == false, + self.isRecoveringAudioRoute == false + else { + self.benchmarkLog("engine_reuse_release_deferred reason=busy") + return + } + + self.releaseStoppedEngine(reason: "reuse_grace_expired") + } + } + } + /// Stops the recording session and returns the transcribed text. /// /// This method performs the complete transcription process: @@ -1110,12 +1181,7 @@ final class ASRService: ObservableObject { // (potentially slow) final transcription pass. await MainActor.run { onCaptureStopped?() } - // Recreate the engine instance instead of calling reset() to prevent format corruption - // VoiceInk approach: tearing down and rebuilding ensures fresh, valid audio format on restart - DebugLogger.shared.debug("đŸ—‘ī¸ Deallocating old engine and creating fresh instance...", source: "ASRService") - self.engineStorage = nil // Explicitly release old engine - // New engine will be lazily created on next access via computed property - DebugLogger.shared.debug("✅ Engine instance recreated", source: "ASRService") + self.retainStoppedEngineForReuse(reason: "normal_stop") // CRITICAL FIX: Await completion of streaming task AND any pending transcriptions // This prevents use-after-free crashes (EXC_BAD_ACCESS) when clearing buffer @@ -1429,26 +1495,28 @@ final class ASRService: ObservableObject { private func configureSession() throws { DebugLogger.shared.debug("🔧 configureSession() - ENTERED", source: "ASRService") + let hadExistingEngine = self.engineStorage != nil + let engine = self.engine - if self.engine.isRunning { + if engine.isRunning { DebugLogger.shared.debug("âš ī¸ Engine is running, stopping before configuration", source: "ASRService") - self.engine.stop() + engine.stop() DebugLogger.shared.debug("✅ Engine stopped", source: "ASRService") } - // No need to call engine.reset() here - we created a fresh engine in stop() - // Accessing the engine property will either return the existing fresh engine, - // or create a new one if this is the first start - DebugLogger.shared.debug("â„šī¸ Using fresh engine instance (created lazily)", source: "ASRService") + DebugLogger.shared.debug( + hadExistingEngine ? "â„šī¸ Using retained engine instance" : "â„šī¸ Created fresh engine instance lazily", + source: "ASRService" + ) // Force input node instantiation (ensures the underlying AUHAL AudioUnit exists) DebugLogger.shared.debug("📍 Forcing input node instantiation...", source: "ASRService") - _ = self.engine.inputNode + _ = engine.inputNode DebugLogger.shared.debug("Input node instantiated", source: "ASRService") // Force output node instantiation for output device binding DebugLogger.shared.debug("📍 Forcing output node instantiation...", source: "ASRService") - _ = self.engine.outputNode + _ = engine.outputNode DebugLogger.shared.debug("✅ Output node instantiated", source: "ASRService") // NOTE: Device binding occurs in startEngine() BEFORE engine.prepare() From 33bae459b7970cdc9f5ef6c8f6cb1b4b74b602a7 Mon Sep 17 00:00:00 2001 From: Jon Patterson Date: Mon, 29 Jun 2026 20:30:10 -0500 Subject: [PATCH 03/10] fix: address capture readiness review feedback --- Sources/Fluid/Services/ASRService.swift | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index 7e995e69..eec1668a 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -587,6 +587,7 @@ final class ASRService: ObservableObject { private var lastEngineStartCompletedAt: TimeInterval? private var startupEngineConfigurationRecoveryScheduled = false private var startupEngineConfigurationRecoveryCompletedAt: TimeInterval? + private var startupEngineConfigurationRecoveryCompletedSampleCount: Int? private var stoppedEngineRetainedAt: TimeInterval? private var stoppedEngineReleaseTask: Task? private var isRecoveringAudioRoute = false @@ -894,6 +895,7 @@ final class ASRService: ObservableObject { self.lastEngineStartCompletedAt = nil self.startupEngineConfigurationRecoveryScheduled = false self.startupEngineConfigurationRecoveryCompletedAt = nil + self.startupEngineConfigurationRecoveryCompletedSampleCount = nil self.streamingChunkAnalyticsSuccessCount = 0 self.lastStreamingChunkFailureAnalyticsAt = nil (self.transcriptionProvider as? FluidAudioProvider)?.resetStreamingPreviewCache() @@ -1004,22 +1006,30 @@ final class ASRService: ObservableObject { } else { stableEnough = false } + let readySampleCount: Int + if self.startupEngineConfigurationRecoveryCompletedAt != nil, + let recoveryCompletedSampleCount = self.startupEngineConfigurationRecoveryCompletedSampleCount + { + readySampleCount = max(0, sampleCount - recoveryCompletedSampleCount) + } else { + readySampleCount = sampleCount + } if routeRecoveryIdle, stableEnough, - sampleCount >= self.startupCaptureReadyMinimumSamples + readySampleCount >= self.startupCaptureReadyMinimumSamples { DebugLogger.shared.info( - "Capture ready for start cue (samples=\(sampleCount), waitedMs=\(Int(((now - waitStartedAt) * 1000).rounded())))", + "Capture ready for start cue (samples=\(sampleCount), readySamples=\(readySampleCount), waitedMs=\(Int(((now - waitStartedAt) * 1000).rounded())))", source: "ASRService" ) return true } if now - waitStartedAt >= self.startupCaptureReadyTimeoutSeconds { - let ready = routeRecoveryIdle && sampleCount > 0 + let ready = routeRecoveryIdle && readySampleCount > 0 DebugLogger.shared.warning( - "Timed out waiting for capture-ready start cue (ready=\(ready), samples=\(sampleCount), routeRecoveryIdle=\(routeRecoveryIdle), stableEnough=\(stableEnough))", + "Timed out waiting for capture-ready start cue (ready=\(ready), samples=\(sampleCount), readySamples=\(readySampleCount), routeRecoveryIdle=\(routeRecoveryIdle), stableEnough=\(stableEnough))", source: "ASRService" ) return ready @@ -2000,6 +2010,9 @@ final class ASRService: ObservableObject { let isStartupEngineConfigurationRecovery = self.isStartupEngineConfigurationRecovery(reason: reason) if isStartupEngineConfigurationRecovery { self.startupEngineConfigurationRecoveryScheduled = true + } else { + self.audioCapturePipeline.setRecordingEnabled(false) + self.audioLevelSubject.send(0.0) } let recoveryDelayNanoseconds = self.recoveryDelayNanoseconds(for: reason) DebugLogger.shared.warning( @@ -2068,6 +2081,7 @@ final class ASRService: ObservableObject { DebugLogger.shared.info("Audio route recovery succeeded", source: "ASRService") if reason == "engine configuration changed", self.startupEngineConfigurationRecoveryScheduled { self.startupEngineConfigurationRecoveryCompletedAt = Date().timeIntervalSince1970 + self.startupEngineConfigurationRecoveryCompletedSampleCount = self.audioBuffer.count } } catch { DebugLogger.shared.error("Audio route recovery failed: \(error)", source: "ASRService") From 73cf1e47df792ec05fe104cd565aa0641fb572d7 Mon Sep 17 00:00:00 2001 From: Jon Patterson Date: Tue, 30 Jun 2026 00:29:41 -0500 Subject: [PATCH 04/10] fix: recover hotkey event tap asynchronously --- .../Fluid/Services/GlobalHotkeyManager.swift | 66 +++++++++++++++---- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/Sources/Fluid/Services/GlobalHotkeyManager.swift b/Sources/Fluid/Services/GlobalHotkeyManager.swift index 2e1cef4b..6adc7e86 100644 --- a/Sources/Fluid/Services/GlobalHotkeyManager.swift +++ b/Sources/Fluid/Services/GlobalHotkeyManager.swift @@ -271,9 +271,11 @@ final class GlobalHotkeyManager: NSObject { private var isInitialized = false private var initializationTask: Task? private var healthCheckTask: Task? + private var tapRecoveryTask: Task? private var maxRetryAttempts = 5 private var retryDelay: TimeInterval = 0.5 private var healthCheckInterval: TimeInterval = 30.0 + private let eventTapRecoveryDelayNanoseconds: UInt64 = 250_000_000 init( asrService: ASRService, @@ -993,26 +995,64 @@ final class GlobalHotkeyManager: NSObject { private func handleTapDisableEvent(type: CGEventType, event: CGEvent) -> Unmanaged? { // macOS can temporarily disable event taps (e.g. timeouts, user input protection). - // If we don't immediately re-enable here, hotkeys will silently stop working until our - // periodic health check kicks in, and the OS may handle the key (e.g. system dictation). + // Keep this path fail-open: doing synchronous recovery work inside the event tap callback + // can make keyboard input feel stuck while macOS is already trying to protect user input. guard type == .tapDisabledByTimeout || type == .tapDisabledByUserInput else { return nil } let reason = (type == .tapDisabledByTimeout) ? "timeout" : "user input" - DebugLogger.shared.warning("Event tap disabled by \(reason) — attempting immediate re-enable", source: "GlobalHotkeyManager") - self.resetModifierOnlyShortcutTracking(reason: .tapDisabled) - - if let tap = self.eventTap { - CGEvent.tapEnable(tap: tap, enable: true) + Task { @MainActor [weak self] in + self?.scheduleEventTapRecovery(reason: reason) } - if !self.isEventTapEnabled() { - DebugLogger.shared.warning("Event tap re-enable failed — recreating tap", source: "GlobalHotkeyManager") - self.setupGlobalHotkeyWithRetry() + return Unmanaged.passUnretained(event) + } + + private func scheduleEventTapRecovery(reason: String) { + DebugLogger.shared.warning( + "Event tap disabled by \(reason) — scheduling async recovery and passing input through", + source: "GlobalHotkeyManager" + ) + + guard self.tapRecoveryTask == nil else { + DebugLogger.shared.debug( + "Event tap recovery already scheduled; ignoring duplicate disabled event (\(reason))", + source: "GlobalHotkeyManager" + ) + return } - return Unmanaged.passUnretained(event) + self.isInitialized = false + self.tapRecoveryTask = Task { @MainActor [weak self] in + guard let self else { return } + defer { self.tapRecoveryTask = nil } + + do { + try await Task.sleep(nanoseconds: self.eventTapRecoveryDelayNanoseconds) + } catch { + return + } + + self.resetModifierOnlyShortcutTracking(reason: .tapDisabled) + + guard AXIsProcessTrusted() else { + DebugLogger.shared.warning( + "Event tap async recovery skipped because Accessibility is not trusted", + source: "GlobalHotkeyManager" + ) + return + } + + if self.setupGlobalHotkey() { + self.isInitialized = true + self.startHealthCheckTimer() + DebugLogger.shared.info("Event tap async recovery successful", source: "GlobalHotkeyManager") + } else { + self.isInitialized = false + DebugLogger.shared.error("Event tap async recovery failed", source: "GlobalHotkeyManager") + } + } } private func synchronizedPressedModifierKeyCodes( @@ -1809,6 +1849,8 @@ final class GlobalHotkeyManager: NSObject { self.initializationTask?.cancel() self.healthCheckTask?.cancel() + self.tapRecoveryTask?.cancel() + self.tapRecoveryTask = nil self.resetModifierOnlyShortcutTracking(reason: .reinitialize) self.isInitialized = false self.initializeWithDelay() @@ -1823,6 +1865,7 @@ final class GlobalHotkeyManager: NSObject { guard !Task.isCancelled else { break } await MainActor.run { + guard self.tapRecoveryTask == nil else { return } if !self.validateEventTapHealth() { DebugLogger.shared.warning("Health check failed, attempting to recover", source: "GlobalHotkeyManager") @@ -1842,6 +1885,7 @@ final class GlobalHotkeyManager: NSObject { deinit { initializationTask?.cancel() healthCheckTask?.cancel() + tapRecoveryTask?.cancel() cleanupEventTap() } } From 0c81e63b85406548c5ebd9ebfa82deb571d64aaa Mon Sep 17 00:00:00 2001 From: Jon Patterson Date: Tue, 30 Jun 2026 00:32:22 -0500 Subject: [PATCH 05/10] fix: tie start cue to recording session --- Sources/Fluid/ContentView.swift | 25 ++++++++++++++++++------- Sources/Fluid/Services/ASRService.swift | 16 ++++++++++++++-- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/Sources/Fluid/ContentView.swift b/Sources/Fluid/ContentView.swift index ff9cf420..755e01e6 100644 --- a/Sources/Fluid/ContentView.swift +++ b/Sources/Fluid/ContentView.swift @@ -2836,7 +2836,8 @@ struct ContentView: View { self.menuBarManager.hideRecordingOverlayImmediately(reason: "asr_start_failed") return } - await self.playStartCueWhenCaptureReady() + guard let sessionID = self.asr.currentRecordingSessionID else { return } + await self.playStartCueWhenCaptureReady(sessionID: sessionID) } // Pre-load model in background while recording (avoids 10s freeze on stop) @@ -3077,7 +3078,10 @@ struct ContentView: View { ) Task { await self.asr.start() - await self.playStartCueWhenCaptureReady() + guard self.asr.isRunning, + let sessionID = self.asr.currentRecordingSessionID + else { return } + await self.playStartCueWhenCaptureReady(sessionID: sessionID) } }, rewriteModeCallback: { @@ -3112,7 +3116,10 @@ struct ContentView: View { DebugLogger.shared.info("Starting voice recording for edit mode", source: "ContentView") Task { await self.asr.start() - await self.playStartCueWhenCaptureReady() + guard self.asr.isRunning, + let sessionID = self.asr.currentRecordingSessionID + else { return } + await self.playStartCueWhenCaptureReady(sessionID: sessionID) } }, isDictateRecordingProvider: { @@ -3446,20 +3453,24 @@ extension ContentView { message: "asr_start_return elapsedMs=\(Int(((ProcessInfo.processInfo.systemUptime - asrStartStartedAt) * 1000).rounded()))", source: "AppBenchmark" ) - await self.playStartCueWhenCaptureReady() + guard let sessionID = self.asr.currentRecordingSessionID else { return } + await self.playStartCueWhenCaptureReady(sessionID: sessionID) } } - private func playStartCueWhenCaptureReady() async { + private func playStartCueWhenCaptureReady(sessionID: UInt64) async { let cueWaitStartedAt = ProcessInfo.processInfo.systemUptime - let ready = await self.asr.waitForCaptureReadyForStartCue() + let ready = await self.asr.waitForCaptureReadyForStartCue(sessionID: sessionID) DebugLogger.shared.benchmark( "APP_BENCH", message: "start_cue_ready ready=\(ready) elapsedMs=\(Int(((ProcessInfo.processInfo.systemUptime - cueWaitStartedAt) * 1000).rounded()))", source: "AppBenchmark" ) - guard ready, self.asr.isRunning else { + guard ready, + self.asr.isRunning, + self.asr.currentRecordingSessionID == sessionID + else { DebugLogger.shared.debug("Start cue skipped because capture is no longer active", source: "ContentView") return } diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index eec1668a..72eb2aa8 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -588,6 +588,8 @@ final class ASRService: ObservableObject { private var startupEngineConfigurationRecoveryScheduled = false private var startupEngineConfigurationRecoveryCompletedAt: TimeInterval? private var startupEngineConfigurationRecoveryCompletedSampleCount: Int? + private var recordingSessionCounter: UInt64 = 0 + private var activeRecordingSessionID: UInt64? private var stoppedEngineRetainedAt: TimeInterval? private var stoppedEngineReleaseTask: Task? private var isRecoveringAudioRoute = false @@ -606,6 +608,10 @@ final class ASRService: ObservableObject { var audioLevelPublisher: AnyPublisher { self.audioLevelSubject.eraseToAnyPublisher() } private var lastAudioLevelSentAt: TimeInterval = 0 + var currentRecordingSessionID: UInt64? { + self.activeRecordingSessionID + } + func consumeLastCompletedAudioSnapshot() -> DictationAudioSnapshot? { let snapshot = self.lastCompletedAudioSnapshot self.lastCompletedAudioSnapshot = nil @@ -868,6 +874,8 @@ final class ASRService: ObservableObject { // Reset media pause state for this session self.didPauseMediaForThisSession = false + self.recordingSessionCounter &+= 1 + self.activeRecordingSessionID = self.recordingSessionCounter self.audioRouteRecoveryTask?.cancel() self.audioRouteRecoveryTask = nil self.isRecoveringAudioRoute = false @@ -953,6 +961,7 @@ final class ASRService: ObservableObject { } DebugLogger.shared.info("✅ START() completed successfully", source: "ASRService") } catch { + self.activeRecordingSessionID = nil DebugLogger.shared.error("Failed to start ASR session: \(error)", source: "ASRService") // Resume media if we paused it before the failure @@ -988,10 +997,10 @@ final class ASRService: ObservableObject { } } - func waitForCaptureReadyForStartCue() async -> Bool { + func waitForCaptureReadyForStartCue(sessionID: UInt64) async -> Bool { let waitStartedAt = Date().timeIntervalSince1970 - while self.isRunning { + while self.isRunning, self.activeRecordingSessionID == sessionID { let now = Date().timeIntervalSince1970 let sampleCount = self.audioBuffer.count let routeRecoveryIdle = self.isRecoveringAudioRoute == false && self.audioRouteRecoveryTask == nil @@ -1146,10 +1155,12 @@ final class ASRService: ObservableObject { self.benchmarkLog("stop_start ageMs=\(self.elapsedMilliseconds(since: self.benchmarkRecordingStartedAt)) bufferedSamples=\(self.audioBuffer.count)") guard self.isRunning else { + self.activeRecordingSessionID = nil DebugLogger.shared.warning("âš ī¸ STOP() - not running, returning empty string", source: "ASRService") return "" } defer { self.applyPendingParakeetVocabularyReloadIfNeeded() } + self.activeRecordingSessionID = nil self.audioRouteRecoveryTask?.cancel() self.audioRouteRecoveryTask = nil @@ -1445,6 +1456,7 @@ final class ASRService: ObservableObject { } func stopWithoutTranscription() async { + self.activeRecordingSessionID = nil guard self.isRunning else { return } defer { self.applyPendingParakeetVocabularyReloadIfNeeded() } From 311a3cf3b494667d9e7a64e5ddffc8395125411c Mon Sep 17 00:00:00 2001 From: Jon Patterson Date: Tue, 30 Jun 2026 00:44:34 -0500 Subject: [PATCH 06/10] test: cover start cue readiness gating --- Fluid.xcodeproj/project.pbxproj | 4 + Sources/Fluid/Services/ASRService.swift | 184 ++++++++++++++---- .../StartCueCaptureReadinessTests.swift | 135 +++++++++++++ 3 files changed, 282 insertions(+), 41 deletions(-) create mode 100644 Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift diff --git a/Fluid.xcodeproj/project.pbxproj b/Fluid.xcodeproj/project.pbxproj index 1155efef..6eeeabce 100644 --- a/Fluid.xcodeproj/project.pbxproj +++ b/Fluid.xcodeproj/project.pbxproj @@ -14,6 +14,7 @@ 7C91B0012F42AA0100C0DEF0 /* HotkeyShortcutTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */; }; 7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */; }; 7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */; }; + 7CFA10012F54000100C0DEF0 /* StartCueCaptureReadinessTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CFA10022F54000100C0DEF0 /* StartCueCaptureReadinessTests.swift */; }; 86CAA2D4EF18433096185602 /* LLMClientRequestBodyTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 343B29013F4441D6A797D12D /* LLMClientRequestBodyTests.swift */; }; 7CDB0A2F2F3C4D5600FB7CAD /* dictation_fixture.wav in Resources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */; }; 7CDB0A302F3C4D5600FB7CAD /* XCTest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 7CDB0A2C2F3C4D5600FB7CAD /* XCTest.framework */; }; @@ -36,6 +37,7 @@ 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HotkeyShortcutTests.swift; sourceTree = ""; }; 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DictationE2ETests.swift; sourceTree = ""; }; 343B29013F4441D6A797D12D /* LLMClientRequestBodyTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LLMClientRequestBodyTests.swift; sourceTree = ""; }; + 7CFA10022F54000100C0DEF0 /* StartCueCaptureReadinessTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = StartCueCaptureReadinessTests.swift; sourceTree = ""; }; 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioFixtureLoader.swift; sourceTree = ""; }; 7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = dictation_fixture.wav; sourceTree = ""; }; 7CDB0A2C2F3C4D5600FB7CAD /* XCTest.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = XCTest.framework; path = Platforms/MacOSX.platform/Developer/Library/Frameworks/XCTest.framework; sourceTree = DEVELOPER_DIR; }; @@ -107,6 +109,7 @@ 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */, 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */, 343B29013F4441D6A797D12D /* LLMClientRequestBodyTests.swift */, + 7CFA10022F54000100C0DEF0 /* StartCueCaptureReadinessTests.swift */, ); path = FluidDictationIntegrationTests; sourceTree = ""; @@ -262,6 +265,7 @@ 7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */, 7C91B0012F42AA0100C0DEF0 /* HotkeyShortcutTests.swift in Sources */, 86CAA2D4EF18433096185602 /* LLMClientRequestBodyTests.swift in Sources */, + 7CFA10012F54000100C0DEF0 /* StartCueCaptureReadinessTests.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index 72eb2aa8..f76e0a1b 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -588,8 +588,7 @@ final class ASRService: ObservableObject { private var startupEngineConfigurationRecoveryScheduled = false private var startupEngineConfigurationRecoveryCompletedAt: TimeInterval? private var startupEngineConfigurationRecoveryCompletedSampleCount: Int? - private var recordingSessionCounter: UInt64 = 0 - private var activeRecordingSessionID: UInt64? + private var recordingSessionTracker = RecordingSessionTracker() private var stoppedEngineRetainedAt: TimeInterval? private var stoppedEngineReleaseTask: Task? private var isRecoveringAudioRoute = false @@ -609,7 +608,7 @@ final class ASRService: ObservableObject { private var lastAudioLevelSentAt: TimeInterval = 0 var currentRecordingSessionID: UInt64? { - self.activeRecordingSessionID + self.recordingSessionTracker.currentID } func consumeLastCompletedAudioSnapshot() -> DictationAudioSnapshot? { @@ -874,8 +873,7 @@ final class ASRService: ObservableObject { // Reset media pause state for this session self.didPauseMediaForThisSession = false - self.recordingSessionCounter &+= 1 - self.activeRecordingSessionID = self.recordingSessionCounter + self.recordingSessionTracker.beginSession() self.audioRouteRecoveryTask?.cancel() self.audioRouteRecoveryTask = nil self.isRecoveringAudioRoute = false @@ -961,7 +959,7 @@ final class ASRService: ObservableObject { } DebugLogger.shared.info("✅ START() completed successfully", source: "ASRService") } catch { - self.activeRecordingSessionID = nil + self.recordingSessionTracker.clearSession() DebugLogger.shared.error("Failed to start ASR session: \(error)", source: "ASRService") // Resume media if we paused it before the failure @@ -999,49 +997,55 @@ final class ASRService: ObservableObject { func waitForCaptureReadyForStartCue(sessionID: UInt64) async -> Bool { let waitStartedAt = Date().timeIntervalSince1970 + let configuration = StartCueCaptureReadiness.Configuration( + stableDelaySeconds: self.startupCaptureReadyStableDelaySeconds, + afterRecoveryDelaySeconds: self.startupCaptureReadyAfterRecoveryDelaySeconds, + timeoutSeconds: self.startupCaptureReadyTimeoutSeconds, + minimumSamples: self.startupCaptureReadyMinimumSamples + ) - while self.isRunning, self.activeRecordingSessionID == sessionID { + while true { let now = Date().timeIntervalSince1970 let sampleCount = self.audioBuffer.count let routeRecoveryIdle = self.isRecoveringAudioRoute == false && self.audioRouteRecoveryTask == nil + let evaluation = StartCueCaptureReadiness.evaluate( + StartCueCaptureReadiness.Snapshot( + isRunning: self.isRunning, + activeSessionID: self.recordingSessionTracker.currentID, + requestedSessionID: sessionID, + now: now, + waitStartedAt: waitStartedAt, + sampleCount: sampleCount, + routeRecoveryIdle: routeRecoveryIdle, + startupRecoveryScheduled: self.startupEngineConfigurationRecoveryScheduled, + startupRecoveryCompletedAt: self.startupEngineConfigurationRecoveryCompletedAt, + startupRecoveryCompletedSampleCount: self.startupEngineConfigurationRecoveryCompletedSampleCount, + engineStartedAt: self.lastEngineStartCompletedAt + ), + configuration: configuration + ) - let stableEnough: Bool - if let completedAt = self.startupEngineConfigurationRecoveryCompletedAt { - stableEnough = now - completedAt >= self.startupCaptureReadyAfterRecoveryDelaySeconds - } else if self.startupEngineConfigurationRecoveryScheduled { - stableEnough = false - } else if let engineStartedAt = self.lastEngineStartCompletedAt { - stableEnough = now - engineStartedAt >= self.startupCaptureReadyStableDelaySeconds - } else { - stableEnough = false - } - let readySampleCount: Int - if self.startupEngineConfigurationRecoveryCompletedAt != nil, - let recoveryCompletedSampleCount = self.startupEngineConfigurationRecoveryCompletedSampleCount - { - readySampleCount = max(0, sampleCount - recoveryCompletedSampleCount) - } else { - readySampleCount = sampleCount - } - - if routeRecoveryIdle, - stableEnough, - readySampleCount >= self.startupCaptureReadyMinimumSamples - { + switch evaluation.decision { + case .ready: DebugLogger.shared.info( - "Capture ready for start cue (samples=\(sampleCount), readySamples=\(readySampleCount), waitedMs=\(Int(((now - waitStartedAt) * 1000).rounded())))", + "Capture ready for start cue (samples=\(sampleCount), readySamples=\(evaluation.readySampleCount), waitedMs=\(Int(((now - waitStartedAt) * 1000).rounded())))", source: "ASRService" ) return true - } - if now - waitStartedAt >= self.startupCaptureReadyTimeoutSeconds { - let ready = routeRecoveryIdle && readySampleCount > 0 + case .timedOut(let ready): DebugLogger.shared.warning( - "Timed out waiting for capture-ready start cue (ready=\(ready), samples=\(sampleCount), readySamples=\(readySampleCount), routeRecoveryIdle=\(routeRecoveryIdle), stableEnough=\(stableEnough))", + "Timed out waiting for capture-ready start cue (ready=\(ready), samples=\(sampleCount), readySamples=\(evaluation.readySampleCount), routeRecoveryIdle=\(routeRecoveryIdle), stableEnough=\(evaluation.stableEnough))", source: "ASRService" ) return ready + + case .inactive: + DebugLogger.shared.debug("Start cue wait cancelled because ASR is no longer running", source: "ASRService") + return false + + case .wait: + break } do { @@ -1050,9 +1054,6 @@ final class ASRService: ObservableObject { return false } } - - DebugLogger.shared.debug("Start cue wait cancelled because ASR is no longer running", source: "ASRService") - return false } private func stoppedEngineAgeMilliseconds() -> Int? { @@ -1155,12 +1156,12 @@ final class ASRService: ObservableObject { self.benchmarkLog("stop_start ageMs=\(self.elapsedMilliseconds(since: self.benchmarkRecordingStartedAt)) bufferedSamples=\(self.audioBuffer.count)") guard self.isRunning else { - self.activeRecordingSessionID = nil + self.recordingSessionTracker.clearSession() DebugLogger.shared.warning("âš ī¸ STOP() - not running, returning empty string", source: "ASRService") return "" } defer { self.applyPendingParakeetVocabularyReloadIfNeeded() } - self.activeRecordingSessionID = nil + self.recordingSessionTracker.clearSession() self.audioRouteRecoveryTask?.cancel() self.audioRouteRecoveryTask = nil @@ -1456,7 +1457,7 @@ final class ASRService: ObservableObject { } func stopWithoutTranscription() async { - self.activeRecordingSessionID = nil + self.recordingSessionTracker.clearSession() guard self.isRunning else { return } defer { self.applyPendingParakeetVocabularyReloadIfNeeded() } @@ -3358,6 +3359,107 @@ private extension ASRService { // MARK: - Audio capture pipeline +struct RecordingSessionTracker { + private var counter: UInt64 = 0 + private(set) var currentID: UInt64? + + @discardableResult + mutating func beginSession() -> UInt64 { + self.counter &+= 1 + self.currentID = self.counter + return self.counter + } + + mutating func clearSession() { + self.currentID = nil + } + + func isActive(_ sessionID: UInt64) -> Bool { + self.currentID == sessionID + } +} + +enum StartCueCaptureReadiness { + struct Configuration { + let stableDelaySeconds: TimeInterval + let afterRecoveryDelaySeconds: TimeInterval + let timeoutSeconds: TimeInterval + let minimumSamples: Int + } + + struct Snapshot { + let isRunning: Bool + let activeSessionID: UInt64? + let requestedSessionID: UInt64 + let now: TimeInterval + let waitStartedAt: TimeInterval + let sampleCount: Int + let routeRecoveryIdle: Bool + let startupRecoveryScheduled: Bool + let startupRecoveryCompletedAt: TimeInterval? + let startupRecoveryCompletedSampleCount: Int? + let engineStartedAt: TimeInterval? + } + + enum Decision: Equatable { + case ready + case wait + case inactive + case timedOut(ready: Bool) + } + + struct Evaluation: Equatable { + let decision: Decision + let stableEnough: Bool + let readySampleCount: Int + } + + static func evaluate(_ snapshot: Snapshot, configuration: Configuration) -> Evaluation { + let stableEnough = self.isStableEnough(snapshot, configuration: configuration) + let readySampleCount = self.readySampleCount(for: snapshot) + + guard snapshot.isRunning, snapshot.activeSessionID == snapshot.requestedSessionID else { + return Evaluation(decision: .inactive, stableEnough: stableEnough, readySampleCount: readySampleCount) + } + + if snapshot.routeRecoveryIdle, + stableEnough, + readySampleCount >= configuration.minimumSamples + { + return Evaluation(decision: .ready, stableEnough: stableEnough, readySampleCount: readySampleCount) + } + + if snapshot.now - snapshot.waitStartedAt >= configuration.timeoutSeconds { + let ready = snapshot.routeRecoveryIdle && readySampleCount > 0 + return Evaluation(decision: .timedOut(ready: ready), stableEnough: stableEnough, readySampleCount: readySampleCount) + } + + return Evaluation(decision: .wait, stableEnough: stableEnough, readySampleCount: readySampleCount) + } + + private static func isStableEnough(_ snapshot: Snapshot, configuration: Configuration) -> Bool { + if let completedAt = snapshot.startupRecoveryCompletedAt { + return snapshot.now - completedAt >= configuration.afterRecoveryDelaySeconds + } + if snapshot.startupRecoveryScheduled { + return false + } + if let engineStartedAt = snapshot.engineStartedAt { + return snapshot.now - engineStartedAt >= configuration.stableDelaySeconds + } + return false + } + + private static func readySampleCount(for snapshot: Snapshot) -> Int { + if snapshot.startupRecoveryCompletedAt != nil, + let recoveryCompletedSampleCount = snapshot.startupRecoveryCompletedSampleCount + { + return max(0, snapshot.sampleCount - recoveryCompletedSampleCount) + } + return snapshot.sampleCount + } +} + // // AVAudioEngine's tap runs on a realtime audio thread. ASRService is @MainActor, so we must NOT // touch its state directly inside the tap callback. This pipeline keeps all tap-side state diff --git a/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift b/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift new file mode 100644 index 00000000..b757b0fa --- /dev/null +++ b/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift @@ -0,0 +1,135 @@ +@testable import FluidVoice_Debug +import XCTest + +final class StartCueCaptureReadinessTests: XCTestCase { + private let configuration = StartCueCaptureReadiness.Configuration( + stableDelaySeconds: 0.45, + afterRecoveryDelaySeconds: 0.10, + timeoutSeconds: 1.75, + minimumSamples: 2_048 + ) + + func testRestartInvalidatesPreviousRecordingSession() { + var tracker = RecordingSessionTracker() + + let firstSessionID = tracker.beginSession() + let secondSessionID = tracker.beginSession() + + XCTAssertFalse(tracker.isActive(firstSessionID)) + XCTAssertTrue(tracker.isActive(secondSessionID)) + + tracker.clearSession() + + XCTAssertNil(tracker.currentID) + XCTAssertFalse(tracker.isActive(secondSessionID)) + } + + func testStaleSessionWaiterIsInactiveEvenWhenCaptureIsOtherwiseReady() { + let evaluation = StartCueCaptureReadiness.evaluate( + self.snapshot( + activeSessionID: 2, + requestedSessionID: 1, + sampleCount: 4_096, + engineStartedAt: 9.0 + ), + configuration: self.configuration + ) + + XCTAssertEqual(evaluation.decision, .inactive) + } + + func testPostRecoveryReadinessRequiresSamplesAfterRecovery() { + let waiting = StartCueCaptureReadiness.evaluate( + self.snapshot( + sampleCount: 4_096, + startupRecoveryCompletedAt: 10.0, + startupRecoveryCompletedSampleCount: 4_096 + ), + configuration: self.configuration + ) + + XCTAssertEqual(waiting.decision, .wait) + XCTAssertEqual(waiting.readySampleCount, 0) + + let ready = StartCueCaptureReadiness.evaluate( + self.snapshot( + sampleCount: 6_144, + startupRecoveryCompletedAt: 10.0, + startupRecoveryCompletedSampleCount: 4_096 + ), + configuration: self.configuration + ) + + XCTAssertEqual(ready.decision, .ready) + XCTAssertEqual(ready.readySampleCount, 2_048) + } + + func testTimeoutFallbackStillRequiresCurrentSessionSamplesAndIdleRouteRecovery() { + let staleSession = StartCueCaptureReadiness.evaluate( + self.snapshot( + activeSessionID: 2, + requestedSessionID: 1, + now: 12.0, + waitStartedAt: 10.0, + sampleCount: 4_096, + engineStartedAt: 11.9 + ), + configuration: self.configuration + ) + + XCTAssertEqual(staleSession.decision, .inactive) + + let activeSessionWithSamples = StartCueCaptureReadiness.evaluate( + self.snapshot( + now: 12.0, + waitStartedAt: 10.0, + sampleCount: 128, + engineStartedAt: 11.9 + ), + configuration: self.configuration + ) + + XCTAssertEqual(activeSessionWithSamples.decision, .timedOut(ready: true)) + + let routeRecoveryPending = StartCueCaptureReadiness.evaluate( + self.snapshot( + now: 12.0, + waitStartedAt: 10.0, + sampleCount: 4_096, + routeRecoveryIdle: false, + engineStartedAt: 9.0 + ), + configuration: self.configuration + ) + + XCTAssertEqual(routeRecoveryPending.decision, .timedOut(ready: false)) + } + + private func snapshot( + isRunning: Bool = true, + activeSessionID: UInt64? = 1, + requestedSessionID: UInt64 = 1, + now: TimeInterval = 10.2, + waitStartedAt: TimeInterval = 10.0, + sampleCount: Int, + routeRecoveryIdle: Bool = true, + startupRecoveryScheduled: Bool = false, + startupRecoveryCompletedAt: TimeInterval? = nil, + startupRecoveryCompletedSampleCount: Int? = nil, + engineStartedAt: TimeInterval? = nil + ) -> StartCueCaptureReadiness.Snapshot { + StartCueCaptureReadiness.Snapshot( + isRunning: isRunning, + activeSessionID: activeSessionID, + requestedSessionID: requestedSessionID, + now: now, + waitStartedAt: waitStartedAt, + sampleCount: sampleCount, + routeRecoveryIdle: routeRecoveryIdle, + startupRecoveryScheduled: startupRecoveryScheduled, + startupRecoveryCompletedAt: startupRecoveryCompletedAt, + startupRecoveryCompletedSampleCount: startupRecoveryCompletedSampleCount, + engineStartedAt: engineStartedAt + ) + } +} From 53da73873bbbfffa651773bfbf2ca23159ee02ac Mon Sep 17 00:00:00 2001 From: Jon Patterson Date: Tue, 30 Jun 2026 00:54:56 -0500 Subject: [PATCH 07/10] fix: address audio readiness review follow-ups --- Sources/Fluid/Services/ASRService.swift | 39 +++++++++++++++++++ .../Fluid/Services/AudioDeviceService.swift | 38 ++++++++++++++++++ .../Fluid/Services/GlobalHotkeyManager.swift | 11 +++--- .../StartCueCaptureReadinessTests.swift | 25 ++++++++++++ 4 files changed, 108 insertions(+), 5 deletions(-) diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index f76e0a1b..c4c88ae5 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -1087,6 +1087,10 @@ final class ASRService: ObservableObject { self.benchmarkLog("engine_reuse_retained hit=false reason=\(reason)") return } + guard self.canRetainStoppedEngineForReuse() else { + self.releaseStoppedEngine(reason: "\(reason)_unsafe_route") + return + } self.cancelStoppedEngineRelease(reason: "reschedule") self.stoppedEngineRetainedAt = Date().timeIntervalSince1970 @@ -1117,6 +1121,26 @@ final class ASRService: ObservableObject { } } + private func canRetainStoppedEngineForReuse() -> Bool { + guard SettingsStore.shared.syncAudioDevicesWithSystem else { + self.benchmarkLog("engine_reuse_retained hit=false reason=independent_device_binding") + return false + } + + let routeDevices = [ + self.getCurrentlyBoundInputDevice(), + AudioDevice.getDefaultInputDevice(), + AudioDevice.getDefaultOutputDevice(), + ].compactMap { $0 } + + if let bluetoothDevice = routeDevices.first(where: { AudioDevice.isBluetoothDevice($0) }) { + self.benchmarkLog("engine_reuse_retained hit=false reason=bluetooth_device device=\(bluetoothDevice.name)") + return false + } + + return true + } + /// Stops the recording session and returns the transcribed text. /// /// This method performs the complete transcription process: @@ -2023,7 +2047,10 @@ final class ASRService: ObservableObject { let isStartupEngineConfigurationRecovery = self.isStartupEngineConfigurationRecovery(reason: reason) if isStartupEngineConfigurationRecovery { self.startupEngineConfigurationRecoveryScheduled = true + self.startupEngineConfigurationRecoveryCompletedAt = nil + self.startupEngineConfigurationRecoveryCompletedSampleCount = nil } else { + self.clearStartupCaptureReadiness(reason: "route_recovery_replaced_by_\(reason)") self.audioCapturePipeline.setRecordingEnabled(false) self.audioLevelSubject.send(0.0) } @@ -2057,6 +2084,18 @@ final class ASRService: ObservableObject { return startAge >= 0 && startAge <= self.startupEngineConfigurationRecoveryWindowSeconds } + private func clearStartupCaptureReadiness(reason: String) { + if self.startupEngineConfigurationRecoveryScheduled || + self.startupEngineConfigurationRecoveryCompletedAt != nil || + self.startupEngineConfigurationRecoveryCompletedSampleCount != nil + { + self.benchmarkLog("startup_capture_readiness_reset reason=\(reason)") + } + self.startupEngineConfigurationRecoveryScheduled = false + self.startupEngineConfigurationRecoveryCompletedAt = nil + self.startupEngineConfigurationRecoveryCompletedSampleCount = nil + } + @MainActor private func recoverAudioRoute(reason: String) async { guard self.isRunning else { return } diff --git a/Sources/Fluid/Services/AudioDeviceService.swift b/Sources/Fluid/Services/AudioDeviceService.swift index 8e899d76..925dc4b3 100644 --- a/Sources/Fluid/Services/AudioDeviceService.swift +++ b/Sources/Fluid/Services/AudioDeviceService.swift @@ -115,6 +115,26 @@ enum AudioDevice { return self.listAllDevices().first { $0.uid == uid }?.id } + static func isBluetoothDevice(_ device: Device) -> Bool { + let nameLooksBluetooth = device.name.localizedCaseInsensitiveContains("airpods") || + device.name.localizedCaseInsensitiveContains("bluetooth") || + device.name.localizedCaseInsensitiveContains("beats") + if nameLooksBluetooth { + return true + } + + guard device.id != kAudioObjectUnknown else { return false } + + guard let transportType = self.getUInt32Property( + device.id, + selector: kAudioDevicePropertyTransportType, + scope: kAudioObjectPropertyScopeGlobal + ) else { return false } + + return transportType == kAudioDeviceTransportTypeBluetooth || + transportType == kAudioDeviceTransportTypeBluetoothLE + } + private static func getDefaultDeviceId(selector: AudioObjectPropertySelector) -> AudioObjectID? { var address = AudioObjectPropertyAddress( mSelector: selector, @@ -173,6 +193,24 @@ enum AudioDevice { return value?.takeRetainedValue() as String? } + private static func getUInt32Property( + _ devId: AudioObjectID, + selector: AudioObjectPropertySelector, + scope: AudioObjectPropertyScope + ) -> UInt32? { + var address = AudioObjectPropertyAddress( + mSelector: selector, + mScope: scope, + mElement: kAudioObjectPropertyElementMain + ) + + var value: UInt32 = 0 + var dataSize = UInt32(MemoryLayout.size) + let status = AudioObjectGetPropertyData(devId, &address, 0, nil, &dataSize, &value) + guard status == noErr else { return nil } + return value + } + private static func hasChannels(_ devId: AudioObjectID, scope: AudioObjectPropertyScope) -> Bool { var address = AudioObjectPropertyAddress( mSelector: kAudioDevicePropertyStreamConfiguration, diff --git a/Sources/Fluid/Services/GlobalHotkeyManager.swift b/Sources/Fluid/Services/GlobalHotkeyManager.swift index 6adc7e86..d9e09d88 100644 --- a/Sources/Fluid/Services/GlobalHotkeyManager.swift +++ b/Sources/Fluid/Services/GlobalHotkeyManager.swift @@ -1044,13 +1044,14 @@ final class GlobalHotkeyManager: NSObject { return } - if self.setupGlobalHotkey() { - self.isInitialized = true - self.startHealthCheckTimer() + self.setupGlobalHotkeyWithRetry() + if self.isInitialized { DebugLogger.shared.info("Event tap async recovery successful", source: "GlobalHotkeyManager") } else { - self.isInitialized = false - DebugLogger.shared.error("Event tap async recovery failed", source: "GlobalHotkeyManager") + DebugLogger.shared.warning( + "Event tap async recovery initial attempt failed; retry loop scheduled", + source: "GlobalHotkeyManager" + ) } } } diff --git a/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift b/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift index b757b0fa..8390a433 100644 --- a/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift +++ b/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift @@ -1,4 +1,5 @@ @testable import FluidVoice_Debug +import CoreAudio import XCTest final class StartCueCaptureReadinessTests: XCTestCase { @@ -105,6 +106,30 @@ final class StartCueCaptureReadinessTests: XCTestCase { XCTAssertEqual(routeRecoveryPending.decision, .timedOut(ready: false)) } + func testBluetoothDeviceDetectionUsesNameFallbackForAggregateRoutes() { + let airPods = AudioDevice.Device( + id: AudioObjectID(kAudioObjectUnknown), + uid: "airpods", + name: "Jon's AirPods Pro", + hasInput: true, + hasOutput: true + ) + + XCTAssertTrue(AudioDevice.isBluetoothDevice(airPods)) + } + + func testBluetoothDeviceDetectionDoesNotFlagOrdinaryUnknownDeviceNames() { + let builtInMic = AudioDevice.Device( + id: AudioObjectID(kAudioObjectUnknown), + uid: "builtin", + name: "MacBook Pro Microphone", + hasInput: true, + hasOutput: false + ) + + XCTAssertFalse(AudioDevice.isBluetoothDevice(builtInMic)) + } + private func snapshot( isRunning: Bool = true, activeSessionID: UInt64? = 1, From 21a82d1ab1cd3c5b92482c59b84a40efcca725f3 Mon Sep 17 00:00:00 2001 From: Jon Patterson Date: Tue, 30 Jun 2026 13:54:33 -0500 Subject: [PATCH 08/10] fix: require fresh samples after route recovery --- Sources/Fluid/Services/ASRService.swift | 111 ++++++++++++------ .../StartCueCaptureReadinessTests.swift | 68 +++++++++-- 2 files changed, 132 insertions(+), 47 deletions(-) diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index c4c88ae5..5cd44e5e 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -549,7 +549,9 @@ final class ASRService: ObservableObject { (self.transcriptionProvider as? FluidAudioProvider)?.underlyingManager } #else - var asrManager: Any? { nil } + var asrManager: Any? { + nil + } #endif // Thread-safe buffer to prevent "Array mutation while enumerating" and memory corruption crashes @@ -576,18 +578,16 @@ final class ASRService: ObservableObject { private var engineConfigurationChangeObserver: NSObjectProtocol? private var audioRouteRecoveryTask: Task? private let audioRouteRecoveryDelayNanoseconds: UInt64 = 1_000_000_000 - private let startupEngineConfigurationRecoveryDelayNanoseconds: UInt64 = 100_000_000 - private let startupEngineConfigurationRecoveryWindowSeconds: TimeInterval = 2.0 + private let startupRouteRecoveryDelayNs: UInt64 = 100_000_000 + private let startupRouteRecoveryWindowSeconds: TimeInterval = 2.0 private let startupCaptureReadyStableDelaySeconds: TimeInterval = 0.45 - private let startupCaptureReadyAfterRecoveryDelaySeconds: TimeInterval = 0.10 + private let startupCapturePostRecoveryDelaySeconds: TimeInterval = 0.10 private let startupCaptureReadyTimeoutSeconds: TimeInterval = 1.75 private let startupCaptureReadyPollNanoseconds: UInt64 = 25_000_000 - private let startupCaptureReadyMinimumSamples = 2_048 + private let startupCaptureReadyMinimumSamples = 2048 private let stoppedEngineReuseGraceNanoseconds: UInt64 = 20_000_000_000 private var lastEngineStartCompletedAt: TimeInterval? - private var startupEngineConfigurationRecoveryScheduled = false - private var startupEngineConfigurationRecoveryCompletedAt: TimeInterval? - private var startupEngineConfigurationRecoveryCompletedSampleCount: Int? + private var startupRouteRecoveryTracker = StartupRouteRecoveryTracker() private var recordingSessionTracker = RecordingSessionTracker() private var stoppedEngineRetainedAt: TimeInterval? private var stoppedEngineReleaseTask: Task? @@ -604,7 +604,10 @@ final class ASRService: ObservableObject { private var didPauseMediaForThisSession: Bool = false private var audioLevelSubject = PassthroughSubject() - var audioLevelPublisher: AnyPublisher { self.audioLevelSubject.eraseToAnyPublisher() } + var audioLevelPublisher: AnyPublisher { + self.audioLevelSubject.eraseToAnyPublisher() + } + private var lastAudioLevelSentAt: TimeInterval = 0 var currentRecordingSessionID: UInt64? { @@ -899,9 +902,7 @@ final class ASRService: ObservableObject { self.benchmarkCompletedStreamingChunks = 0 self.benchmarkLastChunkSampleCount = 0 self.lastEngineStartCompletedAt = nil - self.startupEngineConfigurationRecoveryScheduled = false - self.startupEngineConfigurationRecoveryCompletedAt = nil - self.startupEngineConfigurationRecoveryCompletedSampleCount = nil + self.startupRouteRecoveryTracker.clear() self.streamingChunkAnalyticsSuccessCount = 0 self.lastStreamingChunkFailureAnalyticsAt = nil (self.transcriptionProvider as? FluidAudioProvider)?.resetStreamingPreviewCache() @@ -960,6 +961,7 @@ final class ASRService: ObservableObject { DebugLogger.shared.info("✅ START() completed successfully", source: "ASRService") } catch { self.recordingSessionTracker.clearSession() + self.clearStartupCaptureReadiness(reason: "start_failed") DebugLogger.shared.error("Failed to start ASR session: \(error)", source: "ASRService") // Resume media if we paused it before the failure @@ -999,7 +1001,7 @@ final class ASRService: ObservableObject { let waitStartedAt = Date().timeIntervalSince1970 let configuration = StartCueCaptureReadiness.Configuration( stableDelaySeconds: self.startupCaptureReadyStableDelaySeconds, - afterRecoveryDelaySeconds: self.startupCaptureReadyAfterRecoveryDelaySeconds, + afterRecoveryDelaySeconds: self.startupCapturePostRecoveryDelaySeconds, timeoutSeconds: self.startupCaptureReadyTimeoutSeconds, minimumSamples: self.startupCaptureReadyMinimumSamples ) @@ -1017,9 +1019,9 @@ final class ASRService: ObservableObject { waitStartedAt: waitStartedAt, sampleCount: sampleCount, routeRecoveryIdle: routeRecoveryIdle, - startupRecoveryScheduled: self.startupEngineConfigurationRecoveryScheduled, - startupRecoveryCompletedAt: self.startupEngineConfigurationRecoveryCompletedAt, - startupRecoveryCompletedSampleCount: self.startupEngineConfigurationRecoveryCompletedSampleCount, + startupRecoveryScheduled: self.startupRouteRecoveryTracker.isScheduled, + startupRecoveryCompletedAt: self.startupRouteRecoveryTracker.completedAt, + startupRecoveryCompletedSampleCount: self.startupRouteRecoveryTracker.completedSampleCount, engineStartedAt: self.lastEngineStartCompletedAt ), configuration: configuration @@ -1033,7 +1035,7 @@ final class ASRService: ObservableObject { ) return true - case .timedOut(let ready): + case let .timedOut(ready): DebugLogger.shared.warning( "Timed out waiting for capture-ready start cue (ready=\(ready), samples=\(sampleCount), readySamples=\(evaluation.readySampleCount), routeRecoveryIdle=\(routeRecoveryIdle), stableEnough=\(evaluation.stableEnough))", source: "ASRService" @@ -1190,6 +1192,7 @@ final class ASRService: ObservableObject { self.audioRouteRecoveryTask?.cancel() self.audioRouteRecoveryTask = nil self.isRecoveringAudioRoute = false + self.clearStartupCaptureReadiness(reason: "stop") // Capture media pause state before we reset it, for resuming at the end let shouldResumeMedia = SettingsStore.shared.pauseMediaDuringTranscription && self.didPauseMediaForThisSession @@ -1482,6 +1485,7 @@ final class ASRService: ObservableObject { func stopWithoutTranscription() async { self.recordingSessionTracker.clearSession() + self.clearStartupCaptureReadiness(reason: "stop_without_transcription") guard self.isRunning else { return } defer { self.applyPendingParakeetVocabularyReloadIfNeeded() } @@ -2045,12 +2049,8 @@ final class ASRService: ObservableObject { self.audioRouteRecoveryTask?.cancel() let isStartupEngineConfigurationRecovery = self.isStartupEngineConfigurationRecovery(reason: reason) - if isStartupEngineConfigurationRecovery { - self.startupEngineConfigurationRecoveryScheduled = true - self.startupEngineConfigurationRecoveryCompletedAt = nil - self.startupEngineConfigurationRecoveryCompletedSampleCount = nil - } else { - self.clearStartupCaptureReadiness(reason: "route_recovery_replaced_by_\(reason)") + self.markStartupRouteRecoveryPending(reason: reason) + if isStartupEngineConfigurationRecovery == false { self.audioCapturePipeline.setRecordingEnabled(false) self.audioLevelSubject.send(0.0) } @@ -2071,7 +2071,7 @@ final class ASRService: ObservableObject { private func recoveryDelayNanoseconds(for reason: String) -> UInt64 { self.isStartupEngineConfigurationRecovery(reason: reason) - ? self.startupEngineConfigurationRecoveryDelayNanoseconds + ? self.startupRouteRecoveryDelayNs : self.audioRouteRecoveryDelayNanoseconds } @@ -2081,19 +2081,31 @@ final class ASRService: ObservableObject { else { return false } let startAge = Date().timeIntervalSince1970 - lastEngineStartCompletedAt - return startAge >= 0 && startAge <= self.startupEngineConfigurationRecoveryWindowSeconds + return startAge >= 0 && startAge <= self.startupRouteRecoveryWindowSeconds + } + + private func markStartupRouteRecoveryPending(reason: String) { + self.startupRouteRecoveryTracker.markScheduled() + self.benchmarkLog("startup_capture_readiness_recovery_pending reason=\(reason)") + } + + private func completeStartupRouteRecoveryIfNeeded(reason: String) { + guard self.startupRouteRecoveryTracker.isScheduled else { return } + let sampleCount = self.audioBuffer.count + self.startupRouteRecoveryTracker.markCompleted( + at: Date().timeIntervalSince1970, + sampleCount: sampleCount + ) + self.benchmarkLog( + "startup_capture_readiness_recovery_completed reason=\(reason) sampleBaseline=\(sampleCount)" + ) } private func clearStartupCaptureReadiness(reason: String) { - if self.startupEngineConfigurationRecoveryScheduled || - self.startupEngineConfigurationRecoveryCompletedAt != nil || - self.startupEngineConfigurationRecoveryCompletedSampleCount != nil - { + if self.startupRouteRecoveryTracker.hasState { self.benchmarkLog("startup_capture_readiness_reset reason=\(reason)") } - self.startupEngineConfigurationRecoveryScheduled = false - self.startupEngineConfigurationRecoveryCompletedAt = nil - self.startupEngineConfigurationRecoveryCompletedSampleCount = nil + self.startupRouteRecoveryTracker.clear() } @MainActor @@ -2131,10 +2143,7 @@ final class ASRService: ObservableObject { } DebugLogger.shared.info("Audio route recovery succeeded", source: "ASRService") - if reason == "engine configuration changed", self.startupEngineConfigurationRecoveryScheduled { - self.startupEngineConfigurationRecoveryCompletedAt = Date().timeIntervalSince1970 - self.startupEngineConfigurationRecoveryCompletedSampleCount = self.audioBuffer.count - } + self.completeStartupRouteRecoveryIfNeeded(reason: reason) } catch { DebugLogger.shared.error("Audio route recovery failed: \(error)", source: "ASRService") await self.stopWithoutTranscription() @@ -2480,7 +2489,7 @@ final class ASRService: ObservableObject { return nil } - // Device caching for change detection + /// Device caching for change detection private var cachedDeviceUIDs: Set = [] private func cacheCurrentDeviceList(_ devices: [AudioDevice.Device]) { @@ -3418,6 +3427,34 @@ struct RecordingSessionTracker { } } +struct StartupRouteRecoveryTracker { + private(set) var isScheduled = false + private(set) var completedAt: TimeInterval? + private(set) var completedSampleCount: Int? + + var hasState: Bool { + self.isScheduled || self.completedAt != nil || self.completedSampleCount != nil + } + + mutating func markScheduled() { + self.isScheduled = true + self.completedAt = nil + self.completedSampleCount = nil + } + + mutating func markCompleted(at completedAt: TimeInterval, sampleCount: Int) { + self.isScheduled = false + self.completedAt = completedAt + self.completedSampleCount = sampleCount + } + + mutating func clear() { + self.isScheduled = false + self.completedAt = nil + self.completedSampleCount = nil + } +} + enum StartCueCaptureReadiness { struct Configuration { let stableDelaySeconds: TimeInterval diff --git a/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift b/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift index 8390a433..d74a7273 100644 --- a/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift +++ b/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift @@ -1,5 +1,5 @@ -@testable import FluidVoice_Debug import CoreAudio +@testable import FluidVoice_Debug import XCTest final class StartCueCaptureReadinessTests: XCTestCase { @@ -7,7 +7,7 @@ final class StartCueCaptureReadinessTests: XCTestCase { stableDelaySeconds: 0.45, afterRecoveryDelaySeconds: 0.10, timeoutSeconds: 1.75, - minimumSamples: 2_048 + minimumSamples: 2048 ) func testRestartInvalidatesPreviousRecordingSession() { @@ -30,7 +30,7 @@ final class StartCueCaptureReadinessTests: XCTestCase { self.snapshot( activeSessionID: 2, requestedSessionID: 1, - sampleCount: 4_096, + sampleCount: 4096, engineStartedAt: 9.0 ), configuration: self.configuration @@ -42,9 +42,9 @@ final class StartCueCaptureReadinessTests: XCTestCase { func testPostRecoveryReadinessRequiresSamplesAfterRecovery() { let waiting = StartCueCaptureReadiness.evaluate( self.snapshot( - sampleCount: 4_096, + sampleCount: 4096, startupRecoveryCompletedAt: 10.0, - startupRecoveryCompletedSampleCount: 4_096 + startupRecoveryCompletedSampleCount: 4096 ), configuration: self.configuration ) @@ -54,15 +54,15 @@ final class StartCueCaptureReadinessTests: XCTestCase { let ready = StartCueCaptureReadiness.evaluate( self.snapshot( - sampleCount: 6_144, + sampleCount: 6144, startupRecoveryCompletedAt: 10.0, - startupRecoveryCompletedSampleCount: 4_096 + startupRecoveryCompletedSampleCount: 4096 ), configuration: self.configuration ) XCTAssertEqual(ready.decision, .ready) - XCTAssertEqual(ready.readySampleCount, 2_048) + XCTAssertEqual(ready.readySampleCount, 2048) } func testTimeoutFallbackStillRequiresCurrentSessionSamplesAndIdleRouteRecovery() { @@ -72,7 +72,7 @@ final class StartCueCaptureReadinessTests: XCTestCase { requestedSessionID: 1, now: 12.0, waitStartedAt: 10.0, - sampleCount: 4_096, + sampleCount: 4096, engineStartedAt: 11.9 ), configuration: self.configuration @@ -96,7 +96,7 @@ final class StartCueCaptureReadinessTests: XCTestCase { self.snapshot( now: 12.0, waitStartedAt: 10.0, - sampleCount: 4_096, + sampleCount: 4096, routeRecoveryIdle: false, engineStartedAt: 9.0 ), @@ -106,6 +106,54 @@ final class StartCueCaptureReadinessTests: XCTestCase { XCTAssertEqual(routeRecoveryPending.decision, .timedOut(ready: false)) } + func testRouteRecoveryTrackerRequiresFreshSamplesAfterRecovery() { + var tracker = StartupRouteRecoveryTracker() + tracker.markScheduled() + tracker.markCompleted(at: 10.0, sampleCount: 4096) + + XCTAssertFalse(tracker.isScheduled) + XCTAssertTrue(tracker.hasState) + + let waiting = StartCueCaptureReadiness.evaluate( + self.snapshot( + sampleCount: 4096, + startupRecoveryCompletedAt: tracker.completedAt, + startupRecoveryCompletedSampleCount: tracker.completedSampleCount + ), + configuration: self.configuration + ) + + XCTAssertEqual(waiting.decision, .wait) + XCTAssertEqual(waiting.readySampleCount, 0) + } + + func testTimeoutAfterRecoveryDoesNotUsePreRecoverySamples() { + let evaluation = StartCueCaptureReadiness.evaluate( + self.snapshot( + now: 12.0, + waitStartedAt: 10.0, + sampleCount: 4096, + startupRecoveryCompletedAt: 10.1, + startupRecoveryCompletedSampleCount: 4096 + ), + configuration: self.configuration + ) + + XCTAssertEqual(evaluation.decision, .timedOut(ready: false)) + XCTAssertEqual(evaluation.readySampleCount, 0) + } + + func testRouteRecoveryTrackerClearsStaleBaselineWhenRecoveryIsReplaced() { + var tracker = StartupRouteRecoveryTracker() + tracker.markScheduled() + tracker.markCompleted(at: 9.0, sampleCount: 2048) + tracker.markScheduled() + + XCTAssertTrue(tracker.isScheduled) + XCTAssertNil(tracker.completedAt) + XCTAssertNil(tracker.completedSampleCount) + } + func testBluetoothDeviceDetectionUsesNameFallbackForAggregateRoutes() { let airPods = AudioDevice.Device( id: AudioObjectID(kAudioObjectUnknown), From 84115b771b63601f0acffc7a0f8b4abd93e0c477 Mon Sep 17 00:00:00 2001 From: Jon Patterson Date: Tue, 30 Jun 2026 14:45:11 -0500 Subject: [PATCH 09/10] fix: avoid stale start cue recovery races --- Sources/Fluid/Services/ASRService.swift | 49 ++++++++++++++----- .../Fluid/Services/GlobalHotkeyManager.swift | 4 +- .../StartCueCaptureReadinessTests.swift | 49 +++++++++++++++++++ 3 files changed, 87 insertions(+), 15 deletions(-) diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index 5cd44e5e..5298b9ba 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -586,7 +586,7 @@ final class ASRService: ObservableObject { private let startupCaptureReadyPollNanoseconds: UInt64 = 25_000_000 private let startupCaptureReadyMinimumSamples = 2048 private let stoppedEngineReuseGraceNanoseconds: UInt64 = 20_000_000_000 - private var lastEngineStartCompletedAt: TimeInterval? + private var initialEngineStartCompletedAt: TimeInterval? private var startupRouteRecoveryTracker = StartupRouteRecoveryTracker() private var recordingSessionTracker = RecordingSessionTracker() private var stoppedEngineRetainedAt: TimeInterval? @@ -901,7 +901,7 @@ final class ASRService: ObservableObject { self.benchmarkStreamingChunkIndex = 0 self.benchmarkCompletedStreamingChunks = 0 self.benchmarkLastChunkSampleCount = 0 - self.lastEngineStartCompletedAt = nil + self.initialEngineStartCompletedAt = nil self.startupRouteRecoveryTracker.clear() self.streamingChunkAnalyticsSuccessCount = 0 self.lastStreamingChunkFailureAnalyticsAt = nil @@ -921,7 +921,7 @@ final class ASRService: ObservableObject { DebugLogger.shared.debug("✅ configureSession() completed", source: "ASRService") DebugLogger.shared.debug("🚀 Calling startEngine()...", source: "ASRService") - try self.startEngine() + try self.startEngine(context: .initialRecording) DebugLogger.shared.debug("✅ startEngine() completed", source: "ASRService") DebugLogger.shared.debug("🎧 Setting up engine tap...", source: "ASRService") @@ -1022,7 +1022,7 @@ final class ASRService: ObservableObject { startupRecoveryScheduled: self.startupRouteRecoveryTracker.isScheduled, startupRecoveryCompletedAt: self.startupRouteRecoveryTracker.completedAt, startupRecoveryCompletedSampleCount: self.startupRouteRecoveryTracker.completedSampleCount, - engineStartedAt: self.lastEngineStartCompletedAt + engineStartedAt: self.initialEngineStartCompletedAt ), configuration: configuration ) @@ -1885,7 +1885,7 @@ final class ASRService: ObservableObject { } } - private func startEngine() throws { + private func startEngine(context: EngineStartContext) throws { DebugLogger.shared.debug("🚀 startEngine() - ENTERED", source: "ASRService") var attempts = 0 var lastError: Error? @@ -1928,7 +1928,9 @@ final class ASRService: ObservableObject { ) try self.engine.start() - self.lastEngineStartCompletedAt = Date().timeIntervalSince1970 + if context == .initialRecording { + self.initialEngineStartCompletedAt = Date().timeIntervalSince1970 + } DebugLogger.shared.info("AVAudioEngine started successfully on attempt \(attempts + 1)", source: "ASRService") return } catch { @@ -2076,12 +2078,12 @@ final class ASRService: ObservableObject { } private func isStartupEngineConfigurationRecovery(reason: String) -> Bool { - guard reason == "engine configuration changed", - let lastEngineStartCompletedAt - else { return false } - - let startAge = Date().timeIntervalSince1970 - lastEngineStartCompletedAt - return startAge >= 0 && startAge <= self.startupRouteRecoveryWindowSeconds + StartupEngineConfigurationRecoveryPolicy.isStartupRecovery( + reason: reason, + initialEngineStartedAt: self.initialEngineStartCompletedAt, + now: Date().timeIntervalSince1970, + windowSeconds: self.startupRouteRecoveryWindowSeconds + ) } private func markStartupRouteRecoveryPending(reason: String) { @@ -2134,7 +2136,7 @@ final class ASRService: ObservableObject { do { try self.configureSession() - try self.startEngine() + try self.startEngine(context: .routeRecovery) try self.setupEngineTap() self.audioCapturePipeline.setRecordingEnabled(true) @@ -3405,6 +3407,11 @@ private extension ASRService { } } +private enum EngineStartContext { + case initialRecording + case routeRecovery +} + // MARK: - Audio capture pipeline struct RecordingSessionTracker { @@ -3455,6 +3462,22 @@ struct StartupRouteRecoveryTracker { } } +enum StartupEngineConfigurationRecoveryPolicy { + static func isStartupRecovery( + reason: String, + initialEngineStartedAt: TimeInterval?, + now: TimeInterval, + windowSeconds: TimeInterval + ) -> Bool { + guard reason == "engine configuration changed", + let initialEngineStartedAt + else { return false } + + let startAge = now - initialEngineStartedAt + return startAge >= 0 && startAge <= windowSeconds + } +} + enum StartCueCaptureReadiness { struct Configuration { let stableDelaySeconds: TimeInterval diff --git a/Sources/Fluid/Services/GlobalHotkeyManager.swift b/Sources/Fluid/Services/GlobalHotkeyManager.swift index d9e09d88..a483ca2f 100644 --- a/Sources/Fluid/Services/GlobalHotkeyManager.swift +++ b/Sources/Fluid/Services/GlobalHotkeyManager.swift @@ -1015,6 +1015,8 @@ final class GlobalHotkeyManager: NSObject { source: "GlobalHotkeyManager" ) + self.resetModifierOnlyShortcutTracking(reason: .tapDisabled) + guard self.tapRecoveryTask == nil else { DebugLogger.shared.debug( "Event tap recovery already scheduled; ignoring duplicate disabled event (\(reason))", @@ -1034,8 +1036,6 @@ final class GlobalHotkeyManager: NSObject { return } - self.resetModifierOnlyShortcutTracking(reason: .tapDisabled) - guard AXIsProcessTrusted() else { DebugLogger.shared.warning( "Event tap async recovery skipped because Accessibility is not trusted", diff --git a/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift b/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift index d74a7273..e4336904 100644 --- a/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift +++ b/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift @@ -154,6 +154,55 @@ final class StartCueCaptureReadinessTests: XCTestCase { XCTAssertNil(tracker.completedSampleCount) } + func testStartupEngineConfigurationPolicyAcceptsInitialEngineWindow() { + XCTAssertTrue( + StartupEngineConfigurationRecoveryPolicy.isStartupRecovery( + reason: "engine configuration changed", + initialEngineStartedAt: 10.0, + now: 10.5, + windowSeconds: 2.0 + ) + ) + } + + func testStartupEngineConfigurationPolicyRejectsNonStartupEvents() { + XCTAssertFalse( + StartupEngineConfigurationRecoveryPolicy.isStartupRecovery( + reason: "input device changed", + initialEngineStartedAt: 10.0, + now: 10.5, + windowSeconds: 2.0 + ) + ) + XCTAssertFalse( + StartupEngineConfigurationRecoveryPolicy.isStartupRecovery( + reason: "engine configuration changed", + initialEngineStartedAt: nil, + now: 10.5, + windowSeconds: 2.0 + ) + ) + XCTAssertFalse( + StartupEngineConfigurationRecoveryPolicy.isStartupRecovery( + reason: "engine configuration changed", + initialEngineStartedAt: 10.0, + now: 12.1, + windowSeconds: 2.0 + ) + ) + } + + func testStartupEngineConfigurationPolicyDoesNotUseRouteRecoveryStartTime() { + XCTAssertFalse( + StartupEngineConfigurationRecoveryPolicy.isStartupRecovery( + reason: "engine configuration changed", + initialEngineStartedAt: 0.0, + now: 10.1, + windowSeconds: 2.0 + ) + ) + } + func testBluetoothDeviceDetectionUsesNameFallbackForAggregateRoutes() { let airPods = AudioDevice.Device( id: AudioObjectID(kAudioObjectUnknown), From fa95c81f50b77560e552a22f17f2bf3f79094769 Mon Sep 17 00:00:00 2001 From: Jon Patterson Date: Tue, 30 Jun 2026 15:45:37 -0500 Subject: [PATCH 10/10] fix: reuse cached startup recovery decision --- Sources/Fluid/Services/ASRService.swift | 24 +++++++++++++++---- .../StartCueCaptureReadinessTests.swift | 19 +++++++++++++++ 2 files changed, 38 insertions(+), 5 deletions(-) diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index 5298b9ba..6e391f95 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -2056,7 +2056,9 @@ final class ASRService: ObservableObject { self.audioCapturePipeline.setRecordingEnabled(false) self.audioLevelSubject.send(0.0) } - let recoveryDelayNanoseconds = self.recoveryDelayNanoseconds(for: reason) + let recoveryDelayNanoseconds = self.recoveryDelayNanoseconds( + isStartupEngineConfigurationRecovery: isStartupEngineConfigurationRecovery + ) DebugLogger.shared.warning( "Audio route changed while recording; scheduling recovery (\(reason), delayMs=\(recoveryDelayNanoseconds / 1_000_000))", source: "ASRService" @@ -2071,10 +2073,12 @@ final class ASRService: ObservableObject { } } - private func recoveryDelayNanoseconds(for reason: String) -> UInt64 { - self.isStartupEngineConfigurationRecovery(reason: reason) - ? self.startupRouteRecoveryDelayNs - : self.audioRouteRecoveryDelayNanoseconds + private func recoveryDelayNanoseconds(isStartupEngineConfigurationRecovery: Bool) -> UInt64 { + StartupRouteRecoveryDelay.nanoseconds( + isStartupEngineConfigurationRecovery: isStartupEngineConfigurationRecovery, + startupDelayNanoseconds: self.startupRouteRecoveryDelayNs, + defaultDelayNanoseconds: self.audioRouteRecoveryDelayNanoseconds + ) } private func isStartupEngineConfigurationRecovery(reason: String) -> Bool { @@ -3478,6 +3482,16 @@ enum StartupEngineConfigurationRecoveryPolicy { } } +enum StartupRouteRecoveryDelay { + static func nanoseconds( + isStartupEngineConfigurationRecovery: Bool, + startupDelayNanoseconds: UInt64, + defaultDelayNanoseconds: UInt64 + ) -> UInt64 { + isStartupEngineConfigurationRecovery ? startupDelayNanoseconds : defaultDelayNanoseconds + } +} + enum StartCueCaptureReadiness { struct Configuration { let stableDelaySeconds: TimeInterval diff --git a/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift b/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift index e4336904..23736623 100644 --- a/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift +++ b/Tests/FluidDictationIntegrationTests/StartCueCaptureReadinessTests.swift @@ -203,6 +203,25 @@ final class StartCueCaptureReadinessTests: XCTestCase { ) } + func testRouteRecoveryDelayUsesCachedStartupDecision() { + XCTAssertEqual( + StartupRouteRecoveryDelay.nanoseconds( + isStartupEngineConfigurationRecovery: true, + startupDelayNanoseconds: 100, + defaultDelayNanoseconds: 1000 + ), + 100 + ) + XCTAssertEqual( + StartupRouteRecoveryDelay.nanoseconds( + isStartupEngineConfigurationRecovery: false, + startupDelayNanoseconds: 100, + defaultDelayNanoseconds: 1000 + ), + 1000 + ) + } + func testBluetoothDeviceDetectionUsesNameFallbackForAggregateRoutes() { let airPods = AudioDevice.Device( id: AudioObjectID(kAudioObjectUnknown),