diff --git a/Sources/Fluid/ContentView.swift b/Sources/Fluid/ContentView.swift index 19878293..8ed9394b 100644 --- a/Sources/Fluid/ContentView.swift +++ b/Sources/Fluid/ContentView.swift @@ -2949,12 +2949,14 @@ struct ContentView: View { self.menuBarManager.showRecordingOverlayImmediately() } - if !self.isRecordingForCommand, !self.isRecordingForRewrite { - TranscriptionSoundPlayer.shared.playStartSound() - } + let shouldPlayStartSound = !self.isRecordingForCommand && !self.isRecordingForRewrite Task { - await self.asr.start() + await self.asr.start(beforeCaptureEnabled: { + if shouldPlayStartSound { + await self.playTranscriptionStartSoundBeforeCapture() + } + }) if !self.asr.isRunning { self.menuBarManager.hideRecordingOverlayImmediately(reason: "asr_start_failed") } @@ -3167,7 +3169,7 @@ struct ContentView: View { "ContentView: selected model for dictate hotkey=\(SettingsStore.shared.selectedSpeechModel.displayName)", source: "ContentView" ) - self.beginDictationRecording(for: .primary, mode: .dictate) + await self.beginDictationRecording(for: .primary, mode: .dictate) }, stopAndProcessCallback: { let route = self.currentDictationOutputRouteForHotkeyStop() @@ -3176,11 +3178,11 @@ struct ContentView: View { }, promptModeCallback: { DebugLogger.shared.info("Prompt mode triggered", source: "ContentView") - self.beginDictationRecording(for: .secondary, mode: .promptMode) + await self.beginDictationRecording(for: .secondary, mode: .promptMode) }, promptSelectionCallback: { selection in DebugLogger.shared.info("Prompt selection shortcut triggered", source: "ContentView") - self.beginDictationRecording(for: selection, mode: .promptMode) + await self.beginDictationRecording(for: selection, mode: .promptMode) }, commandModeCallback: { DebugLogger.shared.info("Command mode triggered", source: "ContentView") @@ -3199,9 +3201,10 @@ struct ContentView: View { "Starting voice recording for command", source: "ContentView" ) - TranscriptionSoundPlayer.shared.playStartSound() Task { - await self.asr.start() + await self.asr.start(beforeCaptureEnabled: { + await self.playTranscriptionStartSoundBeforeCapture() + }) } }, rewriteModeCallback: { @@ -3234,9 +3237,10 @@ struct ContentView: View { // Start recording immediately for the edit instruction DebugLogger.shared.info("Starting voice recording for edit mode", source: "ContentView") - TranscriptionSoundPlayer.shared.playStartSound() Task { - await self.asr.start() + await self.asr.start(beforeCaptureEnabled: { + await self.playTranscriptionStartSoundBeforeCapture() + }) } }, isDictateRecordingProvider: { @@ -3532,7 +3536,7 @@ extension ContentView { } } - private func beginDictationRecording(for slot: SettingsStore.DictationShortcutSlot, mode: ActiveRecordingMode) { + private func beginDictationRecording(for slot: SettingsStore.DictationShortcutSlot, mode: ActiveRecordingMode) async { DebugLogger.shared.debug("Begin dictation recording for slot \(slot.rawValue)", source: "ContentView") self.appBench("begin_recording slot=\(slot.rawValue) mode=\(mode.rawValue)") if self.isOnboardingVoicePlaygroundStepActive { @@ -3542,30 +3546,38 @@ extension ContentView { self.settings.playgroundUsed = false self.playgroundUsed = false } + self.appBench("capture_context_start") self.captureRecordingContext() - self.applyDictationPromptConfiguration(for: SettingsStore.shared.dictationPromptSelection(for: slot)) + self.appBench("capture_context_end") + self.appBench("pre_asr_state_start") self.applyDictationShortcutSelectionContext(for: slot) self.setActiveRecordingMode(mode) self.rewriteModeService.clearState() + self.appBench("pre_asr_state_end") + + self.appBench("prompt_config_start") + self.applyDictationPromptConfiguration(for: SettingsStore.shared.dictationPromptSelection(for: slot)) + self.appBench("prompt_config_end") self.appBench("overlay_mode_request mode=Dictation") self.menuBarManager.setOverlayMode(.dictation) self.menuBarManager.showRecordingOverlayImmediately() self.appBench("overlay_mode_requested mode=Dictation") - self.prewarmPrivateAIDictationIfNeeded(for: slot) - guard !self.asr.isRunning else { + let wasAlreadyRunning = self.asr.isRunning + if wasAlreadyRunning { self.appBench("asr_start_skipped reason=already_running") - return - } - if SettingsStore.shared.enableTranscriptionSounds { - TranscriptionSoundPlayer.shared.playStartSound() } - Task { + + if !wasAlreadyRunning { let asrStartStartedAt = ProcessInfo.processInfo.systemUptime DebugLogger.shared.benchmark("APP_BENCH", message: "asr_start_call", source: "AppBenchmark") - await self.asr.start() + await self.asr.start(beforeCaptureEnabled: { + await self.playTranscriptionStartSoundBeforeCapture(logBenchmarks: true) + }) if !self.asr.isRunning { + self.appBench("asr_start_failed") self.menuBarManager.hideRecordingOverlayImmediately(reason: "asr_start_failed") + return } DebugLogger.shared.benchmark( "APP_BENCH", @@ -3573,13 +3585,43 @@ extension ContentView { source: "AppBenchmark" ) } + + self.appBench("prewarm_private_ai_start") + self.prewarmPrivateAIDictationIfNeeded(for: slot) + self.appBench("prewarm_private_ai_end") } - private func beginDictationRecording(for selection: SettingsStore.DictationPromptSelection, mode: ActiveRecordingMode) { + private func beginDictationRecording(for selection: SettingsStore.DictationPromptSelection, mode: ActiveRecordingMode) async { let settings = SettingsStore.shared settings.setDictationPromptSelection(selection, for: .secondary) self.applyDictationPromptConfiguration(for: selection) - self.beginDictationRecording(for: .secondary, mode: mode) + await self.beginDictationRecording(for: .secondary, mode: mode) + } + + private func playTranscriptionStartSoundBeforeCapture(logBenchmarks: Bool = false) async { + guard SettingsStore.shared.enableTranscriptionSounds else { return } + + if logBenchmarks { + self.appBench("start_sound_start") + } + + let duration = TranscriptionSoundPlayer.shared.playStartSound() + let gateSeconds = min(max(duration + 0.04, 0), 1.0) + if logBenchmarks { + DebugLogger.shared.benchmark( + "APP_BENCH", + message: "start_sound_played durationMs=\(Int((duration * 1000).rounded())) gateMs=\(Int((gateSeconds * 1000).rounded()))", + source: "AppBenchmark" + ) + } + + if gateSeconds > 0 { + try? await Task.sleep(nanoseconds: UInt64(gateSeconds * 1_000_000_000)) + } + + if logBenchmarks { + self.appBench("start_sound_gate_done") + } } private func applyDictationPromptConfiguration(for selection: SettingsStore.DictationPromptSelection) { diff --git a/Sources/Fluid/Services/ASRService.swift b/Sources/Fluid/Services/ASRService.swift index 29d4d7b2..5942678c 100644 --- a/Sources/Fluid/Services/ASRService.swift +++ b/Sources/Fluid/Services/ASRService.swift @@ -94,6 +94,7 @@ final class ASRService: ObservableObject { @Published var downloadProgress: Double? = nil @Published var downloadingModelId: String? = nil // Tracks which model is currently being downloaded @Published private(set) var isCancellingModelDownload: Bool = false + @Published private(set) var isDictionaryTrainingCaptureActive: Bool = false private var isStarting: Bool = false // Guard against re-entrant start() calls private var hasCompletedFirstTranscription: Bool = false // Track if model has warmed up with first transcription @@ -101,6 +102,24 @@ final class ASRService: ObservableObject { private var hasPendingParakeetVocabularyReload: Bool = false private var vocabularyChangeObserver: NSObjectProtocol? + private struct CaptureEngineConfiguration: Equatable { + let syncAudioDevicesWithSystem: Bool + let preferredInputDeviceUID: String? + let preferredOutputDeviceUID: String? + } + + private let fastRestartWarmEngineEnabled = true + private let fastRestartWarmEngineHoldNanoseconds: UInt64 = 8_000_000_000 + private let fastStartPrewarmHoldNanoseconds: UInt64 = 2_000_000_000 + private var fastRestartWarmEngineShutdownTask: Task? + private var fastStartPrewarmTask: Task? + private var fastStartPrewarmShutdownTask: Task? + private var idleCaptureEnginePrepareTask: Task? + private var isEngineWarmForFastRestart = false + private var isCaptureEnginePrewarmedForFastStart = false + private var isFastStartPrewarmingCaptureEngine = false + private var warmCaptureEngineConfiguration: CaptureEngineConfiguration? + // MARK: - Error Handling @Published var errorTitle: String = "Error" @@ -474,6 +493,9 @@ final class ASRService: ObservableObject { func resetTranscriptionProvider() { let newModel = SettingsStore.shared.selectedSpeechModel DebugLogger.shared.info("ASRService: Switching to '\(newModel.displayName)', resetting provider state...", source: "ASRService") + if !self.isRunning, self.isEngineWarmForFastRestart { + self.tearDownCaptureEngine(reason: "provider reset while warm", releaseAsync: true) + } self.isAsrReady = false self.modelsExistOnDisk = false @@ -649,6 +671,10 @@ final class ASRService: ObservableObject { } deinit { + self.fastRestartWarmEngineShutdownTask?.cancel() + self.fastStartPrewarmTask?.cancel() + self.fastStartPrewarmShutdownTask?.cancel() + self.idleCaptureEnginePrepareTask?.cancel() if let observer = self.vocabularyChangeObserver { NotificationCenter.default.removeObserver(observer) } @@ -739,6 +765,7 @@ final class ASRService: ObservableObject { // Initialize device list cache self.cacheCurrentDeviceList(AudioDevice.listInputDevices()) + self.scheduleIdleCaptureEnginePreparation(reason: "startup") // Check if models exist on disk and auto-load if present // This is done in a Task to support async model detection (e.g., AppleSpeechAnalyzerProvider) @@ -809,6 +836,9 @@ final class ASRService: ObservableObject { Task { @MainActor in self.micPermissionGranted = granted self.micStatus = granted ? .authorized : .denied + if granted { + self.scheduleIdleCaptureEnginePreparation(reason: "mic permission granted") + } } } } @@ -839,7 +869,11 @@ final class ASRService: ObservableObject { /// ## Errors /// If audio session configuration fails, the method will silently fail /// and `isRunning` will remain `false`. Check the debug logs for details. - func start() async { + func start( + forDictionaryTraining: Bool = false, + beforeCaptureEnabled: (@MainActor () async -> Void)? = nil + ) async { + let startBenchmarkStartedAt = Date().timeIntervalSince1970 DebugLogger.shared.info("🎀 START() called - beginning recording session", source: "ASRService") guard self.micStatus == .authorized else { @@ -858,6 +892,7 @@ final class ASRService: ObservableObject { self.isRecoveringAudioRoute = false DebugLogger.shared.debug("🧹 Clearing buffers and state", source: "ASRService") + let stateResetStartedAt = Date().timeIntervalSince1970 self.finalText.removeAll() self.audioBuffer.clear(keepingCapacity: true) // specific optimization for restart self.partialTranscription.removeAll() @@ -871,33 +906,58 @@ final class ASRService: ObservableObject { self.benchmarkStreamingChunkIndex = 0 self.benchmarkCompletedStreamingChunks = 0 self.benchmarkLastChunkSampleCount = 0 + DictationStartProbe.shared.markASRStart(session: self.benchmarkSessionID) self.streamingChunkAnalyticsSuccessCount = 0 self.lastStreamingChunkFailureAnalyticsAt = nil (self.transcriptionProvider as? FluidAudioProvider)?.resetStreamingPreviewCache() - self.audioCapturePipeline.setRecordingEnabled(true) + self.audioCapturePipeline.setRecordingEnabled(false) self.refreshWordBoostStatus() let dims = self.currentTranscriptionAnalyticsDimensions() self.benchmarkLog("recording_start model=\(dims.model) provider=\(dims.provider) supportsStreaming=\(SettingsStore.shared.selectedSpeechModel.supportsStreaming)") + self.benchmarkLog( + "start_state_reset elapsedMs=\(self.elapsedMilliseconds(since: stateResetStartedAt)) totalMs=\(self.elapsedMilliseconds(since: startBenchmarkStartedAt))" + ) DebugLogger.shared.debug("βœ… Buffers cleared", source: "ASRService") self.isStarting = true defer { self.isStarting = false } + self.isDictionaryTrainingCaptureActive = false do { - DebugLogger.shared.debug("βš™οΈ Calling configureSession()...", source: "ASRService") - try self.configureSession() - DebugLogger.shared.debug("βœ… configureSession() completed", source: "ASRService") + let warmReuseStartedAt = Date().timeIntervalSince1970 + let reusedWarmEngine = self.reuseWarmCaptureEngineIfAvailable() + self.benchmarkLog( + "start_warm_reuse_check reused=\(reusedWarmEngine) elapsedMs=\(self.elapsedMilliseconds(since: warmReuseStartedAt)) totalMs=\(self.elapsedMilliseconds(since: startBenchmarkStartedAt))" + ) + if reusedWarmEngine { + DebugLogger.shared.debug("βœ… Warm capture engine reused", source: "ASRService") + } else { + DebugLogger.shared.debug("βš™οΈ Calling configureSession()...", source: "ASRService") + let configureStartedAt = Date().timeIntervalSince1970 + try self.configureSession() + self.benchmarkLog( + "start_configure elapsedMs=\(self.elapsedMilliseconds(since: configureStartedAt)) totalMs=\(self.elapsedMilliseconds(since: startBenchmarkStartedAt))" + ) + DebugLogger.shared.debug("βœ… configureSession() completed", source: "ASRService") - DebugLogger.shared.debug("πŸš€ Calling startEngine()...", source: "ASRService") - try self.startEngine() - DebugLogger.shared.debug("βœ… startEngine() completed", source: "ASRService") + DebugLogger.shared.debug("πŸš€ Calling startEngine()...", source: "ASRService") + let engineStartStartedAt = Date().timeIntervalSince1970 + try self.startEngine() + self.benchmarkLog( + "start_engine elapsedMs=\(self.elapsedMilliseconds(since: engineStartStartedAt)) totalMs=\(self.elapsedMilliseconds(since: startBenchmarkStartedAt))" + ) + DebugLogger.shared.debug("βœ… startEngine() completed", source: "ASRService") + } - DebugLogger.shared.debug("🎧 Setting up engine tap...", source: "ASRService") - try self.setupEngineTap() - DebugLogger.shared.debug("βœ… Engine tap setup complete", source: "ASRService") + let captureCallbackStartedAt = Date().timeIntervalSince1970 + await beforeCaptureEnabled?() + self.benchmarkLog( + "start_capture_callback elapsedMs=\(self.elapsedMilliseconds(since: captureCallbackStartedAt)) totalMs=\(self.elapsedMilliseconds(since: startBenchmarkStartedAt))" + ) // Pause system media AFTER successful audio setup but BEFORE setting isRunning // This ensures we only pause media when we know recording will succeed + let mediaPauseStartedAt = Date().timeIntervalSince1970 if SettingsStore.shared.pauseMediaDuringTranscription { let didPause = await MediaPlaybackService.shared.pauseIfPlaying() self.didPauseMediaForThisSession = didPause @@ -905,29 +965,54 @@ final class ASRService: ObservableObject { DebugLogger.shared.info("🎡 Paused system media for transcription", source: "ASRService") } } + self.benchmarkLog( + "start_media_pause enabled=\(SettingsStore.shared.pauseMediaDuringTranscription) " + + "didPause=\(self.didPauseMediaForThisSession) " + + "elapsedMs=\(self.elapsedMilliseconds(since: mediaPauseStartedAt)) " + + "totalMs=\(self.elapsedMilliseconds(since: startBenchmarkStartedAt))" + ) + + self.audioCapturePipeline.setRecordingEnabled(true) + DictationStartProbe.shared.markCaptureEnabled(session: self.benchmarkSessionID) self.isRunning = true + self.isDictionaryTrainingCaptureActive = forDictionaryTraining DebugLogger.shared.info("βœ… isRunning set to TRUE", source: "ASRService") // Start monitoring the currently bound device for disconnection + let deviceMonitorStartedAt = Date().timeIntervalSince1970 if let currentDevice = getCurrentlyBoundInputDevice() { DebugLogger.shared.debug("πŸ‘€ Starting device monitoring for: \(currentDevice.name)", source: "ASRService") self.startMonitoringDevice(currentDevice.id) } else { DebugLogger.shared.debug("ℹ️ No device to monitor", source: "ASRService") } + self.benchmarkLog( + "start_device_monitor elapsedMs=\(self.elapsedMilliseconds(since: deviceMonitorStartedAt)) totalMs=\(self.elapsedMilliseconds(since: startBenchmarkStartedAt))" + ) // Only start streaming for models that support it (large Whisper models are too slow) let model = SettingsStore.shared.selectedSpeechModel - if model.supportsStreaming { + let streamingStartStartedAt = Date().timeIntervalSince1970 + if model.supportsStreaming, !forDictionaryTraining { DebugLogger.shared.debug("πŸ“‘ Starting streaming transcription...", source: "ASRService") self.benchmarkLog("streaming_timer_start intervalMs=\(Int((self.streamingChunkDurationSeconds * 1000).rounded())) minSamples=\(self.minimumStreamingPreviewSamples)") self.startStreamingTranscription() + } else if forDictionaryTraining { + DebugLogger.shared.debug("⏸️ Skipping streaming for dictionary training sample", source: "ASRService") } else { DebugLogger.shared.debug("⏸️ Skipping streaming - model '\(model.displayName)' does not support real-time chunk processing", source: "ASRService") } + self.benchmarkLog( + "start_streaming_setup elapsedMs=\(self.elapsedMilliseconds(since: streamingStartStartedAt)) totalMs=\(self.elapsedMilliseconds(since: startBenchmarkStartedAt))" + ) + self.benchmarkLog( + "start_done reusedWarmEngine=\(reusedWarmEngine) totalMs=\(self.elapsedMilliseconds(since: startBenchmarkStartedAt))" + ) DebugLogger.shared.info("βœ… START() completed successfully", source: "ASRService") } catch { + self.isDictionaryTrainingCaptureActive = false + self.tearDownCaptureEngine(reason: "start failure", releaseAsync: true) DebugLogger.shared.error("Failed to start ASR session: \(error)", source: "ASRService") // Resume media if we paused it before the failure @@ -995,17 +1080,25 @@ final class ASRService: ObservableObject { /// final transcription pass. Use this for immediate stop cues that /// shouldn't wait on finalization. Only invoked when capture was actually /// running (i.e. not when `stop()` early-returns because `isRunning` is false). - func stop(onCaptureStopped: (@MainActor () -> Void)? = nil) async -> String { + func stop( + onCaptureStopped: (@MainActor () -> Void)? = nil, + forDictionaryTraining: Bool = false + ) async -> String { DebugLogger.shared.info("πŸ›‘ STOP() called - beginning shutdown sequence", source: "ASRService") self.lastCompletedAudioSnapshot = nil let stopStartedAt = Date().timeIntervalSince1970 self.benchmarkLog("stop_start ageMs=\(self.elapsedMilliseconds(since: self.benchmarkRecordingStartedAt)) bufferedSamples=\(self.audioBuffer.count)") guard self.isRunning else { + self.isDictionaryTrainingCaptureActive = false DebugLogger.shared.warning("⚠️ STOP() - not running, returning empty string", source: "ASRService") return "" } - defer { self.applyPendingParakeetVocabularyReloadIfNeeded() } + let useDictionaryTrainingPath = forDictionaryTraining || self.isDictionaryTrainingCaptureActive + defer { + self.applyPendingParakeetVocabularyReloadIfNeeded() + self.isDictionaryTrainingCaptureActive = false + } self.audioRouteRecoveryTask?.cancel() self.audioRouteRecoveryTask = nil @@ -1021,7 +1114,9 @@ final class ASRService: ObservableObject { self.audioCapturePipeline.setRecordingEnabled(false) DebugLogger.shared.debug("βœ… Capture pipeline disabled", source: "ASRService") - await self.runFastPreviewStopGraceIfNeeded() + if !useDictionaryTrainingPath { + await self.runFastPreviewStopGraceIfNeeded() + } // CRITICAL: Set isRunning to false before teardown so in-flight chunks stop safely. DebugLogger.shared.debug("🚫 Setting isRunning = false...", source: "ASRService") @@ -1033,27 +1128,19 @@ final class ASRService: ObservableObject { self.stopMonitoringDevice() DebugLogger.shared.debug("βœ… Device monitoring stopped", source: "ASRService") - // Stop the audio engine to stop new audio from coming in - DebugLogger.shared.debug("🎧 Removing engine tap...", source: "ASRService") - self.removeEngineTap() - DebugLogger.shared.debug("βœ… Engine tap removed", source: "ASRService") - - DebugLogger.shared.debug("πŸ›‘ Calling engine.stop()...", source: "ASRService") - self.engine.stop() - DebugLogger.shared.debug("βœ… Engine stopped", source: "ASRService") + if useDictionaryTrainingPath || !self.fastRestartWarmEngineEnabled { + DebugLogger.shared.debug("πŸ›‘ Tearing down capture engine...", source: "ASRService") + self.tearDownCaptureEngine(reason: "recording stopped") + DebugLogger.shared.debug("βœ… Capture engine torn down", source: "ASRService") + } else { + self.scheduleFastRestartWarmEngineShutdown(reason: "recording stopped") + } // Capture has fully ended β€” invoke the callback so callers can play a // stop cue or release capture-dependent UI without waiting on the // (potentially slow) final transcription pass. await MainActor.run { onCaptureStopped?() } - // Recreate the engine instance instead of calling reset() to prevent format corruption - // VoiceInk approach: tearing down and rebuilding ensures fresh, valid audio format on restart - DebugLogger.shared.debug("πŸ—‘οΈ Deallocating old engine and creating fresh instance...", source: "ASRService") - self.engineStorage = nil // Explicitly release old engine - // New engine will be lazily created on next access via computed property - DebugLogger.shared.debug("βœ… Engine instance recreated", source: "ASRService") - // CRITICAL FIX: Await completion of streaming task AND any pending transcriptions // This prevents use-after-free crashes (EXC_BAD_ACCESS) when clearing buffer DebugLogger.shared.debug("⏳ Awaiting stopStreamingTimerAndAwait()...", source: "ASRService") @@ -1136,8 +1223,19 @@ final class ASRService: ObservableObject { let finalStartedAt = Date().timeIntervalSince1970 let result: ASRTranscriptionResult let finalSource: String - if let fluidProvider = provider as? FluidAudioProvider, - let cachedResult = await fluidProvider.transcribeCachedStreamingPreviewIfAvailable(pcm) + if useDictionaryTrainingPath { + if let fluidProvider = provider as? FluidAudioProvider { + result = try await self.transcriptionExecutor.run { [fluidProvider] in + try await fluidProvider.transcribeDictionaryTraining(pcm) + } + } else { + result = try await self.transcriptionExecutor.run { [provider] in + try await provider.transcribeFinal(pcm) + } + } + finalSource = "dictionaryTraining" + } else if let fluidProvider = provider as? FluidAudioProvider, + let cachedResult = await fluidProvider.transcribeCachedStreamingPreviewIfAvailable(pcm) { result = cachedResult finalSource = "livePreview" @@ -1175,11 +1273,17 @@ final class ASRService: ObservableObject { } // Do not update self.finalText here to avoid instant binding insert in playground - let cleanedText = ASRService.applyCustomDictionary(ASRService.removeFillerWords(result.text)) - self.recordWordBoostHitIfAny(transcribedText: cleanedText) - DebugLogger.shared.debug("After post-processing: '\(cleanedText)'", source: "ASRService") - self.benchmarkLog("stop_end result=success totalMs=\(self.elapsedMilliseconds(since: stopStartedAt)) recordingAgeMs=\(self.elapsedMilliseconds(since: self.benchmarkRecordingStartedAt)) cleanedChars=\(cleanedText.count)") - if SettingsStore.shared.saveTranscriptionHistory, + let textWithoutFillers = ASRService.removeFillerWords(result.text) + let outputText = useDictionaryTrainingPath + ? textWithoutFillers + : ASRService.applyCustomDictionary(textWithoutFillers) + if !useDictionaryTrainingPath { + self.recordWordBoostHitIfAny(transcribedText: outputText) + } + DebugLogger.shared.debug("After post-processing: '\(outputText)'", source: "ASRService") + self.benchmarkLog("stop_end result=success totalMs=\(self.elapsedMilliseconds(since: stopStartedAt)) recordingAgeMs=\(self.elapsedMilliseconds(since: self.benchmarkRecordingStartedAt)) cleanedChars=\(outputText.count)") + if !useDictionaryTrainingPath, + SettingsStore.shared.saveTranscriptionHistory, SettingsStore.shared.saveAudioWithTranscriptionHistory, !capturedPCM.isEmpty { @@ -1196,7 +1300,7 @@ final class ASRService: ObservableObject { DebugLogger.shared.info("🎡 Resumed system media after transcription", source: "ASRService") } - return cleanedText + return outputText } catch { DebugLogger.shared.error("ASR transcription failed: \(error)", source: "ASRService") DebugLogger.shared.error("Error details: \(error.localizedDescription)", source: "ASRService") @@ -1307,7 +1411,10 @@ final class ASRService: ObservableObject { func stopWithoutTranscription() async { guard self.isRunning else { return } - defer { self.applyPendingParakeetVocabularyReloadIfNeeded() } + defer { + self.applyPendingParakeetVocabularyReloadIfNeeded() + self.isDictionaryTrainingCaptureActive = false + } self.audioRouteRecoveryTask?.cancel() self.audioRouteRecoveryTask = nil @@ -1326,20 +1433,10 @@ final class ASRService: ObservableObject { // Stop monitoring device self.stopMonitoringDevice() - self.removeEngineTap() - DebugLogger.shared.debug("Engine tap removed", source: "ASRService") - - self.engine.stop() - DebugLogger.shared.debug("Engine stopped", source: "ASRService") - // Release old engine on a background thread β€” if the underlying device just died, // AVAudioEngine deallocation can block in CoreAudio's internal teardown. // No new engine is created here (it's lazy on next start()), so no overlap risk. - let oldEngine = self.engineStorage - self.engineStorage = nil - if let oldEngine { - DispatchQueue.global(qos: .utility).async { _ = oldEngine } - } + self.tearDownCaptureEngine(reason: "stop without transcription", releaseAsync: true) // CRITICAL FIX: Await completion of streaming task AND any pending transcriptions // This prevents use-after-free crashes (EXC_BAD_ACCESS) when clearing buffer @@ -1365,13 +1462,18 @@ final class ASRService: ObservableObject { } private func configureSession() throws { + let configureStartedAt = Date().timeIntervalSince1970 DebugLogger.shared.debug("πŸ”§ configureSession() - ENTERED", source: "ASRService") + let runningCheckStartedAt = Date().timeIntervalSince1970 if self.engine.isRunning { DebugLogger.shared.debug("⚠️ Engine is running, stopping before configuration", source: "ASRService") self.engine.stop() DebugLogger.shared.debug("βœ… Engine stopped", source: "ASRService") } + self.benchmarkLog( + "start_configure_running_check elapsedMs=\(self.elapsedMilliseconds(since: runningCheckStartedAt))" + ) // No need to call engine.reset() here - we created a fresh engine in stop() // Accessing the engine property will either return the existing fresh engine, @@ -1380,18 +1482,29 @@ final class ASRService: ObservableObject { // Force input node instantiation (ensures the underlying AUHAL AudioUnit exists) DebugLogger.shared.debug("πŸ“ Forcing input node instantiation...", source: "ASRService") + let inputNodeStartedAt = Date().timeIntervalSince1970 _ = self.engine.inputNode + self.benchmarkLog( + "start_configure_input_node elapsedMs=\(self.elapsedMilliseconds(since: inputNodeStartedAt))" + ) DebugLogger.shared.debug("Input node instantiated", source: "ASRService") // Force output node instantiation for output device binding DebugLogger.shared.debug("πŸ“ Forcing output node instantiation...", source: "ASRService") + let outputNodeStartedAt = Date().timeIntervalSince1970 _ = self.engine.outputNode + self.benchmarkLog( + "start_configure_output_node elapsedMs=\(self.elapsedMilliseconds(since: outputNodeStartedAt))" + ) DebugLogger.shared.debug("βœ… Output node instantiated", source: "ASRService") // NOTE: Device binding occurs in startEngine() BEFORE engine.prepare() // Per CoreAudio docs, device must be set before AudioUnit initialization (prepare) // Since sync mode is always ON, binding actually no-ops and uses system defaults + self.benchmarkLog( + "start_configure_done elapsedMs=\(self.elapsedMilliseconds(since: configureStartedAt))" + ) DebugLogger.shared.debug("βœ… configureSession() - COMPLETED", source: "ASRService") } @@ -1704,21 +1817,32 @@ final class ASRService: ObservableObject { } private func startEngine() throws { + let startEngineStartedAt = Date().timeIntervalSince1970 DebugLogger.shared.debug("πŸš€ startEngine() - ENTERED", source: "ASRService") var attempts = 0 var lastError: Error? while attempts < 3 { + var installedTapThisAttempt = false + var startedEngineThisAttempt = false do { // CRITICAL: Bind devices BEFORE prepare() - must be set before AudioUnit initialization // Note: This may fail for aggregate devices (Bluetooth, etc.) with OSStatus -10851 // In that case, we fall back to system defaults (same as sync mode) DebugLogger.shared.debug("🎚️ Binding input device (before prepare)...", source: "ASRService") + let inputBindStartedAt = Date().timeIntervalSince1970 let inputBindOk = self.bindPreferredInputDeviceIfNeeded() + self.benchmarkLog( + "start_engine_input_bind attempt=\(attempts + 1) ok=\(inputBindOk) elapsedMs=\(self.elapsedMilliseconds(since: inputBindStartedAt)) totalMs=\(self.elapsedMilliseconds(since: startEngineStartedAt))" + ) DebugLogger.shared.debug("βœ… Input device binding result: \(inputBindOk)", source: "ASRService") DebugLogger.shared.debug("πŸ”Š Binding output device (before prepare)...", source: "ASRService") + let outputBindStartedAt = Date().timeIntervalSince1970 let outputBindOk = self.bindPreferredOutputDeviceIfNeeded() + self.benchmarkLog( + "start_engine_output_bind attempt=\(attempts + 1) ok=\(outputBindOk) elapsedMs=\(self.elapsedMilliseconds(since: outputBindStartedAt)) totalMs=\(self.elapsedMilliseconds(since: startEngineStartedAt))" + ) DebugLogger.shared.debug("βœ… Output device binding result: \(outputBindOk)", source: "ASRService") // If binding failed (e.g., aggregate device), engine will use system defaults @@ -1732,25 +1856,90 @@ final class ASRService: ObservableObject { // Prepare the engine to allocate resources and establish format SYNCHRONOUSLY // This ensures the audio graph is fully initialized before we proceed DebugLogger.shared.debug("πŸ“‹ Preparing engine (allocating resources)...", source: "ASRService") + let prepareStartedAt = Date().timeIntervalSince1970 self.engine.prepare() + self.benchmarkLog( + "start_engine_prepare attempt=\(attempts + 1) elapsedMs=\(self.elapsedMilliseconds(since: prepareStartedAt)) totalMs=\(self.elapsedMilliseconds(since: startEngineStartedAt))" + ) DebugLogger.shared.debug("βœ… Engine prepared", source: "ASRService") // Log engine state before attempting to start + let formatProbeStartedAt = Date().timeIntervalSince1970 let inputNode = self.engine.inputNode let inputFormat = inputNode.inputFormat(forBus: 0) + let outputSampleRate = inputNode.outputFormat(forBus: 0).sampleRate + self.benchmarkLog( + "start_engine_format_probe attempt=\(attempts + 1) " + + "elapsedMs=\(self.elapsedMilliseconds(since: formatProbeStartedAt)) " + + "totalMs=\(self.elapsedMilliseconds(since: startEngineStartedAt)) " + + "sampleRate=\(Int(inputFormat.sampleRate.rounded())) " + + "channels=\(inputFormat.channelCount)" + ) DebugLogger.shared.debug( "(startEngine(): before engine.start attempt \(attempts + 1)) " + - "Engine IO device = \(inputNode.outputFormat(forBus: 0).sampleRate)Hz, " + + "Engine IO device = \(outputSampleRate)Hz, " + "Input format = \(inputFormat.sampleRate)Hz \(inputFormat.channelCount)ch", source: "ASRService" ) + DebugLogger.shared.debug("🎧 Setting up engine tap before engine.start()...", source: "ASRService") + let tapSetupStartedAt = Date().timeIntervalSince1970 + do { + try self.setupEngineTap() + installedTapThisAttempt = true + self.benchmarkLog( + "start_engine_tap_setup attempt=\(attempts + 1) " + + "preStart=true success=true " + + "elapsedMs=\(self.elapsedMilliseconds(since: tapSetupStartedAt)) " + + "totalMs=\(self.elapsedMilliseconds(since: startEngineStartedAt))" + ) + DebugLogger.shared.debug("βœ… Pre-start engine tap setup complete", source: "ASRService") + } catch { + self.benchmarkLog( + "start_engine_tap_setup attempt=\(attempts + 1) " + + "preStart=true success=false " + + "elapsedMs=\(self.elapsedMilliseconds(since: tapSetupStartedAt)) " + + "totalMs=\(self.elapsedMilliseconds(since: startEngineStartedAt)) " + + "error=\(error.localizedDescription)" + ) + DebugLogger.shared.warning( + "⚠️ Pre-start tap setup failed; falling back to post-start tap setup: \(error.localizedDescription)", + source: "ASRService" + ) + } + + let engineStartCallStartedAt = Date().timeIntervalSince1970 try self.engine.start() + startedEngineThisAttempt = true + self.benchmarkLog( + "start_engine_start_call attempt=\(attempts + 1) elapsedMs=\(self.elapsedMilliseconds(since: engineStartCallStartedAt)) totalMs=\(self.elapsedMilliseconds(since: startEngineStartedAt))" + ) DebugLogger.shared.info("AVAudioEngine started successfully on attempt \(attempts + 1)", source: "ASRService") + + if installedTapThisAttempt == false { + DebugLogger.shared.debug("🎧 Setting up engine tap after engine.start() fallback...", source: "ASRService") + let fallbackTapStartedAt = Date().timeIntervalSince1970 + try self.setupEngineTap() + installedTapThisAttempt = true + self.benchmarkLog( + "start_engine_tap_setup attempt=\(attempts + 1) preStart=false success=true elapsedMs=\(self.elapsedMilliseconds(since: fallbackTapStartedAt)) totalMs=\(self.elapsedMilliseconds(since: startEngineStartedAt))" + ) + DebugLogger.shared.debug("βœ… Post-start engine tap setup complete", source: "ASRService") + } + + self.benchmarkLog( + "start_engine_done attempt=\(attempts + 1) totalMs=\(self.elapsedMilliseconds(since: startEngineStartedAt))" + ) return } catch { lastError = error attempts += 1 + if installedTapThisAttempt { + self.removeEngineTap() + } + if startedEngineThisAttempt || self.engine.isRunning { + self.engine.stop() + } // Log the actual error from AVFoundation DebugLogger.shared.error( @@ -1790,17 +1979,278 @@ final class ASRService: ObservableObject { } private func removeEngineTap() { - self.engine.inputNode.removeTap(onBus: 0) + guard let engine = self.engineStorage as? AVAudioEngine else { return } + engine.inputNode.removeTap(onBus: 0) + } + + private func cancelFastRestartWarmEngineShutdown() { + self.fastRestartWarmEngineShutdownTask?.cancel() + self.fastRestartWarmEngineShutdownTask = nil + } + + private func cancelFastStartPrewarmShutdown() { + self.fastStartPrewarmShutdownTask?.cancel() + self.fastStartPrewarmShutdownTask = nil + } + + private func scheduleIdleCaptureEnginePreparation( + reason: String, + delayNanoseconds: UInt64 = 1_000_000_000 + ) { + guard self.fastRestartWarmEngineEnabled else { return } + guard self.micStatus == .authorized else { return } + guard self.idleCaptureEnginePrepareTask == nil else { return } + + self.idleCaptureEnginePrepareTask = Task { @MainActor [weak self] in + do { + try await Task.sleep(nanoseconds: delayNanoseconds) + } catch { + return + } + + guard let self else { return } + self.idleCaptureEnginePrepareTask = nil + guard self.micStatus == .authorized, + self.isRunning == false, + self.isStarting == false, + self.isEngineWarmForFastRestart == false, + self.fastStartPrewarmTask == nil, + self.engineStorage == nil + else { return } + + let startedAt = Date().timeIntervalSince1970 + do { + try self.configureSession() + if SettingsStore.shared.syncAudioDevicesWithSystem { + let prepareStartedAt = Date().timeIntervalSince1970 + self.engine.prepare() + self.benchmarkLog( + "fast_start_idle_prepare_engine_prepare skipped=false elapsedMs=\(self.elapsedMilliseconds(since: prepareStartedAt))" + ) + } else { + self.benchmarkLog("fast_start_idle_prepare_engine_prepare skipped=true reason=independent_device_binding") + } + self.benchmarkLog( + "fast_start_idle_prepare_ready reason=\(reason) elapsedMs=\(self.elapsedMilliseconds(since: startedAt))" + ) + } catch { + self.tearDownCaptureEngine(reason: "idle capture engine prepare failed", releaseAsync: true) + self.benchmarkLog( + "fast_start_idle_prepare_failed reason=\(reason) elapsedMs=\(self.elapsedMilliseconds(since: startedAt)) error=\(error.localizedDescription)" + ) + } + } + } + + @discardableResult + func prewarmCaptureEngineForFastStart(reason: String) -> Bool { + guard self.fastRestartWarmEngineEnabled else { return false } + guard self.micStatus == .authorized else { return false } + guard self.isRunning == false, self.isStarting == false else { return false } + guard self.isEngineWarmForFastRestart == false else { return false } + guard self.fastStartPrewarmTask == nil else { return true } + + self.idleCaptureEnginePrepareTask?.cancel() + self.idleCaptureEnginePrepareTask = nil + self.benchmarkLog("fast_start_prewarm_requested reason=\(reason)") + self.fastStartPrewarmTask = Task { @MainActor [weak self] in + guard let self else { return } + guard Task.isCancelled == false, + self.isRunning == false, + self.isStarting == false, + self.isEngineWarmForFastRestart == false + else { + self.fastStartPrewarmTask = nil + return + } + let startedAt = Date().timeIntervalSince1970 + do { + self.audioCapturePipeline.setRecordingEnabled(false) + self.isFastStartPrewarmingCaptureEngine = true + defer { self.isFastStartPrewarmingCaptureEngine = false } + try self.configureSession() + guard Task.isCancelled == false else { + self.tearDownCaptureEngine(reason: "fast start prewarm cancelled after configure", releaseAsync: true) + self.fastStartPrewarmTask = nil + return + } + try self.startEngine() + + guard Task.isCancelled == false else { + self.tearDownCaptureEngine(reason: "fast start prewarm cancelled before ready", releaseAsync: true) + self.fastStartPrewarmTask = nil + return + } + + self.isEngineWarmForFastRestart = true + self.isCaptureEnginePrewarmedForFastStart = true + self.warmCaptureEngineConfiguration = self.currentCaptureEngineConfiguration() + self.benchmarkLog("fast_start_prewarm_ready reason=\(reason) elapsedMs=\(self.elapsedMilliseconds(since: startedAt))") + self.scheduleFastStartPrewarmShutdown(reason: reason) + } catch { + self.tearDownCaptureEngine(reason: "fast start prewarm failed", releaseAsync: true) + self.benchmarkLog("fast_start_prewarm_failed reason=\(reason) elapsedMs=\(self.elapsedMilliseconds(since: startedAt)) error=\(error.localizedDescription)") + } + + self.fastStartPrewarmTask = nil + } + return true + } + + func cancelCaptureEngineFastStartPrewarm(reason: String) { + self.fastStartPrewarmTask?.cancel() + self.fastStartPrewarmTask = nil + self.cancelFastStartPrewarmShutdown() + + guard self.isRunning == false, self.isCaptureEnginePrewarmedForFastStart else { return } + self.benchmarkLog("fast_start_prewarm_cancel reason=\(reason)") + self.tearDownCaptureEngine(reason: "fast start prewarm cancelled: \(reason)", releaseAsync: true) + } + + private func scheduleFastStartPrewarmShutdown(reason: String) { + guard let currentEngine = self.engineStorage as? AVAudioEngine, + currentEngine.isRunning + else { return } + + self.cancelFastStartPrewarmShutdown() + let delay = self.fastStartPrewarmHoldNanoseconds + self.fastStartPrewarmShutdownTask = Task { @MainActor [weak self, weak currentEngine] in + do { + try await Task.sleep(nanoseconds: delay) + } catch { + return + } + + guard let self, + !self.isRunning, + self.isCaptureEnginePrewarmedForFastStart, + let storedEngine = self.engineStorage as? AVAudioEngine, + storedEngine === currentEngine + else { return } + + self.benchmarkLog("fast_start_prewarm_expired reason=\(reason)") + self.tearDownCaptureEngine(reason: "fast start prewarm expired", releaseAsync: true) + self.scheduleIdleCaptureEnginePreparation(reason: "fast start prewarm expired") + } + } + + private func currentCaptureEngineConfiguration() -> CaptureEngineConfiguration { + CaptureEngineConfiguration( + syncAudioDevicesWithSystem: SettingsStore.shared.syncAudioDevicesWithSystem, + preferredInputDeviceUID: SettingsStore.shared.preferredInputDeviceUID, + preferredOutputDeviceUID: SettingsStore.shared.preferredOutputDeviceUID + ) + } + + private func tearDownCaptureEngine(reason: String, releaseAsync: Bool = false) { + self.idleCaptureEnginePrepareTask?.cancel() + self.idleCaptureEnginePrepareTask = nil + self.cancelFastRestartWarmEngineShutdown() + self.cancelFastStartPrewarmShutdown() + self.isEngineWarmForFastRestart = false + self.isCaptureEnginePrewarmedForFastStart = false + self.isFastStartPrewarmingCaptureEngine = false + self.warmCaptureEngineConfiguration = nil + self.audioCapturePipeline.setRecordingEnabled(false) + self.removeEngineTap() + + let engineToStop = self.engineStorage as? AVAudioEngine + if engineToStop?.isRunning == true { + engineToStop?.stop() + } + + let oldEngine = self.engineStorage + self.engineStorage = nil + DebugLogger.shared.debug("Capture engine torn down: \(reason)", source: "ASRService") + + guard releaseAsync, let oldEngine else { return } + DispatchQueue.global(qos: .utility).async { _ = oldEngine } + } + + private func scheduleFastRestartWarmEngineShutdown(reason: String) { + guard self.fastRestartWarmEngineEnabled, + let currentEngine = self.engineStorage as? AVAudioEngine, + currentEngine.isRunning + else { + self.tearDownCaptureEngine(reason: reason) + return + } + + self.cancelFastRestartWarmEngineShutdown() + self.cancelFastStartPrewarmShutdown() + self.isEngineWarmForFastRestart = true + self.isCaptureEnginePrewarmedForFastStart = false + self.warmCaptureEngineConfiguration = self.currentCaptureEngineConfiguration() + let delay = self.fastRestartWarmEngineHoldNanoseconds + DebugLogger.shared.info("Keeping capture engine warm for fast restart: \(reason)", source: "ASRService") + + self.fastRestartWarmEngineShutdownTask = Task { @MainActor [weak self, weak currentEngine] in + do { + try await Task.sleep(nanoseconds: delay) + } catch { + return + } + + guard let self, + !self.isRunning, + self.isEngineWarmForFastRestart, + let storedEngine = self.engineStorage as? AVAudioEngine, + storedEngine === currentEngine + else { return } + + self.tearDownCaptureEngine(reason: "fast restart warm window expired") + self.scheduleIdleCaptureEnginePreparation( + reason: "fast restart warm window expired", + delayNanoseconds: 250_000_000 + ) + } + } + + private func reuseWarmCaptureEngineIfAvailable() -> Bool { + guard self.fastRestartWarmEngineEnabled, self.isEngineWarmForFastRestart else { return false } + + guard let warmEngine = self.engineStorage as? AVAudioEngine, + warmEngine.isRunning + else { + self.tearDownCaptureEngine(reason: "warm engine unavailable") + self.audioCapturePipeline.setRecordingEnabled(false) + return false + } + + guard self.warmCaptureEngineConfiguration == self.currentCaptureEngineConfiguration() else { + self.tearDownCaptureEngine(reason: "audio settings changed while warm") + self.audioCapturePipeline.setRecordingEnabled(false) + return false + } + + self.cancelFastRestartWarmEngineShutdown() + self.cancelFastStartPrewarmShutdown() + let reusedPrewarmedCapture = self.isCaptureEnginePrewarmedForFastStart + self.isEngineWarmForFastRestart = false + self.isCaptureEnginePrewarmedForFastStart = false + self.warmCaptureEngineConfiguration = nil + if self.isFastStartPrewarmingCaptureEngine == false { + DictationStartProbe.shared.markTapInstalled(session: self.benchmarkSessionID) + } + self.benchmarkLog("warm_engine_reuse reused=true prewarmed=\(reusedPrewarmedCapture)") + DebugLogger.shared.info("Reusing warm capture engine for fast restart", source: "ASRService") + return true } private func setupEngineTap() throws { + let tapStartedAt = Date().timeIntervalSince1970 DebugLogger.shared.debug("🎧 setupEngineTap() - ENTERED", source: "ASRService") + let inputNodeStartedAt = Date().timeIntervalSince1970 let input = self.engine.inputNode + self.benchmarkLog( + "start_tap_input_node elapsedMs=\(self.elapsedMilliseconds(since: inputNodeStartedAt))" + ) // On Intel Macs (especially after wake from sleep), the audio HAL may not have // finished initializing even after engine.start() returns. The format can be // temporarily 0Hz/0ch while the hardware negotiates with CoreAudio. // We retry a few times with small delays to handle this race condition. + let formatWaitStartedAt = Date().timeIntervalSince1970 var inFormat = input.inputFormat(forBus: 0) var retryCount = 0 let maxRetries = 5 @@ -1832,6 +2282,9 @@ final class ASRService: ObservableObject { // Re-query the format inFormat = input.inputFormat(forBus: 0) } + self.benchmarkLog( + "start_tap_format_ready elapsedMs=\(self.elapsedMilliseconds(since: formatWaitStartedAt)) retryCount=\(retryCount) sampleRate=\(Int(inFormat.sampleRate.rounded())) channels=\(inFormat.channelCount)" + ) if retryCount > 0 { DebugLogger.shared.info( @@ -1848,14 +2301,28 @@ final class ASRService: ObservableObject { self.inputFormat = inFormat let pipeline = self.audioCapturePipeline DebugLogger.shared.debug("🎧 Installing tap on bus 0...", source: "ASRService") + let installTapStartedAt = Date().timeIntervalSince1970 input.installTap(onBus: 0, bufferSize: 4096, format: inFormat) { buffer, _ in pipeline.handle(buffer: buffer) } + self.benchmarkLog( + "start_tap_install elapsedMs=\(self.elapsedMilliseconds(since: installTapStartedAt)) totalMs=\(self.elapsedMilliseconds(since: tapStartedAt)) bufferSize=4096" + ) + if self.isFastStartPrewarmingCaptureEngine == false { + DictationStartProbe.shared.markTapInstalled(session: self.benchmarkSessionID) + } + self.benchmarkLog( + "start_tap_done totalMs=\(self.elapsedMilliseconds(since: tapStartedAt))" + ) DebugLogger.shared.debug("βœ… setupEngineTap() - COMPLETED", source: "ASRService") } private func scheduleAudioRouteRecovery(reason: String) { guard self.isRunning else { + if self.isEngineWarmForFastRestart || self.engineStorage != nil { + self.tearDownCaptureEngine(reason: "audio route changed while warm: \(reason)", releaseAsync: true) + self.scheduleIdleCaptureEnginePreparation(reason: "audio route changed while idle: \(reason)") + } self.audioLevelSubject.send(0.0) return } @@ -1895,19 +2362,11 @@ final class ASRService: ObservableObject { self.audioCapturePipeline.setRecordingEnabled(false) self.stopMonitoringDevice() - self.removeEngineTap() - self.engine.stop() - - let oldEngine = self.engineStorage - self.engineStorage = nil - if let oldEngine { - DispatchQueue.global(qos: .utility).async { _ = oldEngine } - } + self.tearDownCaptureEngine(reason: "audio route recovery", releaseAsync: true) do { try self.configureSession() try self.startEngine() - try self.setupEngineTap() self.audioCapturePipeline.setRecordingEnabled(true) if let currentDevice = self.getCurrentlyBoundInputDevice() { @@ -3203,12 +3662,17 @@ private final class AudioCapturePipeline { func setRecordingEnabled(_ enabled: Bool) { self.lock.lock() - defer { self.lock.unlock() } self.recordingEnabled = enabled + let shouldResetLevel = enabled == false if enabled == false { self.levelHistory.removeAll(keepingCapacity: true) self.smoothedLevel = 0.0 } + self.lock.unlock() + + if shouldResetLevel { + self.onLevel(0.0) + } } func handle(buffer: AVAudioPCMBuffer) { @@ -3216,11 +3680,12 @@ private final class AudioCapturePipeline { let enabled = self.recordingEnabled self.lock.unlock() - guard enabled else { - self.onLevel(0.0) - return - } + guard enabled else { return } + DictationStartProbe.shared.markFirstTapBuffer( + frameLength: Int(buffer.frameLength), + sampleRate: buffer.format.sampleRate + ) let mono16k = Self.toMono16k(floatBuffer: buffer) guard mono16k.isEmpty == false else { self.onLevel(0.0) @@ -3228,6 +3693,7 @@ private final class AudioCapturePipeline { } self.audioBuffer.append(mono16k) + DictationStartProbe.shared.markFirstAudio(sampleCount: mono16k.count) let level = self.calculateAudioLevel(mono16k) self.onLevel(level) } diff --git a/Sources/Fluid/Services/DictationStartProbe.swift b/Sources/Fluid/Services/DictationStartProbe.swift new file mode 100644 index 00000000..bcf46689 --- /dev/null +++ b/Sources/Fluid/Services/DictationStartProbe.swift @@ -0,0 +1,151 @@ +import CoreGraphics +import Foundation + +final class DictationStartProbe: @unchecked Sendable { + static let shared = DictationStartProbe() + + private struct InputEvent { + let kind: String + let uptime: TimeInterval + } + + private let lock = NSLock() + private var lastInputEvent: InputEvent? + private var activeEvent: InputEvent? + private var activeTriggerLabel: String? + private var firstTapBufferLogged = false + private var firstAudioLogged = false + + private init() {} + + func markInputEvent(type: CGEventType, uptime: TimeInterval) { + self.lock.lock() + self.lastInputEvent = InputEvent(kind: Self.eventName(type), uptime: uptime) + self.lock.unlock() + } + + func markStartTrigger(label: String) { + let now = ProcessInfo.processInfo.systemUptime + self.lock.lock() + self.activeEvent = self.lastInputEvent + self.activeTriggerLabel = label + self.firstTapBufferLogged = false + self.firstAudioLogged = false + let event = self.activeEvent + self.lock.unlock() + + let eventDelta = Self.deltaMilliseconds(from: event?.uptime, to: now) + DebugLogger.shared.benchmark( + "START_LATENCY", + message: "trigger label=\(label) event=\(event?.kind ?? "unknown") eventToTriggerMs=\(eventDelta)", + source: "DictationStartProbe" + ) + } + + func markASRStart(session: Int) { + let now = ProcessInfo.processInfo.systemUptime + let snapshot = self.snapshot() + DebugLogger.shared.benchmark( + "START_LATENCY", + message: "asr_start_enter session=\(session) label=\(snapshot.label) eventToASRStartMs=\(Self.deltaMilliseconds(from: snapshot.eventUptime, to: now))", + source: "DictationStartProbe" + ) + } + + func markCaptureEnabled(session: Int) { + let now = ProcessInfo.processInfo.systemUptime + let snapshot = self.snapshot() + DebugLogger.shared.benchmark( + "START_LATENCY", + message: "capture_enabled session=\(session) label=\(snapshot.label) eventToCaptureEnabledMs=\(Self.deltaMilliseconds(from: snapshot.eventUptime, to: now))", + source: "DictationStartProbe" + ) + } + + func markTapInstalled(session: Int) { + let now = ProcessInfo.processInfo.systemUptime + let snapshot = self.snapshot() + DebugLogger.shared.benchmark( + "START_LATENCY", + message: "tap_installed session=\(session) label=\(snapshot.label) eventToTapInstalledMs=\(Self.deltaMilliseconds(from: snapshot.eventUptime, to: now))", + source: "DictationStartProbe" + ) + } + + func markFirstTapBuffer(frameLength: Int, sampleRate: Double) { + let now = ProcessInfo.processInfo.systemUptime + self.lock.lock() + guard self.firstTapBufferLogged == false else { + self.lock.unlock() + return + } + self.firstTapBufferLogged = true + let event = self.activeEvent + let label = self.activeTriggerLabel ?? "unknown" + self.lock.unlock() + + let callbackDelta = Self.deltaMilliseconds(from: event?.uptime, to: now) + let bufferDuration = Self.bufferDurationMilliseconds(frameLength: frameLength, sampleRate: sampleRate) + let estimatedFirstSampleDelta = callbackDelta >= 0 && bufferDuration >= 0 + ? callbackDelta - bufferDuration + : -1 + DebugLogger.shared.benchmark( + "START_LATENCY", + message: "first_tap_buffer label=\(label) frames=\(frameLength) " + + "sampleRate=\(Int(sampleRate.rounded())) event=\(event?.kind ?? "unknown") " + + "eventToFirstTapBufferMs=\(callbackDelta) bufferMs=\(bufferDuration) " + + "estimatedEventToFirstSampleMs=\(estimatedFirstSampleDelta)", + source: "DictationStartProbe" + ) + } + + func markFirstAudio(sampleCount: Int) { + let now = ProcessInfo.processInfo.systemUptime + self.lock.lock() + guard self.firstAudioLogged == false else { + self.lock.unlock() + return + } + self.firstAudioLogged = true + let event = self.activeEvent + let label = self.activeTriggerLabel ?? "unknown" + self.lock.unlock() + + DebugLogger.shared.benchmark( + "START_LATENCY", + message: "first_audio label=\(label) samples=\(sampleCount) event=\(event?.kind ?? "unknown") eventToFirstAudioMs=\(Self.deltaMilliseconds(from: event?.uptime, to: now))", + source: "DictationStartProbe" + ) + } + + private func snapshot() -> (eventUptime: TimeInterval?, label: String) { + self.lock.lock() + defer { self.lock.unlock() } + return (self.activeEvent?.uptime, self.activeTriggerLabel ?? "unknown") + } + + private static func deltaMilliseconds(from start: TimeInterval?, to end: TimeInterval) -> Int { + guard let start else { return -1 } + return Int(((end - start) * 1000).rounded()) + } + + private static func bufferDurationMilliseconds(frameLength: Int, sampleRate: Double) -> Int { + guard sampleRate > 0 else { return -1 } + return Int(((Double(frameLength) / sampleRate) * 1000).rounded()) + } + + private static func eventName(_ type: CGEventType) -> String { + switch type { + case .keyDown: return "keyDown" + case .keyUp: return "keyUp" + case .flagsChanged: return "flagsChanged" + case .leftMouseDown: return "leftMouseDown" + case .leftMouseUp: return "leftMouseUp" + case .rightMouseDown: return "rightMouseDown" + case .rightMouseUp: return "rightMouseUp" + case .otherMouseDown: return "otherMouseDown" + case .otherMouseUp: return "otherMouseUp" + default: return "event\(type.rawValue)" + } + } +} diff --git a/Sources/Fluid/Services/FluidAudioProvider.swift b/Sources/Fluid/Services/FluidAudioProvider.swift index 130b1fdd..1304e351 100644 --- a/Sources/Fluid/Services/FluidAudioProvider.swift +++ b/Sources/Fluid/Services/FluidAudioProvider.swift @@ -201,6 +201,10 @@ final class FluidAudioProvider: TranscriptionProvider { return ASRTranscriptionResult(text: result.text, confidence: result.confidence) } + func transcribeDictionaryTraining(_ samples: [Float]) async throws -> ASRTranscriptionResult { + try await self.transcribeStreaming(samples) + } + func transcribeFinal(_ samples: [Float]) async throws -> ASRTranscriptionResult { guard let manager = self.finalAsrManager ?? self.streamingAsrManager else { throw NSError( diff --git a/Sources/Fluid/Services/GlobalHotkeyManager.swift b/Sources/Fluid/Services/GlobalHotkeyManager.swift index c0272aa5..cccdda36 100644 --- a/Sources/Fluid/Services/GlobalHotkeyManager.swift +++ b/Sources/Fluid/Services/GlobalHotkeyManager.swift @@ -72,6 +72,7 @@ final class GlobalHotkeyManager: NSObject { private var pasteLastTranscriptionCallback: (() -> Void)? private var hotkeyMode: HotkeyActivationMode = SettingsStore.shared.hotkeyMode private let automaticTapThresholdSeconds: TimeInterval = 0.4 + private var primaryDictationShortcutPrewarmActive = false private struct ModifierOnlyShortcutBehavior { let shortcut: HotkeyShortcut @@ -346,6 +347,7 @@ final class GlobalHotkeyManager: NSObject { } func updatePrimaryShortcuts(_ newShortcuts: [HotkeyShortcut]) { + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "primary dictation shortcuts changed") self.primaryShortcuts = newShortcuts DebugLogger.shared.info("Updated transcription hotkeys", source: "GlobalHotkeyManager") } @@ -447,6 +449,7 @@ final class GlobalHotkeyManager: NSObject { @discardableResult private func setupGlobalHotkey() -> Bool { + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "event tap reset") self.cleanupEventTap() if !AXIsProcessTrusted() { @@ -534,6 +537,7 @@ final class GlobalHotkeyManager: NSObject { private func markOtherInputDuringModifierOnly() { guard self.modifierOnlyKeyDown else { return } self.otherKeyPressedDuringModifier = true + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "modifier combo input") if let pending = self.pendingHoldModeStart { pending.cancel() self.pendingHoldModeStart = nil @@ -542,6 +546,22 @@ final class GlobalHotkeyManager: NSObject { } } + private func prewarmPrimaryDictationShortcutIfNeeded(reason: String) { + guard self.asrService.isRunning == false else { + self.primaryDictationShortcutPrewarmActive = false + return + } + + self.primaryDictationShortcutPrewarmActive = self.asrService.prewarmCaptureEngineForFastStart(reason: reason) + } + + private func cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: String) { + guard self.primaryDictationShortcutPrewarmActive else { return } + + self.primaryDictationShortcutPrewarmActive = false + self.asrService.cancelCaptureEngineFastStartPrewarm(reason: reason) + } + private func mouseButton(from event: CGEvent) -> Int { Int(event.getIntegerValueField(.mouseEventButtonNumber)) } @@ -579,6 +599,7 @@ final class GlobalHotkeyManager: NSObject { setModeKeyPressed: { self.isKeyPressed = $0 }, onHoldStart: { self.startRecordingIfNeeded() }, onToggleRelease: { + self.primaryDictationShortcutPrewarmActive = false if self.asrService.isRunning { let isSameMode = self.isDictateRecordingProvider?() ?? false DebugLogger.shared.info( @@ -606,6 +627,7 @@ final class GlobalHotkeyManager: NSObject { if let tapRecoveryResult = self.handleTapDisableEvent(type: type, event: event) { return tapRecoveryResult } + DictationStartProbe.shared.markInputEvent(type: type, uptime: TimeInterval(event.timestamp) / 1_000_000_000) if self.isShortcutCaptureActiveProvider?() ?? false { self.resetModifierOnlyShortcutTracking() @@ -650,6 +672,7 @@ final class GlobalHotkeyManager: NSObject { } if handled { + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "cancel shortcut pressed") return nil // Consume event only if we did something } } @@ -710,11 +733,15 @@ final class GlobalHotkeyManager: NSObject { self.triggerPromptSelection(assignment.selection) } } + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "prompt shortcut pressed") return nil } // Check prompt mode hotkey - if self.handlePromptModeKeyDown(keyCode: keyCode, modifiers: eventModifiers) { return nil } + if self.handlePromptModeKeyDown(keyCode: keyCode, modifiers: eventModifiers) { + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "prompt mode shortcut pressed") + return nil + } // Check command mode hotkey first if self.commandModeShortcutEnabled, @@ -766,6 +793,7 @@ final class GlobalHotkeyManager: NSObject { self.triggerCommandMode() } } + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "command mode shortcut pressed") return nil } @@ -817,6 +845,7 @@ final class GlobalHotkeyManager: NSObject { self.triggerRewriteMode() } } + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "rewrite mode shortcut pressed") return nil } } @@ -824,10 +853,13 @@ final class GlobalHotkeyManager: NSObject { // Then check transcription hotkeys if let shortcut = self.primaryShortcuts.first(where: { $0.matches(keyCode: keyCode, modifiers: eventModifiers) }) { guard self.beginPrimaryShortcutPress(.keyboard(shortcut.keyCode)) else { return nil } + self.primaryDictationShortcutPrewarmActive = false self.handlePrimaryDictationTriggerDown() return nil } + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "primary dictation combo used by another key") + case .keyUp: // Prompt mode key up (press and hold mode) if self.handlePromptModeKeyUp(keyCode: keyCode) { return nil } @@ -905,6 +937,7 @@ final class GlobalHotkeyManager: NSObject { if self.handleMouseShortcutDown(event, modifiers: eventModifiers) { return nil } + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "primary dictation mouse combo used by another button") case .leftMouseUp, .rightMouseUp, .otherMouseUp: if self.handleMouseShortcutUp(event) { @@ -1071,6 +1104,7 @@ final class GlobalHotkeyManager: NSObject { for behavior: ModifierOnlyShortcutBehavior, message: String ) { + self.cancelModifierOnlyPrewarmIfNeeded(for: behavior, reason: message) guard self.pendingHoldModeType == behavior.holdModeType else { return } self.otherKeyPressedDuringModifier = true self.pendingHoldModeStart?.cancel() @@ -1079,6 +1113,22 @@ final class GlobalHotkeyManager: NSObject { DebugLogger.shared.info(message, source: "GlobalHotkeyManager") } + private func prewarmModifierOnlyCaptureIfNeeded(for behavior: ModifierOnlyShortcutBehavior) { + guard behavior.holdModeType == .transcription, + self.asrService.isRunning == false + else { return } + + self.prewarmPrimaryDictationShortcutIfNeeded(reason: "primary modifier down") + } + + private func cancelModifierOnlyPrewarmIfNeeded( + for behavior: ModifierOnlyShortcutBehavior, + reason: String + ) { + guard behavior.holdModeType == .transcription else { return } + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: reason) + } + private func handleAutomaticKeyRelease( for type: HotkeyHoldModeType, label: String, @@ -1306,6 +1356,9 @@ final class GlobalHotkeyManager: NSObject { if self.hotkeyMode == .hold { self.markHoldModeStartTriggered(for: behavior.holdModeType) } + if behavior.holdModeType == .transcription { + self.primaryDictationShortcutPrewarmActive = false + } behavior.onHoldStart() if self.hotkeyMode == .automatic { self.markAutomaticPressStarted(for: behavior.holdModeType) @@ -1329,6 +1382,8 @@ final class GlobalHotkeyManager: NSObject { if self.asrService.isRunning || didStart { DebugLogger.shared.info(behavior.holdReleaseMessage, source: "GlobalHotkeyManager") self.stopRecordingAfterRelease(for: behavior.holdModeType, label: self.label(for: behavior.holdModeType)) + } else { + self.cancelModifierOnlyPrewarmIfNeeded(for: behavior, reason: "modifier hold released before start") } } case .automatic: @@ -1350,10 +1405,17 @@ final class GlobalHotkeyManager: NSObject { DebugLogger.shared.debug(behavior.toggleIgnoredMessage, source: "GlobalHotkeyManager") } } + if behavior.holdModeType == .transcription, + self.primaryDictationShortcutPrewarmActive, + self.asrService.isRunning == false + { + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "automatic modifier released without start") + } case .toggle: if wasCleanPress { behavior.onToggleRelease() } else { + self.cancelModifierOnlyPrewarmIfNeeded(for: behavior, reason: behavior.toggleIgnoredMessage) DebugLogger.shared.debug(behavior.toggleIgnoredMessage, source: "GlobalHotkeyManager") } } @@ -1378,6 +1440,7 @@ final class GlobalHotkeyManager: NSObject { self.isRewriteKeyPressed = false self.isPromptAssignmentKeyPressed = false self.activePrimaryShortcutPress = nil + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "modifier tracking reset") if shouldStopActiveHold { switch reason { @@ -1550,6 +1613,7 @@ final class GlobalHotkeyManager: NSObject { self.otherKeyPressedDuringModifier = false self.modifierPressStartTime = Date() + self.prewarmModifierOnlyCaptureIfNeeded(for: behavior) self.scheduleModifierOnlyStart(for: behavior) return true } @@ -1591,6 +1655,7 @@ final class GlobalHotkeyManager: NSObject { self.otherKeyPressedDuringModifier = false self.modifierPressStartTime = Date() + self.prewarmModifierOnlyCaptureIfNeeded(for: behavior) self.scheduleModifierOnlyStart(for: behavior) return true } @@ -1681,6 +1746,7 @@ final class GlobalHotkeyManager: NSObject { if self.primaryShortcuts.contains(where: { $0.matchesMouse(button: mouseButton, modifiers: eventModifiers) }) { guard self.beginPrimaryShortcutPress(.mouse(mouseButton)) else { return true } + self.primaryDictationShortcutPrewarmActive = false self.handlePrimaryDictationTriggerDown() return true } @@ -1726,6 +1792,7 @@ final class GlobalHotkeyManager: NSObject { } private func triggerDictationMode() { + DictationStartProbe.shared.markStartTrigger(label: "dictationMode") Task { @MainActor [weak self] in guard let self = self else { return } guard self.canTriggerRecordingAction("Dictate mode hotkey") else { return } @@ -1755,6 +1822,7 @@ final class GlobalHotkeyManager: NSObject { } func setHotkeyMode(_ mode: HotkeyActivationMode) { + self.cancelPrimaryDictationShortcutPrewarmIfNeeded(reason: "hotkey mode changed") let shouldStopActivePress = self.hotkeyMode != .toggle && self.asrService.isRunning && (self.isKeyPressed || self.isPromptModeKeyPressed || self.isCommandModeKeyPressed || self.isRewriteKeyPressed || self.isPromptAssignmentKeyPressed) @@ -1786,6 +1854,10 @@ final class GlobalHotkeyManager: NSObject { DebugLogger.shared.debug("Ignoring \(label) - stop already processing", source: "GlobalHotkeyManager") return false } + guard !self.asrService.isDictionaryTrainingCaptureActive else { + DebugLogger.shared.debug("Ignoring \(label) - dictionary training capture is active", source: "GlobalHotkeyManager") + return false + } return true } @@ -1810,6 +1882,7 @@ final class GlobalHotkeyManager: NSObject { } private func startRecordingIfNeeded() { + DictationStartProbe.shared.markStartTrigger(label: "startRecordingIfNeeded") Task { @MainActor [weak self] in guard let self = self else { return } @@ -1835,6 +1908,10 @@ final class GlobalHotkeyManager: NSObject { DebugLogger.shared.debug("Ignoring stop - already processing", source: "GlobalHotkeyManager") return } + guard !self.asrService.isDictionaryTrainingCaptureActive else { + DebugLogger.shared.debug("Ignoring stop - dictionary training capture is active", source: "GlobalHotkeyManager") + return + } guard self.asrService.isRunning else { return @@ -1847,6 +1924,10 @@ final class GlobalHotkeyManager: NSObject { @MainActor private func stopRecordingInternal() async { guard self.asrService.isRunning else { return } + guard !self.asrService.isDictionaryTrainingCaptureActive else { + DebugLogger.shared.debug("Stop ignored - dictionary training capture is active", source: "GlobalHotkeyManager") + return + } guard !self.isProcessingStop else { DebugLogger.shared.debug("Stop already in progress, ignoring", source: "GlobalHotkeyManager") return diff --git a/Sources/Fluid/Services/LocalAPI/DictionaryAPIController.swift b/Sources/Fluid/Services/LocalAPI/DictionaryAPIController.swift index f0eeb714..99641ae9 100644 --- a/Sources/Fluid/Services/LocalAPI/DictionaryAPIController.swift +++ b/Sources/Fluid/Services/LocalAPI/DictionaryAPIController.swift @@ -132,6 +132,7 @@ struct DictionaryAPIController: LocalAPIRouteHandler { let incoming = try self.replacementEntries(from: payload) var stored = payload.mode == .replace ? [] : SettingsStore.shared.customDictionaryEntries + var incomingEntries: [SettingsStore.CustomDictionaryEntry] = [] for entry in incoming { let normalized = Self.storeEntry(from: entry) guard !normalized.triggers.isEmpty, @@ -144,10 +145,14 @@ struct DictionaryAPIController: LocalAPIRouteHandler { if let id = entry.id, existing.id == id { return true } return existing.replacement.caseInsensitiveCompare(normalized.replacement) == .orderedSame } - stored.append(normalized) + incomingEntries.removeAll { existing in + if let id = entry.id, existing.id == id { return true } + return existing.replacement.caseInsensitiveCompare(normalized.replacement) == .orderedSame + } + incomingEntries.append(normalized) } - SettingsStore.shared.customDictionaryEntries = stored + SettingsStore.shared.customDictionaryEntries = incomingEntries + stored ASRService.invalidateDictionaryCache() NotificationCenter.default.post(name: .parakeetVocabularyDidChange, object: nil) return self.getReplacements() diff --git a/Sources/Fluid/Services/TranscriptionSoundPlayer.swift b/Sources/Fluid/Services/TranscriptionSoundPlayer.swift index 5c7ced21..7cfd2172 100644 --- a/Sources/Fluid/Services/TranscriptionSoundPlayer.swift +++ b/Sources/Fluid/Services/TranscriptionSoundPlayer.swift @@ -11,11 +11,12 @@ final class TranscriptionSoundPlayer { private init() {} - func playStartSound() { - guard SettingsStore.shared.enableTranscriptionSounds else { return } + @discardableResult + func playStartSound() -> TimeInterval { + guard SettingsStore.shared.enableTranscriptionSounds else { return 0 } let selected = SettingsStore.shared.transcriptionStartSound - guard let soundName = selected.startSoundFileName else { return } - self.play(soundName: soundName) + guard let soundName = selected.startSoundFileName else { return 0 } + return self.play(soundName: soundName) } func playStopSound() { @@ -38,10 +39,11 @@ final class TranscriptionSoundPlayer { self.play(soundName: soundName, overrideVolume: volume) } - private func play(soundName: String, overrideVolume: Float? = nil) { + @discardableResult + private func play(soundName: String, overrideVolume: Float? = nil) -> TimeInterval { guard let url = Bundle.main.url(forResource: soundName, withExtension: "m4a") else { DebugLogger.shared.error("Missing sound resource: \(soundName).m4a", source: "TranscriptionSoundPlayer") - return + return 0 } let settings = SettingsStore.shared @@ -49,7 +51,7 @@ final class TranscriptionSoundPlayer { if settings.transcriptionSoundIndependentVolume { let currentSystemVol = Self.getSystemVolume() - guard currentSystemVol > 0.001 else { return } + guard currentSystemVol > 0.001 else { return 0 } // Save current system volume and temporarily set it to desired level self.savedSystemVolume = currentSystemVol Self.setSystemVolume(desiredVolume) @@ -71,16 +73,17 @@ final class TranscriptionSoundPlayer { } else { player.volume = desiredVolume } - player.play() + guard player.play() else { return 0 } + let duration = player.duration // Restore system volume after the sound finishes if settings.transcriptionSoundIndependentVolume, let saved = self.savedSystemVolume { - let duration = player.duration DispatchQueue.main.asyncAfter(deadline: .now() + duration + 0.05) { [weak self] in Self.setSystemVolume(saved) self?.savedSystemVolume = nil } } + return duration } catch { // Restore system volume on error if let saved = self.savedSystemVolume { @@ -91,6 +94,7 @@ final class TranscriptionSoundPlayer { "Failed to play sound \(soundName).m4a: \(error.localizedDescription)", source: "TranscriptionSoundPlayer" ) + return 0 } } diff --git a/Sources/Fluid/Theme/NativeButtonStyles.swift b/Sources/Fluid/Theme/NativeButtonStyles.swift index 06924650..9845e605 100644 --- a/Sources/Fluid/Theme/NativeButtonStyles.swift +++ b/Sources/Fluid/Theme/NativeButtonStyles.swift @@ -18,6 +18,7 @@ enum FluidButtonRole { case glass case compact case accent + case destructive case inline } @@ -77,6 +78,8 @@ extension View { self.buttonStyle(CompactButtonStyle(height: size.controlHeight)) case .accent: self.buttonStyle(AccentButtonStyle(compact: size.accentCompact)) + case .destructive: + self.buttonStyle(AccentButtonStyle(compact: size.accentCompact, tone: Color(nsColor: .systemRed))) case .inline: self.buttonStyle(InlineButtonStyle()) } @@ -389,9 +392,10 @@ struct CompactButtonStyle: ButtonStyle { struct AccentButtonStyle: ButtonStyle { var compact: Bool = false + var tone: Color? = nil func makeBody(configuration: Configuration) -> some View { - AccentButton(configuration: configuration, compact: self.compact) + AccentButton(configuration: configuration, compact: self.compact, tone: self.tone) } private struct AccentButton: View { @@ -399,12 +403,14 @@ struct AccentButtonStyle: ButtonStyle { @State private var isHovered = false let configuration: ButtonStyle.Configuration let compact: Bool + let tone: Color? private var shape: RoundedRectangle { RoundedRectangle(cornerRadius: self.compact ? 8 : self.theme.metrics.corners.md, style: .continuous) } var body: some View { + let tone = self.tone ?? self.theme.palette.accent self.configuration.label .fontWeight(.semibold) .padding(.horizontal, self.compact ? 12 : self.theme.metrics.spacing.lg) @@ -416,8 +422,8 @@ struct AccentButtonStyle: ButtonStyle { .fill( LinearGradient( colors: [ - self.theme.palette.accent, - self.theme.palette.accent.opacity(0.85), + tone, + tone.opacity(0.85), ], startPoint: .top, endPoint: .bottom @@ -429,7 +435,7 @@ struct AccentButtonStyle: ButtonStyle { .stroke(Color.white.opacity(self.isHovered ? 0.3 : 0.15), lineWidth: 1) ) .shadow( - color: self.theme.palette.accent.opacity(self.isHovered ? 0.5 : 0.3), + color: tone.opacity(self.isHovered ? 0.5 : 0.3), radius: self.isHovered ? 6 : 4, x: 0, y: self.isHovered ? 3 : 2 diff --git a/Sources/Fluid/UI/CustomDictionaryView.swift b/Sources/Fluid/UI/CustomDictionaryView.swift index 36a7e10c..c1c37a2e 100644 --- a/Sources/Fluid/UI/CustomDictionaryView.swift +++ b/Sources/Fluid/UI/CustomDictionaryView.swift @@ -12,9 +12,13 @@ import UniformTypeIdentifiers struct CustomDictionaryView: View { @Environment(\.theme) private var theme + @Environment(\.accessibilityReduceMotion) private var reduceMotion + @EnvironmentObject private var appServices: AppServices + + private var asr: ASRService { self.appServices.asr } + @State private var entries: [SettingsStore.CustomDictionaryEntry] = SettingsStore.shared.customDictionaryEntries @State private var boostTerms: [ParakeetVocabularyStore.VocabularyConfig.Term] = [] - @State private var showAddSheet = false @State private var editingEntry: SettingsStore.CustomDictionaryEntry? @State private var showAddBoostSheet = false @State private var editingBoostTerm: EditableBoostTerm? @@ -24,23 +28,195 @@ struct CustomDictionaryView: View { @State private var vocabBoostingEnabled: Bool = SettingsStore.shared.vocabularyBoostingEnabled @State private var isBoostingInfoPresented = false + @State private var trainingReplacement = "" + @State private var trainingVariants: [String] = [] + @State private var trainingSampleCount = 0 + @State private var lastTrainingOutput = "" + @State private var lastTrainingOutputIsCovered = false + @State private var consecutiveCoveredCaptures = 0 + @State private var trainingStatusMessage = "Type the correct text." + @State private var trainingHasError = false + @State private var isTrainingActive = false + @State private var isTrainingStarting = false + @State private var isTrainingRecording = false + @State private var trainingStopRequestedDuringStart = false + @State private var isTrainingProcessing = false + @State private var replacementConfirmation: ReplacementConfirmation? + @State private var composerMode: DictionaryComposerMode = .train + @State private var manualTriggersText = "" + @State private var manualReplacement = "" + @State private var isDictionaryExpanded = false + + private var normalizedTrainingReplacement: String { + self.trainingReplacement.trimmingCharacters(in: .whitespacesAndNewlines) + } + + private var trainingProgressText: String { + let count = self.trainingSampleCount + return "\(count) \(count == 1 ? "sample" : "samples") Β· up to \(CustomDictionaryTrainingMerge.maxSamples)" + } + + private var shouldShowTrainingStatus: Bool { + self.trainingHasError || ( + !self.trainingStatusMessage.isEmpty && + self.trainingStatusMessage != "Type the correct text." + ) + } + + private var canUseTrainingRecorderButton: Bool { + guard !self.trainingStopRequestedDuringStart, !self.isTrainingProcessing else { return false } + return self.isTrainingRecording || self.canRecordTrainingSample + } + + private var trainingRecorderTitle: String { + if self.trainingStopRequestedDuringStart { + return "Stopping..." + } + if self.isTrainingProcessing { + return "Working..." + } + if self.isTrainingStarting { + return "Starting..." + } + if self.isTrainingRecording { + return "Listening..." + } + if self.normalizedTrainingReplacement.isEmpty { + return "Record sample" + } + return self.trainingVariants.isEmpty ? "Say it once" : "Say it again" + } + + private var trainingRecorderDetail: String { + self.normalizedTrainingReplacement.isEmpty + ? "Type the correct text first." + : "Keep trying until FluidVoice understands you 3 times in a row." + } + + private var trainingRecorderStatusText: String { + guard !self.lastTrainingOutput.isEmpty else { return "Record to check" } + if self.trainingAlreadyCorrectWithoutReplacement { + return "Already correct" + } + if self.trainingFinalOutputIsReady { + return "Ready to add" + } + return "\(self.trainingReadinessProgress)/\(CustomDictionaryTrainingMerge.readyCoveredCount) understood" + } + + private var trainingRecorderStatusColor: Color { + self.trainingFinalOutputIsReady || self.trainingAlreadyCorrectWithoutReplacement + ? self.theme.palette.success + : self.theme.palette.secondaryText + } + + private var trainingRecorderFillColor: Color { + self.trainingFinalOutputIsReady || self.trainingAlreadyCorrectWithoutReplacement + ? self.theme.palette.success + : self.theme.palette.accent + } + + private var trainingRecorderFillFraction: Double { + guard !self.lastTrainingOutput.isEmpty else { return 0 } + if self.trainingAlreadyCorrectWithoutReplacement { + return 1 + } + return Double(self.trainingReadinessProgress) / Double(CustomDictionaryTrainingMerge.readyCoveredCount) + } + + private var trainingFinalOutputIsReady: Bool { + !self.trainingAlreadyCorrectWithoutReplacement && + self.trainingOutputIsCovered && + self.consecutiveCoveredCaptures >= CustomDictionaryTrainingMerge.readyCoveredCount + } + + private var trainingAlreadyCorrectWithoutReplacement: Bool { + self.trainingVariants.isEmpty && + self.trainingOutputIsCovered && + !self.lastTrainingOutput.isEmpty && + self.lastTrainingOutput.caseInsensitiveCompare(self.normalizedTrainingReplacement) == .orderedSame && + self.consecutiveCoveredCaptures >= CustomDictionaryTrainingMerge.readyCoveredCount + } + + private var trainingReadinessProgress: Int { + guard !self.trainingAlreadyCorrectWithoutReplacement else { + return CustomDictionaryTrainingMerge.readyCoveredCount + } + guard self.trainingOutputIsCovered else { return 0 } + return min(self.consecutiveCoveredCaptures, CustomDictionaryTrainingMerge.readyCoveredCount) + } + + private var trainingOutputIsCovered: Bool { + self.lastTrainingOutputIsCovered + } + + private var trainingFinalOutputText: String { + guard !self.lastTrainingOutput.isEmpty else { return "Record to check" } + return self.trainingOutputIsCovered ? self.normalizedTrainingReplacement : self.lastTrainingOutput + } + + private var canStartTraining: Bool { + !self.normalizedTrainingReplacement.isEmpty && + !self.isTrainingRecording && + !self.isTrainingProcessing + } + + private var canRecordTrainingSample: Bool { + !self.normalizedTrainingReplacement.isEmpty && + !self.isTrainingProcessing && + !self.asr.isRunning && + self.trainingSampleCount < CustomDictionaryTrainingMerge.maxSamples + } + + private var canAddTrainedReplacement: Bool { + !self.normalizedTrainingReplacement.isEmpty && + !self.trainingVariants.isEmpty && + !self.isTrainingRecording && + !self.isTrainingProcessing + } + + private var trainedReplacementButtonTitle: String { + self.trainingAlreadyCorrectWithoutReplacement ? "No Replacement Needed" : "Add Replacement" + } + + private var shouldEmphasizeTrainedReplacementButton: Bool { + self.trainingFinalOutputIsReady && self.canAddTrainedReplacement + } + + private var manualTriggers: [String] { + CustomDictionaryManualEntry.parseTriggers(self.manualTriggersText) + } + + private var manualDuplicateTriggers: [String] { + self.manualTriggers.filter { self.allExistingTriggers().contains($0) } + } + + private var canAddManualReplacement: Bool { + !self.manualTriggers.isEmpty && + !self.manualReplacement.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty && + self.manualDuplicateTriggers.isEmpty + } + var body: some View { ScrollView(.vertical, showsIndicators: false) { VStack(alignment: .leading, spacing: self.theme.metrics.spacing.xl) { self.pageHeader VStack(alignment: .leading, spacing: self.theme.metrics.spacing.xxl) { - self.instantReplacementSection + self.trainReplacementSection + self.yourDictionarySection self.aiPostProcessingSection } } .frame(maxWidth: 860, alignment: .leading) .padding(self.theme.metrics.spacing.xl) } - .sheet(isPresented: self.$showAddSheet) { - AddDictionaryEntrySheet(existingTriggers: self.allExistingTriggers()) { newEntry in - self.entries.append(newEntry) - self.saveEntries() + .overlay { + if let confirmation = self.replacementConfirmation { + ReplacementConfirmationToast(confirmation: confirmation) + .padding(self.theme.metrics.spacing.xl) + .transition(.scale(scale: 0.92).combined(with: .opacity)) + .allowsHitTesting(false) } } .sheet(item: self.$editingEntry) { entry in @@ -73,6 +249,12 @@ struct CustomDictionaryView: View { .onAppear { self.loadBoostTerms() } + .onDisappear { + guard self.isTrainingRecording else { return } + Task { @MainActor in + await self.stopTrainingSample() + } + } } // MARK: - Page Header @@ -129,17 +311,424 @@ struct CustomDictionaryView: View { .frame(width: 34, height: 34) } - // MARK: - Instant Replacement + // MARK: - Teach Words - private var instantReplacementSection: some View { + private var trainReplacementSection: some View { ThemedCard(style: .standard, hoverEffect: false) { VStack(alignment: .leading, spacing: self.theme.metrics.spacing.lg) { HStack(alignment: .center, spacing: self.theme.metrics.spacing.md) { - self.settingsIconTile(systemName: "arrow.left.arrow.right") + self.settingsIconTile(systemName: "mic.fill") + + VStack(alignment: .leading, spacing: 3) { + Text("Teach Words") + .font(self.theme.typography.sectionTitle) + Text("Show FluidVoice the right spelling, by voice or by typing.") + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.secondaryText) + } + } + + self.dictionaryComposerModePicker + + Group { + switch self.composerMode { + case .train: + self.trainReplacementComposer + case .manual: + self.manualReplacementComposer + } + } + .frame(minHeight: 315, alignment: .topLeading) + } + } + .frame(maxWidth: .infinity, alignment: .leading) + } + + private var dictionaryComposerModePicker: some View { + VStack(alignment: .leading, spacing: self.theme.metrics.spacing.sm) { + self.dictionaryComposerModeSegmented + + Text(self.composerMode.detail) + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.secondaryText) + .fixedSize(horizontal: false, vertical: true) + } + } + + private var dictionaryComposerModeSegmented: some View { + HStack(spacing: 2) { + ForEach(DictionaryComposerMode.allCases) { mode in + DictionaryComposerModeTab( + mode: mode, + isSelected: self.composerMode == mode, + isDisabled: self.isTrainingRecording || self.isTrainingProcessing + ) { + self.selectComposerMode(mode) + } + } + } + .padding(3) + .background( + RoundedRectangle(cornerRadius: self.theme.metrics.corners.md, style: .continuous) + .fill(self.theme.palette.contentBackground.opacity(0.5)) + .overlay( + RoundedRectangle(cornerRadius: self.theme.metrics.corners.md, style: .continuous) + .stroke(self.theme.palette.cardBorder.opacity(0.25), lineWidth: 1) + ) + ) + } + + private var trainReplacementComposer: some View { + VStack(alignment: .leading, spacing: self.theme.metrics.spacing.sm) { + TextField("Type the correct text, e.g. FluidVoice", text: self.$trainingReplacement) + .textFieldStyle(.roundedBorder) + .disabled(self.isTrainingRecording || self.isTrainingProcessing) + .onChange(of: self.trainingReplacement) { oldValue, newValue in + self.handleTrainingReplacementChange(oldValue: oldValue, newValue: newValue) + } + + self.trainingRecorderPanel + + self.trainingFinalOutputPanel + + if !self.trainingVariants.isEmpty { + self.trainingHeardSection + } + + self.trainingFooter + + Spacer(minLength: 0) + + Button { + self.addTrainedReplacement() + } label: { + Label(self.trainedReplacementButtonTitle, systemImage: self.trainingAlreadyCorrectWithoutReplacement ? "checkmark" : "plus") + .frame(maxWidth: .infinity) + .frame(height: 38) + } + .fluidButton(.accent, size: .small) + .disabled(!self.canAddTrainedReplacement) + .opacity(self.canAddTrainedReplacement ? 1 : 0.45) + .overlay(self.trainedReplacementButtonReadyOutline) + .shadow( + color: self.shouldEmphasizeTrainedReplacementButton ? self.theme.palette.success.opacity(0.18) : .clear, + radius: self.shouldEmphasizeTrainedReplacementButton ? 14 : 0, + x: 0, + y: 5 + ) + .scaleEffect(self.shouldEmphasizeTrainedReplacementButton ? 1.006 : 1) + .animation(.spring(response: 0.28, dampingFraction: 0.72), value: self.shouldEmphasizeTrainedReplacementButton) + } + } + + private var trainedReplacementButtonReadyOutline: some View { + RoundedRectangle(cornerRadius: self.theme.metrics.corners.md, style: .continuous) + .stroke( + self.shouldEmphasizeTrainedReplacementButton ? self.theme.palette.success.opacity(0.72) : .clear, + lineWidth: 1.5 + ) + .padding(-3) + .allowsHitTesting(false) + } + + private var manualReplacementComposer: some View { + VStack(alignment: .leading, spacing: self.theme.metrics.spacing.md) { + ViewThatFits(in: .horizontal) { + HStack(alignment: .top, spacing: self.theme.metrics.spacing.md) { + self.manualTriggerField + self.manualReplacementField + } + + VStack(alignment: .leading, spacing: self.theme.metrics.spacing.md) { + self.manualTriggerField + self.manualReplacementField + } + } + + if !self.manualDuplicateTriggers.isEmpty { + Label("Already used: \(self.manualDuplicateTriggers.joined(separator: ", "))", systemImage: "exclamationmark.triangle.fill") + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.warning) + } + + if !self.manualTriggers.isEmpty || !self.manualReplacement.isEmpty { + FlowLayout(spacing: 6) { + ForEach(self.manualTriggers, id: \.self) { trigger in + DictionaryPreviewChip(text: trigger) + } + + Image(systemName: "arrow.right") + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.tertiaryText) + + Text(self.manualReplacement.trimmingCharacters(in: .whitespacesAndNewlines)) + .font(self.theme.typography.captionStrong) + .foregroundStyle(self.theme.palette.accent) + } + } + + Spacer(minLength: 0) + + Button { + self.addManualReplacementIfValid() + } label: { + Label("Add Replacement", systemImage: "plus") + .frame(maxWidth: .infinity) + .frame(height: 38) + } + .fluidButton(.accent, size: .small) + .disabled(!self.canAddManualReplacement) + .opacity(self.canAddManualReplacement ? 1 : 0.45) + } + } + + private var manualTriggerField: some View { + VStack(alignment: .leading, spacing: self.theme.metrics.spacing.sm) { + Text("When FluidVoice hears") + .font(self.theme.typography.captionStrong) + TextField("fluid voice, fluid boys", text: self.$manualTriggersText) + .textFieldStyle(.roundedBorder) + .onSubmit { self.addManualReplacementIfValid() } + Text("Separate multiple versions with commas.") + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.secondaryText) + } + } + + private var manualReplacementField: some View { + VStack(alignment: .leading, spacing: self.theme.metrics.spacing.sm) { + Text("Change it to") + .font(self.theme.typography.captionStrong) + TextField("FluidVoice", text: self.$manualReplacement) + .textFieldStyle(.roundedBorder) + .onSubmit { self.addManualReplacementIfValid() } + Text("This is what appears in your transcription.") + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.secondaryText) + } + } + + private var trainingRecorderPanel: some View { + HStack(alignment: .center, spacing: self.theme.metrics.spacing.md) { + VStack(alignment: .leading, spacing: 6) { + Text(self.trainingRecorderTitle) + .font(self.theme.typography.bodySmallStrong) + + Text(self.trainingRecorderDetail) + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.secondaryText) + .lineLimit(2) + + self.trainingRecorderProgressRow + + HStack(spacing: 7) { + Text(self.trainingRecorderStatusText) + .font(self.theme.typography.captionStrong) + .foregroundStyle(self.trainingRecorderStatusColor) + .lineLimit(1) + + Text("Β· \(self.trainingProgressText) recorded") + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.tertiaryText) + .lineLimit(1) + } + } + + Spacer() + + Button { + Task { + if self.isTrainingRecording { + await self.stopTrainingSample() + } else { + await self.startTrainingSample() + } + } + } label: { + Label(self.isTrainingRecording ? "Stop" : "Record", systemImage: self.isTrainingRecording ? "stop.fill" : "mic.fill") + } + .fluidButton(self.isTrainingRecording ? .destructive : .accent, size: .small) + .disabled(!self.canUseTrainingRecorderButton) + .opacity(self.canUseTrainingRecorderButton ? 1 : 0.45) + } + .padding(self.theme.metrics.spacing.md) + .background(self.trainingRecorderBackground) + } + + private var trainingRecorderBackground: some View { + GeometryReader { proxy in + let fillWidth = proxy.size.width * min(max(self.trainingRecorderFillFraction, 0), 1) + + RoundedRectangle(cornerRadius: self.theme.metrics.corners.md, style: .continuous) + .fill(self.theme.palette.contentBackground.opacity(0.5)) + .overlay(alignment: .leading) { + RoundedRectangle(cornerRadius: self.theme.metrics.corners.md, style: .continuous) + .fill(self.trainingRecorderFillColor.opacity(0.16)) + .frame(width: fillWidth) + } + .overlay( + RoundedRectangle(cornerRadius: self.theme.metrics.corners.md, style: .continuous) + .stroke(self.trainingRecorderBorderColor, lineWidth: 1) + ) + .animation(.easeOut(duration: 0.18), value: self.trainingRecorderFillFraction) + } + .allowsHitTesting(false) + } + + private var trainingRecorderBorderColor: Color { + self.trainingFinalOutputIsReady || self.trainingAlreadyCorrectWithoutReplacement + ? self.theme.palette.success.opacity(0.28) + : self.theme.palette.cardBorder.opacity(0.25) + } + + private var trainingRecorderProgressBar: some View { + GeometryReader { proxy in + let width = proxy.size.width * min(max(self.trainingRecorderFillFraction, 0), 1) + + ZStack(alignment: .leading) { + Capsule(style: .continuous) + .fill(self.theme.palette.cardBorder.opacity(0.35)) + + Capsule(style: .continuous) + .fill(self.trainingRecorderFillColor) + .frame(width: width) + } + } + .frame(height: 5) + .animation(.easeOut(duration: 0.18), value: self.trainingRecorderFillFraction) + .accessibilityHidden(true) + } + + private var trainingRecorderProgressRow: some View { + HStack(spacing: self.theme.metrics.spacing.sm) { + self.trainingRecorderProgressBar + + Text("\(self.trainingReadinessProgress)/\(CustomDictionaryTrainingMerge.readyCoveredCount)") + .font(self.theme.typography.captionStrong) + .foregroundStyle(self.trainingRecorderStatusColor) + .monospacedDigit() + .frame(width: 34, alignment: .trailing) + } + } + + private var trainingHeardSection: some View { + HStack(spacing: self.theme.metrics.spacing.sm) { + Text("Captured") + .font(self.theme.typography.captionStrong) + .foregroundStyle(self.theme.palette.secondaryText) + + HStack(spacing: 6) { + ForEach(Array(self.trainingVariants.prefix(5).enumerated()), id: \.element) { index, variant in + TrainingVariantChip(number: index + 1, variant: variant) { + self.removeTrainingVariant(variant) + } + } + + if self.trainingVariants.count > 5 { + Text("+\(self.trainingVariants.count - 5)") + .font(self.theme.typography.captionStrong) + .foregroundStyle(self.theme.palette.tertiaryText) + .padding(.horizontal, 7) + .padding(.vertical, 4) + .background( + RoundedRectangle(cornerRadius: 5, style: .continuous) + .fill(self.theme.palette.cardBackground.opacity(0.65)) + ) + } + } + + Spacer(minLength: 0) + } + .padding(.horizontal, self.theme.metrics.spacing.md) + .padding(.vertical, self.theme.metrics.spacing.sm) + .background( + RoundedRectangle(cornerRadius: self.theme.metrics.corners.md, style: .continuous) + .fill(self.theme.palette.contentBackground.opacity(0.5)) + .overlay( + RoundedRectangle(cornerRadius: self.theme.metrics.corners.md, style: .continuous) + .stroke(self.theme.palette.cardBorder.opacity(0.25), lineWidth: 1) + ) + ) + } + + private var trainingFinalOutputPanel: some View { + HStack(alignment: .center, spacing: self.theme.metrics.spacing.md) { + VStack(alignment: .leading, spacing: 5) { + Text("Final output") + .font(self.theme.typography.captionStrong) + .foregroundStyle(self.theme.palette.secondaryText) + + Text(self.trainingFinalOutputText) + .font(self.theme.typography.bodySmallStrong) + .foregroundStyle(self.lastTrainingOutput.isEmpty ? self.theme.palette.tertiaryText : self.theme.palette.primaryText) + .lineLimit(1) + + if !self.lastTrainingOutput.isEmpty, self.lastTrainingOutput.caseInsensitiveCompare(self.trainingFinalOutputText) != .orderedSame { + Text("Heard: \(self.lastTrainingOutput)") + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.tertiaryText) + .lineLimit(1) + } + } + + Spacer() + } + .padding(.horizontal, self.theme.metrics.spacing.md) + .padding(.vertical, self.theme.metrics.spacing.sm) + .background( + RoundedRectangle(cornerRadius: self.theme.metrics.corners.md, style: .continuous) + .fill(self.theme.palette.contentBackground.opacity(0.42)) + .overlay( + RoundedRectangle(cornerRadius: self.theme.metrics.corners.md, style: .continuous) + .stroke( + self.trainingFinalOutputIsReady ? self.theme.palette.success.opacity(0.28) : self.theme.palette.cardBorder.opacity(0.22), + lineWidth: 1 + ) + ) + ) + } + + @ViewBuilder + private var trainingFooter: some View { + if self.shouldShowTrainingStatus || self.isTrainingActive || !self.trainingVariants.isEmpty { + HStack(spacing: self.theme.metrics.spacing.sm) { + if self.trainingHasError { + Label(self.trainingStatusMessage, systemImage: "exclamationmark.triangle.fill") + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.warning) + } else if self.shouldShowTrainingStatus { + Text(self.trainingStatusMessage) + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.secondaryText) + } + + if self.isTrainingActive || !self.trainingVariants.isEmpty || !self.normalizedTrainingReplacement.isEmpty { + Spacer() + + Button("Clear") { + self.resetTraining() + } + .fluidButton(.compact, size: .compact) + .disabled(self.isTrainingRecording || self.isTrainingProcessing) + .opacity(self.isTrainingRecording || self.isTrainingProcessing ? 0.45 : 1) + } else { + Spacer(minLength: 0) + } + } + } + } + + // MARK: - Your Dictionary + + private var yourDictionarySection: some View { + ThemedCard(style: .standard, hoverEffect: false) { + VStack(alignment: .leading, spacing: self.theme.metrics.spacing.lg) { + HStack(alignment: .center, spacing: self.theme.metrics.spacing.md) { + self.settingsIconTile(systemName: "book.closed.fill") VStack(alignment: .leading, spacing: 3) { HStack(spacing: 6) { - Text("Instant Replacement") + Text("Your Dictionary") .font(self.theme.typography.sectionTitle) if !self.entries.isEmpty { Text("(\(self.entries.count))") @@ -147,7 +736,7 @@ struct CustomDictionaryView: View { .foregroundStyle(self.theme.palette.tertiaryText) } } - Text("Replace phrases that are consistently transcribed incorrectly.") + Text("Words and phrases FluidVoice will correct automatically.") .font(self.theme.typography.caption) .foregroundStyle(self.theme.palette.secondaryText) } @@ -155,22 +744,35 @@ struct CustomDictionaryView: View { Spacer() Button { - self.showAddSheet = true + withAnimation(self.reduceMotion ? nil : .easeOut(duration: 0.16)) { + self.isDictionaryExpanded.toggle() + } } label: { - Label("Add Replacement", systemImage: "plus") + Image(systemName: self.isDictionaryExpanded ? "chevron.up" : "chevron.down") + .font(.system(size: 12, weight: .semibold)) + .foregroundStyle(self.theme.palette.secondaryText) + .frame(width: 28, height: 28) + .background( + RoundedRectangle(cornerRadius: self.theme.metrics.corners.sm, style: .continuous) + .fill(self.theme.palette.contentBackground.opacity(0.45)) + ) } - .fluidButton(.accent, size: .small) + .buttonStyle(.plain) + .help(self.isDictionaryExpanded ? "Collapse dictionary" : "Expand dictionary") + .accessibilityLabel(self.isDictionaryExpanded ? "Collapse dictionary" : "Expand dictionary") } - if self.entries.isEmpty { - self.dictionaryEmptyState( - title: "No replacements yet", - detail: "Add a phrase and the text it should become." - ) { - self.showAddSheet = true + if self.isDictionaryExpanded { + if self.entries.isEmpty { + self.dictionaryEmptyState( + title: "No replacements yet", + detail: "Use Train Replacement or Manual Add above to create your first one." + ) + .frame(maxWidth: 760) + .frame(maxWidth: .infinity, alignment: .center) + } else { + self.entriesListView } - } else { - self.entriesListView } } } @@ -187,6 +789,8 @@ struct CustomDictionaryView: View { ) } } + .frame(maxWidth: 760) + .frame(maxWidth: .infinity, alignment: .center) } // MARK: - Custom Words @@ -290,6 +894,10 @@ struct CustomDictionaryView: View { .font(self.theme.typography.caption) .foregroundStyle(self.theme.palette.secondaryText) + Text("It can add close to a second to transcription time.") + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.secondaryText) + Text("If recognition gets worse, the model behaves unexpectedly, or you notice other issues after enabling it, turn Boosting off.") .font(self.theme.typography.caption) .foregroundStyle(self.theme.palette.secondaryText) @@ -301,7 +909,7 @@ struct CustomDictionaryView: View { private func dictionaryEmptyState( title: String, detail: String, - action: @escaping () -> Void + action: (() -> Void)? = nil ) -> some View { HStack(spacing: self.theme.metrics.spacing.sm) { Image(systemName: "plus.circle") @@ -316,10 +924,12 @@ struct CustomDictionaryView: View { .foregroundStyle(self.theme.palette.secondaryText) } - Spacer() + if let action { + Spacer() - Button("Add", action: action) - .fluidButton(.compact, size: .compact) + Button("Add", action: action) + .fluidButton(.compact, size: .compact) + } } .padding(self.theme.metrics.spacing.md) .background( @@ -341,6 +951,281 @@ struct CustomDictionaryView: View { NotificationCenter.default.post(name: .parakeetVocabularyDidChange, object: nil) } + private func addReplacementEntry(_ entry: SettingsStore.CustomDictionaryEntry) { + self.entries.insert(entry, at: 0) + self.saveEntries() + self.showReplacementConfirmation( + title: "Replacement added", + detail: "It is at the top of the list." + ) + } + + private func selectComposerMode(_ mode: DictionaryComposerMode) { + guard !self.isTrainingRecording, !self.isTrainingProcessing else { return } + self.composerMode = mode + } + + private func addManualReplacementIfValid() { + guard self.canAddManualReplacement else { return } + let entry = SettingsStore.CustomDictionaryEntry( + triggers: self.manualTriggers, + replacement: self.manualReplacement.trimmingCharacters(in: .whitespacesAndNewlines) + ) + self.addReplacementEntry(entry) + self.manualTriggersText = "" + self.manualReplacement = "" + } + + private func beginTrainingReplacement() { + guard self.canStartTraining else { return } + self.isTrainingActive = true + self.trainingHasError = false + self.trainingStatusMessage = "" + } + + private func startTrainingSample() async { + guard self.canRecordTrainingSample else { return } + self.isTrainingActive = true + self.trainingHasError = false + self.trainingStatusMessage = "" + self.trainingStopRequestedDuringStart = false + self.isTrainingStarting = true + self.isTrainingRecording = true + + await self.asr.start(forDictionaryTraining: true) + self.isTrainingStarting = false + if !self.asr.isRunning { + self.isTrainingRecording = false + self.trainingStopRequestedDuringStart = false + self.trainingHasError = true + self.trainingStatusMessage = "Couldn't start recording. Check microphone access and try again." + return + } + + if self.trainingStopRequestedDuringStart { + await self.finishTrainingSampleStop() + } + } + + private func stopTrainingSample() async { + guard self.isTrainingRecording else { return } + guard !self.trainingStopRequestedDuringStart else { return } + + guard !self.isTrainingStarting, self.asr.isRunning else { + self.trainingStopRequestedDuringStart = true + self.trainingHasError = false + self.trainingStatusMessage = "Stopping..." + return + } + + await self.finishTrainingSampleStop() + } + + private func finishTrainingSampleStop() async { + guard self.isTrainingRecording else { return } + self.isTrainingRecording = false + self.isTrainingStarting = false + self.trainingStopRequestedDuringStart = false + self.isTrainingProcessing = true + self.trainingHasError = false + self.trainingStatusMessage = "" + + let transcript = await self.asr.stop(forDictionaryTraining: true) + self.isTrainingProcessing = false + self.addTrainingVariant(from: transcript) + } + + private func addTrainingVariant(from transcript: String) { + guard let detected = CustomDictionaryTrainingMerge.normalizedTrigger(transcript) else { + self.lastTrainingOutput = "" + self.lastTrainingOutputIsCovered = false + self.consecutiveCoveredCaptures = 0 + self.trainingHasError = true + self.trainingStatusMessage = "Nothing heard. Try again." + return + } + + self.lastTrainingOutput = detected + self.trainingSampleCount = min(self.trainingSampleCount + 1, CustomDictionaryTrainingMerge.maxSamples) + + if detected.caseInsensitiveCompare(self.normalizedTrainingReplacement) == .orderedSame { + self.lastTrainingOutputIsCovered = true + self.consecutiveCoveredCaptures += 1 + self.trainingHasError = false + if self.consecutiveCoveredCaptures >= CustomDictionaryTrainingMerge.readyCoveredCount { + self.trainingStatusMessage = self.trainingVariants.isEmpty + ? "Looks good already. No replacement needed." + : "Looks ready. Add this replacement when you're ready." + } else { + self.trainingStatusMessage = "Covered. Try a couple more." + } + return + } + + let wasAlreadyCaptured = self.trainingVariants.contains { $0.caseInsensitiveCompare(detected) == .orderedSame } + let wasAlreadySaved = self.savedDictionaryCovers(detected) + + if wasAlreadyCaptured || wasAlreadySaved { + self.lastTrainingOutputIsCovered = true + self.consecutiveCoveredCaptures += 1 + self.trainingHasError = false + if self.consecutiveCoveredCaptures >= CustomDictionaryTrainingMerge.readyCoveredCount { + self.trainingStatusMessage = "Looks ready. Add this replacement when you're ready." + } else if wasAlreadySaved { + self.trainingStatusMessage = "Covered by your dictionary." + } else { + self.trainingStatusMessage = "Already captured. Try a couple more." + } + return + } + + guard self.trainingVariants.count < CustomDictionaryTrainingMerge.maxSamples else { + self.lastTrainingOutputIsCovered = false + self.consecutiveCoveredCaptures = 0 + self.trainingHasError = false + self.trainingStatusMessage = "Max samples reached. Add it or clear one." + return + } + + self.trainingVariants.append(detected) + self.lastTrainingOutputIsCovered = false + self.consecutiveCoveredCaptures = 0 + self.trainingHasError = false + if self.trainingSampleCount >= CustomDictionaryTrainingMerge.maxSamples || self.trainingVariants.count >= CustomDictionaryTrainingMerge.maxSamples { + self.trainingStatusMessage = "Max samples reached. Add it or clear one." + } else { + self.trainingStatusMessage = "New pronunciation captured. Add replacement to cover it." + } + } + + private func addTrainedReplacement() { + guard self.canAddTrainedReplacement else { return } + let replacementText = self.normalizedTrainingReplacement + let updatesExisting = self.entries.contains { + $0.replacement.caseInsensitiveCompare(replacementText) == .orderedSame + } + self.entries = CustomDictionaryTrainingMerge.mergedEntries( + current: self.entries, + replacement: replacementText, + triggers: self.trainingVariants + ) + self.saveEntries() + self.resetTraining() + self.showReplacementConfirmation( + title: updatesExisting ? "Replacement updated" : "Recorded", + detail: updatesExisting ? "Your variants are ready." : "Replacement added at the top." + ) + } + + private func removeTrainingVariant(_ variant: String) { + self.trainingVariants.removeAll { $0 == variant } + self.refreshLastTrainingCoverage() + } + + private func refreshLastTrainingCoverage() { + guard !self.lastTrainingOutput.isEmpty else { + self.lastTrainingOutputIsCovered = false + self.consecutiveCoveredCaptures = 0 + return + } + + let matchesReplacement = self.lastTrainingOutput.caseInsensitiveCompare(self.normalizedTrainingReplacement) == .orderedSame + let isStillCaptured = self.trainingVariants.contains { + $0.caseInsensitiveCompare(self.lastTrainingOutput) == .orderedSame + } + + if matchesReplacement || isStillCaptured || self.savedDictionaryCovers(self.lastTrainingOutput) { + self.lastTrainingOutputIsCovered = true + } else { + self.lastTrainingOutputIsCovered = false + self.consecutiveCoveredCaptures = 0 + } + } + + private func resetTraining(statusMessage: String = "Type the correct text.") { + self.trainingReplacement = "" + self.trainingVariants = [] + self.trainingSampleCount = 0 + self.lastTrainingOutput = "" + self.lastTrainingOutputIsCovered = false + self.consecutiveCoveredCaptures = 0 + self.trainingStatusMessage = statusMessage + self.trainingHasError = false + self.isTrainingActive = false + self.isTrainingStarting = false + self.isTrainingRecording = false + self.trainingStopRequestedDuringStart = false + self.isTrainingProcessing = false + } + + private func handleTrainingReplacementChange(oldValue: String, newValue: String) { + let oldKey = CustomDictionaryTrainingMerge.normalizedReplacement(oldValue).lowercased() + let newKey = CustomDictionaryTrainingMerge.normalizedReplacement(newValue).lowercased() + guard oldKey != newKey else { return } + + self.trainingVariants = self.existingTrainingVariants(for: newValue) + self.trainingSampleCount = 0 + self.lastTrainingOutput = "" + self.lastTrainingOutputIsCovered = false + self.consecutiveCoveredCaptures = 0 + self.isTrainingActive = false + if newKey.isEmpty { + self.trainingStatusMessage = "Type the correct text." + } else if self.trainingVariants.isEmpty { + self.trainingStatusMessage = "" + } else { + self.trainingStatusMessage = "Loaded \(self.trainingVariants.count) saved \(self.trainingVariants.count == 1 ? "capture" : "captures")." + } + self.trainingHasError = false + } + + private func existingTrainingVariants(for replacement: String) -> [String] { + let replacementText = CustomDictionaryTrainingMerge.normalizedReplacement(replacement) + guard !replacementText.isEmpty else { return [] } + + let triggers = self.entries + .filter { $0.replacement.caseInsensitiveCompare(replacementText) == .orderedSame } + .flatMap(\.triggers) + + return CustomDictionaryTrainingMerge.normalizedTriggers( + from: triggers, + intendedReplacement: replacementText + ) + } + + private func savedDictionaryCovers(_ trigger: String) -> Bool { + guard let triggerKey = CustomDictionaryTrainingMerge.normalizedTrigger(trigger), + !self.normalizedTrainingReplacement.isEmpty + else { + return false + } + + return self.entries.contains { entry in + entry.replacement.caseInsensitiveCompare(self.normalizedTrainingReplacement) == .orderedSame && + entry.triggers.contains { savedTrigger in + guard let savedKey = CustomDictionaryTrainingMerge.normalizedTrigger(savedTrigger) else { return false } + return savedKey == triggerKey + } + } + } + + private func showReplacementConfirmation(title: String, detail: String) { + let confirmation = ReplacementConfirmation(title: title, detail: detail) + NSHapticFeedbackManager.defaultPerformer.perform(.levelChange, performanceTime: .now) + + withAnimation(self.reduceMotion ? nil : .spring(response: 0.26, dampingFraction: 0.78)) { + self.replacementConfirmation = confirmation + } + + Task { @MainActor in + try? await Task.sleep(nanoseconds: 1_650_000_000) + guard self.replacementConfirmation?.id == confirmation.id else { return } + withAnimation(self.reduceMotion ? nil : .easeOut(duration: 0.16)) { + self.replacementConfirmation = nil + } + } + } + private func loadBoostTerms() { do { self.boostTerms = try ParakeetVocabularyStore.shared.loadUserBoostTerms() @@ -489,6 +1374,339 @@ private struct EditableBoostTerm: Identifiable { let term: ParakeetVocabularyStore.VocabularyConfig.Term } +private enum DictionaryComposerMode: CaseIterable, Identifiable { + case train + case manual + + var id: Self { self } + + var title: String { + switch self { + case .train: + return "Train by Voice" + case .manual: + return "Add Manually" + } + } + + var systemImage: String { + switch self { + case .train: + return "mic.fill" + case .manual: + return "keyboard" + } + } + + var detail: String { + switch self { + case .train: + return "Say it a few times so FluidVoice can catch the versions it hears." + case .manual: + return "Type the misheard text and the spelling you want." + } + } +} + +private struct DictionaryComposerModeTab: View { + let mode: DictionaryComposerMode + let isSelected: Bool + let isDisabled: Bool + let action: () -> Void + + @Environment(\.theme) private var theme + @Environment(\.accessibilityReduceMotion) private var reduceMotion + @State private var isHovered = false + + var body: some View { + Button(action: self.action) { + HStack(spacing: self.theme.metrics.spacing.sm) { + Image(systemName: self.mode.systemImage) + .font(.system(size: 12, weight: .semibold)) + Text(self.mode.title) + .font(self.theme.typography.bodySmallStrong) + } + .foregroundStyle(self.foreground) + .frame(maxWidth: .infinity) + .frame(minHeight: 30) + .padding(.horizontal, self.theme.metrics.spacing.md) + .background(self.background) + .contentShape(RoundedRectangle(cornerRadius: self.theme.metrics.corners.sm, style: .continuous)) + } + .buttonStyle(.plain) + .disabled(self.isDisabled) + .opacity(self.isDisabled ? 0.55 : 1) + .onHover { hovering in + guard !self.reduceMotion else { + self.isHovered = hovering + return + } + withAnimation(.easeOut(duration: 0.14)) { + self.isHovered = hovering + } + } + .accessibilityAddTraits(self.isSelected ? .isSelected : []) + } + + private var foreground: Color { + self.isSelected ? Color.white : self.theme.palette.primaryText + } + + private var background: some View { + RoundedRectangle(cornerRadius: self.theme.metrics.corners.sm, style: .continuous) + .fill( + self.isSelected + ? self.theme.palette.accent + : (self.isHovered ? self.theme.palette.cardBackground.opacity(0.6) : Color.clear) + ) + } +} + +private enum CustomDictionaryManualEntry { + static func parseTriggers(_ text: String) -> [String] { + text + .split(separator: ",") + .map { $0.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() } + .filter { !$0.isEmpty } + } +} + +enum CustomDictionaryTrainingMerge { + static let recommendedSamples = 5 + static let maxSamples = 20 + static let readyCoveredCount = 3 + + private static let edgePunctuation = CharacterSet(charactersIn: ".,!?;:\"'β€œβ€β€˜β€™") + + static func normalizedReplacement(_ value: String) -> String { + value.trimmingCharacters(in: .whitespacesAndNewlines) + } + + static func normalizedTrigger(_ value: String) -> String? { + let edgeCharacters = CharacterSet.whitespacesAndNewlines.union(self.edgePunctuation) + let trimmed = value.trimmingCharacters(in: edgeCharacters).lowercased() + return trimmed.isEmpty ? nil : trimmed + } + + static func normalizedTriggers(from values: [String], intendedReplacement: String) -> [String] { + let replacement = self.normalizedReplacement(intendedReplacement) + var seen: Set = [] + var result: [String] = [] + result.reserveCapacity(values.count) + + for value in values { + guard let trigger = self.normalizedTrigger(value), + trigger.caseInsensitiveCompare(replacement) != .orderedSame, + !seen.contains(trigger) + else { + continue + } + seen.insert(trigger) + result.append(trigger) + if result.count >= self.maxSamples { + break + } + } + + return result + } + + static func mergedEntries( + current entries: [SettingsStore.CustomDictionaryEntry], + replacement: String, + triggers: [String] + ) -> [SettingsStore.CustomDictionaryEntry] { + let replacementText = self.normalizedReplacement(replacement) + let incomingTriggers = self.normalizedTriggers(from: triggers, intendedReplacement: replacementText) + guard !replacementText.isEmpty, !incomingTriggers.isEmpty else { return entries } + + let matchingIndex = entries.firstIndex { + $0.replacement.caseInsensitiveCompare(replacementText) == .orderedSame + } + let replacementID = matchingIndex.map { entries[$0].id } + let storedReplacementText = matchingIndex.map { entries[$0].replacement } ?? replacementText + let matchingEntries = entries.filter { + $0.replacement.caseInsensitiveCompare(storedReplacementText) == .orderedSame + } + let existingTriggers = matchingEntries.flatMap(\.triggers) + let combinedTriggers = self.normalizedTriggers( + from: existingTriggers + incomingTriggers, + intendedReplacement: storedReplacementText + ) + let triggerKeys = Set(combinedTriggers) + + let mergedEntry = replacementID.map { + SettingsStore.CustomDictionaryEntry( + id: $0, + triggers: combinedTriggers, + replacement: storedReplacementText + ) + } ?? SettingsStore.CustomDictionaryEntry( + triggers: combinedTriggers, + replacement: storedReplacementText + ) + + var didInsertMergedEntry = false + var updatedEntries: [SettingsStore.CustomDictionaryEntry] = [] + updatedEntries.reserveCapacity(entries.count + (matchingIndex == nil ? 1 : 0)) + + for entry in entries { + if entry.replacement.caseInsensitiveCompare(storedReplacementText) == .orderedSame { + if !didInsertMergedEntry { + updatedEntries.append(mergedEntry) + didInsertMergedEntry = true + } + continue + } + + let remainingTriggers = entry.triggers.filter { trigger in + guard let key = self.normalizedTrigger(trigger) else { return false } + return !triggerKeys.contains(key) + } + guard !remainingTriggers.isEmpty else { continue } + updatedEntries.append( + SettingsStore.CustomDictionaryEntry( + id: entry.id, + triggers: remainingTriggers, + replacement: entry.replacement + ) + ) + } + + if !didInsertMergedEntry { + updatedEntries.insert(mergedEntry, at: 0) + } + + return updatedEntries + } +} + +private struct ReplacementConfirmation: Identifiable, Equatable { + let id = UUID() + let title: String + let detail: String +} + +private struct ReplacementConfirmationToast: View { + let confirmation: ReplacementConfirmation + + @Environment(\.theme) private var theme + + var body: some View { + VStack(spacing: self.theme.metrics.spacing.sm) { + ZStack { + Circle() + .fill(self.theme.palette.accent.opacity(0.14)) + .frame(width: 58, height: 58) + + Circle() + .stroke(self.theme.palette.accent.opacity(0.24), lineWidth: 1) + .frame(width: 58, height: 58) + + Image(systemName: "checkmark") + .font(.system(size: 25, weight: .bold)) + .foregroundStyle(self.theme.palette.accent) + } + + VStack(spacing: 3) { + Text(self.confirmation.title) + .font(self.theme.typography.sectionTitle) + .foregroundStyle(self.theme.palette.primaryText) + Text(self.confirmation.detail) + .font(self.theme.typography.caption) + .foregroundStyle(self.theme.palette.secondaryText) + .multilineTextAlignment(.center) + } + } + .frame(minWidth: 220) + .padding(.horizontal, self.theme.metrics.spacing.xl) + .padding(.vertical, self.theme.metrics.spacing.lg) + .background( + RoundedRectangle(cornerRadius: self.theme.metrics.corners.lg, style: .continuous) + .fill(self.theme.palette.cardBackground.opacity(0.96)) + .overlay( + RoundedRectangle(cornerRadius: self.theme.metrics.corners.lg, style: .continuous) + .stroke(self.theme.palette.accent.opacity(0.3), lineWidth: 1) + ) + .shadow( + color: self.theme.palette.accent.opacity(0.24), + radius: 24, + x: 0, + y: 10 + ) + .shadow( + color: Color.black.opacity(0.16), + radius: 18, + x: 0, + y: 8 + ) + ) + .accessibilityElement(children: .combine) + } +} + +private struct TrainingVariantChip: View { + let number: Int + let variant: String + let onDelete: () -> Void + + @Environment(\.theme) private var theme + + var body: some View { + HStack(spacing: 4) { + Text("\(self.number)") + .font(self.theme.typography.captionSmall) + .foregroundStyle(self.theme.palette.accent) + .frame(minWidth: 11) + + Text(self.variant) + .font(self.theme.typography.caption) + .lineLimit(1) + .truncationMode(.tail) + + Button(action: self.onDelete) { + Image(systemName: "xmark.circle.fill") + .font(.system(size: 11, weight: .semibold)) + .foregroundStyle(self.theme.palette.tertiaryText) + } + .buttonStyle(.plain) + .help("Remove \(self.variant)") + } + .frame(maxWidth: 165) + .padding(.horizontal, 7) + .padding(.vertical, 4) + .background( + RoundedRectangle(cornerRadius: 5, style: .continuous) + .fill(self.theme.palette.cardBackground.opacity(0.85)) + .overlay( + RoundedRectangle(cornerRadius: 5, style: .continuous) + .stroke(self.theme.palette.cardBorder.opacity(0.35), lineWidth: 1) + ) + ) + } +} + +private struct DictionaryPreviewChip: View { + let text: String + + @Environment(\.theme) private var theme + + var body: some View { + Text(self.text) + .font(self.theme.typography.caption) + .padding(.horizontal, 7) + .padding(.vertical, 4) + .background( + RoundedRectangle(cornerRadius: 5, style: .continuous) + .fill(self.theme.palette.cardBackground.opacity(0.85)) + .overlay( + RoundedRectangle(cornerRadius: 5, style: .continuous) + .stroke(self.theme.palette.cardBorder.opacity(0.35), lineWidth: 1) + ) + ) + } +} + private enum BoostStrengthPreset: String, CaseIterable, Identifiable { case mild = "Mild" case balanced = "Balanced" @@ -954,7 +2172,7 @@ struct AddDictionaryEntrySheet: View { // Save button HStack { Spacer() - Button("Add Entry") { self.saveIfValid() } + Button("Add Replacement") { self.saveIfValid() } .buttonStyle(.borderedProminent) .tint(self.theme.palette.accent) .disabled(!self.canSave) diff --git a/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift b/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift index d659e03b..90b49afe 100644 --- a/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift +++ b/Tests/FluidDictationIntegrationTests/DictationE2ETests.swift @@ -254,6 +254,56 @@ final class DictationE2ETests: XCTestCase { } } + func testDictionaryTrainingNormalizesSamplesAndIgnoresIntendedText() { + let triggers = CustomDictionaryTrainingMerge.normalizedTriggers( + from: [" Fluid Voice. ", "FluidVoice", "fluid voice", " "], + intendedReplacement: "FluidVoice" + ) + + XCTAssertEqual(triggers, ["fluid voice"]) + } + + func testDictionaryTrainingMergeDedupesAndMovesDuplicateTriggers() { + let oldReplacement = SettingsStore.CustomDictionaryEntry( + triggers: ["Fluid Voice.", "old trigger"], + replacement: "Old" + ) + let existingReplacement = SettingsStore.CustomDictionaryEntry( + triggers: ["fluid boys"], + replacement: "FluidVoice" + ) + + let entries = CustomDictionaryTrainingMerge.mergedEntries( + current: [existingReplacement, oldReplacement], + replacement: " fluidvoice ", + triggers: ["Fluid Voice.", "fluid boys", "FluidVoice", ""] + ) + + let fluidVoiceEntry = entries.first { $0.replacement == "FluidVoice" } + let oldEntry = entries.first { $0.replacement == "Old" } + + XCTAssertEqual(entries.count, 2) + XCTAssertEqual(entries.map(\.replacement), ["FluidVoice", "Old"]) + XCTAssertEqual(Set(fluidVoiceEntry?.triggers ?? []), Set(["fluid voice", "fluid boys"])) + XCTAssertEqual(oldEntry?.triggers, ["old trigger"]) + } + + func testDictionaryTrainingNewReplacementPrependsEntry() { + let existingReplacement = SettingsStore.CustomDictionaryEntry( + triggers: ["existing trigger"], + replacement: "Existing" + ) + + let entries = CustomDictionaryTrainingMerge.mergedEntries( + current: [existingReplacement], + replacement: "FluidVoice", + triggers: ["fluid voice"] + ) + + XCTAssertEqual(entries.map(\.replacement), ["FluidVoice", "Existing"]) + XCTAssertEqual(entries.first?.triggers, ["fluid voice"]) + } + func testDictionaryTransferImport_rejectsInvalidReplacementTriggerType() { let json = """ {