diff --git a/Fluid.xcodeproj/project.pbxproj b/Fluid.xcodeproj/project.pbxproj index d72a6a01..0dba9453 100644 --- a/Fluid.xcodeproj/project.pbxproj +++ b/Fluid.xcodeproj/project.pbxproj @@ -11,6 +11,7 @@ 7C1C72F22EECBD1300E3BF4D /* SwiftWhisper in Frameworks */ = {isa = PBXBuildFile; productRef = 7C1C72F12EECBD1300E3BF4D /* SwiftWhisper */; }; 7C3697892ED70F9C005874CE /* DynamicNotchKit in Frameworks */ = {isa = PBXBuildFile; productRef = 7C3697882ED70F9C005874CE /* DynamicNotchKit */; }; 7C5AF14B2F15041600DE21B0 /* MediaRemoteAdapter in Frameworks */ = {isa = PBXBuildFile; productRef = 7C5AF14A2F15041600DE21B0 /* MediaRemoteAdapter */; }; + 6D19DEDFE36041A385D02553 /* VoiceCommandProcessorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97032BFB139242FDA10E67BF /* VoiceCommandProcessorTests.swift */; }; 7C91B0012F42AA0100C0DEF0 /* HotkeyShortcutTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */; }; 7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */; }; 7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */; }; @@ -32,6 +33,7 @@ /* Begin PBXFileReference section */ 7C078D8F2E3B339200FB7CAC /* FluidVoice Debug.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "FluidVoice Debug.app"; sourceTree = BUILT_PRODUCTS_DIR; }; 7CDB0A202F3C4D5600FB7CAD /* FluidDictationIntegrationTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = FluidDictationIntegrationTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + 97032BFB139242FDA10E67BF /* VoiceCommandProcessorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VoiceCommandProcessorTests.swift; sourceTree = ""; }; 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HotkeyShortcutTests.swift; sourceTree = ""; }; 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DictationE2ETests.swift; sourceTree = ""; }; 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioFixtureLoader.swift; sourceTree = ""; }; @@ -104,6 +106,7 @@ 7CDB0A272F3C4D5600FB7CAD /* Resources */, 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */, 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */, + 97032BFB139242FDA10E67BF /* VoiceCommandProcessorTests.swift */, ); path = FluidDictationIntegrationTests; sourceTree = ""; @@ -258,6 +261,7 @@ 7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */, 7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */, 7C91B0012F42AA0100C0DEF0 /* HotkeyShortcutTests.swift in Sources */, + 6D19DEDFE36041A385D02553 /* VoiceCommandProcessorTests.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/Sources/Fluid/Persistence/SettingsStore.swift b/Sources/Fluid/Persistence/SettingsStore.swift index 936f530e..27168b52 100644 --- a/Sources/Fluid/Persistence/SettingsStore.swift +++ b/Sources/Fluid/Persistence/SettingsStore.swift @@ -2408,6 +2408,21 @@ final class SettingsStore: ObservableObject { } } + // MARK: - Voice Command Settings + + var voiceCommandsEnabled: Bool { + get { self.defaults.bool(forKey: Keys.voiceCommandsEnabled) } + set { objectWillChange.send(); self.defaults.set(newValue, forKey: Keys.voiceCommandsEnabled) } + } + + var voiceCommandScratchWordCount: Int { + get { + let raw = self.defaults.integer(forKey: Keys.voiceCommandScratchWordCount) + return raw > 0 ? raw : 1 + } + set { objectWillChange.send(); self.defaults.set(max(1, newValue), forKey: Keys.voiceCommandScratchWordCount) } + } + // MARK: - Rewrite Mode Settings var rewriteModeHotkeyShortcut: HotkeyShortcut { @@ -4316,6 +4331,10 @@ private extension SettingsStore { static let commandModeLinkedToGlobal = "CommandModeLinkedToGlobal" static let commandModeShortcutEnabled = "CommandModeShortcutEnabled" + // Voice Command Keys + static let voiceCommandsEnabled = "VoiceCommandsEnabled" + static let voiceCommandScratchWordCount = "VoiceCommandScratchWordCount" + // Prompt Mode Keys (Transcribe with Prompt) static let promptModeHotkeyShortcut = "PromptModeHotkeyShortcut" static let promptModeShortcutEnabled = "PromptModeShortcutEnabled" @@ -4709,3 +4728,5 @@ extension SettingsStore { return newModel } } + +extension SettingsStore: VoiceCommandSettings {} diff --git a/Sources/Fluid/Services/DictationPostProcessingService.swift b/Sources/Fluid/Services/DictationPostProcessingService.swift index fd47ca0a..24be4342 100644 --- a/Sources/Fluid/Services/DictationPostProcessingService.swift +++ b/Sources/Fluid/Services/DictationPostProcessingService.swift @@ -22,7 +22,16 @@ final class DictationPostProcessingService { func process(_ inputText: String, dictationSlot: SettingsStore.DictationShortcutSlot = .primary) async throws -> Result { let trimmed = inputText.trimmingCharacters(in: .whitespacesAndNewlines) - guard !trimmed.isEmpty else { + + let (voiceStripped, voicePendingAction) = VoiceCommandProcessor.detect(in: trimmed, settings: SettingsStore.shared) + if let commandAction = voicePendingAction, voiceStripped.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + // Command-only utterance: apply the edit and bypass the LLM entirely. + let edited = VoiceCommandProcessor.apply(commandAction, to: "", settings: SettingsStore.shared) + return Result(text: edited, providerID: SettingsStore.shared.selectedProviderID, model: "") + } + let effectiveTrimmed = voiceStripped.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? trimmed : voiceStripped + + guard !effectiveTrimmed.isEmpty else { return Result(text: "", providerID: SettingsStore.shared.selectedProviderID, model: "") } @@ -49,7 +58,7 @@ final class DictationPostProcessingService { isPrivateAIProvider || PrivateAIIntegrationService.shouldHandleDictation(model: resolved.model) { let response = try await PrivateAIIntegrationService.shared.enhanceDictation( - trimmed, + effectiveTrimmed, runtime: PrivateAIIntegrationService.RuntimeConfiguration( selectedProviderID: resolved.providerID, providerKey: resolved.providerKey, @@ -67,7 +76,7 @@ final class DictationPostProcessingService { ) ) return Result( - text: ASRService.applyGAAVFormatting(response.outputText), + text: self.applyPendingVoiceEdit(ASRService.applyGAAVFormatting(response.outputText), action: voicePendingAction), providerID: resolved.providerID, model: resolved.model ) @@ -77,7 +86,7 @@ final class DictationPostProcessingService { let systemPrompt = "" let userMessageContent = SettingsStore.renderDictationUserMessage( promptText: promptText, - transcript: trimmed + transcript: effectiveTrimmed ) if resolved.providerID == "apple-intelligence" { @@ -86,10 +95,10 @@ final class DictationPostProcessingService { let provider = AppleIntelligenceProvider() let output = try await provider.process(systemPrompt: systemPrompt, userText: userMessageContent) guard !output.isEmpty else { throw AIProcessingError.emptyResponse } - return Result(text: ASRService.applyGAAVFormatting(output), providerID: resolved.providerID, model: resolved.model) + return Result(text: self.applyPendingVoiceEdit(ASRService.applyGAAVFormatting(output), action: voicePendingAction), providerID: resolved.providerID, model: resolved.model) } #endif - return Result(text: trimmed, providerID: resolved.providerID, model: resolved.model) + return Result(text: self.applyPendingVoiceEdit(effectiveTrimmed, action: voicePendingAction), providerID: resolved.providerID, model: resolved.model) } guard !resolved.model.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { @@ -131,12 +140,17 @@ final class DictationPostProcessingService { throw AIProcessingError.emptyResponse } return Result( - text: ASRService.applyGAAVFormatting(response.content), + text: self.applyPendingVoiceEdit(ASRService.applyGAAVFormatting(response.content), action: voicePendingAction), providerID: resolved.providerID, model: resolved.model ) } + private func applyPendingVoiceEdit(_ text: String, action: EditAction?) -> String { + guard let action else { return text } + return VoiceCommandProcessor.apply(action, to: text, settings: SettingsStore.shared) + } + private func resolveProvider(settings: SettingsStore, dictationSlot: SettingsStore.DictationShortcutSlot) -> ResolvedProvider { if settings.dictationPromptSelection(for: dictationSlot) == .privateAI, let modelID = PrivateAIProviderPromptFormat.verifiedModelID(settings: settings) diff --git a/Sources/Fluid/Services/VoiceCommandProcessor.swift b/Sources/Fluid/Services/VoiceCommandProcessor.swift new file mode 100644 index 00000000..f7da8c30 --- /dev/null +++ b/Sources/Fluid/Services/VoiceCommandProcessor.swift @@ -0,0 +1,122 @@ +import Foundation + +protocol VoiceCommandSettings { + var voiceCommandsEnabled: Bool { get } + var voiceCommandScratchWordCount: Int { get } +} + +enum EditAction { + case deleteLastWords(Int) + case capitalizeLastWord + case appendAfterLastWord(String) + case insertNewline +} + +struct VoiceCommand { + let phrases: [String] + let action: EditAction +} + +enum VoiceCommandProcessor { + static let commands: [VoiceCommand] = [ + .init(phrases: ["scratch that", "delete that"], action: .deleteLastWords(1)), + .init(phrases: ["capitalize that"], action: .capitalizeLastWord), + .init(phrases: ["slash that"], action: .appendAfterLastWord("/")), + .init(phrases: ["new line", "new paragraph"], action: .insertNewline), + ] + + static func detect(in input: String, settings: VoiceCommandSettings) -> (stripped: String, action: EditAction?) { + guard settings.voiceCommandsEnabled else { return (input, nil) } + if input.isEmpty { return ("", nil) } + + // TODO(v2): support "literal new line" escape hatch + let normalized = self.normalizeForMatching(input) + .replacingOccurrences(of: ", ", with: " ") + .replacingOccurrences(of: "-", with: " ") + + for command in self.commands { + for phrase in command.phrases { + guard normalized.hasSuffix(phrase) else { continue } + + let phraseStart = normalized.index(normalized.endIndex, offsetBy: -phrase.count) + let atWordBoundary = phraseStart == normalized.startIndex + || normalized[normalized.index(before: phraseStart)] == " " + guard atWordBoundary else { continue } + + let stripped = self.stripPhraseSuffix(phrase, from: input) + return (stripped, command.action) + } + } + + return (input, nil) + } + + static func apply(_ action: EditAction, to text: String, settings: VoiceCommandSettings) -> String { + switch action { + case .deleteLastWords: + let count = settings.voiceCommandScratchWordCount + var tokens = self.tokenize(text) + if tokens.isEmpty { return "" } + if count >= tokens.count { return "" } + tokens.removeLast(count) + return tokens.joined(separator: " ") + + case .capitalizeLastWord: + var tokens = self.tokenize(text) + guard let last = tokens.last else { return "" } + let (stem, punct) = self.stripTrailingPunct(from: last) + guard !stem.isEmpty else { return text } + let capitalized = String(stem.prefix(1)).uppercased() + stem.dropFirst() + tokens[tokens.count - 1] = capitalized + punct + return tokens.joined(separator: " ") + + case let .appendAfterLastWord(suffix): + var tokens = self.tokenize(text) + guard let last = tokens.last else { return "" } + let (stem, punct) = self.stripTrailingPunct(from: last) + tokens[tokens.count - 1] = stem + suffix + punct + return tokens.joined(separator: " ") + + case .insertNewline: + return text + "\n" + } + } + + /// Lowercase, collapse whitespace, strip leading/trailing whitespace. + private static func normalizeForMatching(_ s: String) -> String { + return s.lowercased() + .split(whereSeparator: { $0.isWhitespace }) + .joined(separator: " ") + } + + /// Split on whitespace, filter empty. + private static func tokenize(_ s: String) -> [String] { + return s.split(whereSeparator: { $0.isWhitespace }).map(String.init) + } + + /// Returns (stem, trailingPunct) where trailingPunct is the trailing + /// punctuation characters stripped from the token (e.g. "word." -> ("word", ".")) + /// Only strip common trailing punctuation: . , ! ? ; : + private static func stripTrailingPunct(from token: String) -> (stem: String, punct: String) { + let punctSet: Set = [".", ",", "!", "?", ";", ":"] + var stem = token + var punct = "" + while let last = stem.last, punctSet.contains(last) { + punct = String(last) + punct + stem.removeLast() + } + return (stem, punct) + } + + /// Remove the matched phrase plus any preceding whitespace from the end of the + /// original (un-normalized) input. The phrase may appear in the original with + /// different casing/spacing/punctuation than the normalized form, so we strip + /// a word-count's worth of trailing tokens equal to the phrase's word count. + private static func stripPhraseSuffix(_ phrase: String, from input: String) -> String { + let phraseWordCount = phrase.split(separator: " ").count + var tokens = self.tokenize(input) + guard tokens.count >= phraseWordCount else { return "" } + tokens.removeLast(phraseWordCount) + return tokens.joined(separator: " ") + } +} diff --git a/Tests/FluidDictationIntegrationTests/VoiceCommandProcessorTests.swift b/Tests/FluidDictationIntegrationTests/VoiceCommandProcessorTests.swift new file mode 100644 index 00000000..99e6fbb8 --- /dev/null +++ b/Tests/FluidDictationIntegrationTests/VoiceCommandProcessorTests.swift @@ -0,0 +1,192 @@ +@testable import FluidVoice_Debug +import XCTest + +@MainActor +final class VoiceCommandProcessorTests: XCTestCase { + private func withVoiceCommandSettingsRestored(_ run: () -> Void) { + let defaults = UserDefaults.standard + let keys = ["VoiceCommandsEnabled", "VoiceCommandScratchWordCount"] + let snapshot = Dictionary(uniqueKeysWithValues: keys.compactMap { k -> (String, Any)? in + guard let v = defaults.object(forKey: k) else { return nil } + return (k, v) + }) + defer { + for k in keys { + if let v = snapshot[k] { defaults.set(v, forKey: k) } else { defaults.removeObject(forKey: k) } + } + } + run() + } + + func testDetect_featureDisabled_returnsOriginalAndNilAction() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = false + let result = VoiceCommandProcessor.detect(in: "scratch that", settings: SettingsStore.shared) + XCTAssertEqual(result.stripped, "scratch that") + XCTAssertNil(result.action) + } + } + + func testDetect_commandOnly_scratchThat_returnsEmptyStrippedAndDeleteAction() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.detect(in: "scratch that", settings: SettingsStore.shared) + XCTAssertEqual(result.stripped, "") + if case .deleteLastWords = result.action {} else { + XCTFail("expected .deleteLastWords, got \(String(describing: result.action))") + } + } + } + + func testDetect_commandOnly_deleteThat_isSynonymForScratchThat() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.detect(in: "delete that", settings: SettingsStore.shared) + if case .deleteLastWords = result.action {} else { + XCTFail("expected .deleteLastWords, got \(String(describing: result.action))") + } + } + } + + func testDetect_mixed_trailingCommand_stripsCommandFromTranscript() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.detect(in: "send the report scratch that", settings: SettingsStore.shared) + XCTAssertEqual(result.stripped, "send the report") + if case .deleteLastWords = result.action {} else { + XCTFail("expected .deleteLastWords, got \(String(describing: result.action))") + } + } + } + + func testDetect_midSentence_commandNotFired() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.detect(in: "scratch that memo please", settings: SettingsStore.shared) + XCTAssertNil(result.action) + } + } + + func testDetect_wordBoundary_capitalizePrefix_doesNotFire() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.detect(in: "capitalize that letter", settings: SettingsStore.shared) + XCTAssertNil(result.action) + } + } + + func testDetect_asrVariance_punctuatedCommand_normalizes() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.detect(in: "scratch, that", settings: SettingsStore.shared) + if case .deleteLastWords = result.action {} else { + XCTFail("expected .deleteLastWords, got \(String(describing: result.action))") + } + } + } + + func testApply_deleteLastWords_removesOneWord() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.apply(.deleteLastWords(1), to: "call Monday", settings: SettingsStore.shared) + XCTAssertEqual(result, "call") + } + } + + func testApply_deleteLastWords_oneWordUtterance_returnsEmpty() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.apply(.deleteLastWords(1), to: "Monday", settings: SettingsStore.shared) + XCTAssertEqual(result, "") + } + } + + func testApply_capitalizeLastWord_uppercasesFirstLetter() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.apply(.capitalizeLastWord, to: "call monday", settings: SettingsStore.shared) + XCTAssertEqual(result, "call Monday") + } + } + + func testApply_capitalizeLastWord_alreadyCapitalized_unchanged() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.apply(.capitalizeLastWord, to: "call Monday", settings: SettingsStore.shared) + XCTAssertEqual(result, "call Monday") + } + } + + func testApply_appendAfterLastWord_slash_noSpaceAdded() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.apply(.appendAfterLastWord("/"), to: "src", settings: SettingsStore.shared) + XCTAssertEqual(result, "src/") + } + } + + func testApply_insertNewline_appendsNewline() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.apply(.insertNewline, to: "first item", settings: SettingsStore.shared) + XCTAssertEqual(result, "first item\n") + } + } + + func testApply_punctuationOnToken_stripsAndReattaches() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.apply(.deleteLastWords(1), to: "send the report.", settings: SettingsStore.shared) + XCTAssertEqual(result, "send the") + } + } + + func testApply_scratchWordCount_configurable_deletesTwoWords() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + SettingsStore.shared.voiceCommandScratchWordCount = 2 + let result = VoiceCommandProcessor.apply(.deleteLastWords(1), to: "one two three", settings: SettingsStore.shared) + XCTAssertEqual(result, "one") + } + } + + func testDetect_commandOnly_capitalizeOnly_returnsCorrectAction() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.detect(in: "capitalize that", settings: SettingsStore.shared) + if case .capitalizeLastWord = result.action {} else { + XCTFail("expected .capitalizeLastWord, got \(String(describing: result.action))") + } + } + } + + func testDetect_commandOnly_slashThat_returnsCorrectAction() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.detect(in: "slash that", settings: SettingsStore.shared) + if case .appendAfterLastWord = result.action {} else { + XCTFail("expected .appendAfterLastWord, got \(String(describing: result.action))") + } + } + } + + func testDetect_commandOnly_newLine_returnsInsertNewlineAction() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.detect(in: "new line", settings: SettingsStore.shared) + if case .insertNewline = result.action {} else { + XCTFail("expected .insertNewline, got \(String(describing: result.action))") + } + } + } + + func testDetect_commandOnly_newParagraph_synonymForNewLine() { + self.withVoiceCommandSettingsRestored { + SettingsStore.shared.voiceCommandsEnabled = true + let result = VoiceCommandProcessor.detect(in: "new paragraph", settings: SettingsStore.shared) + if case .insertNewline = result.action {} else { + XCTFail("expected .insertNewline, got \(String(describing: result.action))") + } + } + } +}