-
-
Notifications
You must be signed in to change notification settings - Fork 349
feat: voice editing commands — scratch/capitalize/slash/new line (#406) #476
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,122 @@ | ||
| import Foundation | ||
|
|
||
| protocol VoiceCommandSettings { | ||
| var voiceCommandsEnabled: Bool { get } | ||
| var voiceCommandScratchWordCount: Int { get } | ||
| } | ||
|
|
||
| enum EditAction { | ||
| case deleteLastWords(Int) | ||
| case capitalizeLastWord | ||
| case appendAfterLastWord(String) | ||
| case insertNewline | ||
| } | ||
|
|
||
| struct VoiceCommand { | ||
| let phrases: [String] | ||
| let action: EditAction | ||
| } | ||
|
|
||
| enum VoiceCommandProcessor { | ||
| static let commands: [VoiceCommand] = [ | ||
| .init(phrases: ["scratch that", "delete that"], action: .deleteLastWords(1)), | ||
| .init(phrases: ["capitalize that"], action: .capitalizeLastWord), | ||
| .init(phrases: ["slash that"], action: .appendAfterLastWord("/")), | ||
| .init(phrases: ["new line", "new paragraph"], action: .insertNewline), | ||
| ] | ||
|
|
||
| static func detect(in input: String, settings: VoiceCommandSettings) -> (stripped: String, action: EditAction?) { | ||
| guard settings.voiceCommandsEnabled else { return (input, nil) } | ||
| if input.isEmpty { return ("", nil) } | ||
|
|
||
| // TODO(v2): support "literal new line" escape hatch | ||
| let normalized = self.normalizeForMatching(input) | ||
| .replacingOccurrences(of: ", ", with: " ") | ||
| .replacingOccurrences(of: "-", with: " ") | ||
|
|
||
| for command in self.commands { | ||
| for phrase in command.phrases { | ||
| guard normalized.hasSuffix(phrase) else { continue } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When the ASR includes punctuation at the end of the utterance, such as Useful? React with 👍 / 👎. |
||
|
|
||
| let phraseStart = normalized.index(normalized.endIndex, offsetBy: -phrase.count) | ||
| let atWordBoundary = phraseStart == normalized.startIndex | ||
| || normalized[normalized.index(before: phraseStart)] == " " | ||
| guard atWordBoundary else { continue } | ||
|
|
||
| let stripped = self.stripPhraseSuffix(phrase, from: input) | ||
| return (stripped, command.action) | ||
| } | ||
| } | ||
|
|
||
| return (input, nil) | ||
| } | ||
|
|
||
| static func apply(_ action: EditAction, to text: String, settings: VoiceCommandSettings) -> String { | ||
| switch action { | ||
| case .deleteLastWords: | ||
| let count = settings.voiceCommandScratchWordCount | ||
| var tokens = self.tokenize(text) | ||
| if tokens.isEmpty { return "" } | ||
| if count >= tokens.count { return "" } | ||
| tokens.removeLast(count) | ||
| return tokens.joined(separator: " ") | ||
|
|
||
| case .capitalizeLastWord: | ||
| var tokens = self.tokenize(text) | ||
| guard let last = tokens.last else { return "" } | ||
| let (stem, punct) = self.stripTrailingPunct(from: last) | ||
| guard !stem.isEmpty else { return text } | ||
| let capitalized = String(stem.prefix(1)).uppercased() + stem.dropFirst() | ||
| tokens[tokens.count - 1] = capitalized + punct | ||
| return tokens.joined(separator: " ") | ||
|
|
||
| case let .appendAfterLastWord(suffix): | ||
| var tokens = self.tokenize(text) | ||
| guard let last = tokens.last else { return "" } | ||
| let (stem, punct) = self.stripTrailingPunct(from: last) | ||
| tokens[tokens.count - 1] = stem + suffix + punct | ||
| return tokens.joined(separator: " ") | ||
|
|
||
| case .insertNewline: | ||
| return text + "\n" | ||
| } | ||
| } | ||
|
|
||
| /// Lowercase, collapse whitespace, strip leading/trailing whitespace. | ||
| private static func normalizeForMatching(_ s: String) -> String { | ||
| return s.lowercased() | ||
| .split(whereSeparator: { $0.isWhitespace }) | ||
| .joined(separator: " ") | ||
| } | ||
|
|
||
| /// Split on whitespace, filter empty. | ||
| private static func tokenize(_ s: String) -> [String] { | ||
| return s.split(whereSeparator: { $0.isWhitespace }).map(String.init) | ||
| } | ||
|
|
||
| /// Returns (stem, trailingPunct) where trailingPunct is the trailing | ||
| /// punctuation characters stripped from the token (e.g. "word." -> ("word", ".")) | ||
| /// Only strip common trailing punctuation: . , ! ? ; : | ||
| private static func stripTrailingPunct(from token: String) -> (stem: String, punct: String) { | ||
| let punctSet: Set<Character> = [".", ",", "!", "?", ";", ":"] | ||
| var stem = token | ||
| var punct = "" | ||
| while let last = stem.last, punctSet.contains(last) { | ||
| punct = String(last) + punct | ||
| stem.removeLast() | ||
| } | ||
| return (stem, punct) | ||
| } | ||
|
|
||
| /// Remove the matched phrase plus any preceding whitespace from the end of the | ||
| /// original (un-normalized) input. The phrase may appear in the original with | ||
| /// different casing/spacing/punctuation than the normalized form, so we strip | ||
| /// a word-count's worth of trailing tokens equal to the phrase's word count. | ||
| private static func stripPhraseSuffix(_ phrase: String, from input: String) -> String { | ||
| let phraseWordCount = phrase.split(separator: " ").count | ||
| var tokens = self.tokenize(input) | ||
| guard tokens.count >= phraseWordCount else { return "" } | ||
| tokens.removeLast(phraseWordCount) | ||
|
Comment on lines
+116
to
+119
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
For the hyphen variants that detection accepts, such as Useful? React with 👍 / 👎. |
||
| return tokens.joined(separator: " ") | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This hook only executes inside
DictationPostProcessingService, but normal hotkey dictation inContentView.stopAndProcessTranscriptioncallsprocessTextWithAIdirectly when AI is configured (ContentView.swift:2147) and otherwise usestranscribedTextunchanged (ContentView.swift:2193); repo-wide search shows this service is only called byLocalAPI/InferenceAPIController.swift:88. As a result, enablingVoiceCommandsEnabledin the app will not process... scratch thatornew linefor the main dictation workflow, only for the local/v1/postprocessAPI.Useful? React with 👍 / 👎.