Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

103 changes: 41 additions & 62 deletions apps/voxit/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ use eframe::{
};
#[cfg(target_os = "macos")] use enigo::{Direction, Enigo, Key, Keyboard, Settings};
#[cfg(target_os = "macos")] use global_hotkey::GlobalHotKeyManager;
use realtime::{RealtimeEvent, RealtimeSession};
use tracing_appender::rolling::{RollingFileAppender, Rotation};
use tracing_subscriber::EnvFilter;
#[cfg(target_os = "macos")]
Expand All @@ -46,8 +45,8 @@ use voxit_audio::{InputDevice, Recorder};
use voxit_core::{
auth::{self, AuthRecord, AuthStatus},
config::Config,
openai::{self, RewriteState},
realtime,
inference::{self, RewriteState},
realtime::{self, RealtimeEvent, RealtimeSession},
transcript::TranscriptAssembler,
};
use voxit_macos::{MicrophonePermissionState, PermissionSettingsPane, TargetApp};
Expand Down Expand Up @@ -119,8 +118,8 @@ struct VoxitApp {
auth_event_tx: Sender<AuthEvent>,
realtime_event_rx: Receiver<RealtimeEvent>,
realtime_event_tx: Sender<RealtimeEvent>,
inference_tx: Sender<openai::InferenceEvent>,
inference_rx: Receiver<openai::InferenceEvent>,
inference_tx: Sender<inference::InferenceEvent>,
inference_rx: Receiver<inference::InferenceEvent>,
is_recording: bool,
is_window_visible: bool,
state: String,
Expand Down Expand Up @@ -166,8 +165,8 @@ impl VoxitApp {
auth_event_tx: Sender<AuthEvent>,
realtime_event_rx: Receiver<RealtimeEvent>,
realtime_event_tx: Sender<RealtimeEvent>,
inference_tx: Sender<openai::InferenceEvent>,
inference_rx: Receiver<openai::InferenceEvent>,
inference_tx: Sender<inference::InferenceEvent>,
inference_rx: Receiver<inference::InferenceEvent>,
hotkey_mode_u8: Arc<AtomicU8>,
) -> Self {
let start_hidden = config.ui.start_hidden;
Expand Down Expand Up @@ -608,7 +607,7 @@ impl VoxitApp {
fn handle_inference_events(&mut self) {
while let Ok(event) = self.inference_rx.try_recv() {
match event {
openai::InferenceEvent::Pass2Completed { total_ms, raw_transcript } => {
inference::InferenceEvent::Pass2Completed { total_ms, raw_transcript } => {
self.is_finalizing = false;
self.transcription_result = raw_transcript;
self.state = "FinalizingPass2".to_string();
Expand All @@ -627,7 +626,7 @@ impl VoxitApp {
self.status = format!("Pass2 completed in {total_ms} ms. {paste_status}");
}
},
openai::InferenceEvent::RewriteCompleted { total_ms, result } => {
inference::InferenceEvent::RewriteCompleted { total_ms, result } => {
self.is_rewriting = false;

if self.ignore_rewrite_result {
Expand Down Expand Up @@ -669,7 +668,7 @@ impl VoxitApp {
},
}
},
openai::InferenceEvent::Failed(err) => {
inference::InferenceEvent::Failed(err) => {
self.is_finalizing = false;
self.is_rewriting = false;
self.status = format!("OpenAI failed: {err}");
Expand Down Expand Up @@ -738,49 +737,25 @@ impl VoxitApp {
);
}

match auth::access_token() {
Ok((api_key, account_id)) => {
let realtime_config = realtime::RealtimeSessionConfig {
model: self.config.openai.realtime_model.clone(),
sample_rate_hz: self.config.audio.realtime_target_rate_hz,
noise_reduction: self.config.openai.realtime.noise_reduction.clone(),
};

match realtime::start_realtime_session(
api_key,
account_id,
realtime_config,
chunk_rx,
self.realtime_event_tx.clone(),
) {
Ok(session) => {
self.realtime_session = Some(session);
},
Err(err) => {
let realtime_status = format!(
"Realtime unavailable ({err}). Recording continues with Pass2 finalize."
);

self.status = if used_fallback {
format!("{fallback_prefix} {realtime_status}")
} else {
realtime_status
};
},
}
},
Err(err) => {
let auth_status = format!(
"Realtime unavailable because auth is missing ({err}). Recording continues with Pass2 finalize."
);

self.status = if used_fallback {
format!("{fallback_prefix} {auth_status}")
} else {
auth_status
};
let realtime_config = realtime::RealtimeSessionConfig {
model: self.config.openai.realtime_model.clone(),
sample_rate_hz: self.config.audio.realtime_target_rate_hz,
noise_reduction: self.config.openai.realtime.noise_reduction.clone(),
};
let realtime_status = match inference::start_realtime_session(
realtime_config,
chunk_rx,
self.realtime_event_tx.clone(),
) {
Ok(session) => {
self.realtime_session = Some(session);

None
},
}
Err(err) => Some(format!(
"Realtime unavailable ({err}). Recording continues with Pass2 finalize."
)),
};

self.is_recording = true;
self.is_finalizing = false;
Expand All @@ -794,6 +769,10 @@ impl VoxitApp {
started_status = format!("{fallback_prefix} {started_status}");
}

if let Some(realtime_status) = realtime_status {
started_status = format!("{started_status} {realtime_status}");
}

self.status = started_status;
},
Err(err) => {
Expand Down Expand Up @@ -867,12 +846,12 @@ impl VoxitApp {
self.is_finalizing = true;
self.status = "Finalizing Pass2 transcript...".to_string();
thread::spawn(move || {
let outcome = openai::transcribe_only(&wav, &model)
.map(|(raw_transcript, total_ms)| openai::InferenceEvent::Pass2Completed {
let outcome = inference::transcribe_only(&wav, &model)
.map(|(raw_transcript, total_ms)| inference::InferenceEvent::Pass2Completed {
total_ms,
raw_transcript,
})
.unwrap_or_else(openai::InferenceEvent::Failed);
.unwrap_or_else(inference::InferenceEvent::Failed);
let _ = tx.send(outcome);
});
}
Expand All @@ -896,12 +875,12 @@ impl VoxitApp {
let raw = self.transcription_result.clone();

thread::spawn(move || {
let outcome = openai::rewrite_only(&raw, &model)
.map(|(result, total_ms)| openai::InferenceEvent::RewriteCompleted {
let outcome = inference::rewrite_only(&raw, &model)
.map(|(result, total_ms)| inference::InferenceEvent::RewriteCompleted {
total_ms,
result,
})
.unwrap_or_else(openai::InferenceEvent::Failed);
.unwrap_or_else(inference::InferenceEvent::Failed);
let _ = tx.send(outcome);
});
}
Expand Down Expand Up @@ -1232,8 +1211,8 @@ impl VoxitApp {
auth_event_tx: Sender<AuthEvent>,
realtime_event_rx: Receiver<RealtimeEvent>,
realtime_event_tx: Sender<RealtimeEvent>,
inference_tx: Sender<openai::InferenceEvent>,
inference_rx: Receiver<openai::InferenceEvent>,
inference_tx: Sender<inference::InferenceEvent>,
inference_rx: Receiver<inference::InferenceEvent>,
hotkey_mode_u8: Arc<AtomicU8>,
_hotkey_manager: Option<GlobalHotKeyManager>,
_tray_icon: TrayIcon,
Expand Down Expand Up @@ -1469,7 +1448,7 @@ fn run_ui() -> Result<()> {
let (command_tx, command_rx) = mpsc::channel::<AppCommand>();
let (auth_event_tx, auth_event_rx) = mpsc::channel::<AuthEvent>();
let (realtime_event_tx, realtime_event_rx) = mpsc::channel::<RealtimeEvent>();
let (inference_tx, inference_rx) = mpsc::channel::<openai::InferenceEvent>();
let (inference_tx, inference_rx) = mpsc::channel::<inference::InferenceEvent>();
let initial_hotkey =
if app_config.hotkey.mode == "hold" { HotkeyMode::Hold } else { HotkeyMode::Toggle };
let hotkey_mode = Arc::new(AtomicU8::new(initial_hotkey.as_u8()));
Expand Down Expand Up @@ -1541,7 +1520,7 @@ fn run_ui() -> Result<()> {
let (_command_tx, command_rx) = mpsc::channel::<AppCommand>();
let (auth_event_tx, auth_event_rx) = mpsc::channel::<AuthEvent>();
let (realtime_event_tx, realtime_event_rx) = mpsc::channel::<RealtimeEvent>();
let (inference_tx, inference_rx) = mpsc::channel::<openai::InferenceEvent>();
let (inference_tx, inference_rx) = mpsc::channel::<inference::InferenceEvent>();
let initial_hotkey =
if app_config.hotkey.mode == "hold" { HotkeyMode::Hold } else { HotkeyMode::Toggle };
let hotkey_mode = Arc::new(AtomicU8::new(initial_hotkey.as_u8()));
Expand Down
8 changes: 8 additions & 0 deletions packages/voxit-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ voxit-realtime = ["dep:futures-util", "dep:http", "dep:tokio", "dep:tokio-tungst
base64 = { version = "0.22" }
directories = { version = "6.0" }
futures-util = { version = "0.3", optional = true }
hound = { version = "3.5" }
http = { version = "1.3", optional = true }
keyring = { version = "3.6", features = ["apple-native"] }
rand = { version = "0.10" }
Expand All @@ -31,3 +32,10 @@ tracing = { version = "0.1" }
url = { version = "2.5" }
voxit-audio = { path = "../voxit-audio" }
webbrowser = { version = "1.1" }

[target.'cfg(target_os = "macos")'.dependencies]
core-foundation = { version = "0.10" }
core-foundation-sys = { version = "0.8" }
objc2-foundation = { version = "0.3.2", default-features = false, features = ["NSObjCRuntime", "NSObject", "NSString", "alloc", "std"] }
objc2-local-authentication = { version = "0.3.2", default-features = false, features = ["LAContext", "alloc", "std"] }
security-framework-sys = { version = "2.16" }
Loading
Loading