From 0db7ca8a6ba43ec9505d8bdf3aac5b38e6cead53 Mon Sep 17 00:00:00 2001 From: Davide Morelli Date: Tue, 16 Jun 2026 14:54:52 +0200 Subject: [PATCH 1/2] MicCapture: survive input format/route changes (Meet, earbuds) The mic captured the input format once, created an AVAudioFile in that format, and wrote raw tap buffers directly. When a conferencing app reconfigures the shared input device mid-stream (e.g. Google Meet's 3-channel voice-processing mode), or the default input device changes (plugging in earbuds), the tap buffers stop matching the file format, every write fails with CoreAudio -50, and the mic track dies silently. Two independent safety nets, mirroring the existing SystemAudioCapture: - writeBuffer() normalises every tap buffer to the file's fixed processingFormat via AVAudioConverter (block-based form, handles sample-rate/channel changes), with a direct-write fast path when the format is unchanged. - An AVAudioEngineConfigurationChange observer (delivered on .main, so it serialises with stop()) rebinds the input and reinstalls the tap on a route change, with a bounded retry for a transient zero sample-rate while the route settles. applyInputDevice() no longer force-unwraps inputNode.audioUnit (can be nil during route churn). Co-Authored-By: Claude Opus 4.8 --- swift/Sources/AudioCapture.swift | 200 +++++++++++++++++++++++++++---- 1 file changed, 174 insertions(+), 26 deletions(-) diff --git a/swift/Sources/AudioCapture.swift b/swift/Sources/AudioCapture.swift index ce4130e..e882094 100644 --- a/swift/Sources/AudioCapture.swift +++ b/swift/Sources/AudioCapture.swift @@ -37,6 +37,23 @@ class MicCapture { private var audioFile: AVAudioFile? private(set) var startHostTime: UInt64 = 0 + // Fixed processing format of the output file. Every tap buffer is normalised to + // this before writing, so a mid-stream format change cannot break the writes the + // way a fixed AVAudioFile did before (Meet flipping the shared input into + // 3-channel voice-processing mode made every write fail with -50, killing the mic). + private var fileFormat: AVAudioFormat? + private var audioConverter: AVAudioConverter? + + // Remembered so an explicitly named device can be re-applied after a route change. + private var micDeviceName: String? + + // Re-bind the input + reinstall the tap when the audio route changes. The token is + // kept so stop() can deregister it. isStopping guards a notification already queued + // on .main behind stop(): the observer is delivered on .main and stop() runs on + // .main (the SIGINT/SIGTERM sources), so they serialise and the flag alone is enough. + private var configObserver: NSObjectProtocol? + private var isStopping = false + // Mute support — guarded by os_unfair_lock (tap callback is on AVAudioEngine thread) private var _isMuted = false private var _muteLock = os_unfair_lock_s() @@ -66,32 +83,68 @@ class MicCapture { } func start(outputPath: String, deviceName: String?) throws { - let input = engine.inputNode - - // If deviceName specified, find and set the audio device - if let name = deviceName { - let deviceID = try findInputDevice(named: name) - var id = deviceID - let err = AudioUnitSetProperty( - input.audioUnit!, - kAudioOutputUnitProperty_CurrentDevice, - kAudioUnitScope_Global, 0, - &id, UInt32(MemoryLayout.size)) - if err != noErr { - throw MicError.cannotSetDevice(name) - } - } + micDeviceName = deviceName + try applyInputDevice() - let format = input.outputFormat(forBus: 0) + let format = engine.inputNode.outputFormat(forBus: 0) guard format.sampleRate > 0 else { throw MicError.noInputAvailable } let url = URL(fileURLWithPath: outputPath) - audioFile = try AVAudioFile(forWriting: url, - settings: format.settings, - commonFormat: .pcmFormatFloat32, - interleaved: true) + let file = try AVAudioFile(forWriting: url, + settings: format.settings, + commonFormat: .pcmFormatFloat32, + interleaved: true) + audioFile = file + fileFormat = file.processingFormat + + guard installMicTap() else { + throw MicError.noInputAvailable + } + + // Earbuds plugged in mid-recording, or a conferencing app reconfiguring the + // shared input format, posts this notification (the engine stops itself first). + // Rebind + restart so the mic keeps recording instead of silently dying. + configObserver = NotificationCenter.default.addObserver( + forName: .AVAudioEngineConfigurationChange, + object: engine, + queue: .main + ) { [weak self] _ in self?.handleConfigChange() } + + try engine.start() + fputs("Recording microphone audio to \(outputPath)...\n", stderr) + } + + /// Re-apply the explicitly named input device, if one was requested. When no device + /// was named the engine follows the system default input — which is exactly what + /// makes "switch to earbuds mid-recording" work, so we deliberately do nothing. + private func applyInputDevice() throws { + guard let name = micDeviceName else { return } + // audioUnit can be nil while the node is in flux during a route change; treat + // that as "cannot set device" rather than force-unwrapping into a crash. + guard let audioUnit = engine.inputNode.audioUnit else { + throw MicError.cannotSetDevice(name) + } + let deviceID = try findInputDevice(named: name) + var id = deviceID + let err = AudioUnitSetProperty( + audioUnit, + kAudioOutputUnitProperty_CurrentDevice, + kAudioUnitScope_Global, 0, + &id, UInt32(MemoryLayout.size)) + if err != noErr { + throw MicError.cannotSetDevice(name) + } + } + + /// Install the capture tap using the input node's current format. Returns false if + /// no input is available yet (sample rate 0), e.g. mid-route-change. + @discardableResult + private func installMicTap() -> Bool { + let input = engine.inputNode + let format = input.outputFormat(forBus: 0) + guard format.sampleRate > 0 else { return false } input.installTap(onBus: 0, bufferSize: 4096, format: format) { [weak self] buffer, time in guard let self else { return } @@ -117,17 +170,112 @@ class MicCapture { os_unfair_lock_unlock(&self._lastLoudTimeLock) } } - do { - try self.audioFile?.write(from: buffer) - } catch { - fputs("Mic write error: \(error)\n", stderr) + self.writeBuffer(buffer) + } + return true + } + + /// Write a tap buffer to the file, converting to the file's fixed format whenever + /// the incoming format differs (Meet voice-processing mode, or a new device with a + /// different sample rate or channel count). The fast path — unchanged format — + /// writes directly, exactly as the original code did. On a sample-rate change the + /// converter is rebuilt; any sub-buffer of tail samples left inside the old + /// converter is dropped (sub-10ms, inaudible for transcription). + private func writeBuffer(_ buffer: AVAudioPCMBuffer) { + guard let audioFile, let fileFormat else { return } + do { + if buffer.format == fileFormat { + try audioFile.write(from: buffer) + return + } + if audioConverter?.inputFormat != buffer.format { + audioConverter = AVAudioConverter(from: buffer.format, to: fileFormat) + } + guard let converter = audioConverter else { + fputs("Mic converter unavailable for format \(buffer.format)\n", stderr) + return + } + let ratio = fileFormat.sampleRate / buffer.format.sampleRate + let capacity = AVAudioFrameCount(Double(buffer.frameLength) * ratio) + 16 + guard let outBuffer = AVAudioPCMBuffer(pcmFormat: fileFormat, + frameCapacity: capacity) else { return } + var fed = false + var convError: NSError? + let status = converter.convert(to: outBuffer, error: &convError) { _, outStatus in + if fed { + outStatus.pointee = .noDataNow + return nil + } + fed = true + outStatus.pointee = .haveData + return buffer + } + if status == .error || convError != nil { + fputs("Mic convert error: \(convError?.description ?? "unknown")\n", stderr) + return + } + if outBuffer.frameLength > 0 { + try audioFile.write(from: outBuffer) } + } catch { + fputs("Mic write error: \(error)\n", stderr) + } + } + + /// Rebind the input and reinstall the tap after an audio route change. Runs on + /// .main, so it never overlaps stop(); removeTap quiesces the render thread before + /// we touch the tap, and the file/converter stay valid across the swap. + /// + /// Known limitation: the mic file is written continuously with no silence padding, + /// so if the input drops out for a moment during a device switch (e.g. plugging in + /// earbuds), the post-switch mic audio is appended immediately and the mic track + /// drifts slightly earlier relative to the system track. Acceptable for + /// transcription/diarization, and far better than the previous behaviour (losing + /// all post-switch mic audio). The common conferencing-app case reconfigures only + /// the *format*, not the device, so the converter absorbs it with no gap at all. + private func handleConfigChange() { + guard !isStopping else { return } + fputs("[MIC_RECONFIG] Audio route changed; rebinding microphone input.\n", stderr) + engine.inputNode.removeTap(onBus: 0) + engine.stop() + do { + try applyInputDevice() // re-pin a named device; no-op when following default + } catch { + fputs("Mic reconfigure: \(error) — falling back to default input.\n", stderr) + } + reinstallAndStart() + } + + /// Reinstall the tap and restart the engine. If the input is not ready yet + /// (sampleRate 0 while a route is still settling), retry on .main a bounded number + /// of times rather than relying on another notification arriving — a single + /// transient zero-rate change must not kill the mic for the rest of the session. + private func reinstallAndStart(attempt: Int = 0) { + guard !isStopping else { return } + guard installMicTap() else { + let maxAttempts = 20 // ~5s at 0.25s spacing + if attempt < maxAttempts { + DispatchQueue.main.asyncAfter(deadline: .now() + 0.25) { [weak self] in + self?.reinstallAndStart(attempt: attempt + 1) + } + } else { + fputs("Mic reconfigure: input did not return after \(maxAttempts) retries; mic stopped.\n", stderr) + } + return + } + do { + try engine.start() + } catch { + fputs("Mic reconfigure: failed to restart engine: \(error)\n", stderr) } - try engine.start() - fputs("Recording microphone audio to \(outputPath)...\n", stderr) } func stop() { + isStopping = true + if let token = configObserver { + NotificationCenter.default.removeObserver(token) + configObserver = nil + } engine.inputNode.removeTap(onBus: 0) engine.stop() audioFile = nil From c2728c62c1edb7a2b4aee1fd9004d7b0872be8b0 Mon Sep 17 00:00:00 2001 From: Pascal Berrang Date: Tue, 23 Jun 2026 16:48:11 +0200 Subject: [PATCH 2/2] Throttle mic write-error logging and unify mic log message style Gate the per-buffer write/convert error logs in writeBuffer behind a flag that resets on each successful write, so a persistent failure logs once instead of flooding stderr at the tap rate (~10+/sec). Also unify the mic diagnostics to a consistent "Mic : ..." prefix: fold "converter unavailable" into "Mic convert error", and restyle the "[MIC_RECONFIG]" route-change line to "Mic reconfigure:" to match its siblings. --- swift/Sources/AudioCapture.swift | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/swift/Sources/AudioCapture.swift b/swift/Sources/AudioCapture.swift index e882094..149c713 100644 --- a/swift/Sources/AudioCapture.swift +++ b/swift/Sources/AudioCapture.swift @@ -44,6 +44,11 @@ class MicCapture { private var fileFormat: AVAudioFormat? private var audioConverter: AVAudioConverter? + // Set when a write/convert failure has been logged; reset on the next successful write + // so a persistent failure logs once instead of once per tap buffer (~10+/sec). Touched + // only from writeBuffer on the tap (render) thread, so it needs no locking. + private var loggedWriteError = false + // Remembered so an explicitly named device can be re-applied after a route change. private var micDeviceName: String? @@ -186,13 +191,14 @@ class MicCapture { do { if buffer.format == fileFormat { try audioFile.write(from: buffer) + loggedWriteError = false return } if audioConverter?.inputFormat != buffer.format { audioConverter = AVAudioConverter(from: buffer.format, to: fileFormat) } guard let converter = audioConverter else { - fputs("Mic converter unavailable for format \(buffer.format)\n", stderr) + logWriteErrorOnce("Mic convert error: converter unavailable for format \(buffer.format)") return } let ratio = fileFormat.sampleRate / buffer.format.sampleRate @@ -211,17 +217,27 @@ class MicCapture { return buffer } if status == .error || convError != nil { - fputs("Mic convert error: \(convError?.description ?? "unknown")\n", stderr) + logWriteErrorOnce("Mic convert error: \(convError?.description ?? "unknown")") return } if outBuffer.frameLength > 0 { try audioFile.write(from: outBuffer) + loggedWriteError = false } } catch { - fputs("Mic write error: \(error)\n", stderr) + logWriteErrorOnce("Mic write error: \(error)") } } + /// Log a recurring write/convert failure only once until the next successful write, so a + /// persistent bad state (e.g. an unconvertible format) doesn't flood stderr at the tap + /// rate. Reset by writeBuffer on every successful write. Render-thread only — no locking. + private func logWriteErrorOnce(_ message: String) { + guard !loggedWriteError else { return } + loggedWriteError = true + fputs(message + "\n", stderr) + } + /// Rebind the input and reinstall the tap after an audio route change. Runs on /// .main, so it never overlaps stop(); removeTap quiesces the render thread before /// we touch the tap, and the file/converter stay valid across the swap. @@ -235,7 +251,7 @@ class MicCapture { /// the *format*, not the device, so the converter absorbs it with no gap at all. private func handleConfigChange() { guard !isStopping else { return } - fputs("[MIC_RECONFIG] Audio route changed; rebinding microphone input.\n", stderr) + fputs("Mic reconfigure: audio route changed; rebinding microphone input.\n", stderr) engine.inputNode.removeTap(onBus: 0) engine.stop() do {