diff --git a/src/app/core/services/audio-playing.service.ts b/src/app/core/services/audio-playing.service.ts index 90148d81..cd7c5c2a 100644 --- a/src/app/core/services/audio-playing.service.ts +++ b/src/app/core/services/audio-playing.service.ts @@ -22,7 +22,7 @@ import {AudioPlayingService as AudioPlayingServiceInterface} from './interfaces/ providedIn: 'root', }) export class AudioPlayingService implements AudioPlayingServiceInterface { - private audioContext = new AudioContext({sampleRate: 22000}); + private audioContext = new AudioContext({sampleRate: 24000}); private lastAudioTime = 0; private scheduledAudioSources = new Set(); @@ -74,7 +74,7 @@ export class AudioPlayingService implements AudioPlayingServiceInterface { const buffer = this.audioContext.createBuffer( 1, float32Array.length, - 22000, + 24000, ); buffer.copyToChannel(float32Array, 0); diff --git a/src/app/core/services/audio-recording.service.ts b/src/app/core/services/audio-recording.service.ts index e02ef0e3..4beaf5c7 100644 --- a/src/app/core/services/audio-recording.service.ts +++ b/src/app/core/services/audio-recording.service.ts @@ -36,7 +36,8 @@ export class AudioRecordingService implements AudioRecordingServiceInterface { try { this.stream = await navigator.mediaDevices.getUserMedia({audio: true}); - this.audioContext = new AudioContext(); + // Hint capture rate; worklet still resamples to 16 kHz if ignored + this.audioContext = new AudioContext({sampleRate: 16000}); await this.audioContext.audioWorklet.addModule( this.audioWorkletModulePath); diff --git a/src/app/core/services/stream-chat.service.ts b/src/app/core/services/stream-chat.service.ts index a80babd4..676f0abb 100644 --- a/src/app/core/services/stream-chat.service.ts +++ b/src/app/core/services/stream-chat.service.ts @@ -99,7 +99,7 @@ export class StreamChatService implements StreamChatServiceInterface { const request: LiveRequest = { blob: { - mime_type: 'audio/pcm', + mime_type: 'audio/pcm;rate=16000', data: combinedBuffer, }, }; diff --git a/src/app/core/services/video.service.ts b/src/app/core/services/video.service.ts index 76f5c511..7354d45b 100644 --- a/src/app/core/services/video.service.ts +++ b/src/app/core/services/video.service.ts @@ -111,7 +111,7 @@ export class VideoService implements VideoServiceInterface { resolve(blob); else reject(new Error('Failed to create image blob')); - }, 'image/png'); + }, 'image/jpeg', 0.8); } catch (error) { reject(error); } diff --git a/src/app/core/services/websocket.service.ts b/src/app/core/services/websocket.service.ts index 4aa984e3..d31aaa14 100644 --- a/src/app/core/services/websocket.service.ts +++ b/src/app/core/services/websocket.service.ts @@ -98,16 +98,26 @@ export class WebSocketService implements WebSocketServiceInterface { private handleIncomingEvent(message: any) { const msg = JSON.parse(message) as Event; - if ( - msg['content'] && - msg['content']['parts'] && - msg['content']['parts'][0]['inlineData'] - ) { - const pcmBytes = this.base64ToUint8Array( - msg['content']['parts'][0]['inlineData']['data'], - ); - this.audioBuffer.push(pcmBytes); - } else { + const parts = msg?.['content']?.['parts']; + + if (!Array.isArray(parts)) { + this.messages$.next(message); + return; + } + + // Extract audio from any part; forward the event if it has other content + let hasNonAudioContent = false; + for (const part of parts) { + const inlineData = part?.['inlineData']; + const mimeType: string|undefined = inlineData?.['mimeType']; + if (inlineData?.['data'] && mimeType?.startsWith('audio/')) { + this.audioBuffer.push(this.base64ToUint8Array(inlineData['data'])); + } else { + hasNonAudioContent = true; + } + } + + if (hasNonAudioContent) { this.messages$.next(message); } } diff --git a/src/assets/audio-processor.js b/src/assets/audio-processor.js index cec74f83..d815aab1 100644 --- a/src/assets/audio-processor.js +++ b/src/assets/audio-processor.js @@ -17,7 +17,7 @@ class AudioProcessor extends AudioWorkletProcessor { constructor() { super(); - this.targetSampleRate = 22000; // Change to your desired rate + this.targetSampleRate = 16000; // Live API expects 16 kHz PCM input this.originalSampleRate = sampleRate; // Browser's sample rate this.resampleRatio = this.originalSampleRate / this.targetSampleRate; } @@ -40,9 +40,15 @@ class AudioProcessor extends AudioWorkletProcessor { const newLength = Math.round(audioData.length / this.resampleRatio); const resampled = new Float32Array(newLength); + // Linear interpolation resampling (higher quality than nearest neighbor) + const lastIndex = audioData.length - 1; for (let i = 0; i < newLength; i++) { - const srcIndex = Math.floor(i * this.resampleRatio); - resampled[i] = audioData[srcIndex]; // Nearest neighbor resampling + const srcPos = i * this.resampleRatio; + const srcIndex = Math.floor(srcPos); + const nextIndex = Math.min(srcIndex + 1, lastIndex); + const frac = srcPos - srcIndex; + resampled[i] = + audioData[srcIndex] * (1 - frac) + audioData[nextIndex] * frac; } return resampled; }