From af163dcaf39459c07e935fc77d627b2ed5b906a1 Mon Sep 17 00:00:00 2001 From: Dimirolz Date: Thu, 13 Nov 2025 16:45:34 +0400 Subject: [PATCH] feat(openai): support input audio noise reduction --- .changeset/violet-tigers-count.md | 5 +++++ plugins/openai/src/realtime/api_proto.ts | 6 ++++++ plugins/openai/src/realtime/realtime_model.ts | 12 ++++++++---- 3 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 .changeset/violet-tigers-count.md diff --git a/.changeset/violet-tigers-count.md b/.changeset/violet-tigers-count.md new file mode 100644 index 000000000..856bdf944 --- /dev/null +++ b/.changeset/violet-tigers-count.md @@ -0,0 +1,5 @@ +--- +"@livekit/agents-plugin-openai": minor +--- + +Allow agents to pass OpenAI's `input_audio_noise_reduction` setting through the realtime session payload. diff --git a/plugins/openai/src/realtime/api_proto.ts b/plugins/openai/src/realtime/api_proto.ts index 4bc026f77..5c78d64bc 100644 --- a/plugins/openai/src/realtime/api_proto.ts +++ b/plugins/openai/src/realtime/api_proto.ts @@ -113,6 +113,10 @@ export type InputAudioTranscription = { prompt?: string; }; +export interface InputAudioNoiseReduction { + type?: 'near_field' | 'far_field'; +} + export interface InputTextContent { type: 'input_text'; text: string; @@ -196,6 +200,7 @@ export interface SessionResource { input_audio_format: AudioFormat; // default: "pcm16" output_audio_format: AudioFormat; // default: "pcm16" input_audio_transcription: InputAudioTranscription | null; + input_audio_noise_reduction: InputAudioNoiseReduction | null; turn_detection: TurnDetectionType | null; tools: Tool[]; tool_choice: ToolChoice; // default: "auto" @@ -273,6 +278,7 @@ export interface SessionUpdateEvent extends BaseClientEvent { input_audio_format: AudioFormat; output_audio_format: AudioFormat; input_audio_transcription: InputAudioTranscription | null; + input_audio_noise_reduction: InputAudioNoiseReduction | null; turn_detection: TurnDetectionType | null; tools: Tool[]; tool_choice: ToolChoice; diff --git a/plugins/openai/src/realtime/realtime_model.ts b/plugins/openai/src/realtime/realtime_model.ts index a67d3bf58..d2b06766a 100644 --- a/plugins/openai/src/realtime/realtime_model.ts +++ b/plugins/openai/src/realtime/realtime_model.ts @@ -42,7 +42,7 @@ interface RealtimeOptions { temperature: number; toolChoice?: llm.ToolChoice; inputAudioTranscription?: api_proto.InputAudioTranscription | null; - // TODO(shubhra): add inputAudioNoiseReduction + inputAudioNoiseReduction?: api_proto.InputAudioNoiseReduction | null; turnDetection?: api_proto.TurnDetectionType | null; maxResponseOutputTokens?: number | 'inf'; speed?: number; @@ -102,6 +102,7 @@ const DEFAULT_TURN_DETECTION: api_proto.TurnDetectionType = { const DEFAULT_INPUT_AUDIO_TRANSCRIPTION: api_proto.InputAudioTranscription = { model: 'gpt-4o-mini-transcribe', }; +const DEFAULT_INPUT_AUDIO_NOISE_REDUCTION: api_proto.InputAudioNoiseReduction | null = null; const DEFAULT_TOOL_CHOICE: llm.ToolChoice = 'auto'; const DEFAULT_MAX_RESPONSE_OUTPUT_TOKENS: number | 'inf' = 'inf'; @@ -124,6 +125,7 @@ const DEFAULT_REALTIME_MODEL_OPTIONS = { voice: 'marin', temperature: DEFAULT_TEMPERATURE, inputAudioTranscription: DEFAULT_INPUT_AUDIO_TRANSCRIPTION, + inputAudioNoiseReduction: DEFAULT_INPUT_AUDIO_NOISE_REDUCTION, turnDetection: DEFAULT_TURN_DETECTION, toolChoice: DEFAULT_TOOL_CHOICE, maxResponseOutputTokens: DEFAULT_MAX_RESPONSE_OUTPUT_TOKENS, @@ -149,7 +151,7 @@ export class RealtimeModel extends llm.RealtimeModel { baseURL?: string; modalities?: Modality[]; inputAudioTranscription?: api_proto.InputAudioTranscription | null; - // TODO(shubhra): add inputAudioNoiseReduction + inputAudioNoiseReduction?: api_proto.InputAudioNoiseReduction | null; turnDetection?: api_proto.TurnDetectionType | null; speed?: number; // TODO(shubhra): add openai tracing options @@ -243,6 +245,7 @@ export class RealtimeModel extends llm.RealtimeModel { turnDetection = AZURE_DEFAULT_TURN_DETECTION, temperature = 0.8, speed, + inputAudioNoiseReduction = DEFAULT_INPUT_AUDIO_NOISE_REDUCTION, }: { azureDeployment: string; azureEndpoint?: string; @@ -252,7 +255,7 @@ export class RealtimeModel extends llm.RealtimeModel { baseURL?: string; voice?: string; inputAudioTranscription?: api_proto.InputAudioTranscription; - // TODO(shubhra): add inputAudioNoiseReduction + inputAudioNoiseReduction?: api_proto.InputAudioNoiseReduction | null; turnDetection?: api_proto.TurnDetectionType; temperature?: number; speed?: number; @@ -284,6 +287,7 @@ export class RealtimeModel extends llm.RealtimeModel { return new RealtimeModel({ voice, inputAudioTranscription, + inputAudioNoiseReduction, turnDetection, temperature, speed, @@ -417,7 +421,7 @@ export class RealtimeSession extends llm.RealtimeSession { modalities: modalities, turn_detection: this.oaiRealtimeModel._options.turnDetection, input_audio_transcription: this.oaiRealtimeModel._options.inputAudioTranscription, - // TODO(shubhra): add inputAudioNoiseReduction + input_audio_noise_reduction: this.oaiRealtimeModel._options.inputAudioNoiseReduction, temperature: this.oaiRealtimeModel._options.temperature, tool_choice: toOaiToolChoice(this.oaiRealtimeModel._options.toolChoice), max_response_output_tokens: