diff --git a/packages/api/src/hooks/useSpeechToSpeech.ts b/packages/api/src/hooks/useSpeechToSpeech.ts
new file mode 100644
index 0000000000..4f529a2c08
--- /dev/null
+++ b/packages/api/src/hooks/useSpeechToSpeech.ts
@@ -0,0 +1,3 @@
+import useSpeechToSpeech from '../providers/SpeechToSpeech/useSpeechToSpeech';
+
+export default useSpeechToSpeech;
diff --git a/packages/api/src/hooks/useVoiceActivities.ts b/packages/api/src/hooks/useVoiceActivities.ts
new file mode 100644
index 0000000000..d65e142b17
--- /dev/null
+++ b/packages/api/src/hooks/useVoiceActivities.ts
@@ -0,0 +1,11 @@
+import { isVoiceActivity, type WebChatActivity } from 'botframework-webchat-core';
+import { useSelector } from './internal/WebChatReduxContext';
+
+const activitiesSelector = (state: { activities: WebChatActivity[] }) => state.activities;
+
+const of = (predicate: (activity: WebChatActivity) => boolean) => (state: { activities: WebChatActivity[] }) =>
+  activitiesSelector(state).filter(predicate);
+
+export default function useVoiceActivities(): [WebChatActivity[]] {
+  return [useSelector(of(activity => isVoiceActivity(activity)))];
+}
diff --git a/packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx b/packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx
new file mode 100644
index 0000000000..0ccf1a6f32
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx
@@ -0,0 +1,134 @@
+import React, { useCallback, useEffect, useMemo, useRef, useState, type ReactNode } from 'react';
+import { isVoiceActivity, WebChatActivity } from 'botframework-webchat-core';
+import { useAudioPlayer } from './private/useAudioPlayer';
+import { useRecorder } from './private/useRecorder';
+import { useDebouncedNotifications, usePostActivity, useVoiceActivities } from '../../hooks';
+import SpeechToSpeechContext from './private/Context';
+import { SpeechState } from './types/SpeechState';
+
+export const SpeechToSpeechComposer: React.FC<{ readonly children: ReactNode }> = ({ children }) => {
+  const [voiceActivities] = useVoiceActivities();
+  const postActivity = usePostActivity();
+  const [{ connectivitystatus }] = useDebouncedNotifications();
+  const { playAudio, stopAudio, isPlaying } = useAudioPlayer();
+
+  const lastProcessedIndexRef = useRef(0);
+
+  // Remove when we have activity protocol changes, we would get this as part of signal activity.
+  const [speechState, setSpeechState] = useState<SpeechState>('idle');
+
+  const isConnected = useMemo(() => connectivitystatus?.message === 'connected', [connectivitystatus]);
+
+  const sendAudioChunk = useCallback(
+    (base64: string) => {
+      postActivity({
+        type: 'event',
+        name: 'stream.chunk',
+        value: { voiceLiveEvent: { type: 'input_audio_buffer.append', audio: base64 } }
+      } as any);
+    },
+    [postActivity]
+  );
+
+  const { recording, setRecording: baseSetRecording } = useRecorder(sendAudioChunk);
+
+  const cancelActiveResponse = useCallback(() => {
+    if (isPlaying) {
+      postActivity({
+        type: 'event',
+        value: { voiceLiveEvent: { type: 'response.cancel' } }
+      } as any);
+    }
+  }, [isPlaying, postActivity]);
+
+  const handleVoiceActivity = useCallback(
+    (activity: WebChatActivity) => {
+      if (!isVoiceActivity(activity)) {
+        return;
+      }
+
+      const { voiceLiveEvent } = activity.value;
+
+      switch (voiceLiveEvent.type) {
+        case 'input_audio_buffer.speech_started':
+          stopAudio();
+          setSpeechState('listening');
+          break;
+        case 'input_audio_buffer.speech_stopped':
+          setSpeechState('processing');
+          break;
+        case 'response.audio.delta':
+          if (voiceLiveEvent.delta && recording) {
+            playAudio(voiceLiveEvent.delta);
+          }
+          break;
+        case 'response.done':
+          if (!isPlaying) {
+            setSpeechState('listening');
+          }
+          break;
+        default:
+          break;
+      }
+    },
+    [isPlaying, playAudio, recording, stopAudio]
+  );
+
+  useEffect(() => {
+    const startIndex = lastProcessedIndexRef.current;
+
+    if (!voiceActivities.length || startIndex >= voiceActivities.length) {
+      return;
+    }
+
+    // If not recording, skip processing voice activities but update ref
+    // so next time we start recording, we only process new activities.
+    if (!recording) {
+      lastProcessedIndexRef.current = voiceActivities.length;
+      return;
+    }
+
+    for (let i = startIndex; i < voiceActivities.length; i++) {
+      // eslint-disable-next-line security/detect-object-injection
+      handleVoiceActivity(voiceActivities[i]);
+    }
+
+    if (isPlaying && speechState !== 'bot_speaking') {
+      setSpeechState('bot_speaking');
+    } else if (!isPlaying && speechState === 'bot_speaking') {
+      setSpeechState('listening');
+    }
+
+    lastProcessedIndexRef.current = voiceActivities.length;
+  }, [voiceActivities, recording, postActivity, isPlaying, playAudio, speechState, stopAudio, handleVoiceActivity]);
+
+  const setRecording = useCallback(
+    (shouldRecord: boolean) => {
+      if (!isConnected) {
+        return;
+      }
+
+      if (!recording) {
+        setSpeechState('listening');
+      } else {
+        stopAudio();
+        cancelActiveResponse();
+        setSpeechState('idle');
+      }
+
+      baseSetRecording(shouldRecord);
+    },
+    [isConnected, recording, baseSetRecording, stopAudio, cancelActiveResponse]
+  );
+
+  const contextValue = useMemo(
+    () => ({
+      recording,
+      setRecording,
+      speechState
+    }),
+    [recording, setRecording, speechState]
+  );
+
+  return <SpeechToSpeechContext.Provider value={contextValue}>{children}</SpeechToSpeechContext.Provider>;
+};
diff --git a/packages/api/src/providers/SpeechToSpeech/private/Context.ts b/packages/api/src/providers/SpeechToSpeech/private/Context.ts
new file mode 100644
index 0000000000..ce85310246
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/Context.ts
@@ -0,0 +1,14 @@
+import { createContext } from 'react';
+import { SpeechState } from '../types/SpeechState';
+
+type SpeechToSpeechContextType = {
+  recording: boolean;
+  setRecording: (recording: boolean) => void;
+  speechState: SpeechState;
+};
+
+const SpeechToSpeechContext = createContext<SpeechToSpeechContextType>(undefined!);
+
+export default SpeechToSpeechContext;
+
+export type { SpeechToSpeechContextType };
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.spec.tsx b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.spec.tsx
new file mode 100644
index 0000000000..8c1d42cb08
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.spec.tsx
@@ -0,0 +1,279 @@
+/** @jest-environment @happy-dom/jest-environment */
+/// <reference types="jest" />
+/// <reference types="node" />
+
+import { render, type RenderResult } from '@testing-library/react';
+import React, { type ComponentType } from 'react';
+import { useAudioPlayer } from './useAudioPlayer';
+
+// Mock AudioContext and related APIs
+const mockAudioContext = {
+  sampleRate: 24000,
+  currentTime: 0,
+  destination: {},
+  state: 'running',
+  resume: jest.fn().mockResolvedValue(undefined),
+  close: jest.fn().mockResolvedValue(undefined),
+  createBuffer: jest.fn(),
+  createBufferSource: jest.fn()
+};
+
+const mockAudioBuffer = {
+  duration: 0.1, // 100m
+  getChannelData: jest.fn().mockReturnValue(new Float32Array(2400))
+};
+
+const mockBufferSource = {
+  buffer: null,
+  connect: jest.fn(),
+  start: jest.fn(),
+  stop: jest.fn(),
+  disconnect: jest.fn(),
+  onended: null
+};
+
+// Mock global AudioContext
+global.AudioContext = jest.fn(() => mockAudioContext) as any;
+global.atob = jest.fn(str => str); // Simple mock for base64 decode
+
+type UseAudioPlayerReturn = ReturnType<typeof useAudioPlayer>;
+
+describe('setup', () => {
+  let HookApp: ComponentType;
+  let hookData: UseAudioPlayerReturn | undefined;
+  let renderResult: RenderResult;
+  const originalAudioContext = global.AudioContext;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    mockAudioContext.currentTime = 0;
+    mockAudioContext.createBuffer.mockReturnValue(mockAudioBuffer);
+    mockAudioContext.createBufferSource.mockReturnValue(mockBufferSource);
+    mockBufferSource.buffer = null;
+    mockBufferSource.onended = null;
+
+    HookApp = () => {
+      hookData = useAudioPlayer();
+      return null;
+    };
+  });
+
+  afterEach(() => {
+    global.AudioContext = originalAudioContext;
+  });
+
+  describe('Initialization', () => {
+    test('should initialize with correct default values', () => {
+      render(<HookApp />);
+
+      expect(hookData?.isPlaying).toBe(false);
+      expect(typeof hookData?.playAudio).toBe('function');
+      expect(typeof hookData?.stopAudio).toBe('function');
+    });
+
+    test('should create AudioContext on first playAudio call', () => {
+      render(<HookApp />);
+
+      hookData?.playAudio('dGVzdA=='); // base64 for 'test'
+
+      expect(AudioContext).toHaveBeenCalledWith({ sampleRate: 24000 });
+    });
+
+    test('should reuse existing AudioContext on subsequent calls', () => {
+      render(<HookApp />);
+
+      hookData?.playAudio('dGVzdA==');
+      hookData?.playAudio('dGVzdDI=');
+
+      expect(AudioContext).toHaveBeenCalledTimes(1);
+    });
+  });
+
+  describe('Audio playback', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should process base64 audio data correctly', () => {
+      hookData?.playAudio('dGVzdA==');
+
+      expect(global.atob).toHaveBeenCalledWith('dGVzdA==');
+      expect(mockAudioContext.createBuffer).toHaveBeenCalledWith(1, expect.any(Number), 24000);
+      expect(mockAudioContext.createBufferSource).toHaveBeenCalled();
+    });
+
+    test('should set up audio buffer source correctly', () => {
+      hookData?.playAudio('dGVzdA==');
+
+      expect(mockBufferSource.connect).toHaveBeenCalledWith(mockAudioContext.destination);
+      expect(mockBufferSource.start).toHaveBeenCalled();
+      expect(mockBufferSource.buffer).toBe(mockAudioBuffer);
+    });
+
+    test('should resume AudioContext if needed', () => {
+      hookData?.playAudio('dGVzdA==');
+
+      expect(mockAudioContext.resume).toHaveBeenCalled();
+    });
+
+    test('should queue multiple audio chunks correctly', () => {
+      mockAudioBuffer.duration = 0.1; // 100ms
+
+      hookData?.playAudio('dGVzdA==');
+      hookData?.playAudio('dGVzdDI=');
+
+      expect(mockBufferSource.start).toHaveBeenCalledTimes(2);
+      // First chunk starts at currentTime (0), second at 0.1
+      expect(mockBufferSource.start).toHaveBeenNthCalledWith(1, 0);
+      expect(mockBufferSource.start).toHaveBeenNthCalledWith(2, 0.1);
+    });
+  });
+
+  describe('isPlaying state', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should return true when audio is queued for playback', () => {
+      mockAudioContext.currentTime = 0;
+      mockAudioBuffer.duration = 0.1;
+
+      hookData?.playAudio('dGVzdA==');
+      renderResult.rerender(<HookApp />);
+
+      expect(hookData?.isPlaying).toBe(true);
+    });
+
+    test('should return false when no audio is queued', () => {
+      expect(hookData?.isPlaying).toBe(false);
+    });
+
+    test('should handle multiple chunks and playing state', () => {
+      mockAudioContext.currentTime = 0.05; // In the middle of first chunk
+      mockAudioBuffer.duration = 0.1;
+
+      hookData?.playAudio('dGVzdA=='); // 0 - 0.1
+      hookData?.playAudio('dGVzdDI='); // 0.1 - 0.2
+      renderResult.rerender(<HookApp />);
+
+      expect(hookData?.isPlaying).toBe(true);
+    });
+  });
+
+  describe('Audio cleanup', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should clean up buffer source on ended', () => {
+      hookData?.playAudio('dGVzdA==');
+
+      // Simulate audio ended
+      if (mockBufferSource.onended) {
+        mockBufferSource.onended();
+      }
+
+      expect(mockBufferSource.disconnect).toHaveBeenCalled();
+      expect(mockBufferSource.buffer).toBeNull();
+    });
+
+    test('should stop all audio and close context', () => {
+      hookData?.playAudio('dGVzdA==');
+
+      hookData?.stopAudio();
+      renderResult.rerender(<HookApp />);
+
+      expect(mockAudioContext.close).toHaveBeenCalled();
+      expect(hookData?.isPlaying).toBe(false);
+    });
+  });
+
+  describe('Error handling', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should handle invalid base64 data gracefully', () => {
+      expect(() => {
+        hookData?.playAudio('invalid-base64!@#');
+      }).not.toThrow();
+    });
+
+    test('should handle AudioContext creation failure', () => {
+      global.AudioContext = jest.fn(() => {
+        throw new Error('AudioContext not supported');
+      }) as any;
+
+      expect(() => {
+        hookData?.playAudio('dGVzdA==');
+      }).toThrow('AudioContext not supported');
+    });
+
+    test('should handle missing audio context in isPlaying', () => {
+      // Before any audio is played, audioCtxRef should be null
+      expect(hookData?.isPlaying).toBe(false);
+    });
+  });
+
+  describe('Real-world scenarios', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should handle streaming audio chunks', () => {
+      mockAudioBuffer.duration = 0.05; // 50ms chunks
+
+      // Simulate streaming 5 chunks
+      for (let i = 0; i < 5; i++) {
+        hookData?.playAudio(`chunk${i}`);
+      }
+
+      expect(mockBufferSource.start).toHaveBeenCalledTimes(5);
+      renderResult.rerender(<HookApp />);
+      expect(hookData?.isPlaying).toBe(true);
+    });
+
+    test('should handle playback interruption', () => {
+      hookData?.playAudio('dGVzdA==');
+      renderResult.rerender(<HookApp />);
+      expect(hookData?.isPlaying).toBe(true);
+
+      hookData?.stopAudio();
+      renderResult.rerender(<HookApp />);
+      expect(hookData?.isPlaying).toBe(false);
+      expect(mockAudioContext.close).toHaveBeenCalled();
+    });
+
+    test('should handle resume after stop', () => {
+      // Play, stop, then play again
+      hookData?.playAudio('dGVzdA==');
+      hookData?.stopAudio();
+      hookData?.playAudio('dGVzdDI=');
+
+      expect(AudioContext).toHaveBeenCalledTimes(2); // New context after stop
+    });
+  });
+
+  describe('Performance considerations', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should handle large audio data', () => {
+      const largeBase64 = 'A'.repeat(10000);
+
+      expect(() => {
+        hookData?.playAudio(largeBase64);
+      }).not.toThrow();
+    });
+
+    test('should handle rapid successive calls', () => {
+      for (let i = 0; i < 100; i++) {
+        // Ensure the mock "base64" data has an even length as Int16Array (which represents 16-bit audio samples) requires the underlying data to be in multiples of 2 bytes
+        hookData?.playAudio(`chunk${i}`.padEnd(8, ' '));
+      }
+
+      expect(mockBufferSource.start).toHaveBeenCalledTimes(100);
+    });
+  });
+});
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.ts b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.ts
new file mode 100644
index 0000000000..6216932a8c
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.ts
@@ -0,0 +1,69 @@
+import { useRef, useCallback } from 'react';
+
+const SAMPLE_RATE = 24000;
+const INT16_SCALE = 32768;
+
+export function useAudioPlayer() {
+  const audioCtxRef = useRef<AudioContext | null>(null);
+  const nextPlayTimeRef = useRef(0);
+
+  const initAudio = useCallback(() => {
+    if (!audioCtxRef.current) {
+      audioCtxRef.current = new AudioContext({ sampleRate: SAMPLE_RATE });
+    }
+    return audioCtxRef.current;
+  }, []);
+
+  const playAudio = useCallback(
+    (base64: string) => {
+      const audioCtx = initAudio();
+      audioCtx.resume?.();
+
+      try {
+        const bytes = Uint8Array.from(atob(base64), c => c.charCodeAt(0));
+        const int16 = new Int16Array(bytes.buffer);
+        const float32 = new Float32Array(int16.length);
+
+        for (let i = 0; i < int16.length; i++) {
+          // eslint-disable-next-line security/detect-object-injection
+          float32[i] = int16[i] / INT16_SCALE;
+        }
+
+        const buffer = audioCtx.createBuffer(1, float32.length, SAMPLE_RATE);
+        buffer.getChannelData(0).set(float32);
+
+        const src = audioCtx.createBufferSource();
+        src.buffer = buffer;
+        src.connect(audioCtx.destination);
+
+        // Clear buffer when finished
+        src.onended = () => {
+          src.disconnect();
+          src.buffer = null;
+        };
+
+        nextPlayTimeRef.current = Math.max(nextPlayTimeRef.current, audioCtx.currentTime);
+        src.start(nextPlayTimeRef.current);
+        nextPlayTimeRef.current += buffer.duration;
+      } catch (error) {
+        console.warn('botframework-webchat: Error during audio playback in useAudioPlayer:', error);
+      }
+    },
+    [initAudio]
+  );
+
+  const stopAudio = useCallback(() => {
+    nextPlayTimeRef.current = 0;
+
+    if (audioCtxRef.current) {
+      audioCtxRef.current.close();
+      audioCtxRef.current = null;
+    }
+  }, []);
+
+  return {
+    playAudio,
+    stopAudio,
+    isPlaying: audioCtxRef.current ? audioCtxRef.current.currentTime < nextPlayTimeRef.current : false
+  };
+}
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useContext.ts b/packages/api/src/providers/SpeechToSpeech/private/useContext.ts
new file mode 100644
index 0000000000..50926b0a12
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useContext.ts
@@ -0,0 +1,15 @@
+import { useContext } from 'react';
+
+import SpeechToSpeechContext from './Context';
+
+import type { SpeechToSpeechContextType } from './Context';
+
+export default function useSpeechToSpeechContext(thrownOnUndefined = true): SpeechToSpeechContextType {
+  const contextValue = useContext(SpeechToSpeechContext);
+
+  if (thrownOnUndefined && !contextValue) {
+    throw new Error('botframework-webchat internal: This hook can only be used under <SpeechToSpeechContext>.');
+  }
+
+  return contextValue;
+}
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx
new file mode 100644
index 0000000000..01368ceda2
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx
@@ -0,0 +1,160 @@
+/** @jest-environment @happy-dom/jest-environment */
+/// <reference types="jest" />
+
+import { act, render, waitFor, type RenderResult } from '@testing-library/react';
+import React, { type ComponentType } from 'react';
+import { useRecorder } from './useRecorder';
+
+// --- Mocks ---
+
+const mockTrack = {
+  stop: jest.fn()
+};
+
+const mockMediaStream = {
+  getTracks: jest.fn(() => [mockTrack])
+};
+
+const mockMediaDevices = {
+  getUserMedia: jest.fn().mockResolvedValue(mockMediaStream)
+};
+
+const mockWorkletPort = {
+  postMessage: jest.fn(),
+  onmessage: null as ((event: { data: any }) => void) | null
+};
+
+const mockWorkletNode = {
+  connect: jest.fn(),
+  disconnect: jest.fn(),
+  port: mockWorkletPort
+};
+
+const mockAudioContext = {
+  state: 'running',
+  resume: jest.fn().mockResolvedValue(undefined),
+  createMediaStreamSource: jest.fn(() => ({
+    connect: jest.fn()
+  })),
+  destination: {},
+  audioWorklet: {
+    addModule: jest.fn().mockResolvedValue(undefined)
+  }
+};
+
+// --- Global Mocks Setup ---
+
+Object.defineProperty(global.navigator, 'mediaDevices', {
+  value: mockMediaDevices,
+  writable: true
+});
+
+global.AudioContext = jest.fn(() => mockAudioContext as any);
+global.AudioWorkletNode = jest.fn(() => mockWorkletNode as any);
+global.Blob = jest.fn(parts => ({ parts, type: parts[1]?.type })) as any;
+global.URL.createObjectURL = jest.fn(() => 'blob:http://localhost/mock-url');
+global.URL.revokeObjectURL = jest.fn();
+global.btoa = jest.fn(str => `btoa(${str})`);
+
+// --- Tests ---
+
+describe('useRecorder', () => {
+  let onAudioChunk: jest.Mock;
+  let HookApp: ComponentType<{ onAudioChunk: (base64: string) => void }>;
+  let hookData: ReturnType<typeof useRecorder> | undefined;
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  let renderResult: RenderResult;
+
+  beforeEach(() => {
+    // Clear all mocks before each test
+    jest.clearAllMocks();
+    onAudioChunk = jest.fn();
+    hookData = undefined;
+    mockWorkletPort.onmessage = null;
+    (mockAudioContext.state as any) = 'running';
+
+    HookApp = ({ onAudioChunk }) => {
+      hookData = useRecorder(onAudioChunk);
+      return null;
+    };
+  });
+
+  test('should be initially not recording', () => {
+    render(<HookApp onAudioChunk={onAudioChunk} />);
+    expect(hookData?.recording).toBe(false);
+  });
+
+  test('should start recording when setRecording(true) is called', async () => {
+    renderResult = render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    act(() => {
+      hookData?.setRecording(true);
+    });
+
+    await waitFor(() => expect(hookData?.recording).toBe(true));
+
+    expect(navigator.mediaDevices.getUserMedia).toHaveBeenCalledTimes(1);
+    expect(global.AudioContext).toHaveBeenCalledTimes(1);
+    expect(mockAudioContext.audioWorklet.addModule).toHaveBeenCalledTimes(1);
+    expect(global.AudioWorkletNode).toHaveBeenCalledWith(expect.anything(), 'audio-recorder');
+    expect(mockWorkletNode.connect).toHaveBeenCalledTimes(1);
+    expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'START' });
+  });
+
+  test('should stop recording when setRecording(false) is called', async () => {
+    renderResult = render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    // Start recording
+    act(() => {
+      hookData?.setRecording(true);
+    });
+
+    await waitFor(() => expect(hookData?.recording).toBe(true));
+
+    // Stop recording
+    act(() => {
+      hookData?.setRecording(false);
+    });
+
+    await waitFor(() => expect(hookData?.recording).toBe(false));
+
+    expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'STOP' });
+    expect(mockWorkletNode.disconnect).toHaveBeenCalledTimes(1);
+    expect(mockTrack.stop).toHaveBeenCalledTimes(1);
+  });
+
+  test('should process audio chunks sent from the worklet', async () => {
+    render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    act(() => {
+      hookData?.setRecording(true);
+    });
+
+    await waitFor(() => expect(mockWorkletPort.onmessage).not.toBeNull());
+
+    // Simulate a message from the audio worklet
+    const mockAudioData = new Float32Array([0.1, 0.2, -0.1]);
+    act(() => {
+      mockWorkletPort.onmessage!({
+        data: {
+          eventType: 'audio',
+          audioData: mockAudioData
+        }
+      });
+    });
+
+    await waitFor(() => expect(onAudioChunk).toHaveBeenCalledTimes(1));
+    expect(global.btoa).toHaveBeenCalled();
+  });
+
+  test('should handle suspended audio context by resuming it', async () => {
+    (mockAudioContext.state as any) = 'suspended';
+    render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    act(() => {
+      hookData?.setRecording(true);
+    });
+
+    await waitFor(() => expect(mockAudioContext.resume).toHaveBeenCalledTimes(1));
+  });
+});
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts
new file mode 100644
index 0000000000..b9930cada1
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts
@@ -0,0 +1,128 @@
+import { useRef, useState, useCallback } from 'react';
+
+const audioProcessorCode = `
+class AudioRecorderProcessor extends AudioWorkletProcessor {
+  constructor() {
+    super()
+    this.recording = false
+    this.buffer = []
+    this.port.onmessage = e => {
+      if (e.data.command === 'START') this.recording = true
+      else if (e.data.command === 'STOP') {
+        this.recording = false
+        if (this.buffer.length) this.sendBuffer()
+      }
+    }
+  }
+  sendBuffer() {
+    if (this.buffer.length) {
+      this.port.postMessage({
+        eventType: 'audio',
+        audioData: new Float32Array(this.buffer)
+      })
+      this.buffer = []
+    }
+  }
+  process(inputs) {
+    if (inputs[0]?.length && this.recording) {
+      this.buffer.push(...inputs[0][0])
+      if (this.buffer.length >= 2400) this.sendBuffer()
+    }
+    return true
+  }
+}
+registerProcessor('audio-recorder', AudioRecorderProcessor)
+`;
+
+const INT16_MIN = -32768;
+const INT16_MAX = 32767;
+const INT16_SCALE = 32767;
+
+export function useRecorder(onAudioChunk: (base64: string) => void) {
+  const [recording, setRecordingInternal] = useState(false);
+  const audioCtxRef = useRef<AudioContext | null>(null);
+  const workletRef = useRef<AudioWorkletNode | null>(null);
+  const streamRef = useRef<MediaStream | null>(null);
+
+  const initAudio = useCallback(async () => {
+    if (audioCtxRef.current) {
+      return;
+    }
+    const audioCtx = new AudioContext({ sampleRate: 24000 });
+    const blob = new Blob([audioProcessorCode], {
+      type: 'application/javascript'
+    });
+    // eslint-disable-next-line no-restricted-properties
+    const url = URL.createObjectURL(blob);
+    await audioCtx.audioWorklet.addModule(url);
+    URL.revokeObjectURL(url);
+    // eslint-disable-next-line require-atomic-updates
+    audioCtxRef.current = audioCtx;
+  }, []);
+
+  const startRecording = useCallback(async () => {
+    await initAudio();
+    const audioCtx = audioCtxRef.current!;
+    if (audioCtx.state === 'suspended') {
+      await audioCtx.resume();
+    }
+    const stream = await navigator.mediaDevices.getUserMedia({
+      audio: {
+        channelCount: 1,
+        sampleRate: 24000,
+        echoCancellation: true
+      }
+    });
+    streamRef.current = stream;
+    const source = audioCtx.createMediaStreamSource(stream);
+    const worklet = new AudioWorkletNode(audioCtx, 'audio-recorder');
+
+    worklet.port.onmessage = e => {
+      if (e.data.eventType === 'audio') {
+        const float32 = e.data.audioData;
+        const int16 = new Int16Array(float32.length);
+        for (let i = 0; i < float32.length; i++) {
+          // eslint-disable-next-line security/detect-object-injection
+          int16[i] = Math.max(INT16_MIN, Math.min(INT16_MAX, float32[i] * INT16_SCALE));
+        }
+        const base64 = btoa(String.fromCharCode(...new Uint8Array(int16.buffer)));
+        onAudioChunk(base64);
+      }
+    };
+
+    source.connect(worklet);
+    worklet.connect(audioCtx.destination);
+    worklet.port.postMessage({ command: 'START' });
+    workletRef.current = worklet;
+    setRecordingInternal(true);
+  }, [initAudio, onAudioChunk]);
+
+  const stopRecording = useCallback(() => {
+    if (workletRef.current) {
+      workletRef.current.port.postMessage({ command: 'STOP' });
+      workletRef.current.disconnect();
+      workletRef.current = null;
+    }
+    if (streamRef.current) {
+      streamRef.current.getTracks().forEach(track => track.stop());
+      streamRef.current = null;
+    }
+    setRecordingInternal(false);
+  }, []);
+
+  const setRecording = useCallback(
+    async (shouldRecord: boolean) => {
+      if (!shouldRecord && recording) {
+        stopRecording();
+      } else if (shouldRecord && !recording) {
+        await startRecording();
+      }
+    },
+    [recording, startRecording, stopRecording]
+  );
+
+  return {
+    recording,
+    setRecording
+  };
+}
diff --git a/packages/api/src/providers/SpeechToSpeech/types/SpeechState.ts b/packages/api/src/providers/SpeechToSpeech/types/SpeechState.ts
new file mode 100644
index 0000000000..62d5cc8c13
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/types/SpeechState.ts
@@ -0,0 +1 @@
+export type SpeechState = 'idle' | 'listening' | 'processing' | 'bot_speaking';
diff --git a/packages/api/src/providers/SpeechToSpeech/useSpeechToSpeech.ts b/packages/api/src/providers/SpeechToSpeech/useSpeechToSpeech.ts
new file mode 100644
index 0000000000..d7ac3fac44
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/useSpeechToSpeech.ts
@@ -0,0 +1,6 @@
+import { SpeechToSpeechContextType } from './private/Context';
+import useSpeechToSpeechContext from './private/useContext';
+
+export default function useSpeechToSpeech(): readonly [SpeechToSpeechContextType] {
+  return [useSpeechToSpeechContext()];
+}
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index e635e6a060..a81d494a07 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -42,6 +42,7 @@ import getOrgSchemaMessage from './utils/getOrgSchemaMessage';
 import isForbiddenPropertyName from './utils/isForbiddenPropertyName';
 import onErrorResumeNext from './utils/onErrorResumeNext';
 import singleToArray from './utils/singleToArray';
+import isVoiceActivity from './utils/voiceActivity/isVoiceActivity';
 
 export {
   CLEAR_SUGGESTED_ACTIONS,
@@ -96,6 +97,7 @@ export {
   getActivityLivestreamingMetadata,
   getOrgSchemaMessage,
   isForbiddenPropertyName,
+  isVoiceActivity,
   markActivity,
   onErrorResumeNext,
   parseAction,
diff --git a/packages/core/src/utils/voiceActivity/isVoiceActivity.spec.ts b/packages/core/src/utils/voiceActivity/isVoiceActivity.spec.ts
new file mode 100644
index 0000000000..c8d744595e
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/isVoiceActivity.spec.ts
@@ -0,0 +1,88 @@
+import isVoiceActivity from './isVoiceActivity';
+import { WebChatActivity } from '../../types/WebChatActivity';
+
+// Mock activity factory for testing
+const createMockActivity = (type: string = 'event', value?: any): WebChatActivity => ({
+  type: type as any,
+  id: 'test-activity-id',
+  from: { id: 'test-user' },
+  channelData: {
+    'webchat:sequence-id': 1
+  },
+  ...(value && { value })
+});
+
+const createMockVoiceActivity = (voiceEventType: string, additionalProps?: any): WebChatActivity =>
+  createMockActivity('event', {
+    voiceLiveEvent: {
+      type: voiceEventType,
+      ...additionalProps
+    }
+  });
+
+describe('isVoiceActivity', () => {
+  describe('Valid voice activities', () => {
+    test('should return true for event activity with voiceLiveEvent', () => {
+      const activity = createMockVoiceActivity('response.audio.delta', { delta: 'audiodata' });
+
+      const result = isVoiceActivity(activity);
+
+      expect(result).toBe(true);
+    });
+
+    test('should return true for voice activity with minimal voiceLiveEvent', () => {
+      const activity = createMockActivity('event', { voiceLiveEvent: {} });
+
+      const result = isVoiceActivity(activity);
+
+      expect(result).toBe(true);
+    });
+  });
+
+  describe('Invalid activities', () => {
+    const testCases = [
+      // Invalid by activity type
+      {
+        name: 'message activity with voiceLiveEvent',
+        activity: () => createMockActivity('message', { voiceLiveEvent: { type: 'response.audio.delta' } })
+      },
+      {
+        name: 'typing activity',
+        activity: () => createMockActivity('typing')
+      },
+      {
+        name: 'event activity with value',
+        activity: () => ({ ...createMockActivity('event'), value: 'not an object' })
+      }
+    ];
+
+    test.each(testCases)('should return false for $name', ({ activity }) => {
+      const result = isVoiceActivity(activity());
+
+      expect(result).toBe(false);
+    });
+  });
+
+  describe('Real-world voice event types', () => {
+    const voiceEventTypes = [
+      'input_audio_buffer.append',
+      'input_audio_buffer.speech_started',
+      'input_audio_buffer.speech_stopped',
+      'conversation.item.input_audio_transcription.completed',
+      'response.audio.delta',
+      'response.audio_transcript.delta',
+      'response.audio_transcript.done',
+      'response.done',
+      'session.update',
+      'response.cancel'
+    ];
+
+    test.each(voiceEventTypes)('should return true for voice event type: %s', eventType => {
+      const activity = createMockVoiceActivity(eventType);
+
+      const result = isVoiceActivity(activity);
+
+      expect(result).toBe(true);
+    });
+  });
+});
diff --git a/packages/core/src/utils/voiceActivity/isVoiceActivity.ts b/packages/core/src/utils/voiceActivity/isVoiceActivity.ts
new file mode 100644
index 0000000000..e16154e590
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/isVoiceActivity.ts
@@ -0,0 +1,14 @@
+import { WebChatActivity } from '../../types/WebChatActivity';
+
+// This is interim type guard until activity protocol is ratified.
+const isVoiceActivity = (
+  activity: WebChatActivity
+): activity is WebChatActivity & {
+  value: { voiceLiveEvent: any };
+} =>
+  activity.type === 'event' &&
+  activity.value &&
+  typeof activity.value === 'object' &&
+  'voiceLiveEvent' in activity.value;
+
+export default isVoiceActivity;