From 9aa6b47775b221e984ea68f4536915034b1099a4 Mon Sep 17 00:00:00 2001 From: shash-hq Date: Mon, 15 Dec 2025 16:51:12 +0530 Subject: [PATCH] Add Realtime API Customer Support Agent example --- examples/realtime_agent/realtime_agent.ipynb | 252 +++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 examples/realtime_agent/realtime_agent.ipynb diff --git a/examples/realtime_agent/realtime_agent.ipynb b/examples/realtime_agent/realtime_agent.ipynb new file mode 100644 index 0000000000..e8a33c05b6 --- /dev/null +++ b/examples/realtime_agent/realtime_agent.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Building an Interruptible Customer Support Agent with OpenAI Realtime API\n", + "\n", + "This notebook demonstrates how to build a voice-based customer support agent using the **OpenAI Realtime API (Beta)**.\n", + "\n", + "## Key Features Demonstrated\n", + "1. **WebSocket Connection**: Persistent, low-latency stateful connection.\n", + "2. **Tool Use**: The agent can look up mock data (Order Status).\n", + "3. **Interruption Handling**: The client handles `input_audio_buffer.speech_started` to cancel the AI's response when the user interrupts.\n", + "\n", + "## Prerequisites\n", + "- OpenAI API Key with access to `gpt-4o-realtime-preview`.\n", + "- Python 3.10+\n", + "- `websockets`, `asyncio`, `numpy`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import asyncio\n", + "import json\n", + "import logging\n", + "import websockets\n", + "import traceback\n", + "\n", + "# Configuring logging to see the events live\n", + "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n", + "logger = logging.getLogger(\"RealtimeAgent\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Define Tools\n", + "\n", + "We define a simple tool `get_order_status` to look up orders in a mock database." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Mock Data\n", + "MOCK_DB = {\n", + " \"order_123\": {\"status\": \"shipped\", \"delivery_date\": \"2024-12-20\"},\n", + " \"order_456\": {\"status\": \"processing\", \"delivery_date\": \"2024-12-25\"},\n", + "}\n", + "\n", + "def get_order_status(order_id: str):\n", + " \"\"\"\n", + " Look up an order by ID.\n", + " \"\"\"\n", + " print(f\"\\n[Tool] Looking up order {order_id}...\")\n", + " result = MOCK_DB.get(order_id, {\"status\": \"not_found\"})\n", + " return json.dumps(result)\n", + "\n", + "tools_schema = [\n", + " {\n", + " \"type\": \"function\",\n", + " \"name\": \"get_order_status\",\n", + " \"description\": \"Get the status of a customer order\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"order_id\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The order ID, e.g. order_123\"\n", + " }\n", + " },\n", + " \"required\": [\"order_id\"]\n", + " }\n", + " }\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. The Realtime Agent Class\n", + "\n", + "This class handles the WebSocket connection, event parsing, and audio streaming." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class RealtimeAgent:\n", + " def __init__(self, url=\"wss://api.openai.com/v1/realtime\", api_key=None, model=\"gpt-4o-realtime-preview-2024-10-01\"):\n", + " self.url = f\"{url}?model={model}\"\n", + " self.api_key = api_key\n", + " self.ws = None\n", + " self.should_stop = False\n", + "\n", + " async def connect(self):\n", + " headers = {\n", + " \"Authorization\": f\"Bearer {self.api_key}\",\n", + " \"OpenAI-Beta\": \"realtime=v1\"\n", + " }\n", + " try:\n", + " logger.info(f\"Connecting to {self.url}...\")\n", + " self.ws = await websockets.connect(self.url, additional_headers=headers)\n", + " logger.info(\"Connected!\")\n", + " await self.initialize_session()\n", + " except Exception as e:\n", + " logger.error(f\"Connection failed: {e}\")\n", + " raise\n", + "\n", + " async def initialize_session(self):\n", + " \"\"\"Send initial session configuration.\"\"\"\n", + " event = {\n", + " \"type\": \"session.update\",\n", + " \"session\": {\n", + " \"modalities\": [\"text\", \"audio\"],\n", + " \"instructions\": \"You are a helpful customer support agent. Check order status when asked.\",\n", + " \"voice\": \"alloy\",\n", + " \"turn_detection\": {\"type\": \"server_vad\"},\n", + " \"tools\": tools_schema,\n", + " \"tool_choice\": \"auto\",\n", + " }\n", + " }\n", + " await self.send_event(event)\n", + "\n", + " async def send_event(self, event):\n", + " if self.ws:\n", + " await self.ws.send(json.dumps(event))\n", + "\n", + " async def run_loop(self):\n", + " \"\"\"Main loop to receive messages.\"\"\"\n", + " try:\n", + " async for message in self.ws:\n", + " if self.should_stop: break\n", + " await self.handle_message(json.loads(message))\n", + " except Exception as e:\n", + " logger.error(f\"Loop error: {e}\")\n", + "\n", + " async def handle_message(self, data):\n", + " event_type = data.get(\"type\")\n", + " \n", + " if event_type == \"input_audio_buffer.speech_started\":\n", + " logger.warning(\"[INTERRUPTION] User started speaking! Canceling current response.\")\n", + " await self.send_event({\"type\": \"response.cancel\"})\n", + " \n", + " elif event_type == \"response.function_call_arguments.done\":\n", + " # Execute the tool\n", + " call_id = data.get(\"call_id\")\n", + " name = data.get(\"name\")\n", + " args = json.loads(data.get(\"arguments\"))\n", + " \n", + " if name == \"get_order_status\":\n", + " result = get_order_status(args.get(\"order_id\"))\n", + " # Send output back\n", + " await self.send_event({\n", + " \"type\": \"conversation.item.create\",\n", + " \"item\": {\n", + " \"type\": \"function_call_output\",\n", + " \"call_id\": call_id,\n", + " \"output\": result\n", + " }\n", + " })\n", + " # Trigger model to read the result\n", + " await self.send_event({\"type\": \"response.create\"})\n", + " \n", + " elif event_type == \"response.audio.delta\":\n", + " # Here you would play audio bytes\n", + " pass\n", + " \n", + " elif event_type == \"error\":\n", + " logger.error(f\"Error: {data.get('error')}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Running the Agent\n", + "\n", + "Replace `YOUR_API_KEY` below. This block runs the agent for 10 seconds to demonstrate the connection." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# REPLACE WITH YOUR KEY\n", + "API_KEY = \"YOUR_API_KEY_HERE\"\n", + "\n", + "async def main():\n", + " if \"YOUR_API_KEY\" in API_KEY:\n", + " print(\"Please set a valid API Key first!\")\n", + " return\n", + "\n", + " agent = RealtimeAgent(api_key=API_KEY)\n", + " await agent.connect()\n", + " \n", + " # Run the listener in background\n", + " listener_task = asyncio.create_task(agent.run_loop())\n", + " \n", + " try:\n", + " # Simulate a conversation starter if you like, or just wait for VAD\n", + " # For this demo, we just keep it alive for 10 seconds\n", + " print(\"Agent is listening (server VAD)... Speak now if you have a mic setup!\")\n", + " print(\"(Or manually trigger response.create if using mock)\")\n", + " await asyncio.sleep(10)\n", + " finally:\n", + " agent.should_stop = True\n", + " await agent.ws.close()\n", + " await listener_task\n", + "\n", + "# In Jupyter, we can await main directly\n", + "# await main()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file