diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/README.md b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/README.md new file mode 100644 index 0000000000..802a62dd71 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/README.md @@ -0,0 +1,99 @@ +# Speechmatics TTS Python Extension + +This extension provides text-to-speech functionality using Speechmatics TTS API. + +## Features + +- Low-latency speech synthesis (sub-150ms) +- High-quality, natural-sounding voices +- HTTP REST API integration +- Multiple voice options (UK and US English) +- Support for WAV and MP3 output formats +- Production-grade reliability + +## Prerequisites + +- Speechmatics API key +- Python 3.8+ +- aiohttp package + +## Configuration + +The extension can be configured through your property.json: + +```json +{ + "params": { + "api_key": "your-api-key-here", + "voice_id": "sarah", + "output_format": "wav", + "sample_rate": 16000, + "base_url": "https://preview.tts.speechmatics.com" + } +} +``` + +### Configuration Options + +**Parameters inside `params` object:** +- `api_key` (required): Speechmatics API key +- `voice_id` (required): Voice identifier (sarah, theo, megan, jack) +- `output_format` (optional): Audio format - "wav" or "mp3" (default: "wav") +- `sample_rate` (optional): Audio sample rate in Hz (default: 16000) +- `base_url` (optional): API base URL (default: "https://preview.tts.speechmatics.com") + +### Available Voices + +| Voice ID | Description | +|----------|-------------| +| `sarah` | English Female (UK) | +| `theo` | English Male (UK) | +| `megan` | English Female (US) | +| `jack` | English Male (US) | + +## Getting Started + +### 1. Get API Key + +Create an API key at the [Speechmatics Portal](https://portal.speechmatics.com/). + +### 2. Set Environment Variable + +```bash +export SPEECHMATICS_API_KEY=your-api-key-here +``` + +### 3. Configure Extension + +Update your `property.json` with the desired voice and settings. + +## API Details + +- **Endpoint**: `https://preview.tts.speechmatics.com/generate/{voice_id}` +- **Method**: POST +- **Authentication**: Bearer token +- **Latency**: Sub-150ms +- **Sample Rate**: 16kHz mono (optimized for voice agents) + +## Architecture + +This extension follows the TEN Framework TTS2 HTTP extension pattern: + +- `extension.py`: Main extension class inheriting from `AsyncTTS2HttpExtension` +- `speechmatics_tts.py`: Client implementation with HTTP API integration +- `config.py`: Configuration model with validation +- `addon.py`: Extension addon registration + +## License + +Apache 2.0 + +## Contributing + +Contributions are welcome! Please submit issues and pull requests to the TEN Framework repository. + +## Links + +- [Speechmatics TTS Documentation](https://docs.speechmatics.com/text-to-speech/quickstart) +- [Speechmatics Portal](https://portal.speechmatics.com/) +- [TEN Framework](https://github.com/TEN-framework/ten-framework) diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/__init__.py b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/__init__.py new file mode 100644 index 0000000000..0413aa9b81 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/__init__.py @@ -0,0 +1,8 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from . import addon + +__all__ = ["addon"] diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/addon.py b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/addon.py new file mode 100644 index 0000000000..3e749f97c9 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/addon.py @@ -0,0 +1,19 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from ten_runtime import ( + Addon, + register_addon_as_extension, + TenEnv, +) + + +@register_addon_as_extension("speechmatics_tts_python") +class SpeechmaticsTTSExtensionAddon(Addon): + def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: + from .extension import SpeechmaticsTTSExtension + + ten_env.log_info("SpeechmaticsTTSExtensionAddon on_create_instance") + ten_env.on_create_instance_done(SpeechmaticsTTSExtension(name), context) diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/config.py b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/config.py new file mode 100644 index 0000000000..6230ebddee --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/config.py @@ -0,0 +1,50 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from typing import Any +import copy +from pydantic import Field +from pathlib import Path +from ten_ai_base import utils +from ten_ai_base.tts2_http import AsyncTTS2HttpConfig + + +class SpeechmaticsTTSConfig(AsyncTTS2HttpConfig): + """Speechmatics TTS Config""" + + dump: bool = Field(default=False, description="Speechmatics TTS dump") + dump_path: str = Field( + default_factory=lambda: str( + Path(__file__).parent / "speechmatics_tts_in.pcm" + ), + description="Speechmatics TTS dump path", + ) + params: dict[str, Any] = Field( + default_factory=dict, description="Speechmatics TTS params" + ) + + def update_params(self) -> None: + """Update configuration from params dictionary""" + pass + + def to_str(self, sensitive_handling: bool = True) -> str: + """Convert config to string with optional sensitive data handling.""" + if not sensitive_handling: + return f"{self}" + + config = copy.deepcopy(self) + + # Encrypt sensitive fields in params + if config.params and "api_key" in config.params: + config.params["api_key"] = utils.encrypt(config.params["api_key"]) + + return f"{config}" + + def validate(self) -> None: + """Validate Speechmatics-specific configuration.""" + if "api_key" not in self.params or not self.params["api_key"]: + raise ValueError("API key is required for Speechmatics TTS") + if "voice_id" not in self.params or not self.params["voice_id"]: + raise ValueError("Voice ID is required for Speechmatics TTS") diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/extension.py b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/extension.py new file mode 100644 index 0000000000..972f6fb556 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/extension.py @@ -0,0 +1,58 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +""" +Speechmatics TTS Extension + +This extension implements text-to-speech using Speechmatics TTS API. +It provides low-latency, high-quality speech synthesis. +""" + +from ten_ai_base.tts2_http import ( + AsyncTTS2HttpExtension, + AsyncTTS2HttpConfig, + AsyncTTS2HttpClient, +) +from ten_runtime import AsyncTenEnv + +from .config import SpeechmaticsTTSConfig +from .speechmatics_tts import SpeechmaticsTTSClient + + +class SpeechmaticsTTSExtension(AsyncTTS2HttpExtension): + """ + Speechmatics TTS Extension implementation. + + Provides text-to-speech synthesis using Speechmatics HTTP API. + Inherits all common HTTP TTS functionality from AsyncTTS2HttpExtension. + """ + + def __init__(self, name: str) -> None: + super().__init__(name) + # Type hints for better IDE support + self.config: SpeechmaticsTTSConfig = None + self.client: SpeechmaticsTTSClient = None + + # ============================================================ + # Required method implementations + # ============================================================ + + async def create_config(self, config_json_str: str) -> AsyncTTS2HttpConfig: + """Create Speechmatics TTS configuration from JSON string.""" + return SpeechmaticsTTSConfig.model_validate_json(config_json_str) + + async def create_client( + self, config: AsyncTTS2HttpConfig, ten_env: AsyncTenEnv + ) -> AsyncTTS2HttpClient: + """Create Speechmatics TTS client.""" + return SpeechmaticsTTSClient(config=config, ten_env=ten_env) + + def vendor(self) -> str: + """Return vendor name.""" + return "speechmatics" + + def synthesize_audio_sample_rate(self) -> int: + """Return the sample rate for synthesized audio.""" + return self.config.params.get("sample_rate", 16000) diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/manifest.json b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/manifest.json new file mode 100644 index 0000000000..3abc65ee81 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/manifest.json @@ -0,0 +1,57 @@ +{ + "type": "extension", + "name": "speechmatics_tts_python", + "version": "0.1.0", + "dependencies": [ + { + "type": "system", + "name": "ten_runtime_python", + "version": "0.11" + }, + { + "type": "system", + "name": "ten_ai_base", + "version": "0.7" + } + ], + "package": { + "include": [ + "manifest.json", + "property.json", + "**.py", + "README.md", + "requirements.txt" + ] + }, + "api": { + "interface": [ + { + "import_uri": "../../system/ten_ai_base/api/tts-interface.json" + } + ], + "property": { + "properties": { + "params": { + "type": "object", + "properties": { + "api_key": { + "type": "string" + }, + "voice_id": { + "type": "string" + }, + "output_format": { + "type": "string" + }, + "sample_rate": { + "type": "int64" + }, + "base_url": { + "type": "string" + } + } + } + } + } + } +} diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/property.json b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/property.json new file mode 100644 index 0000000000..94b3589c6b --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/property.json @@ -0,0 +1,9 @@ +{ + "params": { + "api_key": "${env:SPEECHMATICS_API_KEY}", + "voice_id": "sarah", + "output_format": "wav", + "sample_rate": 16000, + "base_url": "https://preview.tts.speechmatics.com" + } +} diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/requirements.txt b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/requirements.txt new file mode 100644 index 0000000000..f68b8c0636 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/requirements.txt @@ -0,0 +1,3 @@ +aiohttp>=3.8.0 +pydantic>=2.0.0 +pytest==8.3.4 diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/speechmatics_tts.py b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/speechmatics_tts.py new file mode 100644 index 0000000000..695a70806e --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/speechmatics_tts.py @@ -0,0 +1,207 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +from typing import Any, AsyncIterator, Tuple +import asyncio +import aiohttp +from ten_runtime import AsyncTenEnv +from ten_ai_base.const import LOG_CATEGORY_VENDOR +from ten_ai_base.struct import TTS2HttpResponseEventType +from ten_ai_base.tts2_http import AsyncTTS2HttpClient + +from .config import SpeechmaticsTTSConfig + + +class SpeechmaticsTTSClient(AsyncTTS2HttpClient): + """Speechmatics TTS Client implementation""" + + def __init__( + self, + config: SpeechmaticsTTSConfig, + ten_env: AsyncTenEnv, + ): + super().__init__() + self.config = config + self.ten_env: AsyncTenEnv = ten_env + self._is_cancelled = False + self.session: aiohttp.ClientSession | None = None + + # Retry configuration + self.max_retries = 3 + self.retry_delay = 0.1 + + try: + # Create aiohttp session + self.session = aiohttp.ClientSession() + + self.ten_env.log_info( + f"Speechmatics TTS client initialized with voice: {config.params.get('voice_id')}", + category=LOG_CATEGORY_VENDOR, + ) + except Exception as e: + ten_env.log_error( + f"Error when initializing Speechmatics TTS: {e}", + category=LOG_CATEGORY_VENDOR, + ) + raise RuntimeError( + f"Error when initializing Speechmatics TTS: {e}" + ) from e + + async def cancel(self): + """Cancel the current TTS request""" + self.ten_env.log_debug("Speechmatics TTS: cancel() called.") + self._is_cancelled = True + + async def get( + self, text: str, request_id: str + ) -> AsyncIterator[Tuple[bytes | None, TTS2HttpResponseEventType]]: + """Process a single TTS request""" + self._is_cancelled = False + + if not self.session: + self.ten_env.log_error( + f"Speechmatics TTS: session not initialized for request_id: {request_id}", + category=LOG_CATEGORY_VENDOR, + ) + raise RuntimeError( + f"Speechmatics TTS: session not initialized for request_id: {request_id}" + ) + + if len(text.strip()) == 0: + self.ten_env.log_warn( + f"Speechmatics TTS: empty text for request_id: {request_id}", + category=LOG_CATEGORY_VENDOR, + ) + yield None, TTS2HttpResponseEventType.END + return + + try: + # Synthesize audio + async for chunk in self._synthesize_with_retry(text, request_id): + if self._is_cancelled: + self.ten_env.log_debug( + f"Cancellation detected, sending flush event for request_id: {request_id}" + ) + yield None, TTS2HttpResponseEventType.FLUSH + break + + self.ten_env.log_debug( + f"Speechmatics TTS: sending audio chunk, length: {len(chunk)}, request_id: {request_id}", + category=LOG_CATEGORY_VENDOR, + ) + + if len(chunk) > 0: + yield bytes(chunk), TTS2HttpResponseEventType.RESPONSE + + if not self._is_cancelled: + self.ten_env.log_debug( + f"Speechmatics TTS: synthesis completed for request_id: {request_id}", + category=LOG_CATEGORY_VENDOR, + ) + yield None, TTS2HttpResponseEventType.END + + except Exception as e: + error_message = str(e) + self.ten_env.log_error( + f"Speechmatics TTS error: {error_message}, request_id: {request_id}", + category=LOG_CATEGORY_VENDOR, + ) + + # Check for authentication errors + if ( + "401" in error_message + or "authentication" in error_message.lower() + ): + yield error_message.encode( + "utf-8" + ), TTS2HttpResponseEventType.INVALID_KEY_ERROR + else: + yield error_message.encode( + "utf-8" + ), TTS2HttpResponseEventType.ERROR + + async def _synthesize(self, text: str) -> AsyncIterator[bytes]: + """Internal method to synthesize audio from text""" + assert self.session is not None + + # Build API endpoint + voice_id = self.config.params["voice_id"] + output_format = self.config.params.get("output_format", "wav") + base_url = self.config.params.get( + "base_url", "https://preview.tts.speechmatics.com" + ) + + url = f"{base_url}/generate/{voice_id}" + if output_format: + url += f"?output_format={output_format}" + + # Prepare request + headers = { + "Authorization": f"Bearer {self.config.params['api_key']}", + "Content-Type": "application/json", + } + + payload = {"text": text} + + self.ten_env.log_debug( + f"Speechmatics TTS: requesting synthesis, voice: {voice_id}, format: {output_format}", + category=LOG_CATEGORY_VENDOR, + ) + + # Make HTTP request + async with self.session.post( + url, headers=headers, json=payload + ) as response: + if response.status != 200: + error_text = await response.text() + raise RuntimeError( + f"Speechmatics TTS API error: {response.status} - {error_text}" + ) + + # Stream response chunks + async for chunk in response.content.iter_chunked(4096): + if chunk: + yield chunk + + async def _synthesize_with_retry( + self, text: str, request_id: str + ) -> AsyncIterator[bytes]: + """Synthesize with retry logic""" + retries = 0 + last_error = None + + while retries <= self.max_retries: + try: + async for chunk in self._synthesize(text): + yield chunk + return # Success, exit retry loop + except Exception as e: + last_error = e + retries += 1 + + if retries <= self.max_retries: + self.ten_env.log_warn( + f"Speechmatics TTS: retry {retries}/{self.max_retries} after error: {e}", + category=LOG_CATEGORY_VENDOR, + ) + await asyncio.sleep(self.retry_delay * (2 ** (retries - 1))) + else: + raise last_error + + async def clean(self): + """Clean up resources""" + self.ten_env.log_debug("Speechmatics TTS: clean() called.") + try: + if self.session: + await self.session.close() + finally: + pass + + def get_extra_metadata(self) -> dict[str, Any]: + """Return extra metadata for TTFB metrics.""" + return { + "voice_id": self.config.params.get("voice_id", ""), + "output_format": self.config.params.get("output_format", "wav"), + } diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/__init__.py b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/__init__.py new file mode 100644 index 0000000000..da402faf43 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/__init__.py @@ -0,0 +1,5 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/bin/bootstrap b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/bin/bootstrap new file mode 100755 index 0000000000..1a54df5c55 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/bin/bootstrap @@ -0,0 +1,6 @@ +#!/bin/bash + +set -e + +cd "$(dirname "${BASH_SOURCE[0]}")/../.." +pip install -r requirements.txt diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/bin/bootstrap_and_start b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/bin/bootstrap_and_start new file mode 100755 index 0000000000..89aaef454b --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/bin/bootstrap_and_start @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e + +cd "$(dirname "${BASH_SOURCE[0]}")/../.." + +./tests/bin/bootstrap +./tests/bin/start diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/bin/start b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/bin/start new file mode 100755 index 0000000000..b736ea0de1 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/bin/start @@ -0,0 +1,21 @@ +#!/bin/bash + +set -e + +cd "$(dirname "${BASH_SOURCE[0]}")/../.." + +export PYTHONPATH=.ten/app:.ten/app/ten_packages/system/ten_runtime_python/lib:.ten/app/ten_packages/system/ten_runtime_python/interface:.ten/app/ten_packages/system/ten_ai_base/interface:$PYTHONPATH + +# If the Python app imports some modules that are compiled with a different +# version of libstdc++ (ex: PyTorch), the Python app may encounter confusing +# errors. To solve this problem, we can preload the correct version of +# libstdc++. +# +# export LD_PRELOAD=/lib/x86_64-linux-gnu/libstdc++.so.6 +# +# Another solution is to make sure the module 'ten_runtime_python' is imported +# _after_ the module that requires another version of libstdc++ is imported. +# +# Refer to https://github.com/pytorch/pytorch/issues/102360?from_wecom=1#issuecomment-1708989096 + +pytest -s tests/ "$@" diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/test_config.py b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/test_config.py new file mode 100644 index 0000000000..23330fc469 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/test_config.py @@ -0,0 +1,111 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +import pytest +from speechmatics_tts_python.config import SpeechmaticsTTSConfig + + +def test_config_creation(): + """Test basic config creation""" + config = SpeechmaticsTTSConfig( + params={ + "api_key": "test_key", + "voice_id": "sarah", + } + ) + assert config.params["api_key"] == "test_key" + assert config.params["voice_id"] == "sarah" + + +def test_config_validation_missing_api_key(): + """Test validation fails without API key""" + config = SpeechmaticsTTSConfig(params={"voice_id": "sarah"}) + with pytest.raises(ValueError, match="API key is required"): + config.validate() + + +def test_config_validation_missing_voice_id(): + """Test validation fails without voice ID""" + config = SpeechmaticsTTSConfig(params={"api_key": "test_key"}) + with pytest.raises(ValueError, match="Voice ID is required"): + config.validate() + + +def test_config_validation_success(): + """Test validation succeeds with required fields""" + config = SpeechmaticsTTSConfig( + params={ + "api_key": "test_key", + "voice_id": "sarah", + } + ) + config.validate() # Should not raise + + +def test_config_to_str_sensitive_handling(): + """Test sensitive data handling in to_str""" + config = SpeechmaticsTTSConfig( + params={ + "api_key": "secret_key_12345", + "voice_id": "sarah", + } + ) + config_str = config.to_str(sensitive_handling=True) + assert "secret_key_12345" not in config_str + + +def test_config_default_values(): + """Test default configuration values""" + config = SpeechmaticsTTSConfig( + params={ + "api_key": "test_key", + "voice_id": "sarah", + } + ) + assert config.dump is False + assert "speechmatics_tts_in.pcm" in config.dump_path + + +def test_config_with_optional_params(): + """Test configuration with optional parameters""" + config = SpeechmaticsTTSConfig( + params={ + "api_key": "test_key", + "voice_id": "megan", + "output_format": "mp3", + "sample_rate": 24000, + "base_url": "https://custom.api.com", + } + ) + assert config.params["output_format"] == "mp3" + assert config.params["sample_rate"] == 24000 + assert config.params["base_url"] == "https://custom.api.com" + + +def test_config_voice_options(): + """Test different voice configurations""" + voices = ["sarah", "theo", "megan", "jack"] + for voice in voices: + config = SpeechmaticsTTSConfig( + params={ + "api_key": "test_key", + "voice_id": voice, + } + ) + assert config.params["voice_id"] == voice + + +def test_config_output_formats(): + """Test different output format configurations""" + formats = ["wav", "mp3"] + for fmt in formats: + config = SpeechmaticsTTSConfig( + params={ + "api_key": "test_key", + "voice_id": "sarah", + "output_format": fmt, + } + ) + assert config.params["output_format"] == fmt diff --git a/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/test_extension.py b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/test_extension.py new file mode 100644 index 0000000000..9e50661679 --- /dev/null +++ b/ai_agents/agents/ten_packages/extension/speechmatics_tts_python/tests/test_extension.py @@ -0,0 +1,110 @@ +# +# This file is part of TEN Framework, an open source project. +# Licensed under the Apache License, Version 2.0. +# See the LICENSE file for more information. +# +import pytest +from speechmatics_tts_python.extension import SpeechmaticsTTSExtension +from speechmatics_tts_python.config import SpeechmaticsTTSConfig + + +def test_extension_creation(): + """Test extension can be created""" + extension = SpeechmaticsTTSExtension("test_extension") + assert extension is not None + assert extension.vendor() == "speechmatics" + + +def test_extension_vendor(): + """Test vendor name is correct""" + extension = SpeechmaticsTTSExtension("test_extension") + assert extension.vendor() == "speechmatics" + + +def test_extension_sample_rate_default(): + """Test default sample rate""" + extension = SpeechmaticsTTSExtension("test_extension") + extension.config = SpeechmaticsTTSConfig( + params={ + "api_key": "test_key", + "voice_id": "sarah", + } + ) + assert extension.synthesize_audio_sample_rate() == 16000 + + +def test_extension_sample_rate_custom(): + """Test custom sample rate""" + extension = SpeechmaticsTTSExtension("test_extension") + extension.config = SpeechmaticsTTSConfig( + params={ + "api_key": "test_key", + "voice_id": "sarah", + "sample_rate": 24000, + } + ) + assert extension.synthesize_audio_sample_rate() == 24000 + + +@pytest.mark.asyncio +async def test_create_config(): + """Test config creation from JSON""" + extension = SpeechmaticsTTSExtension("test_extension") + config_json = '{"params": {"api_key": "test_key", "voice_id": "sarah"}}' + config = await extension.create_config(config_json) + assert isinstance(config, SpeechmaticsTTSConfig) + assert config.params["api_key"] == "test_key" + assert config.params["voice_id"] == "sarah" + + +def test_extension_inheritance(): + """Test extension inherits from correct base class""" + from ten_ai_base.tts2_http import AsyncTTS2HttpExtension + + extension = SpeechmaticsTTSExtension("test_extension") + assert isinstance(extension, AsyncTTS2HttpExtension) + + +def test_extension_config_types(): + """Test extension handles different config types""" + extension = SpeechmaticsTTSExtension("test_extension") + + # Test with different voices + for voice in ["sarah", "theo", "megan", "jack"]: + extension.config = SpeechmaticsTTSConfig( + params={ + "api_key": "test_key", + "voice_id": voice, + } + ) + assert extension.config.params["voice_id"] == voice + + +def test_extension_config_validation(): + """Test extension config validation""" + extension = SpeechmaticsTTSExtension("test_extension") + extension.config = SpeechmaticsTTSConfig( + params={ + "api_key": "test_key", + "voice_id": "sarah", + } + ) + # Should not raise + extension.config.validate() + + +def test_extension_with_all_params(): + """Test extension with all configuration parameters""" + extension = SpeechmaticsTTSExtension("test_extension") + extension.config = SpeechmaticsTTSConfig( + params={ + "api_key": "test_key", + "voice_id": "megan", + "output_format": "mp3", + "sample_rate": 24000, + "base_url": "https://custom.api.com", + } + ) + assert extension.config.params["voice_id"] == "megan" + assert extension.config.params["output_format"] == "mp3" + assert extension.synthesize_audio_sample_rate() == 24000