From c031d4bbf974c4c2ad028fcdec9646beaafdb5f7 Mon Sep 17 00:00:00 2001
From: HuyNVQ <nguyenhuy1431997@gmail.com>
Date: Thu, 12 Feb 2026 17:31:19 +0700
Subject: [PATCH] feat(extension): add Blaze STT and TTS extensions

---
 .../extension/blaze_stt_python/README.md      | 121 ++++++
 .../extension/blaze_stt_python/__init__.py    |  12 +
 .../extension/blaze_stt_python/blaze_stt.py   | 348 +++++++++++++++
 .../extension/blaze_stt_python/manifest.json  |  83 ++++
 .../extension/blaze_stt_python/property.json  |  11 +
 .../blaze_stt_python/requirements.txt         |   3 +
 .../blaze_stt_python/tests/__init__.py        |   4 +
 .../blaze_stt_python/tests/conftest.py        | 111 +++++
 .../blaze_stt_python/tests/pytest.ini         |   7 +
 .../blaze_stt_python/tests/requirements.txt   |   4 +
 .../blaze_stt_python/tests/test_blaze_stt.py  | 350 ++++++++++++++++
 .../extension/blaze_tts_python/README.md      | 125 ++++++
 .../extension/blaze_tts_python/__init__.py    |  12 +
 .../extension/blaze_tts_python/blaze_tts.py   | 396 ++++++++++++++++++
 .../extension/blaze_tts_python/manifest.json  |  86 ++++
 .../extension/blaze_tts_python/property.json  |  12 +
 .../blaze_tts_python/requirements.txt         |   3 +
 .../blaze_tts_python/tests/__init__.py        |   4 +
 .../blaze_tts_python/tests/conftest.py        |  60 +++
 .../blaze_tts_python/tests/pytest.ini         |   7 +
 .../blaze_tts_python/tests/requirements.txt   |   4 +
 .../blaze_tts_python/tests/test_blaze_tts.py  | 375 +++++++++++++++++
 22 files changed, 2138 insertions(+)
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_stt_python/README.md
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_stt_python/__init__.py
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_stt_python/blaze_stt.py
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_stt_python/manifest.json
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_stt_python/property.json
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_stt_python/requirements.txt
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/__init__.py
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/conftest.py
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/pytest.ini
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/requirements.txt
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/test_blaze_stt.py
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_tts_python/README.md
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_tts_python/__init__.py
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_tts_python/blaze_tts.py
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_tts_python/manifest.json
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_tts_python/property.json
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_tts_python/requirements.txt
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/__init__.py
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/conftest.py
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/pytest.ini
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/requirements.txt
 create mode 100644 ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/test_blaze_tts.py

diff --git a/ai_agents/agents/ten_packages/extension/blaze_stt_python/README.md b/ai_agents/agents/ten_packages/extension/blaze_stt_python/README.md
new file mode 100644
index 0000000000..54687ab6c5
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_stt_python/README.md
@@ -0,0 +1,121 @@
+# Blaze STT Extension for TEN Framework
+
+Blaze Speech-to-Text (STT) extension for [TEN Framework](https://github.com/TEN-framework/ten-framework).
+
+## Installation
+
+```bash
+pip install -r requirements.txt
+```
+
+Or install dependencies directly:
+
+```bash
+pip install httpx pydantic
+```
+
+## Configuration
+
+### Environment Variables
+
+Set the following environment variables:
+
+```bash
+export BLAZE_STT_API_URL="http://localhost:8000"
+export BLAZE_STT_API_KEY="your-api-key-here"  # Optional
+```
+
+### Property.json (TEN Framework)
+
+The extension includes a `property.json` file with default configuration that TEN framework can use:
+
+```json
+{
+    "params": {
+        "api_url": "${env:BLAZE_STT_API_URL}",
+        "api_key": "${env:BLAZE_STT_API_KEY}",
+        "language": "vi",
+        "enable_segments": false,
+        "enable_refinement": false,
+        "timeout": 3600
+    }
+}
+```
+
+TEN framework will automatically read this file and use environment variables for configuration.
+
+## Usage
+
+### As TEN Framework Extension
+
+```python
+from blaze_stt_python import BlazeSTTExtension
+
+# Initialize extension (can accept dict config from TEN framework)
+stt = BlazeSTTExtension(config={
+    "api_url": "http://localhost:8000",
+    "api_key": "your-api-key",
+    "language": "vi",
+})
+
+# Process audio using TEN framework interface
+result = stt.process({
+    "audio_data": audio_bytes,
+    "audio_content_type": "audio/wav",
+    "language": "vi",
+})
+
+print(result["transcription"])
+
+# Get extension metadata
+metadata = stt.get_metadata()
+print(metadata)
+```
+
+### As Direct Extension
+
+```python
+from blaze_stt_python import BlazeSTTExtension, BlazeSTTConfig
+
+# Initialize extension
+config = BlazeSTTConfig(
+    api_url="http://localhost:8000",
+    api_key="your-api-key",
+    default_language="vi",
+)
+stt = BlazeSTTExtension(config=config)
+
+# Transcribe audio
+result = stt.transcribe(
+    audio_data=audio_bytes,
+    audio_content_type="audio/wav",
+    language="vi",
+)
+
+print(result["transcription"])
+```
+
+## API Reference
+
+### BlazeSTTExtension
+
+**TEN Framework Interface Methods:**
+- `process(input_data)` - Process audio and return transcription (TEN framework interface)
+- `get_metadata()` - Get extension metadata (TEN framework interface)
+
+**Direct Methods:**
+
+- `transcribe(audio_data, audio_file, audio_content_type, language, enable_segments, enable_refinement, lazy_process)` - Transcribe audio data (bytes) or file (UploadFile)
+- `get_job_status(job_id)` - Get transcription job status
+
+## Supported Formats
+
+- `audio/wav` - WAV format
+- `audio/mpeg` - MP3 format
+- `audio/webm` - WebM format
+- `audio/ogg` - OGG format
+
+## License
+
+This extension is provided as-is for use with the TEN Framework and Blaze services.
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_stt_python/__init__.py b/ai_agents/agents/ten_packages/extension/blaze_stt_python/__init__.py
new file mode 100644
index 0000000000..f76f61db82
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_stt_python/__init__.py
@@ -0,0 +1,12 @@
+"""
+Blaze STT Extension for TEN Framework
+
+This extension provides Speech-to-Text (STT) functionality using Blaze API.
+Implements TEN framework extension interface.
+"""
+
+from .blaze_stt import BlazeSTTExtension, BlazeSTTConfig
+
+__all__ = ["BlazeSTTExtension", "BlazeSTTConfig"]
+__version__ = "1.0.0"
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_stt_python/blaze_stt.py b/ai_agents/agents/ten_packages/extension/blaze_stt_python/blaze_stt.py
new file mode 100644
index 0000000000..e5800f2575
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_stt_python/blaze_stt.py
@@ -0,0 +1,348 @@
+"""
+Blaze STT Extension Implementation
+
+This extension wraps the Blaze STT API endpoint for use in TEN framework.
+"""
+
+import os
+import logging
+from typing import Optional, Dict, Any, BinaryIO, Union
+from io import BytesIO
+
+import httpx
+from pydantic import BaseModel, Field
+
+# Import UploadFile for multipart support
+try:
+    from fastapi import UploadFile
+except ImportError:
+    # Fallback if fastapi is not available
+    UploadFile = None
+
+logger = logging.getLogger(__name__)
+
+
+class BlazeSTTConfig(BaseModel):
+    """Configuration for Blaze STT Extension"""
+    
+    api_url: str = Field(
+        default=os.getenv("BLAZE_STT_API_URL", "http://localhost:8000"),
+        description="Blaze STT API base URL"
+    )
+    api_key: Optional[str] = Field(
+        default=os.getenv("BLAZE_STT_API_KEY", None),
+        description="API key for authentication (Bearer token)"
+    )
+    timeout: int = Field(
+        default=3600,
+        description="Request timeout in seconds"
+    )
+    enable_segments: bool = Field(
+        default=False,
+        description="Split audio into segments with timestamps"
+    )
+    enable_refinement: bool = Field(
+        default=False,
+        description="Apply post-processing refinement to improve accuracy"
+    )
+    default_language: str = Field(
+        default="vi",
+        description="Default language code (e.g., 'vi' for Vietnamese)"
+    )
+
+
+class BlazeSTTExtension:
+    """
+    Blaze STT Extension for TEN Framework
+    
+    This extension provides Speech-to-Text functionality by wrapping
+    the Blaze STT API endpoint: /v1/stt/execute
+    
+    Implements TEN framework extension interface with process() and get_metadata() methods.
+    """
+    
+    def __init__(self, config: Optional[Union[BlazeSTTConfig, Dict[str, Any]]] = None):
+        """
+        Initialize Blaze STT Extension
+        
+        Args:
+            config: Configuration object (BlazeSTTConfig) or dict from TEN framework.
+                   If None, uses environment variables.
+                   If dict, converts to BlazeSTTConfig.
+        """
+        if config is None:
+            self.config = BlazeSTTConfig()
+        elif isinstance(config, dict):
+            # Convert dict from TEN framework to BlazeSTTConfig
+            self.config = BlazeSTTConfig(
+                api_url=config.get("api_url", "http://localhost:8000"),
+                api_key=config.get("api_key"),
+                default_language=config.get("language", "vi"),
+                enable_segments=config.get("enable_segments", False),
+                enable_refinement=config.get("enable_refinement", False),
+                timeout=config.get("timeout", 3600),
+            )
+        else:
+            self.config = config
+        
+        self.base_url = self.config.api_url.rstrip("/")
+        self.endpoint = f"{self.base_url}/v1/stt/execute"
+        
+        logger.info(f"Blaze STT Extension initialized with API URL: {self.base_url}")
+    
+    def transcribe(
+        self,
+        audio_data: Optional[bytes] = None,
+        audio_file: Optional[UploadFile] = None,
+        audio_content_type: Optional[str] = None,
+        language: Optional[str] = None,
+        enable_segments: Optional[bool] = None,
+        enable_refinement: Optional[bool] = None,
+        lazy_process: bool = False,
+    ) -> Dict[str, Any]:
+        """
+        Transcribe audio data to text
+        
+        Similar to API endpoint /v1/stt/execute which accepts:
+        - UploadFile via multipart/form-data (field name: audio_file)
+        - Binary data in request body with Content-Type header
+        
+        Args:
+            audio_data: Binary audio data (bytes). Required if audio_file is None.
+            audio_file: FastAPI UploadFile object (sent as multipart/form-data).
+                       If provided, audio_data is ignored.
+            audio_content_type: MIME type. Auto-detected if not provided.
+            language: Language code (e.g., 'vi' for Vietnamese). Defaults to config default.
+            enable_segments: Split audio into segments with timestamps
+            enable_refinement: Apply post-processing refinement
+            lazy_process: If True, process in background (returns job_id). If False, returns result immediately.
+        
+        Returns:
+            Dict containing transcription result or job information
+            
+        Raises:
+            httpx.HTTPError: If the API request fails
+            ValueError: If both audio_data and audio_file are None, or if audio_data is empty
+        """
+        if audio_file is None and audio_data is None:
+            raise ValueError("Either audio_data or audio_file must be provided")
+        
+        if audio_file is not None and audio_data is not None:
+            logger.warning("Both audio_file and audio_data provided. audio_file will be used.")
+        
+        # Use provided values or fall back to config defaults
+        language = language or self.config.default_language
+        enable_segments = enable_segments if enable_segments is not None else self.config.enable_segments
+        enable_refinement = enable_refinement if enable_refinement is not None else self.config.enable_refinement
+        
+        # Prepare headers
+        headers = {}
+        if self.config.api_key:
+            headers["Authorization"] = f"Bearer {self.config.api_key}"
+        
+        # Prepare query parameters
+        params = {
+            "language": language,
+            "enable_segments": str(enable_segments).lower(),
+            "enable_refinement": str(enable_refinement).lower(),
+            "lazy_process": str(lazy_process).lower(),
+        }
+        
+        try:
+            with httpx.Client(timeout=self.config.timeout) as client:
+                if audio_file is not None:
+                    if UploadFile is None:
+                        raise ImportError("fastapi is required to use audio_file parameter. Install with: pip install fastapi")
+                    
+                    # Reset file pointer if needed
+                    if hasattr(audio_file.file, 'seek'):
+                        audio_file.file.seek(0)
+                    
+                    # Get filename and content type
+                    filename = getattr(audio_file, 'filename', 'audio.mp3') or 'audio.mp3'
+                    content_type = audio_content_type or getattr(audio_file, 'content_type', None) or "audio/mpeg"
+                    
+                    # Infer content type from filename if needed
+                    if content_type == "application/octet-stream" or not content_type:
+                        ext = os.path.splitext(filename)[1].lower()
+                        if ext == ".wav":
+                            content_type = "audio/wav"
+                        elif ext in [".mp3", ".mpeg"]:
+                            content_type = "audio/mpeg"
+                    
+                    files = {
+                        "audio_file": (filename, audio_file.file, content_type)
+                    }
+                    
+                    response = client.post(
+                        self.endpoint,
+                        files=files,
+                        headers=headers,
+                        params=params,
+                    )
+                
+                else:
+                    if not audio_data:
+                        raise ValueError("audio_data cannot be empty")
+                    
+                    content_type = audio_content_type or "audio/wav"
+                    headers["Content-Type"] = content_type
+                    
+                    response = client.post(
+                        self.endpoint,
+                        content=audio_data,
+                        headers=headers,
+                        params=params,
+                    )
+                
+                response.raise_for_status()
+                result = response.json()
+                
+                # Handle response format from service
+                # Response structure:
+                # - lazy_process=False: {"job_status": "completed", "result": {"data": {"transcription": "..."}}}
+                # - lazy_process=True: {"job_id": "...", "job_status": "processing"}
+                
+                # Extract transcription from nested result.data structure if available
+                transcription = ""
+                if result.get("result") and isinstance(result["result"], dict):
+                    result_data = result["result"].get("data", {})
+                    if isinstance(result_data, dict):
+                        transcription = result_data.get("transcription", "")
+                
+                # Return normalized format
+                return {
+                    "transcription": transcription,
+                    "job_id": result.get("job_id"),
+                    "job_status": result.get("job_status", "processing"),
+                    "raw_result": result,  # Include full result for advanced use cases
+                }
+        
+        except httpx.HTTPStatusError as e:
+            logger.error(f"Blaze STT API error: {e.response.status_code} - {e.response.text}")
+            raise
+        except httpx.RequestError as e:
+            logger.error(f"Blaze STT request error: {str(e)}")
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error in Blaze STT: {str(e)}")
+            raise
+    
+    def get_job_status(self, job_id: str) -> Dict[str, Any]:
+        """
+        Get status of a transcription job
+        
+        Args:
+            job_id: Job ID returned from transcribe with lazy_process=True
+        
+        Returns:
+            Dict containing job status and result if available
+            Format: {
+                "job_id": "...",
+                "job_status": "processing" | "completed" | "failed",
+                "transcription": "...",  # Extracted from result.data.transcription
+                "result": {...}  # Full result structure
+            }
+        """
+        headers = {}
+        if self.config.api_key:
+            headers["Authorization"] = f"Bearer {self.config.api_key}"
+        
+        endpoint = f"{self.base_url}/v1/stt/{job_id}"
+        
+        try:
+            with httpx.Client(timeout=30) as client:
+                response = client.get(endpoint, headers=headers)
+                response.raise_for_status()
+                result = response.json()
+                
+                # Extract transcription from nested result.data structure if available
+                transcription = ""
+                if result.get("result") and isinstance(result["result"], dict):
+                    result_data = result["result"].get("data", {})
+                    if isinstance(result_data, dict):
+                        transcription = result_data.get("transcription", "")
+                
+                # Return normalized format
+                return {
+                    "job_id": result.get("job_id", job_id),
+                    "job_status": result.get("job_status", "processing"),
+                    "transcription": transcription,
+                    "result": result.get("result"),
+                    "raw_result": result,  # Include full result for advanced use cases
+                }
+        
+        except httpx.HTTPStatusError as e:
+            logger.error(f"Blaze STT job status error: {e.response.status_code} - {e.response.text}")
+            raise
+        except httpx.RequestError as e:
+            logger.error(f"Blaze STT request error: {str(e)}")
+            raise
+    
+    def process(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Process input according to TEN framework interface
+        
+        This method implements the TEN framework extension interface.
+        
+        Args:
+            input_data: Input dict with:
+                - audio_data (bytes): Required. Audio data to transcribe
+                - audio_content_type (str): Optional. MIME type (default: "audio/wav")
+                - language (str): Optional. Language code (default: from config)
+                - enable_segments (bool): Optional. Enable segments
+                - enable_refinement (bool): Optional. Enable refinement
+                - lazy_process (bool): Optional. Process in background (default: False)
+        
+        Returns:
+            Output dict with:
+                - transcription (str): Transcribed text
+                - job_id (str): Optional. Job ID if lazy_process=True
+                - status (str): Job status
+        """
+        audio_data = input_data.get("audio_data")
+        if not audio_data:
+            raise ValueError("audio_data is required in input_data")
+        
+        result = self.transcribe(
+            audio_data=audio_data,
+            audio_content_type=input_data.get("audio_content_type", "audio/wav"),
+            language=input_data.get("language"),
+            enable_segments=input_data.get("enable_segments"),
+            enable_refinement=input_data.get("enable_refinement"),
+            lazy_process=input_data.get("lazy_process", False),
+        )
+        
+        # Return normalized format (transcribe() already handles response format)
+        return {
+            "transcription": result.get("transcription", ""),
+            "job_id": result.get("job_id"),
+            "status": result.get("job_status", "completed"),
+            "raw_result": result,  # Include full result for advanced use cases
+        }
+    
+    def get_metadata(self) -> Dict[str, Any]:
+        """
+        Return extension metadata for TEN framework
+        
+        This method implements the TEN framework extension interface.
+        
+        Returns:
+            Dict with extension information
+        """
+        return {
+            "name": "blaze_stt_python",
+            "version": "1.0.0",
+            "description": "Blaze Speech-to-Text extension for TEN framework",
+            "capabilities": ["stt", "transcription", "speech_to_text"],
+            "supported_formats": ["audio/wav", "audio/mpeg", "audio/webm", "audio/ogg"],
+            "supported_languages": ["vi", "en"],
+            "config_schema": {
+                "api_url": {"type": "string", "required": False, "default": "http://localhost:8000"},
+                "api_key": {"type": "string", "required": False},
+                "language": {"type": "string", "required": False, "default": "vi"},
+                "enable_segments": {"type": "boolean", "required": False, "default": False},
+                "enable_refinement": {"type": "boolean", "required": False, "default": False},
+            },
+        }
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_stt_python/manifest.json b/ai_agents/agents/ten_packages/extension/blaze_stt_python/manifest.json
new file mode 100644
index 0000000000..2acb59ae8d
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_stt_python/manifest.json
@@ -0,0 +1,83 @@
+{
+  "type": "extension",
+  "name": "blaze_stt_python",
+  "version": "1.0.0",
+  "display_name": {
+    "locales": {
+      "en-US": {
+        "content": "Blaze STT Extension"
+      },
+      "vi-VN": {
+        "content": "Blaze STT Extension"
+      }
+    }
+  },
+  "description": {
+    "locales": {
+      "en-US": {
+        "content": "Blaze Speech-to-Text extension for TEN Framework"
+      },
+      "vi-VN": {
+        "content": "Extension chuyển đổi giọng nói thành văn bản cho TEN Framework"
+      }
+    }
+  },
+  "dependencies": [
+    {
+      "type": "system",
+      "name": "ten_runtime_python",
+      "version": "0.11"
+    },
+    {
+      "type": "system",
+      "name": "ten_ai_base",
+      "version": "0.7"
+    }
+  ],
+  "package": {
+    "include": [
+      "manifest.json",
+      "property.json",
+      "BUILD.gn",
+      "**.tent",
+      "**.py",
+      "README.md",
+      "requirements.txt"
+    ]
+  },
+  "api": {
+    "interface": [
+      {
+        "import_uri": "../../system/ten_ai_base/api/stt-interface.json"
+      }
+    ],
+    "property": {
+      "properties": {
+        "params": {
+          "type": "object",
+          "properties": {
+            "api_url": {
+              "type": "string"
+            },
+            "api_key": {
+              "type": "string"
+            },
+            "language": {
+              "type": "string"
+            },
+            "enable_segments": {
+              "type": "boolean"
+            },
+            "enable_refinement": {
+              "type": "boolean"
+            },
+            "timeout": {
+              "type": "number"
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_stt_python/property.json b/ai_agents/agents/ten_packages/extension/blaze_stt_python/property.json
new file mode 100644
index 0000000000..200877eaf5
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_stt_python/property.json
@@ -0,0 +1,11 @@
+{
+    "params": {
+        "api_url": "${env:BLAZE_STT_API_URL}",
+        "api_key": "${env:BLAZE_STT_API_KEY}",
+        "language": "vi",
+        "enable_segments": false,
+        "enable_refinement": false,
+        "timeout": 3600
+    }
+}
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_stt_python/requirements.txt b/ai_agents/agents/ten_packages/extension/blaze_stt_python/requirements.txt
new file mode 100644
index 0000000000..e0a3d7ee1a
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_stt_python/requirements.txt
@@ -0,0 +1,3 @@
+httpx>=0.24.0
+pydantic>=2.0.0
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/__init__.py b/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/__init__.py
new file mode 100644
index 0000000000..4b0485c84e
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/__init__.py
@@ -0,0 +1,4 @@
+"""
+Tests for Blaze STT Extension
+"""
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/conftest.py b/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/conftest.py
new file mode 100644
index 0000000000..a8ba55fb8a
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/conftest.py
@@ -0,0 +1,111 @@
+"""
+Pytest fixtures for Blaze STT Extension tests
+"""
+import io
+import pytest
+from typing import Optional
+
+try:
+    from fastapi import UploadFile
+except ImportError:
+    UploadFile = None
+
+
+@pytest.fixture
+def sample_audio_bytes():
+    """Sample audio bytes (minimal WAV file header)"""
+    # Minimal WAV file header (44 bytes)
+    return (
+        b'RIFF'  # ChunkID
+        b'\x24\x00\x00\x00'  # ChunkSize (36)
+        b'WAVE'  # Format
+        b'fmt '  # Subchunk1ID
+        b'\x10\x00\x00\x00'  # Subchunk1Size (16)
+        b'\x01\x00'  # AudioFormat (1 = PCM)
+        b'\x01\x00'  # NumChannels (1 = mono)
+        b'\x44\xac\x00\x00'  # SampleRate (44100)
+        b'\x88\x58\x01\x00'  # ByteRate
+        b'\x02\x00'  # BlockAlign
+        b'\x10\x00'  # BitsPerSample (16)
+        b'data'  # Subchunk2ID
+        b'\x00\x00\x00\x00'  # Subchunk2Size (0 for empty)
+    )
+
+
+@pytest.fixture
+def mock_upload_file(sample_audio_bytes):
+    """Mock UploadFile object"""
+    if UploadFile is None:
+        pytest.skip("fastapi not installed")
+    
+    file_obj = io.BytesIO(sample_audio_bytes)
+    file_obj.seek(0)
+    
+    # Create a mock UploadFile-like object
+    class MockUploadFile:
+        def __init__(self, filename: str, file_obj, content_type: str):
+            self.filename = filename
+            self.file = file_obj
+            self.content_type = content_type
+    
+    return MockUploadFile(
+        filename="test_audio.wav",
+        file_obj=file_obj,
+        content_type="audio/wav"
+    )
+
+
+@pytest.fixture
+def mock_config():
+    """Mock configuration for BlazeSTTExtension"""
+    return {
+        "api_url": "http://localhost:8000",
+        "api_key": "test-api-key",
+        "language": "vi",
+        "enable_segments": False,
+        "enable_refinement": False,
+        "timeout": 3600,
+    }
+
+
+@pytest.fixture
+def mock_api_response_completed():
+    """Mock API response for completed transcription"""
+    return {
+        "job_status": "completed",
+        "result": {
+            "status_code": 200,
+            "error": "",
+            "data": {
+                "transcription": "Xin chào, đây là test transcription",
+                "is_successful": True,
+            }
+        }
+    }
+
+
+@pytest.fixture
+def mock_api_response_processing():
+    """Mock API response for processing job"""
+    return {
+        "job_id": "test-job-id-123",
+        "job_status": "processing",
+    }
+
+
+@pytest.fixture
+def mock_api_response_job_status():
+    """Mock API response for get_job_status"""
+    return {
+        "job_id": "test-job-id-123",
+        "job_status": "completed",
+        "result": {
+            "status_code": 200,
+            "error": "",
+            "data": {
+                "transcription": "Xin chào, đây là test transcription",
+                "is_successful": True,
+            }
+        }
+    }
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/pytest.ini b/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/pytest.ini
new file mode 100644
index 0000000000..0770e6b0fe
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/pytest.ini
@@ -0,0 +1,7 @@
+[pytest]
+testpaths = tests
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+addopts = -v --tb=short
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/requirements.txt b/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/requirements.txt
new file mode 100644
index 0000000000..ae8bbe77c0
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/requirements.txt
@@ -0,0 +1,4 @@
+pytest>=7.0.0
+pytest-mock>=3.10.0
+httpx>=0.24.0
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/test_blaze_stt.py b/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/test_blaze_stt.py
new file mode 100644
index 0000000000..1d22d38001
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_stt_python/tests/test_blaze_stt.py
@@ -0,0 +1,350 @@
+"""
+Unit tests for BlazeSTTExtension
+"""
+import pytest
+from unittest.mock import Mock, patch, MagicMock
+import httpx
+
+from blaze_stt_python import BlazeSTTExtension, BlazeSTTConfig
+
+
+class TestBlazeSTTExtension:
+    """Test suite for BlazeSTTExtension"""
+    
+    def test_init_with_config_dict(self, mock_config):
+        """Test initialization with dict config"""
+        stt = BlazeSTTExtension(config=mock_config)
+        assert stt.config.api_url == "http://localhost:8000"
+        assert stt.config.api_key == "test-api-key"
+        assert stt.config.default_language == "vi"
+        assert stt.endpoint == "http://localhost:8000/v1/stt/execute"
+    
+    def test_init_with_config_object(self):
+        """Test initialization with BlazeSTTConfig object"""
+        config = BlazeSTTConfig(
+            api_url="http://test.com",
+            api_key="test-key",
+            default_language="en",
+        )
+        stt = BlazeSTTExtension(config=config)
+        assert stt.config.api_url == "http://test.com"
+        assert stt.config.api_key == "test-key"
+        assert stt.config.default_language == "en"
+    
+    def test_init_with_env_vars(self, monkeypatch):
+        """Test initialization with environment variables"""
+        monkeypatch.setenv("BLAZE_STT_API_URL", "http://env-test.com")
+        monkeypatch.setenv("BLAZE_STT_API_KEY", "env-key")
+        
+        stt = BlazeSTTExtension(config=None)
+        assert stt.config.api_url == "http://env-test.com"
+        assert stt.config.api_key == "env-key"
+    
+    @patch('httpx.Client')
+    def test_transcribe_with_bytes(self, mock_client_class, sample_audio_bytes, mock_api_response_completed):
+        """Test transcribe() with bytes (binary mode)"""
+        # Setup mock response
+        mock_response = Mock()
+        mock_response.json.return_value = mock_api_response_completed
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.post.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        # Initialize extension
+        stt = BlazeSTTExtension(config={
+            "api_url": "http://localhost:8000",
+            "api_key": "test-key",
+        })
+        
+        # Call transcribe
+        result = stt.transcribe(
+            audio_data=sample_audio_bytes,
+            audio_content_type="audio/wav",
+            language="vi",
+        )
+        
+        # Verify request was made correctly
+        mock_client.post.assert_called_once()
+        call_args = mock_client.post.call_args
+        
+        # Check endpoint
+        assert call_args[0][0] == "http://localhost:8000/v1/stt/execute"
+        
+        # Check content (binary data)
+        assert call_args[1]["content"] == sample_audio_bytes
+        
+        # Check headers
+        headers = call_args[1]["headers"]
+        assert headers["Content-Type"] == "audio/wav"
+        assert headers["Authorization"] == "Bearer test-key"
+        
+        # Check params
+        params = call_args[1]["params"]
+        assert params["language"] == "vi"
+        assert params["enable_segments"] == "false"
+        assert params["enable_refinement"] == "false"
+        assert params["lazy_process"] == "false"
+        
+        # Verify result
+        assert result["transcription"] == "Xin chào, đây là test transcription"
+        assert result["job_status"] == "completed"
+    
+    @patch('httpx.Client')
+    def test_transcribe_with_upload_file(self, mock_client_class, mock_upload_file, mock_api_response_completed):
+        """Test transcribe() with UploadFile (multipart mode)"""
+        # Setup mock response
+        mock_response = Mock()
+        mock_response.json.return_value = mock_api_response_completed
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.post.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        # Initialize extension
+        stt = BlazeSTTExtension(config={
+            "api_url": "http://localhost:8000",
+            "api_key": "test-key",
+        })
+        
+        # Call transcribe with UploadFile
+        result = stt.transcribe(
+            audio_file=mock_upload_file,
+            language="vi",
+        )
+        
+        # Verify request was made correctly
+        mock_client.post.assert_called_once()
+        call_args = mock_client.post.call_args
+        
+        # Check endpoint
+        assert call_args[0][0] == "http://localhost:8000/v1/stt/execute"
+        
+        # Check files (multipart)
+        assert "files" in call_args[1]
+        files = call_args[1]["files"]
+        assert "audio_file" in files
+        assert files["audio_file"][0] == "test_audio.wav"
+        
+        # Check headers (no Content-Type for multipart)
+        headers = call_args[1]["headers"]
+        assert "Content-Type" not in headers
+        assert headers["Authorization"] == "Bearer test-key"
+        
+        # Verify result
+        assert result["transcription"] == "Xin chào, đây là test transcription"
+        assert result["job_status"] == "completed"
+    
+    @patch('httpx.Client')
+    def test_transcribe_lazy_process(self, mock_client_class, sample_audio_bytes, mock_api_response_processing):
+        """Test transcribe() with lazy_process=True"""
+        # Setup mock response
+        mock_response = Mock()
+        mock_response.json.return_value = mock_api_response_processing
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.post.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        # Initialize extension
+        stt = BlazeSTTExtension(config={"api_url": "http://localhost:8000"})
+        
+        # Call transcribe with lazy_process=True
+        result = stt.transcribe(
+            audio_data=sample_audio_bytes,
+            lazy_process=True,
+        )
+        
+        # Verify lazy_process parameter
+        call_args = mock_client.post.call_args
+        params = call_args[1]["params"]
+        assert params["lazy_process"] == "true"
+        
+        # Verify result
+        assert result["job_id"] == "test-job-id-123"
+        assert result["job_status"] == "processing"
+    
+    def test_transcribe_no_input(self):
+        """Test transcribe() with no input raises ValueError"""
+        stt = BlazeSTTExtension(config={"api_url": "http://localhost:8000"})
+        
+        with pytest.raises(ValueError, match="Either audio_data or audio_file must be provided"):
+            stt.transcribe()
+    
+    def test_transcribe_empty_bytes(self):
+        """Test transcribe() with empty bytes raises ValueError"""
+        stt = BlazeSTTExtension(config={"api_url": "http://localhost:8000"})
+        
+        with pytest.raises(ValueError, match="audio_data cannot be empty"):
+            stt.transcribe(audio_data=b"")
+    
+    @patch('httpx.Client')
+    def test_get_job_status(self, mock_client_class, mock_api_response_job_status):
+        """Test get_job_status()"""
+        # Setup mock response
+        mock_response = Mock()
+        mock_response.json.return_value = mock_api_response_job_status
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.get.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        # Initialize extension
+        stt = BlazeSTTExtension(config={
+            "api_url": "http://localhost:8000",
+            "api_key": "test-key",
+        })
+        
+        # Call get_job_status
+        result = stt.get_job_status("test-job-id-123")
+        
+        # Verify request was made correctly
+        mock_client.get.assert_called_once()
+        call_args = mock_client.get.call_args
+        
+        # Check endpoint
+        assert call_args[0][0] == "http://localhost:8000/v1/stt/test-job-id-123"
+        
+        # Check headers
+        headers = call_args[1]["headers"]
+        assert headers["Authorization"] == "Bearer test-key"
+        
+        # Verify result
+        assert result["job_id"] == "test-job-id-123"
+        assert result["job_status"] == "completed"
+        assert result["transcription"] == "Xin chào, đây là test transcription"
+    
+    def test_process_method(self, sample_audio_bytes, mock_api_response_completed):
+        """Test process() method (TEN framework interface)"""
+        with patch('httpx.Client') as mock_client_class:
+            # Setup mock response
+            mock_response = Mock()
+            mock_response.json.return_value = mock_api_response_completed
+            mock_response.raise_for_status = Mock()
+            
+            mock_client = Mock()
+            mock_client.__enter__ = Mock(return_value=mock_client)
+            mock_client.__exit__ = Mock(return_value=False)
+            mock_client.post.return_value = mock_response
+            mock_client_class.return_value = mock_client
+            
+            # Initialize extension
+            stt = BlazeSTTExtension(config={"api_url": "http://localhost:8000"})
+            
+            # Call process
+            result = stt.process({
+                "audio_data": sample_audio_bytes,
+                "audio_content_type": "audio/wav",
+                "language": "vi",
+            })
+            
+            # Verify result format
+            assert result["transcription"] == "Xin chào, đây là test transcription"
+            assert result["status"] == "completed"
+            assert "job_id" in result
+    
+    def test_process_method_missing_audio_data(self):
+        """Test process() method raises error when audio_data is missing"""
+        stt = BlazeSTTExtension(config={"api_url": "http://localhost:8000"})
+        
+        with pytest.raises(ValueError, match="audio_data is required in input_data"):
+            stt.process({})
+    
+    def test_get_metadata(self):
+        """Test get_metadata() method"""
+        stt = BlazeSTTExtension(config={"api_url": "http://localhost:8000"})
+        
+        metadata = stt.get_metadata()
+        
+        assert metadata["name"] == "blaze_stt_python"
+        assert metadata["version"] == "1.0.0"
+        assert "stt" in metadata["capabilities"]
+        assert "transcription" in metadata["capabilities"]
+        assert "speech_to_text" in metadata["capabilities"]
+        assert "audio/wav" in metadata["supported_formats"]
+        assert "vi" in metadata["supported_languages"]
+    
+    @patch('httpx.Client')
+    def test_transcribe_with_enable_segments(self, mock_client_class, sample_audio_bytes, mock_api_response_completed):
+        """Test transcribe() with enable_segments=True"""
+        mock_response = Mock()
+        mock_response.json.return_value = mock_api_response_completed
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.post.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        stt = BlazeSTTExtension(config={"api_url": "http://localhost:8000"})
+        
+        result = stt.transcribe(
+            audio_data=sample_audio_bytes,
+            enable_segments=True,
+        )
+        
+        call_args = mock_client.post.call_args
+        params = call_args[1]["params"]
+        assert params["enable_segments"] == "true"
+    
+    @patch('httpx.Client')
+    def test_transcribe_with_enable_refinement(self, mock_client_class, sample_audio_bytes, mock_api_response_completed):
+        """Test transcribe() with enable_refinement=True"""
+        mock_response = Mock()
+        mock_response.json.return_value = mock_api_response_completed
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.post.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        stt = BlazeSTTExtension(config={"api_url": "http://localhost:8000"})
+        
+        result = stt.transcribe(
+            audio_data=sample_audio_bytes,
+            enable_refinement=True,
+        )
+        
+        call_args = mock_client.post.call_args
+        params = call_args[1]["params"]
+        assert params["enable_refinement"] == "true"
+    
+    @patch('httpx.Client')
+    def test_transcribe_http_error(self, mock_client_class, sample_audio_bytes):
+        """Test transcribe() handles HTTP errors"""
+        # Setup mock response with error
+        mock_response = Mock()
+        mock_response.status_code = 400
+        mock_response.text = "Bad Request"
+        mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
+            "Bad Request",
+            request=Mock(),
+            response=mock_response
+        )
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.post.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        stt = BlazeSTTExtension(config={"api_url": "http://localhost:8000"})
+        
+        with pytest.raises(httpx.HTTPStatusError):
+            stt.transcribe(audio_data=sample_audio_bytes)
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_tts_python/README.md b/ai_agents/agents/ten_packages/extension/blaze_tts_python/README.md
new file mode 100644
index 0000000000..b44fae58a3
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_tts_python/README.md
@@ -0,0 +1,125 @@
+# Blaze TTS Extension for TEN Framework
+
+Blaze Text-to-Speech (TTS) extension for [TEN Framework](https://github.com/TEN-framework/ten-framework).
+
+## Installation
+
+```bash
+pip install -r requirements.txt
+```
+
+Or install dependencies directly:
+
+```bash
+pip install httpx pydantic
+```
+
+## Configuration
+
+### Environment Variables
+
+Set the following environment variables:
+
+```bash
+export BLAZE_TTS_API_URL="http://localhost:8000"
+export BLAZE_TTS_API_KEY="your-api-key-here"  # Optional
+```
+
+### Property.json (TEN Framework)
+
+The extension includes a `property.json` file with default configuration that TEN framework can use:
+
+```json
+{
+    "params": {
+        "api_url": "${env:BLAZE_TTS_API_URL}",
+        "api_key": "${env:BLAZE_TTS_API_KEY}",
+        "language": "vi",
+        "speaker_id": null,
+        "audio_speed": 1.0,
+        "audio_quality": 64,
+        "timeout": 3600
+    }
+}
+```
+
+TEN framework will automatically read this file and use environment variables for configuration.
+
+## Usage
+
+### As TEN Framework Extension
+
+```python
+from blaze_tts_python import BlazeTTSExtension
+
+# Initialize extension (can accept dict config from TEN framework)
+tts = BlazeTTSExtension(config={
+    "api_url": "http://localhost:8000",
+    "api_key": "your-api-key",
+    "speaker_id": "speaker-123",
+})
+
+# Synthesize text using TEN framework interface
+result = tts.process({
+    "text": "Xin chào",
+    "speaker_id": "speaker-123",
+    "language": "vi",
+})
+
+audio_bytes = result.get("audio_data")
+
+# Get extension metadata
+metadata = tts.get_metadata()
+print(metadata)
+```
+
+### As Direct Extension
+
+```python
+from blaze_tts_python import BlazeTTSExtension, BlazeTTSConfig
+
+# Initialize extension
+config = BlazeTTSConfig(
+    api_url="http://localhost:8000",
+    api_key="your-api-key",
+    default_language="vi",
+)
+tts = BlazeTTSExtension(config=config)
+
+# Synthesize text
+result = tts.synthesize(
+    text="Xin chào",
+    speaker_id="speaker-123",
+    language="vi",
+)
+
+# Download audio
+job_id = result["id"]
+audio_bytes = tts.download_audio(job_id)
+```
+
+## API Reference
+
+### BlazeTTSExtension
+
+**TEN Framework Interface Methods:**
+- `process(input_data)` - Process text and return audio (TEN framework interface)
+- `get_metadata()` - Get extension metadata (TEN framework interface)
+
+**Direct Methods:**
+
+- `synthesize(text, speaker_id, language, audio_speed, audio_quality, ...)` - Synthesize text to speech
+- `get_speakers()` - Get list of available speakers
+- `download_audio(job_id, output_path)` - Download generated audio
+- `get_job_info(job_id)` - Get TTS job information
+
+## Supported Formats
+
+- `wav` - WAV format
+- `mp3` - MP3 format
+- `ogg` - OGG format
+
+## License
+
+This extension is provided as-is for use with the TEN Framework and Blaze services.
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_tts_python/__init__.py b/ai_agents/agents/ten_packages/extension/blaze_tts_python/__init__.py
new file mode 100644
index 0000000000..a454f4eff6
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_tts_python/__init__.py
@@ -0,0 +1,12 @@
+"""
+Blaze TTS Extension for TEN Framework
+
+This extension provides Text-to-Speech (TTS) functionality using Blaze API.
+Implements TEN framework extension interface.
+"""
+
+from .blaze_tts import BlazeTTSExtension, BlazeTTSConfig
+
+__all__ = ["BlazeTTSExtension", "BlazeTTSConfig"]
+__version__ = "1.0.0"
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_tts_python/blaze_tts.py b/ai_agents/agents/ten_packages/extension/blaze_tts_python/blaze_tts.py
new file mode 100644
index 0000000000..0a72553715
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_tts_python/blaze_tts.py
@@ -0,0 +1,396 @@
+"""
+Blaze TTS Extension Implementation
+
+This extension wraps the Blaze TTS API endpoint for use in TEN framework.
+"""
+
+import os
+import logging
+from typing import Optional, Dict, Any, Union
+from enum import Enum
+
+import httpx
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
+
+
+class AudioFormat(str, Enum):
+    """Audio format options"""
+    WAV = "wav"
+    MP3 = "mp3"
+    OGG = "ogg"
+
+
+class MediaType(str, Enum):
+    """Media type options"""
+    AUDIO_OGG_CODECS_OPUS = "audio/ogg; codecs=opus"
+    AUDIO_MP3 = "audio/mp3"
+    AUDIO_WAV = "audio/wav"
+
+
+class Normalization(str, Enum):
+    """Normalization options"""
+    NO = "no"
+    YES = "yes"
+
+
+class Model(str, Enum):
+    """Model options"""
+    V1_5_FLASH = "v1.5_flash"
+    V1_5_PRO = "v1.5_pro"
+
+
+class BlazeTTSConfig(BaseModel):
+    """Configuration for Blaze TTS Extension"""
+    
+    api_url: str = Field(
+        default=os.getenv("BLAZE_TTS_API_URL", "http://localhost:8000"),
+        description="Blaze TTS API base URL"
+    )
+    api_key: Optional[str] = Field(
+        default=os.getenv("BLAZE_TTS_API_KEY", None),
+        description="API key for authentication (Bearer token)"
+    )
+    timeout: int = Field(
+        default=3600,
+        description="Request timeout in seconds"
+    )
+    default_language: str = Field(
+        default="vi",
+        description="Default language code (e.g., 'vi' for Vietnamese)"
+    )
+    default_speaker_id: Optional[str] = Field(
+        default=None,
+        description="Default speaker ID"
+    )
+    default_audio_speed: float = Field(
+        default=1.0,
+        description="Default audio speed multiplier"
+    )
+    default_audio_quality: int = Field(
+        default=64,
+        description="Default audio quality (kbps)"
+    )
+
+
+class BlazeTTSExtension:
+    """
+    Blaze TTS Extension for TEN Framework
+    
+    This extension provides Text-to-Speech functionality by wrapping
+    the Blaze TTS API endpoint: /v1/tts
+    
+    Implements TEN framework extension interface with process() and get_metadata() methods.
+    """
+    
+    def __init__(self, config: Optional[Union[BlazeTTSConfig, Dict[str, Any]]] = None):
+        """
+        Initialize Blaze TTS Extension
+        
+        Args:
+            config: Configuration object (BlazeTTSConfig) or dict from TEN framework.
+                   If None, uses environment variables.
+                   If dict, converts to BlazeTTSConfig.
+        """
+        if config is None:
+            self.config = BlazeTTSConfig()
+        elif isinstance(config, dict):
+            # Convert dict from TEN framework to BlazeTTSConfig
+            self.config = BlazeTTSConfig(
+                api_url=config.get("api_url", "http://localhost:8000"),
+                api_key=config.get("api_key"),
+                default_language=config.get("language", "vi"),
+                default_speaker_id=config.get("speaker_id"),
+                default_audio_speed=config.get("audio_speed", 1.0),
+                default_audio_quality=config.get("audio_quality", 64),
+                timeout=config.get("timeout", 3600),
+            )
+        else:
+            self.config = config
+        
+        self.base_url = self.config.api_url.rstrip("/")
+        self.endpoint = f"{self.base_url}/v1/tts"
+        
+        logger.info(f"Blaze TTS Extension initialized with API URL: {self.base_url}")
+    
+    def synthesize(
+        self,
+        text: str,
+        speaker_id: Optional[str] = None,
+        language: Optional[str] = None,
+        audio_speed: Optional[float] = None,
+        audio_quality: Optional[int] = None,
+        audio_format: Union[AudioFormat, str] = AudioFormat.WAV,
+        media_type: Union[MediaType, str] = MediaType.AUDIO_OGG_CODECS_OPUS,
+        normalization: Union[Normalization, str] = Normalization.NO,
+        model: Union[Model, str] = Model.V1_5_PRO,
+    ) -> Dict[str, Any]:
+        """
+        Synthesize text to speech
+        
+        Args:
+            text: Text to synthesize
+            speaker_id: Speaker/voice ID. Required if not set in config.
+            language: Language code (e.g., 'vi' for Vietnamese). Defaults to config default.
+            audio_speed: Audio speed multiplier (default: 1.0)
+            audio_quality: Audio quality in kbps (default: 64)
+            audio_format: Audio format (wav, mp3, ogg)
+            media_type: Media type
+            normalization: Normalization option (no, yes)
+            model: Model version to use
+        
+        Returns:
+            Dict containing TTS result with job_id or audio URL
+            
+        Raises:
+            httpx.HTTPError: If the API request fails
+            ValueError: If text is empty or speaker_id is missing
+        """
+        if not text:
+            raise ValueError("text cannot be empty")
+        
+        speaker_id = speaker_id or self.config.default_speaker_id
+        if not speaker_id:
+            raise ValueError("speaker_id is required (either as parameter or in config)")
+        
+        # Use provided values or fall back to config defaults
+        language = language or self.config.default_language
+        audio_speed = audio_speed if audio_speed is not None else self.config.default_audio_speed
+        audio_quality = audio_quality if audio_quality is not None else self.config.default_audio_quality
+        
+        # Convert enum to string if needed
+        if isinstance(audio_format, AudioFormat):
+            audio_format = audio_format.value
+        if isinstance(media_type, MediaType):
+            media_type = media_type.value
+        if isinstance(normalization, Normalization):
+            normalization = normalization.value
+        if isinstance(model, Model):
+            model = model.value
+        
+        # Prepare request payload
+        payload = {
+            "query": text,
+            "language": language,
+            "audio_speed": audio_speed,
+            "audio_quality": audio_quality,
+            "audio_format": audio_format,
+            "speaker_id": speaker_id,
+            "media_type": media_type,
+            "normalization": normalization,
+            "model": model,
+        }
+        
+        # Prepare headers
+        headers = {
+            "Content-Type": "application/json",
+        }
+        
+        if self.config.api_key:
+            headers["Authorization"] = f"Bearer {self.config.api_key}"
+        
+        try:
+            with httpx.Client(timeout=self.config.timeout) as client:
+                response = client.post(
+                    self.endpoint,
+                    json=payload,
+                    headers=headers,
+                )
+                response.raise_for_status()
+                return response.json()
+        
+        except httpx.HTTPStatusError as e:
+            logger.error(f"Blaze TTS API error: {e.response.status_code} - {e.response.text}")
+            raise
+        except httpx.RequestError as e:
+            logger.error(f"Blaze TTS request error: {str(e)}")
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error in Blaze TTS: {str(e)}")
+            raise
+    
+    def get_speakers(self) -> Dict[str, Any]:
+        """
+        Get list of available speakers/voices
+        
+        Returns:
+            Dict containing list of speakers
+        """
+        headers = {}
+        if self.config.api_key:
+            headers["Authorization"] = f"Bearer {self.config.api_key}"
+        
+        endpoint = f"{self.base_url}/v1/tts/list-speaker-ids"
+        
+        try:
+            with httpx.Client(timeout=30) as client:
+                response = client.get(endpoint, headers=headers)
+                response.raise_for_status()
+                return response.json()
+        
+        except httpx.HTTPStatusError as e:
+            logger.error(f"Blaze TTS speakers error: {e.response.status_code} - {e.response.text}")
+            raise
+        except httpx.RequestError as e:
+            logger.error(f"Blaze TTS request error: {str(e)}")
+            raise
+    
+    def download_audio(self, job_id: str, output_path: Optional[str] = None) -> bytes:
+        """
+        Download generated audio file
+        
+        Args:
+            job_id: Job ID returned from synthesize
+            output_path: Optional path to save the audio file. If None, returns bytes.
+        
+        Returns:
+            Audio file bytes
+        """
+        headers = {}
+        if self.config.api_key:
+            headers["Authorization"] = f"Bearer {self.config.api_key}"
+        
+        endpoint = f"{self.base_url}/v1/tts/{job_id}/download"
+        
+        try:
+            with httpx.Client(timeout=self.config.timeout) as client:
+                response = client.get(endpoint, headers=headers)
+                response.raise_for_status()
+                
+                audio_bytes = response.content
+                
+                if output_path:
+                    with open(output_path, "wb") as f:
+                        f.write(audio_bytes)
+                    logger.info(f"Audio saved to {output_path}")
+                
+                return audio_bytes
+        
+        except httpx.HTTPStatusError as e:
+            logger.error(f"Blaze TTS download error: {e.response.status_code} - {e.response.text}")
+            raise
+        except httpx.RequestError as e:
+            logger.error(f"Blaze TTS request error: {str(e)}")
+            raise
+    
+    def get_job_info(self, job_id: str) -> Dict[str, Any]:
+        """
+        Get information about a TTS job
+        
+        Args:
+            job_id: Job ID returned from synthesize
+        
+        Returns:
+            Dict containing job information
+        """
+        headers = {}
+        if self.config.api_key:
+            headers["Authorization"] = f"Bearer {self.config.api_key}"
+        
+        endpoint = f"{self.base_url}/v1/tts/{job_id}/info"
+        
+        try:
+            with httpx.Client(timeout=30) as client:
+                response = client.get(endpoint, headers=headers)
+                response.raise_for_status()
+                return response.json()
+        
+        except httpx.HTTPStatusError as e:
+            logger.error(f"Blaze TTS job info error: {e.response.status_code} - {e.response.text}")
+            raise
+        except httpx.RequestError as e:
+            logger.error(f"Blaze TTS request error: {str(e)}")
+            raise
+    
+    def process(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Process input according to TEN framework interface
+        
+        This method implements the TEN framework extension interface.
+        
+        Args:
+            input_data: Input dict with:
+                - text (str): Required. Text to synthesize
+                - speaker_id (str): Optional. Speaker ID (default: from config)
+                - language (str): Optional. Language code (default: from config)
+                - audio_speed (float): Optional. Audio speed (default: 1.0)
+                - audio_quality (int): Optional. Audio quality in kbps (default: 64)
+                - audio_format (str): Optional. Audio format (default: "wav")
+                - download_audio (bool): Optional. Download audio immediately (default: True)
+        
+        Returns:
+            Output dict with:
+                - audio_data (bytes): Audio bytes if download_audio=True
+                - job_id (str): Job ID
+                - format (str): Audio format
+                - status (str): Job status
+        """
+        text = input_data.get("text")
+        if not text:
+            raise ValueError("text is required in input_data")
+        
+        result = self.synthesize(
+            text=text,
+            speaker_id=input_data.get("speaker_id"),
+            language=input_data.get("language"),
+            audio_speed=input_data.get("audio_speed", 1.0),
+            audio_quality=input_data.get("audio_quality", 64),
+            audio_format=input_data.get("audio_format", "wav"),
+            media_type=input_data.get("media_type", MediaType.AUDIO_OGG_CODECS_OPUS),
+            normalization=input_data.get("normalization", "no"),
+            model=input_data.get("model", Model.V1_5_PRO),
+        )
+        
+        job_id = result.get("id") or result.get("job_id")
+        
+        # If immediate result requested, download audio
+        if job_id and input_data.get("download_audio", True):
+            try:
+                audio_bytes = self.download_audio(job_id)
+                return {
+                    "audio_data": audio_bytes,
+                    "job_id": job_id,
+                    "format": input_data.get("audio_format", "mp3"),
+                    "status": "completed",
+                    "size_bytes": len(audio_bytes),
+                }
+            except Exception as e:
+                # If download fails, return job_id for later retrieval
+                return {
+                    "job_id": job_id,
+                    "status": "processing",
+                    "error": str(e),
+                }
+        
+        return {
+            "job_id": job_id,
+            "status": "processing",
+        }
+    
+    def get_metadata(self) -> Dict[str, Any]:
+        """
+        Return extension metadata for TEN framework
+        
+        This method implements the TEN framework extension interface.
+        
+        Returns:
+            Dict with extension information
+        """
+        return {
+            "name": "blaze_tts_python",
+            "version": "1.0.0",
+            "description": "Blaze Text-to-Speech extension for TEN framework",
+            "capabilities": ["tts", "synthesis", "text_to_speech"],
+            "supported_formats": ["mp3", "wav", "ogg"],
+            "supported_languages": ["vi", "en"],
+            "config_schema": {
+                "api_url": {"type": "string", "required": False, "default": "http://localhost:8000"},
+                "api_key": {"type": "string", "required": False},
+                "language": {"type": "string", "required": False, "default": "vi"},
+                "speaker_id": {"type": "string", "required": False},
+                "audio_speed": {"type": "float", "required": False, "default": 1.0},
+                "audio_quality": {"type": "integer", "required": False, "default": 64},
+            },
+        }
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_tts_python/manifest.json b/ai_agents/agents/ten_packages/extension/blaze_tts_python/manifest.json
new file mode 100644
index 0000000000..a25b313a23
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_tts_python/manifest.json
@@ -0,0 +1,86 @@
+{
+  "type": "extension",
+  "name": "blaze_tts_python",
+  "version": "1.0.0",
+  "display_name": {
+    "locales": {
+      "en-US": {
+        "content": "Blaze TTS Extension"
+      },
+      "vi-VN": {
+        "content": "Blaze TTS Extension"
+      }
+    }
+  },
+  "description": {
+    "locales": {
+      "en-US": {
+        "content": "Blaze Text-to-Speech extension for TEN Framework"
+      },
+      "vi-VN": {
+        "content": "Extension chuyển đổi văn bản thành giọng nói cho TEN Framework"
+      }
+    }
+  },
+  "dependencies": [
+    {
+      "type": "system",
+      "name": "ten_runtime_python",
+      "version": "0.11"
+    },
+    {
+      "type": "system",
+      "name": "ten_ai_base",
+      "version": "0.7"
+    }
+  ],
+  "package": {
+    "include": [
+      "manifest.json",
+      "property.json",
+      "BUILD.gn",
+      "**.tent",
+      "**.py",
+      "README.md",
+      "requirements.txt"
+    ]
+  },
+  "api": {
+    "interface": [
+      {
+        "import_uri": "../../system/ten_ai_base/api/tts-interface.json"
+      }
+    ],
+    "property": {
+      "properties": {
+        "params": {
+          "type": "object",
+          "properties": {
+            "api_url": {
+              "type": "string"
+            },
+            "api_key": {
+              "type": "string"
+            },
+            "language": {
+              "type": "string"
+            },
+            "speaker_id": {
+              "type": "string"
+            },
+            "audio_speed": {
+              "type": "number"
+            },
+            "audio_quality": {
+              "type": "number"
+            },
+            "timeout": {
+              "type": "number"
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_tts_python/property.json b/ai_agents/agents/ten_packages/extension/blaze_tts_python/property.json
new file mode 100644
index 0000000000..25ca345113
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_tts_python/property.json
@@ -0,0 +1,12 @@
+{
+    "params": {
+        "api_url": "${env:BLAZE_TTS_API_URL}",
+        "api_key": "${env:BLAZE_TTS_API_KEY}",
+        "language": "vi",
+        "speaker_id": null,
+        "audio_speed": 1.0,
+        "audio_quality": 64,
+        "timeout": 3600
+    }
+}
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_tts_python/requirements.txt b/ai_agents/agents/ten_packages/extension/blaze_tts_python/requirements.txt
new file mode 100644
index 0000000000..e0a3d7ee1a
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_tts_python/requirements.txt
@@ -0,0 +1,3 @@
+httpx>=0.24.0
+pydantic>=2.0.0
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/__init__.py b/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/__init__.py
new file mode 100644
index 0000000000..48a47bcf6a
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/__init__.py
@@ -0,0 +1,4 @@
+"""
+Tests for Blaze TTS Extension
+"""
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/conftest.py b/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/conftest.py
new file mode 100644
index 0000000000..644eedb0bb
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/conftest.py
@@ -0,0 +1,60 @@
+"""
+Pytest fixtures for Blaze TTS Extension tests
+"""
+import pytest
+
+
+@pytest.fixture
+def mock_config():
+    """Mock configuration for BlazeTTSExtension"""
+    return {
+        "api_url": "http://localhost:8000",
+        "api_key": "test-api-key",
+        "language": "vi",
+        "speaker_id": "test-speaker-123",
+        "audio_speed": 1.0,
+        "audio_quality": 64,
+        "timeout": 3600,
+    }
+
+
+@pytest.fixture
+def mock_api_response_synthesize():
+    """Mock API response for synthesize"""
+    return {
+        "job_id": "test-tts-job-123",
+        "job_status": "completed",
+        "audio_url": "https://example.com/audio/test-tts-job-123.mp3",
+    }
+
+
+@pytest.fixture
+def mock_api_response_speakers():
+    """Mock API response for list_speakers"""
+    return {
+        "list_speakers": [
+            {
+                "id": "speaker-1",
+                "name": "Vietnamese Female",
+                "language": "vi",
+                "gender": "female",
+            },
+            {
+                "id": "speaker-2",
+                "name": "Vietnamese Male",
+                "language": "vi",
+                "gender": "male",
+            },
+        ]
+    }
+
+
+@pytest.fixture
+def mock_api_response_job_info():
+    """Mock API response for get_job_info"""
+    return {
+        "job_id": "test-tts-job-123",
+        "job_status": "completed",
+        "audio_url": "https://example.com/audio/test-tts-job-123.mp3",
+    }
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/pytest.ini b/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/pytest.ini
new file mode 100644
index 0000000000..0770e6b0fe
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/pytest.ini
@@ -0,0 +1,7 @@
+[pytest]
+testpaths = tests
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+addopts = -v --tb=short
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/requirements.txt b/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/requirements.txt
new file mode 100644
index 0000000000..ae8bbe77c0
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/requirements.txt
@@ -0,0 +1,4 @@
+pytest>=7.0.0
+pytest-mock>=3.10.0
+httpx>=0.24.0
+
diff --git a/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/test_blaze_tts.py b/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/test_blaze_tts.py
new file mode 100644
index 0000000000..913c58312c
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/blaze_tts_python/tests/test_blaze_tts.py
@@ -0,0 +1,375 @@
+"""
+Unit tests for BlazeTTSExtension
+"""
+import pytest
+from unittest.mock import Mock, patch
+import httpx
+
+from blaze_tts_python import BlazeTTSExtension, BlazeTTSConfig
+
+
+class TestBlazeTTSExtension:
+    """Test suite for BlazeTTSExtension"""
+    
+    def test_init_with_config_dict(self, mock_config):
+        """Test initialization with dict config"""
+        tts = BlazeTTSExtension(config=mock_config)
+        assert tts.config.api_url == "http://localhost:8000"
+        assert tts.config.api_key == "test-api-key"
+        assert tts.config.default_language == "vi"
+        assert tts.config.default_speaker_id == "test-speaker-123"
+        assert tts.endpoint == "http://localhost:8000/v1/tts"
+    
+    def test_init_with_config_object(self):
+        """Test initialization with BlazeTTSConfig object"""
+        config = BlazeTTSConfig(
+            api_url="http://test.com",
+            api_key="test-key",
+            default_language="en",
+            default_speaker_id="speaker-456",
+        )
+        tts = BlazeTTSExtension(config=config)
+        assert tts.config.api_url == "http://test.com"
+        assert tts.config.api_key == "test-key"
+        assert tts.config.default_language == "en"
+        assert tts.config.default_speaker_id == "speaker-456"
+    
+    def test_init_with_env_vars(self, monkeypatch):
+        """Test initialization with environment variables"""
+        monkeypatch.setenv("BLAZE_TTS_API_URL", "http://env-test.com")
+        monkeypatch.setenv("BLAZE_TTS_API_KEY", "env-key")
+        
+        tts = BlazeTTSExtension(config=None)
+        assert tts.config.api_url == "http://env-test.com"
+        assert tts.config.api_key == "env-key"
+    
+    @patch('httpx.Client')
+    def test_synthesize(self, mock_client_class, mock_api_response_synthesize):
+        """Test synthesize() method"""
+        # Setup mock response
+        mock_response = Mock()
+        mock_response.json.return_value = mock_api_response_synthesize
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.post.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        # Initialize extension
+        tts = BlazeTTSExtension(config={
+            "api_url": "http://localhost:8000",
+            "api_key": "test-key",
+            "speaker_id": "test-speaker-123",
+        })
+        
+        # Call synthesize
+        result = tts.synthesize(
+            text="Xin chào",
+            speaker_id="test-speaker-123",
+            language="vi",
+        )
+        
+        # Verify request was made correctly
+        mock_client.post.assert_called_once()
+        call_args = mock_client.post.call_args
+        
+        # Check endpoint
+        assert call_args[0][0] == "http://localhost:8000/v1/tts"
+        
+        # Check JSON body
+        json_data = call_args[1]["json"]
+        assert json_data["text"] == "Xin chào"
+        assert json_data["speaker_id"] == "test-speaker-123"
+        assert json_data["language"] == "vi"
+        
+        # Check headers
+        headers = call_args[1]["headers"]
+        assert headers["Authorization"] == "Bearer test-key"
+        
+        # Verify result
+        assert result["job_id"] == "test-tts-job-123"
+        assert result["job_status"] == "completed"
+        assert result["audio_url"] == "https://example.com/audio/test-tts-job-123.mp3"
+    
+    def test_synthesize_empty_text(self):
+        """Test synthesize() with empty text raises ValueError"""
+        tts = BlazeTTSExtension(config={"api_url": "http://localhost:8000"})
+        
+        with pytest.raises(ValueError, match="text cannot be empty"):
+            tts.synthesize(text="", speaker_id="test-speaker")
+    
+    def test_synthesize_missing_speaker_id(self):
+        """Test synthesize() without speaker_id raises ValueError"""
+        tts = BlazeTTSExtension(config={"api_url": "http://localhost:8000"})
+        
+        with pytest.raises(ValueError, match="speaker_id is required"):
+            tts.synthesize(text="Hello")
+    
+    @patch('httpx.Client')
+    def test_synthesize_with_default_speaker_id(self, mock_client_class, mock_api_response_synthesize):
+        """Test synthesize() uses default speaker_id from config"""
+        mock_response = Mock()
+        mock_response.json.return_value = mock_api_response_synthesize
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.post.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        tts = BlazeTTSExtension(config={
+            "api_url": "http://localhost:8000",
+            "speaker_id": "default-speaker",
+        })
+        
+        result = tts.synthesize(text="Hello")
+        
+        call_args = mock_client.post.call_args
+        json_data = call_args[1]["json"]
+        assert json_data["speaker_id"] == "default-speaker"
+    
+    @patch('httpx.Client')
+    def test_get_speakers(self, mock_client_class, mock_api_response_speakers):
+        """Test get_speakers() method"""
+        # Setup mock response
+        mock_response = Mock()
+        mock_response.json.return_value = mock_api_response_speakers
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.get.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        # Initialize extension
+        tts = BlazeTTSExtension(config={
+            "api_url": "http://localhost:8000",
+            "api_key": "test-key",
+        })
+        
+        # Call get_speakers
+        result = tts.get_speakers()
+        
+        # Verify request was made correctly
+        mock_client.get.assert_called_once()
+        call_args = mock_client.get.call_args
+        
+        # Check endpoint
+        assert call_args[0][0] == "http://localhost:8000/v1/tts/list-speaker-ids"
+        
+        # Check headers
+        headers = call_args[1]["headers"]
+        assert headers["Authorization"] == "Bearer test-key"
+        
+        # Verify result
+        assert len(result["list_speakers"]) == 2
+        assert result["list_speakers"][0]["id"] == "speaker-1"
+        assert result["list_speakers"][1]["id"] == "speaker-2"
+    
+    @patch('httpx.Client')
+    def test_get_job_info(self, mock_client_class, mock_api_response_job_info):
+        """Test get_job_info() method"""
+        # Setup mock response
+        mock_response = Mock()
+        mock_response.json.return_value = mock_api_response_job_info
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.get.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        # Initialize extension
+        tts = BlazeTTSExtension(config={
+            "api_url": "http://localhost:8000",
+            "api_key": "test-key",
+        })
+        
+        # Call get_job_info
+        result = tts.get_job_info("test-tts-job-123")
+        
+        # Verify request was made correctly
+        mock_client.get.assert_called_once()
+        call_args = mock_client.get.call_args
+        
+        # Check endpoint
+        assert call_args[0][0] == "http://localhost:8000/v1/tts/test-tts-job-123/info"
+        
+        # Check headers
+        headers = call_args[1]["headers"]
+        assert headers["Authorization"] == "Bearer test-key"
+        
+        # Verify result
+        assert result["job_id"] == "test-tts-job-123"
+        assert result["job_status"] == "completed"
+        assert result["audio_url"] == "https://example.com/audio/test-tts-job-123.mp3"
+    
+    @patch('httpx.Client')
+    def test_download_audio(self, mock_client_class):
+        """Test download_audio() method"""
+        # Setup mock response with audio bytes
+        mock_audio_bytes = b"fake audio data"
+        mock_response = Mock()
+        mock_response.content = mock_audio_bytes
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.get.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        # Initialize extension
+        tts = BlazeTTSExtension(config={
+            "api_url": "http://localhost:8000",
+            "api_key": "test-key",
+        })
+        
+        # Call download_audio
+        audio_data = tts.download_audio("test-tts-job-123")
+        
+        # Verify request was made correctly
+        mock_client.get.assert_called_once()
+        call_args = mock_client.get.call_args
+        
+        # Check endpoint
+        assert call_args[0][0] == "http://localhost:8000/v1/tts/test-tts-job-123/download"
+        
+        # Check headers
+        headers = call_args[1]["headers"]
+        assert headers["Authorization"] == "Bearer test-key"
+        
+        # Verify result
+        assert audio_data == mock_audio_bytes
+    
+    def test_process_method(self, mock_api_response_synthesize):
+        """Test process() method (TEN framework interface)"""
+        with patch('httpx.Client') as mock_client_class:
+            # Setup mock response
+            mock_response = Mock()
+            mock_response.json.return_value = mock_api_response_synthesize
+            mock_response.raise_for_status = Mock()
+            
+            mock_client = Mock()
+            mock_client.__enter__ = Mock(return_value=mock_client)
+            mock_client.__exit__ = Mock(return_value=False)
+            mock_client.post.return_value = mock_response
+            mock_client_class.return_value = mock_client
+            
+            # Initialize extension
+            tts = BlazeTTSExtension(config={
+                "api_url": "http://localhost:8000",
+                "speaker_id": "test-speaker",
+            })
+            
+            # Call process
+            result = tts.process({
+                "text": "Xin chào",
+                "speaker_id": "test-speaker",
+                "language": "vi",
+            })
+            
+            # Verify result format
+            assert result["job_id"] == "test-tts-job-123"
+            assert result["status"] == "completed"
+            assert "audio_url" in result
+    
+    def test_process_method_missing_text(self):
+        """Test process() method raises error when text is missing"""
+        tts = BlazeTTSExtension(config={"api_url": "http://localhost:8000"})
+        
+        with pytest.raises(ValueError, match="text is required in input_data"):
+            tts.process({})
+    
+    def test_get_metadata(self):
+        """Test get_metadata() method"""
+        tts = BlazeTTSExtension(config={"api_url": "http://localhost:8000"})
+        
+        metadata = tts.get_metadata()
+        
+        assert metadata["name"] == "blaze_tts_python"
+        assert metadata["version"] == "1.0.0"
+        assert "tts" in metadata["capabilities"]
+        assert "text_to_speech" in metadata["capabilities"]
+        assert "audio/wav" in metadata["supported_formats"]
+        assert "vi" in metadata["supported_languages"]
+    
+    @patch('httpx.Client')
+    def test_synthesize_with_audio_speed(self, mock_client_class, mock_api_response_synthesize):
+        """Test synthesize() with custom audio_speed"""
+        mock_response = Mock()
+        mock_response.json.return_value = mock_api_response_synthesize
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.post.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        tts = BlazeTTSExtension(config={"api_url": "http://localhost:8000"})
+        
+        result = tts.synthesize(
+            text="Hello",
+            speaker_id="test-speaker",
+            audio_speed=1.5,
+        )
+        
+        call_args = mock_client.post.call_args
+        json_data = call_args[1]["json"]
+        assert json_data["audio_speed"] == 1.5
+    
+    @patch('httpx.Client')
+    def test_synthesize_with_audio_quality(self, mock_client_class, mock_api_response_synthesize):
+        """Test synthesize() with custom audio_quality"""
+        mock_response = Mock()
+        mock_response.json.return_value = mock_api_response_synthesize
+        mock_response.raise_for_status = Mock()
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.post.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        tts = BlazeTTSExtension(config={"api_url": "http://localhost:8000"})
+        
+        result = tts.synthesize(
+            text="Hello",
+            speaker_id="test-speaker",
+            audio_quality=128,
+        )
+        
+        call_args = mock_client.post.call_args
+        json_data = call_args[1]["json"]
+        assert json_data["audio_quality"] == 128
+    
+    @patch('httpx.Client')
+    def test_synthesize_http_error(self, mock_client_class):
+        """Test synthesize() handles HTTP errors"""
+        # Setup mock response with error
+        mock_response = Mock()
+        mock_response.status_code = 400
+        mock_response.text = "Bad Request"
+        mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
+            "Bad Request",
+            request=Mock(),
+            response=mock_response
+        )
+        
+        mock_client = Mock()
+        mock_client.__enter__ = Mock(return_value=mock_client)
+        mock_client.__exit__ = Mock(return_value=False)
+        mock_client.post.return_value = mock_response
+        mock_client_class.return_value = mock_client
+        
+        tts = BlazeTTSExtension(config={"api_url": "http://localhost:8000"})
+        
+        with pytest.raises(httpx.HTTPStatusError):
+            tts.synthesize(text="Hello", speaker_id="test-speaker")
+