diff --git a/backend/app/core/providers.py b/backend/app/core/providers.py index 4680bea28..980ef4164 100644 --- a/backend/app/core/providers.py +++ b/backend/app/core/providers.py @@ -14,6 +14,7 @@ class Provider(str, Enum): LANGFUSE = "langfuse" GOOGLE = "google" SARVAMAI = "sarvamai" + ELEVENLABS = "elevenlabs" @dataclass @@ -34,6 +35,7 @@ class ProviderConfig: ), Provider.GOOGLE: ProviderConfig(required_fields=["api_key"]), Provider.SARVAMAI: ProviderConfig(required_fields=["api_key"]), + Provider.ELEVENLABS: ProviderConfig(required_fields=["api_key"]), } diff --git a/backend/app/models/llm/request.py b/backend/app/models/llm/request.py index 0a8c33818..97e0e5867 100644 --- a/backend/app/models/llm/request.py +++ b/backend/app/models/llm/request.py @@ -7,7 +7,6 @@ from pydantic import HttpUrl, model_validator from sqlalchemy.dialects.postgresql import JSONB from sqlmodel import Field, Index, SQLModel, text - from app.core.util import now @@ -55,8 +54,8 @@ class STTLLMParams(SQLModel): class TTSLLMParams(SQLModel): model: str - voice: str - language: str + voice: str | None = None + language: str | None = None response_format: Literal["mp3", "wav", "ogg"] | None = "wav" @@ -194,7 +193,9 @@ class NativeCompletionConfig(SQLModel): Supports any LLM provider's native API format. """ - provider: Literal["openai-native", "google-native", "sarvamai-native"] = Field( + provider: Literal[ + "openai-native", "google-native", "sarvamai-native", "elevenlabs-native" + ] = Field( ..., description="Native provider type (e.g., openai-native)", ) @@ -214,7 +215,7 @@ class KaapiCompletionConfig(SQLModel): Supports multiple providers: OpenAI, Claude, Gemini, etc. """ - provider: Literal["openai", "google", "sarvamai"] = Field( + provider: Literal["openai", "google", "sarvamai", "elevenlabs"] = Field( ..., description="LLM provider (openai, google, sarvamai)" ) diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py index 2a5f7dee2..d5bc70119 100644 --- a/backend/app/services/llm/jobs.py +++ b/backend/app/services/llm/jobs.py @@ -351,6 +351,7 @@ def execute_llm_call( session=session, project_id=project_id, config_id=config.id ) config_blob, error = resolve_config_blob(config_crud, config) + logger.info(f"----the resolved config blob is {config_blob}") if error: return BlockResult(error=error) else: @@ -520,7 +521,7 @@ def execute_job( callback_url_str = str(request.callback_url) if request.callback_url else None logger.info( - f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}" + f"[execute_job] Starting LLM job execution | job_id={job_id}, task_id={task_id}, callback_url {callback_url_str}" ) try: @@ -548,6 +549,8 @@ def execute_job( include_provider_raw_response=request.include_provider_raw_response, ) + logger.info(f"[execute_job] results: {result.error}") + if result.success: callback_response = APIResponse.success_response( data=result.response, metadata=result.metadata diff --git a/backend/app/services/llm/mappers.py b/backend/app/services/llm/mappers.py index b9e1ebae4..b4ee02dd0 100644 --- a/backend/app/services/llm/mappers.py +++ b/backend/app/services/llm/mappers.py @@ -1,8 +1,59 @@ """Parameter mappers for converting Kaapi-abstracted parameters to provider-specific formats.""" import litellm +import logging from app.models.llm import KaapiCompletionConfig, NativeCompletionConfig +logger = logging.getLogger(__name__) +# BCP-47 language tag → ElevenLabs ISO 639-3 code (Indic + English) +BCP47_TO_ELEVENLABS_LANG: dict[str, str] = { + "en-IN": "en", + "hi-IN": "hi", + "bn-IN": "bn", # Bengali + "ta-IN": "ta", + "te-IN": "te", + "mr-IN": "mr", + "gu-IN": "gu", + "kn-IN": "kn", + "ml-IN": "ml", + "pa-IN": "pa", + # "od-IN": "or", # Not supported by Elevenlabs explicitly but works in auto detect mode + "as-IN": "as", + "ur-IN": "ur", + "ne-IN": "ne", + "sd-IN": "sd", +} + +ELEVENLABS_VOICE_TO_ID: dict[str, str] = { + "Sarah": "EXAVITQu4vr4xnSDxMaL", + "George": "JBFqnCBsd6RMkjVDRZzb", + "Callum": "N2lVS1w4EtoT3dr4eOWO", + "Liam": "TX3LPaxmHKxFdv7VOQHJ", +} + + +def voice_to_id(voice: str) -> str | None: + """ + Convert voice to its corresponding voice_id + + Returns: + voice_id associated with a voice + """ + + return ELEVENLABS_VOICE_TO_ID.get(voice) + + +def bcp47_to_elevenlabs_lang(bcp47_code: str) -> str | None: + """Convert a BCP-47 language tag to an ElevenLabs ISO 639-3 language code. + + Args: + bcp47_code: BCP-47 language tag (e.g. "en-IN", "hi-IN", "ta-IN") + + Returns: + ISO 639-3 code (e.g. "eng", "hin", "tam") or None if unsupported + """ + return BCP47_TO_ELEVENLABS_LANG.get(bcp47_code) + def map_kaapi_to_openai_params(kaapi_params: dict) -> tuple[dict, list[str]]: """Map Kaapi-abstracted parameters to OpenAI API parameters. @@ -76,7 +127,9 @@ def map_kaapi_to_openai_params(kaapi_params: dict) -> tuple[dict, list[str]]: return openai_params, warnings -def map_kaapi_to_google_params(kaapi_params: dict) -> tuple[dict, list[str]]: +def map_kaapi_to_google_params( + kaapi_params: dict, completion_type: str +) -> tuple[dict, list[str]]: """Map Kaapi-abstracted parameters to Google AI (Gemini) API parameters. This mapper transforms standardized Kaapi parameters into Google-specific @@ -84,6 +137,7 @@ def map_kaapi_to_google_params(kaapi_params: dict) -> tuple[dict, list[str]]: Args: kaapi_params: Dictionary with standardized Kaapi parameters + completion_type: Type of completion ("text", "stt", or "tts") Supported Mapping: - model → model @@ -103,46 +157,74 @@ def map_kaapi_to_google_params(kaapi_params: dict) -> tuple[dict, list[str]]: if not model: return {}, ["Missing required 'model' parameter"] - google_params["model"] = kaapi_params.get("model") + google_params["model"] = model - # Instructions for STT prompts - instructions = kaapi_params.get("instructions") - if instructions: - google_params["instructions"] = instructions + if completion_type == "text": + # Text completion - instructions, temperature, reasoning, knowledge_base_ids + instructions = kaapi_params.get("instructions") + if instructions: + google_params["instructions"] = instructions - temperature = kaapi_params.get("temperature") + temperature = kaapi_params.get("temperature") + if temperature is not None: + google_params["temperature"] = temperature - if temperature is not None: - google_params["temperature"] = temperature + reasoning = kaapi_params.get("reasoning") + if reasoning: + google_params["reasoning"] = reasoning - # TTS Config - voice = kaapi_params.get("voice") - if voice: - google_params["voice"] = voice + # Warn about unsupported parameters + if kaapi_params.get("knowledge_base_ids"): + # TODO: Will take up later, when we add google filesearch tool support + warnings.append( + "Parameter 'knowledge_base_ids' is not supported by Google AI and was ignored." + ) - language = kaapi_params.get("language") - if language: - google_params["language"] = language + elif completion_type == "tts": + # TTS mode - voice, language, response_format + voice = kaapi_params.get("voice") + if voice: + google_params["voice"] = voice - response_format = kaapi_params.get("response_format") - if response_format: - google_params["response_format"] = response_format + language = kaapi_params.get("language") + if language: + google_params["language"] = language - reasoning = kaapi_params.get("reasoning") - if reasoning: - google_params["reasoning"] = reasoning - - # Warn about unsupported parameters - if kaapi_params.get("knowledge_base_ids"): - # TODO: Will take up later, when we add google filesearch tool support - warnings.append( - "Parameter 'knowledge_base_ids' is not supported by Google AI and was ignored." - ) + response_format = kaapi_params.get("response_format") + if response_format: + google_params["response_format"] = response_format + + elif completion_type == "stt": + # STT mode - instructions, temperature, input_language, output_language, response_format + instructions = kaapi_params.get("instructions") + if instructions: + google_params["instructions"] = instructions + + temperature = kaapi_params.get("temperature") + if temperature is not None: + google_params["temperature"] = temperature + + input_language = kaapi_params.get("input_language") + if input_language: + google_params["input_language"] = input_language + + output_language = kaapi_params.get("output_language") + if output_language: + google_params["output_language"] = output_language + + response_format = kaapi_params.get("response_format") + if response_format: + google_params["response_format"] = response_format + + else: + return {}, [f"Unsupported completion type '{completion_type}' for Google AI"] return google_params, warnings -def map_kaapi_to_sarvam_params(kaapi_params: dict) -> tuple[dict, list[str]]: +def map_kaapi_to_sarvam_params( + kaapi_params: dict, completion_type: str +) -> tuple[dict, list[str]]: """Map Kaapi-abstracted parameters to SarvamAI API parameters. Handles both STTLLMParams and TTSLLMParams. @@ -152,6 +234,7 @@ def map_kaapi_to_sarvam_params(kaapi_params: dict) -> tuple[dict, list[str]]: Args: kaapi_params: Dictionary with standardized Kaapi parameters + completion_type: Type of completion ("stt" or "tts") Returns: Tuple of: @@ -167,44 +250,59 @@ def map_kaapi_to_sarvam_params(kaapi_params: dict) -> tuple[dict, list[str]]: return {}, ["Missing required 'model' parameter"] sarvam_params["model"] = model - # Determine if STT or TTS based on presence of specific params - voice = kaapi_params.get("voice") - input_language = kaapi_params.get("input_language") - - if voice is not None: + if completion_type == "tts": # TTS mode - map TTSLLMParams - sarvam_params["speaker"] = voice - + # Required: target_language_code (API requirement) language = kaapi_params.get("language") if not language: return {}, ["Missing required 'language' parameter for TTS"] sarvam_params["target_language_code"] = language + # Optional: speaker (has API default: Shubh for v3, Anushka for v2) + voice = kaapi_params.get("voice") + if voice: + sarvam_params["speaker"] = voice + + # Optional: output_audio_codec response_format = kaapi_params.get("response_format") if response_format: # Map audio format to SarvamAI codec - format_mapping = {"mp3": "mp3", "wav": "wav", "ogg": "ogg"} + # Supported: mp3, linear16, mulaw, alaw, opus, flac, aac, wav + format_mapping = { + "mp3": "mp3", + "wav": "wav", + "ogg": "opus", # Map ogg to opus (closest match) + } sarvam_params["output_audio_codec"] = format_mapping.get( response_format, "wav" ) - elif input_language is not None or kaapi_params.get("output_language") is not None: + elif completion_type == "stt": # STT mode - map STTLLMParams + input_language = kaapi_params.get("input_language") output_language = kaapi_params.get("output_language") - transcription_mode = "transcribe" + # Set language_code (optional, defaults to "unknown" for auto-detection) if input_language == "auto": sarvam_params["language_code"] = "unknown" elif input_language: sarvam_params["language_code"] = input_language + else: + # Default to "unknown" for auto-detection if not provided + sarvam_params["language_code"] = "unknown" + + # Set mode only for saaras:v3 model (not for saarika:v2.5) + # mode parameter: transcribe, translate, verbatim, translit, or codemix + if model and "saaras" in model: + transcription_mode = "transcribe" - if output_language is None: - output_language = input_language + if output_language is None: + output_language = input_language - if output_language == "en-IN" and input_language != output_language: - transcription_mode = "translate" + if output_language == "en-IN" and input_language != output_language: + transcription_mode = "translate" - sarvam_params["mode"] = transcription_mode + sarvam_params["mode"] = transcription_mode # Warn about unsupported STT parameters instructions = kaapi_params.get("instructions") @@ -225,9 +323,117 @@ def map_kaapi_to_sarvam_params(kaapi_params: dict) -> tuple[dict, list[str]]: "Parameter 'response_format' is not supported by SarvamAI STT and was ignored" ) + else: + return {}, [f"Unsupported completion type '{completion_type}' for SarvamAI"] + return sarvam_params, warnings +def map_kaapi_to_elevenlabs_params( + kaapi_params: dict, completion_type: str +) -> tuple[dict, list[str]]: + """ + Map Kaapi-abstracted parameters to ElevenLab API params + Handles both STTLLMParams and TTSLLMParams. + + STTLLMParams: model, instructions, input_language, output_language, response_format, temperature + TTSLLMParams: model, voice, language, response_format + + Args: + kaapi_params: Dictionary with standardized Kaapi parameters + completion_type: Type of completion ("stt" or "tts") + + Returns: + Tuple of: + - Dictionary of ELevenlabs API parameters + - List of warnings for unsupported parameters + + """ + elevenlabs_params = {} + warnings = [] + + model_id = kaapi_params.get("model") + if not model_id: + return {}, ["Missing required 'model' parameter"] + elevenlabs_params["model_id"] = model_id + + if completion_type == "tts": + # TTS Mode - map TTSLLMParams + voice = kaapi_params.get("voice") + if not voice: + return {}, ["Missing required 'voice' parameter for TTS"] + + voice_id = voice_to_id(voice) + if not voice_id: + return {}, [f"Unsupported voice '{voice}' for ElevenLabs TTS"] + elevenlabs_params["voice_id"] = voice_id + + language = kaapi_params.get("language") + if language: + elevenlabs_lang = bcp47_to_elevenlabs_lang(language) + if not elevenlabs_lang: + warnings.append( + f"Unsupported language '{language}' for ElevenLabs TTS, using default" + ) + else: + elevenlabs_params["language_code"] = elevenlabs_lang + + response_format = kaapi_params.get("response_format") + if response_format: + # Map audio format to Elevenlabs codec + # supports mp3, wav and opus (ogg maps to opus) + format_mapping = { + "mp3": "mp3_44100_128", + "wav": "wav_24000", + "ogg": "opus_48000_128", # Map ogg to opus + } + elevenlabs_params["output_format"] = format_mapping.get( + response_format, "mp3_44100_128" + ) + + elif completion_type == "stt": + # STT mode - map STTLLMParams + input_language = kaapi_params.get("input_language") + output_language = kaapi_params.get("output_language") + + if input_language == "auto": + elevenlabs_params["language_code"] = None + elif input_language: + elevenlabs_lang = bcp47_to_elevenlabs_lang(input_language) + if elevenlabs_lang: + elevenlabs_params["language_code"] = elevenlabs_lang + else: + warnings.append( + f"Unsupported language '{input_language}' for ElevenLabs STT, defaulting to auto-detect" + ) + + if output_language and output_language != input_language: + warnings.append( + "Parameter 'output_language' is not supported by ElevenLabs STT. " + "ElevenLabs only supports transcription, not translation. " + "The audio will be transcribed in its original language." + ) + + temperature = kaapi_params.get("temperature") + if temperature is not None: + elevenlabs_params["temperature"] = temperature + + response_format = kaapi_params.get("response_format") + if response_format: + warnings.append("Kaapi only supports 'txt' as the default response format.") + + # Warn about unsupported STT parameters + instructions = kaapi_params.get("instructions") + if instructions: + warnings.append( + "Parameter 'instructions' is not supported by ElevenLabs STT and was ignored." + ) + else: + return {}, [f"Unsupported completion type '{completion_type}' for ElevenLabs"] + + return elevenlabs_params, warnings + + def transform_kaapi_config_to_native( kaapi_config: KaapiCompletionConfig, ) -> tuple[NativeCompletionConfig, list[str]]: @@ -243,6 +449,7 @@ def transform_kaapi_config_to_native( - NativeCompletionConfig with provider-native parameters ready for API - List of warnings for suppressed/ignored parameters """ + # TODO change from magic string to enums if kaapi_config.provider == "openai": mapped_params, warnings = map_kaapi_to_openai_params(kaapi_config.params) return ( @@ -253,7 +460,9 @@ def transform_kaapi_config_to_native( ) if kaapi_config.provider == "google": - mapped_params, warnings = map_kaapi_to_google_params(kaapi_config.params) + mapped_params, warnings = map_kaapi_to_google_params( + kaapi_config.params, kaapi_config.type + ) return ( NativeCompletionConfig( provider="google-native", params=mapped_params, type=kaapi_config.type @@ -262,7 +471,9 @@ def transform_kaapi_config_to_native( ) if kaapi_config.provider == "sarvamai": - mapped_params, warnings = map_kaapi_to_sarvam_params(kaapi_config.params) + mapped_params, warnings = map_kaapi_to_sarvam_params( + kaapi_config.params, kaapi_config.type + ) return ( NativeCompletionConfig( provider="sarvamai-native", params=mapped_params, type=kaapi_config.type @@ -270,4 +481,17 @@ def transform_kaapi_config_to_native( warnings, ) + if kaapi_config.provider == "elevenlabs": + mapped_params, warnings = map_kaapi_to_elevenlabs_params( + kaapi_config.params, kaapi_config.type + ) + return ( + NativeCompletionConfig( + provider="elevenlabs-native", + params=mapped_params, + type=kaapi_config.type, + ), + warnings, + ) + raise ValueError(f"Unsupported provider: {kaapi_config.provider}") diff --git a/backend/app/services/llm/providers/eai.py b/backend/app/services/llm/providers/eai.py new file mode 100644 index 000000000..de14039bd --- /dev/null +++ b/backend/app/services/llm/providers/eai.py @@ -0,0 +1,315 @@ +import base64 +import logging +import os +import uuid +from typing import Any + +from elevenlabs import ElevenLabs, SpeechToTextConvertResponse + + +from app.models.llm import ( + NativeCompletionConfig, + LLMCallResponse, + QueryParams, + TextOutput, + LLMResponse, + Usage, + TextContent, +) +from app.models.llm.response import AudioOutput +from app.models.llm.request import AudioContent +from app.services.llm.providers.base import BaseProvider + + +logger = logging.getLogger(__name__) + + +class ElevenlabsAIProvider(BaseProvider): + def __init__(self, client: ElevenLabs): + """Initialize Elevenlabs provider with client. + + Args: + client: ElevenLabs client instance + """ + super().__init__(client) + self.client = client + + @staticmethod + def create_client(credentials: dict[str, Any]) -> Any: + if "api_key" not in credentials: + raise ValueError("API Key for Elevenlabs Not Set") + return ElevenLabs(api_key=credentials["api_key"]) + + def _parse_input( + self, query_input: Any, completion_type: str, provider: str + ) -> str: + if completion_type == "stt": + if isinstance(query_input, str) and os.path.exists(query_input): + return query_input + else: + raise ValueError(f"{provider} STT requires a valid file path as input") + elif completion_type == "tts": + if isinstance(query_input, str): + return query_input + else: + raise ValueError(f"{provider} TTS requires a text string as input") + raise ValueError( + f"Unsupported completion type '{completion_type}' for {provider}" + ) + + def _execute_stt( + self, + completion_config: NativeCompletionConfig, + resolved_input: str, + include_provider_raw_response: bool = False, + ) -> tuple[LLMCallResponse | None, str | None]: + """Execute speech-to-text completion using Elevenlabs. + + Args: + completion_config: Configuration for the completion request (with already-mapped params) + resolved_input: File path to the audio input + include_provider_raw_response: Whether to include raw provider response + + Returns: + Tuple of (response, error_message) + """ + provider_name = completion_config.provider + params = completion_config.params + + # Extract already-mapped parameters from the mapper + model_id = params.get("model_id") or "scribe_v2" + if not model_id: + return None, "Missing 'model_id' in native params for Elevenlabs STT" + + language_code = params.get("language_code") + temperature = params.get("temperature") + + # Parse and validate input + parsed_input_path = self._parse_input( + query_input=resolved_input, + completion_type="stt", + provider=provider_name, + ) + + try: + # Build optional kwargs + stt_kwargs: dict[str, Any] = {} + if language_code: + stt_kwargs["language_code"] = language_code + if temperature is not None: + stt_kwargs["temperature"] = temperature + + with open(parsed_input_path, "rb") as audio_file: + # Call ElevenLabs transcribe with all mapped parameters + elevenlabs_response: SpeechToTextConvertResponse = ( + self.client.speech_to_text.convert( + file=audio_file, model_id=model_id, **stt_kwargs + ) + ) + + # Estimate token usage (not directly provided by Elevenlabs STT) + input_tokens_estimate = 0 + output_tokens_estimate = len(elevenlabs_response.text.split()) + total_tokens_estimate = input_tokens_estimate + output_tokens_estimate + transcription_id = elevenlabs_response.transcription_id or str(uuid.uuid4()) + llm_response = LLMCallResponse( + response=LLMResponse( + provider_response_id=transcription_id, + conversation_id=None, + provider=provider_name, + model=model_id, + output=TextOutput( + content=TextContent(value=elevenlabs_response.text) + ), + ), + usage=Usage( + input_tokens=input_tokens_estimate, + output_tokens=output_tokens_estimate, + total_tokens=total_tokens_estimate, + reasoning_tokens=None, + ), + ) + + if include_provider_raw_response: + llm_response.provider_raw_response = elevenlabs_response.model_dump() + + logger.info( + f"[_execute_stt] Successfully transcribed audio | " + f"request_id={elevenlabs_response.transcription_id}, model={model_id}, provider={provider_name}" + ) + return llm_response, None + + except Exception as e: + error_message = f"Elevenlabs STT transcription failed: {str(e)}" + logger.error( + f"[_execute_stt] {error_message} | provider={provider_name}", + exc_info=True, + ) + return None, error_message + + def _execute_tts( + self, + completion_config: NativeCompletionConfig, + resolved_input: str, + include_provider_raw_response: bool = False, + ) -> tuple[LLMCallResponse | None, str | None]: + """Execute text-to-speech completion using Elevenlabs. + + Args: + completion_config: Configuration for the completion request (with already-mapped params) + resolved_input: Text string to convert to speech + include_provider_raw_response: Whether to include raw provider response + + Returns: + Tuple of (response, error_message) + """ + provider_name = completion_config.provider + params = completion_config.params + + # Extract already-mapped parameters from the mapper + # Use 'or' to handle both missing keys and falsy values + model_id = params.get("model_id") or "eleven_v3" + voice_id = params.get("voice_id") or "EXAVITQu4vr4xnSDxMaL" + + if not model_id: + return None, "Missing 'model_id' in native params for Elevenlabs TTS" + if not voice_id: + return None, "Missing 'voice_id' in native params for Elevenlabs TTS" + + output_format = params.get("output_format", "mp3_44100_128") + language_code = params.get("language_code") + voice_settings = params.get("voice_settings") + + # Parse and validate input + parsed_text = self._parse_input( + query_input=resolved_input, + completion_type="tts", + provider=provider_name, + ) + + try: + # Build optional kwargs + tts_kwargs: dict[str, Any] = {} + if language_code: + tts_kwargs["language_code"] = language_code + if voice_settings: + tts_kwargs["voice_settings"] = voice_settings + + # Call Elevenlabs TTS API + audio_iterator = self.client.text_to_speech.convert( + voice_id=voice_id, + text=parsed_text, + model_id=model_id, + output_format=output_format, + **tts_kwargs, + ) + + # Elevenlabs returns an iterator of audio bytes; collect and base64-encode + audio_bytes = b"".join(audio_iterator) + if not audio_bytes: + return None, "Elevenlabs TTS returned no audio data" + + audio_base64 = base64.b64encode(audio_bytes).decode("utf-8") + + # Derive mime type from output_format (e.g. "mp3_44100_128" -> "audio/mpeg") + codec = output_format.split("_")[0] + mime_type_map = { + "mp3": "audio/mpeg", + "pcm": "audio/pcm", + "wav": "audio/wav", + "opus": "audio/opus", + "ulaw": "audio/basic", + "alaw": "audio/alaw", + } + mime_type = mime_type_map.get(codec, f"audio/{codec}") + + # Estimate token usage (not directly provided by Elevenlabs TTS) + input_tokens_estimate = len(parsed_text.split()) + output_tokens_estimate = 0 # Audio output, no tokens + total_tokens_estimate = input_tokens_estimate + + llm_response = LLMCallResponse( + response=LLMResponse( + provider_response_id=str(uuid.uuid4()), + conversation_id=None, + provider=provider_name, + model=model_id, + output=AudioOutput( + content=AudioContent( + format="base64", + value=audio_base64, + mime_type=mime_type, + ) + ), + ), + usage=Usage( + input_tokens=input_tokens_estimate, + output_tokens=output_tokens_estimate, + total_tokens=total_tokens_estimate, + reasoning_tokens=None, + ), + ) + + if include_provider_raw_response: + llm_response.provider_raw_response = { + "audio_bytes_length": len(audio_bytes), + "output_format": output_format, + } + + logger.info( + f"[_execute_tts] Successfully converted text to speech | " + f"provider={provider_name}, model={model_id}, voice_id={voice_id}, output_format={output_format}" + ) + return llm_response, None + + except Exception as e: + error_message = f"Elevenlabs TTS conversion failed: {str(e)}" + logger.error( + f"[_execute_tts] {error_message} | provider={provider_name}", + exc_info=True, + ) + return None, error_message + + def execute( + self, + completion_config: NativeCompletionConfig, + query: QueryParams, # noqa: ARG002 - Required by base class interface, unused for STT/TTS + resolved_input: str, + include_provider_raw_response: bool = False, + ) -> tuple[LLMCallResponse | None, str | None]: + provider_name = completion_config.provider + try: + completion_type = completion_config.type + + if completion_type == "stt": + return self._execute_stt( + completion_config=completion_config, + resolved_input=resolved_input, + include_provider_raw_response=include_provider_raw_response, + ) + elif completion_type == "tts": + return self._execute_tts( + completion_config=completion_config, + resolved_input=resolved_input, + include_provider_raw_response=include_provider_raw_response, + ) + else: + return ( + None, + f"Unsupported completion type '{completion_type}' for ElevenlabsAIProvider", + ) + + except ValueError as e: + error_message = f"Input validation error: {str(e)}" + logger.error( + f"[ElevenlabsAIProvider.execute] {error_message} | provider={provider_name}", + exc_info=True, + ) + return None, error_message + except Exception as e: + error_message = "Unexpected error occurred during Elevenlabs execution" + logger.error( + f"[ElevenlabsAIProvider.execute] {error_message}: {str(e)} | provider={provider_name}", + exc_info=True, + ) + return None, error_message diff --git a/backend/app/services/llm/providers/gai.py b/backend/app/services/llm/providers/gai.py index 05fa46fc1..a20ebda2e 100644 --- a/backend/app/services/llm/providers/gai.py +++ b/backend/app/services/llm/providers/gai.py @@ -181,7 +181,7 @@ def _execute_stt( reasoning_tokens = response.usage_metadata.thoughts_token_count or 0 else: logger.warning( - f"[GoogleAIProvider._execute_stt] Response missing usage_metadata, using zeros" + f"[GoogleAIProvider._execute_stt] Response missing usage_metadata, using zeros | provider={provider}" ) input_tokens = 0 output_tokens = 0 @@ -208,7 +208,8 @@ def _execute_stt( llm_response.provider_raw_response = response.model_dump() logger.info( - f"[GoogleAIProvider._execute_stt] Successfully generated STT response: {response.response_id}" + f"[GoogleAIProvider._execute_stt] Successfully generated STT response | " + f"request_id={response.response_id}, provider={provider}, model={model}" ) return llm_response, None @@ -299,7 +300,7 @@ def _execute_tts( if response_format and response_format != "wav": # Need to convert from WAV to requested format logger.info( - f"[GoogleAIProvider._execute_tts] Converting audio from WAV to {response_format}" + f"[GoogleAIProvider._execute_tts] Converting audio from WAV to {response_format} | provider={provider}" ) if response_format == "mp3": @@ -321,11 +322,11 @@ def _execute_tts( actual_format = "ogg" else: logger.warning( - f"[GoogleAIProvider._execute_tts] Unsupported response_format '{response_format}', returning native WAV" + f"[GoogleAIProvider._execute_tts] Unsupported response_format '{response_format}', returning native WAV | provider={provider}" ) response_format = "wav" logger.info( - f"[GoogleAIProvider._execute_tts] Audio conversion successful: {actual_format.upper()} ({len(raw_audio_bytes)} bytes)" + f"[GoogleAIProvider._execute_tts] Audio conversion successful: {actual_format.upper()} ({len(raw_audio_bytes)} bytes) | provider={provider}" ) response_mime_type = f"audio/{response_format}" @@ -337,7 +338,7 @@ def _execute_tts( reasoning_tokens = response.usage_metadata.thoughts_token_count or 0 else: logger.warning( - f"[GoogleAIProvider._execute_tts] Response missing usage_metadata, using zeros" + f"[GoogleAIProvider._execute_tts] Response missing usage_metadata, using zeros | provider={provider}" ) input_tokens = 0 output_tokens = 0 @@ -371,8 +372,8 @@ def _execute_tts( llm_response.provider_raw_response = response.model_dump() logger.info( - f"[GoogleAIProvider._execute_tts] Successfully generated TTS response: " - f"{response.response_id}, audio_size={len(raw_audio_bytes)} bytes" + f"[GoogleAIProvider._execute_tts] Successfully generated TTS response | " + f"request_id={response.response_id}, provider={provider}, model={model}, audio_size={len(raw_audio_bytes)} bytes" ) return llm_response, None @@ -425,7 +426,7 @@ def _execute_text( reasoning_tokens = response.usage_metadata.thoughts_token_count or 0 else: logger.warning( - f"[GoogleAIProvider._execute_text] Response missing usage_metadata, using zeros" + f"[GoogleAIProvider._execute_text] Response missing usage_metadata, using zeros | provider={completion_config.provider}" ) input_tokens = 0 output_tokens = 0 @@ -450,7 +451,8 @@ def _execute_text( llm_response.provider_raw_response = response.model_dump(mode="json") logger.info( - f"[GoogleAIProvider._execute_text] Successfully generated text response: {response.response_id}" + f"[GoogleAIProvider._execute_text] Successfully generated text response | " + f"request_id={response.response_id}, provider={completion_config.provider}, model={model}" ) return llm_response, None @@ -491,6 +493,7 @@ def execute( except Exception as e: error_message = "Unexpected error occurred" logger.error( - f"[GoogleAIProvider.execute] {error_message}: {str(e)}", exc_info=True + f"[GoogleAIProvider.execute] {error_message}: {str(e)} | provider={completion_config.provider}", + exc_info=True, ) return None, error_message diff --git a/backend/app/services/llm/providers/oai.py b/backend/app/services/llm/providers/oai.py index 392487eea..758f4bc7d 100644 --- a/backend/app/services/llm/providers/oai.py +++ b/backend/app/services/llm/providers/oai.py @@ -126,7 +126,8 @@ def execute( llm_response.provider_raw_response = response.model_dump() logger.info( - f"[OpenAIProvider.execute] Successfully generated response: {response.id}" + f"[OpenAIProvider.execute] Successfully generated response | " + f"request_id={response.id}, provider={completion_config.provider}, model={response.model}" ) return llm_response, None @@ -141,7 +142,7 @@ def execute( error_message = handle_openai_error(e) logger.error( - f"[OpenAIProvider.execute] OpenAI API error: {error_message}", + f"[OpenAIProvider.execute] OpenAI API error: {error_message} | provider={completion_config.provider}", exc_info=True, ) return None, error_message @@ -149,6 +150,7 @@ def execute( except Exception as e: error_message = "Unexpected error occurred" logger.error( - f"[OpenAIProvider.execute] {error_message}: {str(e)}", exc_info=True + f"[OpenAIProvider.execute] {error_message}: {str(e)} | provider={completion_config.provider}", + exc_info=True, ) return None, error_message diff --git a/backend/app/services/llm/providers/registry.py b/backend/app/services/llm/providers/registry.py index b6b7d3af1..9f4538ae1 100644 --- a/backend/app/services/llm/providers/registry.py +++ b/backend/app/services/llm/providers/registry.py @@ -5,25 +5,34 @@ from app.services.llm.providers.oai import OpenAIProvider from app.services.llm.providers.gai import GoogleAIProvider from app.services.llm.providers.sai import SarvamAIProvider +from app.services.llm.providers.eai import ElevenlabsAIProvider logger = logging.getLogger(__name__) class LLMProvider: - OPENAI_NATIVE = "openai-native" OPENAI = "openai" + SARVAMAI = "sarvamai" + ELEVENLABS = "elevenlabs" + GOOGLE = "google" # Future constants for native providers: # CLAUDE_NATIVE = "claude-native" + OPENAI_NATIVE = "openai-native" GOOGLE_NATIVE = "google-native" SARVAMAI_NATIVE = "sarvamai-native" + ELEVENLABS_NATIVE = "elevenlabs-native" _registry: dict[str, type[BaseProvider]] = { - OPENAI_NATIVE: OpenAIProvider, OPENAI: OpenAIProvider, + GOOGLE: GoogleAIProvider, + SARVAMAI: SarvamAIProvider, + ELEVENLABS: ElevenlabsAIProvider, # Future native providers: # CLAUDE_NATIVE: ClaudeProvider, + OPENAI_NATIVE: OpenAIProvider, GOOGLE_NATIVE: GoogleAIProvider, SARVAMAI_NATIVE: SarvamAIProvider, + ELEVENLABS_NATIVE: ElevenlabsAIProvider, } @classmethod diff --git a/backend/app/services/llm/providers/sai.py b/backend/app/services/llm/providers/sai.py index c2984e6aa..8ebb9b10a 100644 --- a/backend/app/services/llm/providers/sai.py +++ b/backend/app/services/llm/providers/sai.py @@ -1,10 +1,8 @@ import logging import os +import uuid from typing import Any - from sarvamai import SarvamAI - - from app.models.llm import ( NativeCompletionConfig, LLMCallResponse, @@ -90,14 +88,23 @@ def _execute_stt( ) try: + # Build kwargs for API call, only including non-None parameters + stt_kwargs = { + "file": None, # Will be set below + "model": model, + } + + if language_code: + stt_kwargs["language_code"] = language_code + + # mode only applies to saaras:v3 model + if mode: + stt_kwargs["mode"] = mode + with open(parsed_input_path, "rb") as audio_file: - # Call SarvamAI transcribe with all mapped parameters - sarvam_response = self.client.speech_to_text.transcribe( - file=audio_file, - model=model, - language_code=language_code, - mode=mode, - ) + # Call SarvamAI transcribe with mapped parameters + stt_kwargs["file"] = audio_file + sarvam_response = self.client.speech_to_text.transcribe(**stt_kwargs) # Estimate token usage (not directly provided by SarvamAI STT) input_tokens_estimate = 0 @@ -106,7 +113,8 @@ def _execute_stt( llm_response = LLMCallResponse( response=LLMResponse( - provider_response_id=sarvam_response.request_id or "unknown", + provider_response_id=sarvam_response.request_id + or str(uuid.uuid4()), conversation_id=None, provider=provider_name, model=model, @@ -127,13 +135,16 @@ def _execute_stt( logger.info( f"[_execute_stt] Successfully transcribed audio | " - f"request_id={sarvam_response.request_id}, model={model}, mode={mode}" + f"request_id={sarvam_response.request_id}, provider={provider_name} model={model}, mode={mode}" ) return llm_response, None except Exception as e: error_message = f"SarvamAI STT transcription failed: {str(e)}" - logger.error(f"[_execute_stt] {error_message}", exc_info=True) + logger.error( + f"[_execute_stt] {error_message} | provider={provider_name}", + exc_info=True, + ) return None, error_message def _execute_tts( @@ -167,8 +178,9 @@ def _execute_tts( "Missing 'target_language_code' in native params for SarvamAI TTS", ) - speaker = params.get("speaker") - output_audio_codec = params.get("output_audio_codec") + # Optional parameters (have API defaults) + speaker = params.get("speaker") # Defaults: Shubh (v3) / Anushka (v2) + output_audio_codec = params.get("output_audio_codec") # Has API default # Parse and validate input parsed_text = self._parse_input( @@ -178,14 +190,21 @@ def _execute_tts( ) try: - # Call SarvamAI TTS with all mapped parameters - sarvam_response = self.client.text_to_speech.convert( - text=parsed_text, - target_language_code=target_language_code, - model=model, - speaker=speaker, - output_audio_codec=output_audio_codec, - ) + # Build kwargs for API call, only including non-None parameters + tts_kwargs = { + "text": parsed_text, + "target_language_code": target_language_code, + "model": model, + } + + if speaker: + tts_kwargs["speaker"] = speaker + + if output_audio_codec: + tts_kwargs["output_audio_codec"] = output_audio_codec + + # Call SarvamAI TTS with mapped parameters + sarvam_response = self.client.text_to_speech.convert(**tts_kwargs) # SarvamAI returns a list of base64-encoded audio strings # For single text input, take the first audio @@ -201,7 +220,8 @@ def _execute_tts( llm_response = LLMCallResponse( response=LLMResponse( - provider_response_id=sarvam_response.request_id or "unknown", + provider_response_id=sarvam_response.request_id + or str(uuid.uuid4()), conversation_id=None, provider=provider_name, model=model, @@ -226,13 +246,16 @@ def _execute_tts( logger.info( f"[_execute_tts] Successfully converted text to speech | " - f"request_id={sarvam_response.request_id}, model={model}, speaker={speaker}" + f"request_id={sarvam_response.request_id}, provider={provider_name}, model={model}, speaker={speaker}" ) return llm_response, None except Exception as e: error_message = f"SarvamAI TTS conversion failed: {str(e)}" - logger.error(f"[_execute_tts] {error_message}", exc_info=True) + logger.error( + f"[_execute_tts] {error_message} | provider={provider_name}", + exc_info=True, + ) return None, error_message def execute( @@ -242,6 +265,7 @@ def execute( resolved_input: str, include_provider_raw_response: bool = False, ) -> tuple[LLMCallResponse | None, str | None]: + provider_name = completion_config.provider try: completion_type = completion_config.type @@ -265,11 +289,15 @@ def execute( except ValueError as e: error_message = f"Input validation error: {str(e)}" - logger.error(f"[SarvamAIProvider.execute] {error_message}", exc_info=True) + logger.error( + f"[SarvamAIProvider.execute] {error_message} | provider={provider_name}", + exc_info=True, + ) return None, error_message except Exception as e: error_message = "Unexpected error occurred during SarvamAI execution" logger.error( - f"[SarvamAIProvider.execute] {error_message}: {str(e)}", exc_info=True + f"[SarvamAIProvider.execute] {error_message}: {str(e)} | provider={provider_name}", + exc_info=True, ) return None, error_message diff --git a/backend/app/tests/services/llm/providers/test_eai.py b/backend/app/tests/services/llm/providers/test_eai.py new file mode 100644 index 000000000..3069f35ca --- /dev/null +++ b/backend/app/tests/services/llm/providers/test_eai.py @@ -0,0 +1,513 @@ +""" +Tests for the ElevenLabs provider (STT and TTS). +""" + +import base64 +import pytest +from unittest.mock import MagicMock, patch +from types import SimpleNamespace + +from app.models.llm import ( + NativeCompletionConfig, + QueryParams, +) +from app.services.llm.providers.eai import ElevenlabsAIProvider + + +def mock_elevenlabs_stt_response( + text: str = "Hello world", + language_code: str = "eng", + transcription_id: str = "txn_stt_123", +) -> SimpleNamespace: + """Create a mock ElevenLabs STT response object.""" + response = SimpleNamespace( + text=text, + language_code=language_code, + language_probability=0.98, + transcription_id=transcription_id, + words=[], + model_dump=lambda: { + "text": text, + "language_code": language_code, + "language_probability": 0.98, + "transcription_id": transcription_id, + "words": [], + }, + ) + return response + + +class TestElevenlabsProviderSTT: + """Test cases for ElevenlabsAIProvider STT functionality.""" + + @pytest.fixture + def mock_client(self): + """Create a mock ElevenLabs client.""" + client = MagicMock() + client.speech_to_text = MagicMock() + return client + + @pytest.fixture + def provider(self, mock_client): + """Create an ElevenlabsAIProvider instance with mock client.""" + return ElevenlabsAIProvider(client=mock_client) + + @pytest.fixture + def stt_config(self): + """Create a basic STT completion config.""" + return NativeCompletionConfig( + provider="elevenlabs-native", + type="stt", + params={ + "model_id": "scribe_v1", + "language_code": "hin", + }, + ) + + @pytest.fixture + def query_params(self): + """Create basic query parameters.""" + return QueryParams(input="Test audio input") + + @pytest.fixture + def temp_audio_file(self, tmp_path): + """Create a temporary audio file for testing.""" + audio_file = tmp_path / "test_audio.wav" + audio_file.write_bytes(b"fake audio data") + return str(audio_file) + + def test_stt_success_basic_transcription( + self, provider, mock_client, stt_config, query_params, temp_audio_file + ): + """Test successful STT transcription with Hindi audio.""" + mock_response = mock_elevenlabs_stt_response( + text="namaste duniya", language_code="hin" + ) + mock_client.speech_to_text.convert.return_value = mock_response + + result, error = provider.execute(stt_config, query_params, temp_audio_file) + + assert error is None + assert result is not None + assert result.response.output.content.value == "namaste duniya" + assert result.response.model == "scribe_v1" + assert result.response.provider == "elevenlabs-native" + assert result.response.provider_response_id == "txn_stt_123" + assert result.usage.output_tokens == 2 + + def test_stt_auto_detect_language( + self, provider, mock_client, query_params, temp_audio_file + ): + """Test STT without language_code lets ElevenLabs auto-detect.""" + config = NativeCompletionConfig( + provider="elevenlabs-native", + type="stt", + params={"model_id": "scribe_v1"}, + ) + mock_response = mock_elevenlabs_stt_response(text="Detected text") + mock_client.speech_to_text.convert.return_value = mock_response + + result, error = provider.execute(config, query_params, temp_audio_file) + + assert error is None + assert result is not None + call_kwargs = mock_client.speech_to_text.convert.call_args.kwargs + assert "language_code" not in call_kwargs + + def test_stt_with_temperature( + self, provider, mock_client, query_params, temp_audio_file + ): + """Test STT passes temperature to the API.""" + config = NativeCompletionConfig( + provider="elevenlabs-native", + type="stt", + params={ + "model_id": "scribe_v1", + "language_code": "eng", + "temperature": 0.5, + }, + ) + mock_response = mock_elevenlabs_stt_response(text="Hello") + mock_client.speech_to_text.convert.return_value = mock_response + + result, error = provider.execute(config, query_params, temp_audio_file) + + assert error is None + call_kwargs = mock_client.speech_to_text.convert.call_args.kwargs + assert call_kwargs["temperature"] == 0.5 + + def test_stt_uses_default_model_when_missing( + self, provider, mock_client, query_params, temp_audio_file + ): + """Test STT uses default model (scribe_v2) when model_id is not provided.""" + config = NativeCompletionConfig( + provider="elevenlabs-native", + type="stt", + params={"language_code": "eng"}, + ) + mock_response = mock_elevenlabs_stt_response(text="Default model test") + mock_client.speech_to_text.convert.return_value = mock_response + + result, error = provider.execute(config, query_params, temp_audio_file) + + assert error is None + assert result is not None + # Verify the default model was used + call_kwargs = mock_client.speech_to_text.convert.call_args.kwargs + assert call_kwargs["model_id"] == "scribe_v2" + + def test_stt_invalid_file_path( + self, provider, mock_client, stt_config, query_params + ): + """Test STT with non-existent file path.""" + result, error = provider.execute( + stt_config, query_params, "/nonexistent/path/audio.wav" + ) + + assert result is None + assert error is not None + + def test_stt_api_exception( + self, provider, mock_client, stt_config, query_params, temp_audio_file + ): + """Test STT when ElevenLabs API raises an exception.""" + mock_client.speech_to_text.convert.side_effect = Exception( + "API rate limit exceeded" + ) + + result, error = provider.execute(stt_config, query_params, temp_audio_file) + + assert result is None + assert error is not None + assert "API rate limit exceeded" in error + + def test_stt_include_provider_raw_response( + self, provider, mock_client, stt_config, query_params, temp_audio_file + ): + """Test STT with include_provider_raw_response flag.""" + mock_response = mock_elevenlabs_stt_response(text="Test transcript") + mock_client.speech_to_text.convert.return_value = mock_response + + result, error = provider.execute( + stt_config, + query_params, + temp_audio_file, + include_provider_raw_response=True, + ) + + assert error is None + assert result is not None + assert result.provider_raw_response is not None + assert result.provider_raw_response["text"] == "Test transcript" + assert result.provider_raw_response["transcription_id"] == "txn_stt_123" + + +class TestElevenlabsProviderTTS: + """Test cases for ElevenlabsAIProvider TTS functionality.""" + + @pytest.fixture + def mock_client(self): + """Create a mock ElevenLabs client.""" + client = MagicMock() + client.text_to_speech = MagicMock() + return client + + @pytest.fixture + def provider(self, mock_client): + """Create an ElevenlabsAIProvider instance with mock client.""" + return ElevenlabsAIProvider(client=mock_client) + + @pytest.fixture + def tts_config(self): + """Create a basic TTS completion config.""" + return NativeCompletionConfig( + provider="elevenlabs-native", + type="tts", + params={ + "model_id": "eleven_multilingual_v2", + "voice_id": "JBFqnCBsd6RMkjVDRZzb", + "language_code": "hin", + "output_format": "mp3_44100_128", + }, + ) + + @pytest.fixture + def query_params(self): + """Create basic query parameters.""" + return QueryParams(input="Test text input") + + def test_tts_success_basic_conversion( + self, provider, mock_client, tts_config, query_params + ): + """Test successful TTS conversion returns base64 audio.""" + audio_bytes = b"fake mp3 audio binary data" + mock_client.text_to_speech.convert.return_value = iter([audio_bytes]) + + result, error = provider.execute(tts_config, query_params, "Namaste duniya") + + assert error is None + assert result is not None + expected_b64 = base64.b64encode(audio_bytes).decode("utf-8") + assert result.response.output.content.value == expected_b64 + assert result.response.output.content.format == "base64" + assert result.response.output.content.mime_type == "audio/mpeg" + assert result.response.model == "eleven_multilingual_v2" + assert result.response.provider == "elevenlabs-native" + + def test_tts_chunked_audio_response( + self, provider, mock_client, tts_config, query_params + ): + """Test TTS correctly joins chunked/streamed audio bytes.""" + chunks = [b"chunk1", b"chunk2", b"chunk3"] + mock_client.text_to_speech.convert.return_value = iter(chunks) + + result, error = provider.execute(tts_config, query_params, "Hello") + + assert error is None + expected_b64 = base64.b64encode(b"chunk1chunk2chunk3").decode("utf-8") + assert result.response.output.content.value == expected_b64 + + def test_tts_wav_output_format(self, provider, mock_client, query_params): + """Test TTS with WAV output format sets correct mime type.""" + config = NativeCompletionConfig( + provider="elevenlabs-native", + type="tts", + params={ + "model_id": "eleven_multilingual_v2", + "voice_id": "JBFqnCBsd6RMkjVDRZzb", + "output_format": "wav_24000", + }, + ) + mock_client.text_to_speech.convert.return_value = iter([b"wav data"]) + + result, error = provider.execute(config, query_params, "Test") + + assert error is None + assert result.response.output.content.mime_type == "audio/wav" + call_kwargs = mock_client.text_to_speech.convert.call_args.kwargs + assert call_kwargs["output_format"] == "wav_24000" + + def test_tts_opus_output_format(self, provider, mock_client, query_params): + """Test TTS with Opus output format sets correct mime type.""" + config = NativeCompletionConfig( + provider="elevenlabs-native", + type="tts", + params={ + "model_id": "eleven_multilingual_v2", + "voice_id": "JBFqnCBsd6RMkjVDRZzb", + "output_format": "opus_48000_128", + }, + ) + mock_client.text_to_speech.convert.return_value = iter([b"opus data"]) + + result, error = provider.execute(config, query_params, "Test") + + assert error is None + assert result.response.output.content.mime_type == "audio/opus" + + def test_tts_default_output_format(self, provider, mock_client, query_params): + """Test TTS defaults to mp3_44100_128 when output_format is not specified.""" + config = NativeCompletionConfig( + provider="elevenlabs-native", + type="tts", + params={ + "model_id": "eleven_multilingual_v2", + "voice_id": "JBFqnCBsd6RMkjVDRZzb", + }, + ) + mock_client.text_to_speech.convert.return_value = iter([b"audio"]) + + result, error = provider.execute(config, query_params, "Test") + + assert error is None + call_kwargs = mock_client.text_to_speech.convert.call_args.kwargs + assert call_kwargs["output_format"] == "mp3_44100_128" + assert result.response.output.content.mime_type == "audio/mpeg" + + def test_tts_passes_language_code( + self, provider, mock_client, tts_config, query_params + ): + """Test TTS passes language_code as optional kwarg to SDK.""" + mock_client.text_to_speech.convert.return_value = iter([b"audio"]) + + provider.execute(tts_config, query_params, "Test") + + call_kwargs = mock_client.text_to_speech.convert.call_args.kwargs + assert call_kwargs["language_code"] == "hin" + + def test_tts_omits_language_code_when_absent( + self, provider, mock_client, query_params + ): + """Test TTS does not pass language_code when not in params.""" + config = NativeCompletionConfig( + provider="elevenlabs-native", + type="tts", + params={ + "model_id": "eleven_multilingual_v2", + "voice_id": "JBFqnCBsd6RMkjVDRZzb", + }, + ) + mock_client.text_to_speech.convert.return_value = iter([b"audio"]) + + provider.execute(config, query_params, "Test") + + call_kwargs = mock_client.text_to_speech.convert.call_args.kwargs + assert "language_code" not in call_kwargs + + def test_tts_uses_default_model_when_missing( + self, provider, mock_client, query_params + ): + """Test TTS uses default model (eleven_turbo_v2) when model_id is not provided.""" + config = NativeCompletionConfig( + provider="elevenlabs-native", + type="tts", + params={"voice_id": "JBFqnCBsd6RMkjVDRZzb"}, + ) + mock_client.text_to_speech.convert.return_value = iter([b"audio data"]) + + result, error = provider.execute(config, query_params, "Test text") + + assert error is None + assert result is not None + # Verify the default model was used + call_kwargs = mock_client.text_to_speech.convert.call_args.kwargs + assert call_kwargs["model_id"] == "eleven_v3" + + def test_tts_uses_default_voice_when_missing( + self, provider, mock_client, query_params + ): + """Test TTS uses default voice (Sarah) when voice_id is not provided.""" + config = NativeCompletionConfig( + provider="elevenlabs-native", + type="tts", + params={"model_id": "eleven_multilingual_v2"}, + ) + mock_client.text_to_speech.convert.return_value = iter([b"audio data"]) + + result, error = provider.execute(config, query_params, "Test text") + + assert error is None + assert result is not None + # Verify the default voice (Sarah) was used + call_kwargs = mock_client.text_to_speech.convert.call_args.kwargs + assert call_kwargs["voice_id"] == "EXAVITQu4vr4xnSDxMaL" # Sarah's ID + + def test_tts_empty_audio_response( + self, provider, mock_client, tts_config, query_params + ): + """Test TTS when API returns empty audio iterator.""" + mock_client.text_to_speech.convert.return_value = iter([]) + + result, error = provider.execute(tts_config, query_params, "Test text") + + assert result is None + assert error is not None + assert "no audio data" in error.lower() + + def test_tts_api_exception(self, provider, mock_client, tts_config, query_params): + """Test TTS when ElevenLabs API raises an exception.""" + mock_client.text_to_speech.convert.side_effect = Exception("TTS quota exceeded") + + result, error = provider.execute(tts_config, query_params, "Test text") + + assert result is None + assert error is not None + assert "TTS quota exceeded" in error + + def test_tts_include_provider_raw_response( + self, provider, mock_client, tts_config, query_params + ): + """Test TTS with include_provider_raw_response flag.""" + audio_bytes = b"audio data for raw response test" + mock_client.text_to_speech.convert.return_value = iter([audio_bytes]) + + result, error = provider.execute( + tts_config, + query_params, + "Test text", + include_provider_raw_response=True, + ) + + assert error is None + assert result.provider_raw_response is not None + assert result.provider_raw_response["audio_bytes_length"] == len(audio_bytes) + assert result.provider_raw_response["output_format"] == "mp3_44100_128" + + def test_tts_usage_estimates(self, provider, mock_client, tts_config, query_params): + """Test that TTS properly estimates token usage based on input text.""" + mock_client.text_to_speech.convert.return_value = iter([b"audio"]) + + result, error = provider.execute( + tts_config, query_params, "Hello world how are you" + ) + + assert error is None + assert result.usage.input_tokens == 5 + assert result.usage.output_tokens == 0 + assert result.usage.total_tokens == 5 + + +class TestElevenlabsProviderClientCreation: + """Test cases for ElevenlabsAIProvider client creation.""" + + def test_create_client_with_valid_api_key(self): + """Test client creation with valid API key.""" + credentials = {"api_key": "test_api_key_123"} + + with patch( + "app.services.llm.providers.eai.ElevenLabs" + ) as mock_elevenlabs_class: + client = ElevenlabsAIProvider.create_client(credentials) + + mock_elevenlabs_class.assert_called_once_with(api_key="test_api_key_123") + + def test_create_client_missing_api_key(self): + """Test client creation with missing API key.""" + credentials = {} + + with pytest.raises(ValueError) as exc_info: + ElevenlabsAIProvider.create_client(credentials) + + assert "API Key for Elevenlabs Not Set" in str(exc_info.value) + + def test_create_client_wrong_credential_key(self): + """Test client creation with wrong credential key name.""" + credentials = {"secret_key": "value"} + + with pytest.raises(ValueError) as exc_info: + ElevenlabsAIProvider.create_client(credentials) + + assert "API Key for Elevenlabs Not Set" in str(exc_info.value) + + +class TestElevenlabsProviderExecute: + """Test cases for ElevenlabsAIProvider execute routing.""" + + @pytest.fixture + def mock_client(self): + """Create a mock ElevenLabs client.""" + return MagicMock() + + @pytest.fixture + def provider(self, mock_client): + """Create an ElevenlabsAIProvider instance.""" + return ElevenlabsAIProvider(client=mock_client) + + @pytest.fixture + def query_params(self): + """Create basic query parameters.""" + return QueryParams(input="Test input") + + def test_execute_unsupported_completion_type(self, provider, query_params): + """Test execute with unsupported completion type returns error.""" + config = NativeCompletionConfig( + provider="elevenlabs-native", + type="text", + params={"model_id": "test-model"}, + ) + + result, error = provider.execute(config, query_params, "input") + + assert result is None + assert error is not None + assert "Unsupported completion type" in error diff --git a/backend/app/tests/services/llm/test_mappers.py b/backend/app/tests/services/llm/test_mappers.py index 67e60cf3c..38e987f0b 100644 --- a/backend/app/tests/services/llm/test_mappers.py +++ b/backend/app/tests/services/llm/test_mappers.py @@ -2,6 +2,7 @@ Unit tests for LLM parameter mapping functions. Tests the transformation of Kaapi-abstracted parameters to provider-native formats. +Covers real-world scenarios, edge cases, and provider-specific requirements. """ import pytest @@ -17,6 +18,9 @@ map_kaapi_to_openai_params, map_kaapi_to_google_params, map_kaapi_to_sarvam_params, + map_kaapi_to_elevenlabs_params, + bcp47_to_elevenlabs_lang, + voice_to_id, transform_kaapi_config_to_native, ) @@ -36,50 +40,6 @@ def test_basic_model_mapping(self): assert result == {"model": "gpt-4o", "temperature": 0.1} assert warnings == [] - def test_instructions_mapping(self): - """Test instructions parameter mapping.""" - kaapi_params = TextLLMParams( - model="gpt-4", - instructions="You are a helpful assistant.", - ) - - result, warnings = map_kaapi_to_openai_params( - kaapi_params.model_dump(exclude_none=True) - ) - - assert result["model"] == "gpt-4" - assert result["instructions"] == "You are a helpful assistant." - assert warnings == [] - - def test_temperature_mapping(self): - """Test temperature parameter mapping for non-reasoning models.""" - kaapi_params = TextLLMParams( - model="gpt-4", - temperature=0.7, - ) - - result, warnings = map_kaapi_to_openai_params( - kaapi_params.model_dump(exclude_none=True) - ) - - assert result["model"] == "gpt-4" - assert result["temperature"] == 0.7 - assert warnings == [] - - def test_temperature_zero_mapping(self): - """Test that temperature=0 is correctly mapped (edge case).""" - kaapi_params = TextLLMParams( - model="gpt-4", - temperature=0.0, - ) - - result, warnings = map_kaapi_to_openai_params( - kaapi_params.model_dump(exclude_none=True) - ) - - assert result["temperature"] == 0.0 - assert warnings == [] - def test_reasoning_mapping_for_reasoning_models(self): """Test reasoning parameter mapping to OpenAI format for reasoning-capable models.""" kaapi_params = TextLLMParams( @@ -103,6 +63,7 @@ def test_knowledge_base_ids_mapping(self): kaapi_params = TextLLMParams( model="gpt-4", knowledge_base_ids=["vs_abc123", "vs_def456"], + max_num_results=50, ) result, warnings = map_kaapi_to_openai_params( @@ -114,63 +75,9 @@ def test_knowledge_base_ids_mapping(self): assert len(result["tools"]) == 1 assert result["tools"][0]["type"] == "file_search" assert result["tools"][0]["vector_store_ids"] == ["vs_abc123", "vs_def456"] - assert result["tools"][0]["max_num_results"] == 20 # default - assert warnings == [] - - def test_knowledge_base_with_max_num_results(self): - """Test knowledge_base_ids with custom max_num_results.""" - kaapi_params = TextLLMParams( - model="gpt-4", - knowledge_base_ids=["vs_abc123"], - max_num_results=50, - ) - - result, warnings = map_kaapi_to_openai_params( - kaapi_params.model_dump(exclude_none=True) - ) - assert result["tools"][0]["max_num_results"] == 50 assert warnings == [] - def test_complete_parameter_mapping(self): - """Test mapping all compatible parameters together.""" - kaapi_params = TextLLMParams( - model="gpt-4o", - instructions="You are an expert assistant.", - temperature=0.8, - knowledge_base_ids=["vs_123"], - max_num_results=30, - ) - - result, warnings = map_kaapi_to_openai_params( - kaapi_params.model_dump(exclude_none=True) - ) - - assert result["model"] == "gpt-4o" - assert result["instructions"] == "You are an expert assistant." - assert result["temperature"] == 0.8 - assert result["tools"][0]["type"] == "file_search" - assert result["tools"][0]["vector_store_ids"] == ["vs_123"] - assert result["tools"][0]["max_num_results"] == 30 - assert warnings == [] - - def test_reasoning_suppressed_for_non_reasoning_models(self): - """Test that reasoning is suppressed with warning for non-reasoning models.""" - kaapi_params = TextLLMParams( - model="gpt-4", - reasoning="high", - ) - - result, warnings = map_kaapi_to_openai_params( - kaapi_params.model_dump(exclude_none=True) - ) - - assert result["model"] == "gpt-4" - assert "reasoning" not in result - assert len(warnings) == 1 - assert "reasoning" in warnings[0].lower() - assert "does not support reasoning" in warnings[0] - def test_temperature_suppressed_for_reasoning_models(self): """Test that temperature is suppressed with warning for reasoning models when reasoning is set.""" kaapi_params = TextLLMParams( @@ -190,41 +97,11 @@ def test_temperature_suppressed_for_reasoning_models(self): assert "temperature" in warnings[0].lower() assert "suppressed" in warnings[0] - def test_temperature_without_reasoning_for_reasoning_models(self): - """Test that temperature is suppressed for reasoning models even without explicit reasoning parameter.""" - kaapi_params = TextLLMParams( - model="o1", - temperature=0.7, - ) - - result, warnings = map_kaapi_to_openai_params( - kaapi_params.model_dump(exclude_none=True) - ) - - assert result["model"] == "o1" - assert "temperature" not in result - assert "reasoning" not in result - assert len(warnings) == 1 - assert "temperature" in warnings[0].lower() - assert "suppressed" in warnings[0] - - def test_minimal_params(self): - """Test mapping with minimal parameters (only model).""" - kaapi_params = TextLLMParams(model="gpt-4") - - result, warnings = map_kaapi_to_openai_params( - kaapi_params.model_dump(exclude_none=True) - ) - - # TextLLMParams has default temperature=0.1 - assert result == {"model": "gpt-4", "temperature": 0.1} - assert warnings == [] - - def test_only_knowledge_base_ids(self): - """Test mapping with only knowledge_base_ids and model.""" + def test_reasoning_suppressed_for_non_reasoning_models(self): + """Test that reasoning is suppressed with warning for non-reasoning models.""" kaapi_params = TextLLMParams( model="gpt-4", - knowledge_base_ids=["vs_xyz"], + reasoning="high", ) result, warnings = map_kaapi_to_openai_params( @@ -232,65 +109,49 @@ def test_only_knowledge_base_ids(self): ) assert result["model"] == "gpt-4" - assert "tools" in result - assert result["tools"][0]["vector_store_ids"] == ["vs_xyz"] - assert warnings == [] + assert "reasoning" not in result + assert len(warnings) == 1 + assert "reasoning" in warnings[0].lower() + assert "does not support reasoning" in warnings[0] class TestMapKaapiToGoogleParams: - """Test cases for map_kaapi_to_google_params function.""" - - def test_basic_model_mapping(self): - """Test basic model parameter mapping.""" - kaapi_params = TextLLMParams(model="gemini-2.5-pro") - - result, warnings = map_kaapi_to_google_params( - kaapi_params.model_dump(exclude_none=True) - ) - - # TextLLMParams has default temperature=0.1 - assert result == {"model": "gemini-2.5-pro", "temperature": 0.1} - assert warnings == [] + """Test cases for map_kaapi_to_google_params function with completion_type.""" - def test_instructions_mapping(self): - """Test instructions parameter mapping.""" - kaapi_params = STTLLMParams( - model="gemini-2.5-pro", - instructions="Transcribe this audio accurately.", - ) + def test_text_completion_basic(self): + """Test basic text completion parameter mapping.""" + kaapi_params = TextLLMParams(model="gemini-2.5-pro", temperature=0.7) result, warnings = map_kaapi_to_google_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="text" ) - assert result["model"] == "gemini-2.5-pro" - assert result["instructions"] == "Transcribe this audio accurately." + assert result == {"model": "gemini-2.5-pro", "temperature": 0.7} assert warnings == [] - def test_temperature_mapping(self): - """Test temperature parameter mapping.""" + def test_text_completion_with_reasoning(self): + """Test text completion with reasoning parameter.""" kaapi_params = TextLLMParams( - model="gemini-2.5-pro", - temperature=0.7, + model="gemini-2.5-pro", reasoning="high", temperature=0.5 ) result, warnings = map_kaapi_to_google_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="text" ) assert result["model"] == "gemini-2.5-pro" - assert result["temperature"] == 0.7 + assert result["reasoning"] == "high" + assert result["temperature"] == 0.5 assert warnings == [] - def test_knowledge_base_ids_warning(self): - """Test that knowledge_base_ids are not supported and generate warning.""" + def test_text_completion_knowledge_base_unsupported(self): + """Test that knowledge_base_ids generate warning for Google AI.""" kaapi_params = TextLLMParams( - model="gemini-2.5-pro", - knowledge_base_ids=["vs_abc123"], + model="gemini-2.5-pro", knowledge_base_ids=["vs_abc123"] ) result, warnings = map_kaapi_to_google_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="text" ) assert result["model"] == "gemini-2.5-pro" @@ -299,456 +160,635 @@ def test_knowledge_base_ids_warning(self): assert "knowledge_base_ids" in warnings[0].lower() assert "not supported" in warnings[0] - def test_reasoning_passed_through(self): - kaapi_params = TextLLMParams( - model="gemini-2.5-pro", - reasoning="high", + def test_stt_completion_with_instructions(self): + """Test STT completion with instructions parameter.""" + kaapi_params = STTLLMParams( + model="gemini-2.5-pro", instructions="Transcribe accurately" ) result, warnings = map_kaapi_to_google_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="stt" ) assert result["model"] == "gemini-2.5-pro" - assert result["reasoning"] == "high" - assert len(warnings) == 0 + assert result["instructions"] == "Transcribe accurately" + assert warnings == [] - def test_knowledge_base_ids_unsupported(self): - kaapi_params = TextLLMParams( - model="gemini-2.5-pro", - reasoning="medium", - knowledge_base_ids=["vs_123"], + def test_tts_completion_with_voice(self): + """Test TTS completion with voice and language parameters.""" + kaapi_params = TTSLLMParams( + model="gemini-2.5-pro", voice="en-US-Journey-D", language="en-US" ) result, warnings = map_kaapi_to_google_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="tts" ) assert result["model"] == "gemini-2.5-pro" - assert result["reasoning"] == "medium" - assert "knowledge_base_ids" not in result + assert result["voice"] == "en-US-Journey-D" + assert result["language"] == "en-US" + assert warnings == [] + + def test_unsupported_completion_type(self): + """Test that unsupported completion types return error.""" + kaapi_params = {"model": "gemini-2.5-pro"} + + result, warnings = map_kaapi_to_google_params( + kaapi_params, completion_type="invalid" + ) + + assert result == {} assert len(warnings) == 1 - assert "knowledge_base_ids" in warnings[0].lower() + assert "Unsupported completion type" in warnings[0] class TestMapKaapiToSarvamParams: - """Test cases for map_kaapi_to_sarvam_params function.""" + """Test cases for map_kaapi_to_sarvam_params function with real-world scenarios.""" - def test_stt_basic_mapping(self): - """Test basic STT parameter mapping.""" - kaapi_params = STTLLMParams( - model="saarika:v1", - input_language="hi-IN", - ) + # STT Tests + def test_stt_basic_with_saarika_model(self): + """Test STT with saarika model (mode should NOT be set).""" + kaapi_params = STTLLMParams(model="saarika:v2.5", input_language="hi-IN") result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="stt" ) - assert result["model"] == "saarika:v1" + assert result["model"] == "saarika:v2.5" assert result["language_code"] == "hi-IN" - assert result["mode"] == "transcribe" + # mode should NOT be set for saarika models + assert "mode" not in result assert warnings == [] - def test_stt_auto_language_detection(self): - """Test STT with auto language detection.""" - kaapi_params = STTLLMParams( - model="saarika:v1", - input_language="auto", - ) + def test_stt_with_saaras_model_transcribe_mode(self): + """Test STT with saaras:v3 model (mode SHOULD be set).""" + kaapi_params = STTLLMParams(model="saaras:v3", input_language="hi-IN") result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="stt" ) - assert result["model"] == "saarika:v1" - assert result["language_code"] == "unknown" + assert result["model"] == "saaras:v3" + assert result["language_code"] == "hi-IN" + # mode should be set for saaras:v3 assert result["mode"] == "transcribe" assert warnings == [] - def test_stt_translate_mode(self): - """Test STT with translation to English.""" + def test_stt_with_saaras_model_translate_mode(self): + """Test STT with saaras:v3 model in translate mode.""" kaapi_params = STTLLMParams( - model="saarika:v1", - input_language="hi-IN", - output_language="en-IN", + model="saaras:v3", input_language="hi-IN", output_language="en-IN" ) result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="stt" ) - assert result["model"] == "saarika:v1" + assert result["model"] == "saaras:v3" assert result["language_code"] == "hi-IN" assert result["mode"] == "translate" assert warnings == [] - def test_stt_same_input_output_language(self): - """Test STT when input and output languages are the same.""" - kaapi_params = STTLLMParams( - model="saarika:v1", - input_language="hi-IN", - output_language="hi-IN", - ) + def test_stt_auto_language_detection(self): + """Test STT with auto language detection.""" + kaapi_params = STTLLMParams(model="saarika:v2.5", input_language="auto") result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="stt" ) - assert result["mode"] == "transcribe" + assert result["model"] == "saarika:v2.5" + assert result["language_code"] == "unknown" assert warnings == [] - def test_stt_unsupported_instructions_warning(self): - """Test that instructions parameter generates warning for STT.""" - kaapi_params = STTLLMParams( - model="saarika:v1", - input_language="hi-IN", - instructions="Please transcribe accurately", - ) + def test_stt_missing_input_language_defaults_to_unknown(self): + """Test STT without input_language defaults to 'unknown' for auto-detection.""" + kaapi_params = {"model": "saarika:v2.5"} result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params, completion_type="stt" ) - assert result["model"] == "saarika:v1" - assert "instructions" not in result - assert len(warnings) == 1 - assert "instructions" in warnings[0].lower() - assert "not supported" in warnings[0] + assert result["model"] == "saarika:v2.5" + # Should default to unknown for auto-detection + assert result["language_code"] == "unknown" + assert warnings == [] - def test_stt_unsupported_temperature_warning(self): - """Test that temperature parameter generates warning for STT.""" + def test_stt_unsupported_params_generate_warnings(self): + """Test that unsupported STT parameters generate warnings.""" kaapi_params = STTLLMParams( - model="saarika:v1", + model="saarika:v2.5", input_language="hi-IN", + instructions="Transcribe carefully", temperature=0.5, + response_format="text", ) result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="stt" ) - assert result["model"] == "saarika:v1" + assert result["model"] == "saarika:v2.5" + assert "instructions" not in result assert "temperature" not in result - assert len(warnings) == 1 - assert "temperature" in warnings[0].lower() - assert "not supported" in warnings[0] + assert "response_format" not in result + assert len(warnings) == 3 + assert any("instructions" in w.lower() for w in warnings) + assert any("temperature" in w.lower() for w in warnings) + assert any("response_format" in w.lower() for w in warnings) - def test_stt_unsupported_response_format_warning(self): - """Test that response_format parameter generates warning for STT.""" - kaapi_params = STTLLMParams( - model="saarika:v1", - input_language="hi-IN", - response_format="text", - ) + # TTS Tests + def test_tts_basic_with_all_required_params(self): + """Test TTS with all required parameters.""" + kaapi_params = TTSLLMParams(model="bulbul:v3", voice="Shubh", language="hi-IN") result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="tts" ) - assert result["model"] == "saarika:v1" - assert "response_format" not in result - assert len(warnings) == 1 - assert "response_format" in warnings[0].lower() + assert result["model"] == "bulbul:v3" + assert result["speaker"] == "Shubh" + assert result["target_language_code"] == "hi-IN" + assert warnings == [] - def test_stt_multiple_unsupported_params(self): - """Test STT with multiple unsupported parameters.""" - kaapi_params = STTLLMParams( - model="saarika:v1", - input_language="hi-IN", - instructions="Transcribe", - temperature=0.5, - response_format="text", - ) + def test_tts_missing_language_returns_error(self): + """Test that missing language parameter returns error.""" + kaapi_params = {"model": "bulbul:v3", "voice": "Shubh"} result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params, completion_type="tts" ) - assert result["model"] == "saarika:v1" - assert "instructions" not in result - assert "temperature" not in result - assert "response_format" not in result - assert len(warnings) == 3 + assert result == {} + assert len(warnings) == 1 + assert "language" in warnings[0].lower() - def test_tts_basic_mapping(self): - """Test basic TTS parameter mapping.""" - kaapi_params = TTSLLMParams( - model="bulbul:v1", - voice="meera", - language="hi-IN", - ) + def test_tts_optional_voice_parameter(self): + """Test TTS without voice parameter (should use API default).""" + kaapi_params = {"model": "bulbul:v3", "language": "hi-IN"} result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params, completion_type="tts" ) - assert result["model"] == "bulbul:v1" - assert result["speaker"] == "meera" + assert result["model"] == "bulbul:v3" assert result["target_language_code"] == "hi-IN" + # speaker should not be set if not provided (API will use default) + assert "speaker" not in result assert warnings == [] - def test_tts_with_audio_format(self): - """Test TTS with custom audio format.""" + def test_tts_audio_format_mp3(self): + """Test TTS with MP3 audio format.""" kaapi_params = TTSLLMParams( - model="bulbul:v1", - voice="meera", - language="hi-IN", - response_format="mp3", + model="bulbul:v3", voice="Anushka", language="hi-IN", response_format="mp3" ) result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="tts" ) - assert result["model"] == "bulbul:v1" - assert result["speaker"] == "meera" - assert result["target_language_code"] == "hi-IN" assert result["output_audio_codec"] == "mp3" assert warnings == [] - def test_tts_default_wav_format(self): - """Test TTS with default WAV format.""" + def test_tts_audio_format_ogg_maps_to_opus(self): + """Test TTS with OGG format maps to OPUS (closest supported).""" kaapi_params = TTSLLMParams( - model="bulbul:v1", - voice="arvind", - language="en-IN", - response_format="wav", + model="bulbul:v3", voice="Shubh", language="hi-IN", response_format="ogg" ) result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="tts" ) - assert result["output_audio_codec"] == "wav" + # OGG should map to OPUS (closest match) + assert result["output_audio_codec"] == "opus" assert warnings == [] - def test_tts_ogg_format(self): - """Test TTS with OGG format.""" + def test_tts_audio_format_wav(self): + """Test TTS with WAV audio format.""" kaapi_params = TTSLLMParams( - model="bulbul:v1", - voice="meera", - language="hi-IN", - response_format="ogg", + model="bulbul:v3", voice="Shubh", language="hi-IN", response_format="wav" ) result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + kaapi_params.model_dump(exclude_none=True), completion_type="tts" ) - assert result["output_audio_codec"] == "ogg" + assert result["output_audio_codec"] == "wav" assert warnings == [] - def test_tts_missing_language(self): - """Test that missing language returns error for TTS.""" - kaapi_params = {"model": "bulbul:v1", "voice": "meera"} + # Error Cases + def test_missing_model_returns_error(self): + """Test that missing model parameter returns error.""" + kaapi_params = {"voice": "Shubh", "language": "hi-IN"} - result, warnings = map_kaapi_to_sarvam_params(kaapi_params) + result, warnings = map_kaapi_to_sarvam_params( + kaapi_params, completion_type="tts" + ) assert result == {} assert len(warnings) == 1 - assert "language" in warnings[0].lower() + assert "model" in warnings[0].lower() - def test_missing_model(self): - """Test that missing model returns error.""" - kaapi_params = {"voice": "meera", "language": "hi-IN"} + def test_unsupported_completion_type(self): + """Test that unsupported completion types return error.""" + kaapi_params = {"model": "saarika:v2.5"} - result, warnings = map_kaapi_to_sarvam_params(kaapi_params) + result, warnings = map_kaapi_to_sarvam_params( + kaapi_params, completion_type="invalid" + ) assert result == {} assert len(warnings) == 1 - assert "model" in warnings[0].lower() + assert "Unsupported completion type" in warnings[0] + - def test_stt_output_language_defaults_to_input(self): - """Test that output_language defaults to input_language when not provided.""" +class TestMapKaapiToElevenlabsParams: + """Test cases for map_kaapi_to_elevenlabs_params function.""" + + # STT Tests + def test_stt_basic_with_language(self): + """Test STT with language code.""" kaapi_params = STTLLMParams( - model="saarika:v1", - input_language="hi-IN", + model="scribe_v2", input_language="hi-IN", temperature=0.3 ) - result, warnings = map_kaapi_to_sarvam_params( - kaapi_params.model_dump(exclude_none=True) + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params.model_dump(exclude_none=True), completion_type="stt" ) - assert result["mode"] == "transcribe" + assert result["model_id"] == "scribe_v2" + assert result["language_code"] == "hi" # BCP-47 conversion + assert result["temperature"] == 0.3 assert warnings == [] + def test_stt_auto_language_detection(self): + """Test STT with auto language detection.""" + kaapi_params = STTLLMParams(model="scribe_v2", input_language="auto") -class TestTransformKaapiConfigToNative: - """Test cases for transform_kaapi_config_to_native function.""" + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params.model_dump(exclude_none=True), completion_type="stt" + ) - def test_transform_openai_config(self): - """Test transformation of Kaapi OpenAI config to native format.""" - kaapi_config = KaapiCompletionConfig( - provider="openai", - type="text", - params={ - "model": "gpt-4", - "temperature": 0.7, - }, + assert result["model_id"] == "scribe_v2" + assert result["language_code"] is None + assert warnings == [] + + def test_stt_missing_language_defaults_to_unknown(self): + """Test STT without language defaults to None for auto-detection.""" + kaapi_params = {"model": "scribe_v2"} + + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params, completion_type="stt" ) - result, warnings = transform_kaapi_config_to_native(kaapi_config) + assert result["model_id"] == "scribe_v2" + # No language_code should be set when not provided + assert "language_code" not in result + assert warnings == [] - assert isinstance(result, NativeCompletionConfig) - assert result.provider == "openai-native" - assert result.params["model"] == "gpt-4" - assert result.params["temperature"] == 0.7 + def test_stt_unsupported_language_generates_warning(self): + """Test STT with unsupported language generates warning.""" + kaapi_params = STTLLMParams(model="scribe_v2", input_language="fr-FR") + + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params.model_dump(exclude_none=True), completion_type="stt" + ) + + assert result["model_id"] == "scribe_v2" + assert len(warnings) == 1 + assert "Unsupported language" in warnings[0] + assert "auto-detect" in warnings[0] + + def test_stt_output_language_translation_warning(self): + """Test STT with different output language generates warning.""" + kaapi_params = STTLLMParams( + model="scribe_v2", input_language="hi-IN", output_language="en-IN" + ) + + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params.model_dump(exclude_none=True), completion_type="stt" + ) + + assert result["model_id"] == "scribe_v2" + assert len(warnings) == 1 + assert "output_language" in warnings[0].lower() + assert "translation" in warnings[0].lower() + + def test_stt_unsupported_instructions_warning(self): + """Test STT with instructions generates warning.""" + kaapi_params = STTLLMParams( + model="scribe_v2", + input_language="hi-IN", + instructions="Transcribe accurately", + ) + + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params.model_dump(exclude_none=True), completion_type="stt" + ) + + assert result["model_id"] == "scribe_v2" + assert "instructions" not in result + assert len(warnings) == 1 + assert "instructions" in warnings[0].lower() + + # TTS Tests + def test_tts_basic_with_voice_and_language(self): + """Test TTS with voice and language.""" + kaapi_params = TTSLLMParams( + model="eleven_turbo_v2", voice="Sarah", language="en-IN" + ) + + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params.model_dump(exclude_none=True), completion_type="tts" + ) + + assert result["model_id"] == "eleven_turbo_v2" + assert result["voice_id"] == "EXAVITQu4vr4xnSDxMaL" # Sarah's ID + assert result["language_code"] == "en" assert warnings == [] - def test_transform_with_all_params(self): - """Test transformation with all Kaapi parameters.""" - kaapi_config = KaapiCompletionConfig( - provider="openai", - type="text", - params={ - "model": "gpt-4o", - "instructions": "System prompt here", - "temperature": 0.5, - "knowledge_base_ids": ["vs_abc"], - "max_num_results": 25, - }, + def test_tts_missing_voice_returns_error(self): + """Test that missing voice parameter returns error.""" + kaapi_params = {"model": "eleven_turbo_v2", "language": "en-IN"} + + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params, completion_type="tts" ) - result, warnings = transform_kaapi_config_to_native(kaapi_config) + assert result == {} + assert len(warnings) == 1 + assert "voice" in warnings[0].lower() - assert result.provider == "openai-native" - assert result.params["model"] == "gpt-4o" - assert result.params["instructions"] == "System prompt here" - assert result.params["temperature"] == 0.5 - assert result.params["tools"][0]["type"] == "file_search" - assert result.params["tools"][0]["max_num_results"] == 25 + def test_tts_unsupported_voice_returns_error(self): + """Test that unsupported voice returns error.""" + kaapi_params = TTSLLMParams( + model="eleven_turbo_v2", voice="InvalidVoice", language="en-IN" + ) + + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params.model_dump(exclude_none=True), completion_type="tts" + ) + + assert result == {} + assert len(warnings) == 1 + assert "Unsupported voice" in warnings[0] + + def test_tts_optional_language_parameter(self): + """Test TTS without language (should be optional).""" + kaapi_params = {"model": "eleven_turbo_v2", "voice": "Sarah"} + + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params, completion_type="tts" + ) + + assert result["model_id"] == "eleven_turbo_v2" + assert result["voice_id"] == "EXAVITQu4vr4xnSDxMaL" + # language_code should not be set if language not provided + assert "language_code" not in result assert warnings == [] - def test_transform_with_reasoning(self): - """Test transformation with reasoning parameter for reasoning-capable models.""" - kaapi_config = KaapiCompletionConfig( - provider="openai", - type="text", - params={ - "model": "o1", - "reasoning": "medium", - }, + def test_tts_unsupported_language_generates_warning(self): + """Test TTS with unsupported language generates warning.""" + kaapi_params = TTSLLMParams( + model="eleven_turbo_v2", voice="Sarah", language="fr-FR" ) - result, warnings = transform_kaapi_config_to_native(kaapi_config) + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params.model_dump(exclude_none=True), completion_type="tts" + ) - assert result.provider == "openai-native" - assert result.params["model"] == "o1" - assert result.params["reasoning"] == {"effort": "medium"} - # Temperature is suppressed for reasoning models (even default value) - assert "temperature" not in result.params + assert result["model_id"] == "eleven_turbo_v2" + assert result["voice_id"] == "EXAVITQu4vr4xnSDxMaL" + assert "language_code" not in result assert len(warnings) == 1 - assert "temperature" in warnings[0].lower() + assert "Unsupported language" in warnings[0] - def test_transform_with_both_temperature_and_reasoning(self): - """Test that transformation handles temperature + reasoning intelligently for reasoning models.""" - kaapi_config = KaapiCompletionConfig( - provider="openai", - type="text", - params={ - "model": "o1", - "temperature": 0.7, - "reasoning": "high", - }, + def test_tts_audio_format_mp3(self): + """Test TTS with MP3 format.""" + kaapi_params = TTSLLMParams( + model="eleven_turbo_v2", + voice="George", + language="en-IN", + response_format="mp3", ) - result, warnings = transform_kaapi_config_to_native(kaapi_config) + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params.model_dump(exclude_none=True), completion_type="tts" + ) + + assert result["output_format"] == "mp3_44100_128" + assert warnings == [] + + def test_tts_audio_format_wav(self): + """Test TTS with WAV format.""" + kaapi_params = TTSLLMParams( + model="eleven_turbo_v2", + voice="Callum", + language="en-IN", + response_format="wav", + ) + + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params.model_dump(exclude_none=True), completion_type="tts" + ) + + assert result["output_format"] == "wav_24000" + assert warnings == [] + + def test_tts_audio_format_ogg_maps_to_opus(self): + """Test TTS with OGG format maps to OPUS.""" + kaapi_params = TTSLLMParams( + model="eleven_turbo_v2", + voice="Liam", + language="en-IN", + response_format="ogg", + ) - assert result.provider == "openai-native" - assert result.params["model"] == "o1" - assert result.params["reasoning"] == {"effort": "high"} - assert "temperature" not in result.params + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params.model_dump(exclude_none=True), completion_type="tts" + ) + + # OGG maps to OPUS for ElevenLabs + assert result["output_format"] == "opus_48000_128" + assert warnings == [] + + def test_tts_all_supported_voices(self): + """Test TTS with all supported voices map correctly.""" + voices = { + "Sarah": "EXAVITQu4vr4xnSDxMaL", + "George": "JBFqnCBsd6RMkjVDRZzb", + "Callum": "N2lVS1w4EtoT3dr4eOWO", + "Liam": "TX3LPaxmHKxFdv7VOQHJ", + } + + for voice_name, expected_id in voices.items(): + kaapi_params = TTSLLMParams( + model="eleven_turbo_v2", voice=voice_name, language="en-IN" + ) + + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params.model_dump(exclude_none=True), completion_type="tts" + ) + + assert result["voice_id"] == expected_id + assert warnings == [] + + # Error Cases + def test_missing_model_returns_error(self): + """Test that missing model returns error.""" + kaapi_params = {"voice": "Sarah", "language": "en-IN"} + + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params, completion_type="tts" + ) + + assert result == {} assert len(warnings) == 1 - assert "temperature" in warnings[0].lower() - assert "suppressed" in warnings[0] + assert "model" in warnings[0].lower() + + def test_unsupported_completion_type(self): + """Test that unsupported completion types return error.""" + kaapi_params = {"model": "eleven_turbo_v2"} - def test_unsupported_provider_raises_error(self): - """Test that unsupported providers raise ValueError.""" - # Note: This would require modifying KaapiCompletionConfig to accept other providers - # For now, this tests the error handling in the mapper - # We'll create a mock config that bypasses validation - from unittest.mock import MagicMock + result, warnings = map_kaapi_to_elevenlabs_params( + kaapi_params, completion_type="invalid" + ) - mock_config = MagicMock() - mock_config.provider = "unsupported-provider" - mock_config.params = {"model": "some-model"} + assert result == {} + assert len(warnings) == 1 + assert "Unsupported completion type" in warnings[0] + + +class TestBCP47ToElevenlabsLang: + """Test BCP-47 language code conversion for ElevenLabs.""" + + def test_supported_indian_languages(self): + """Test conversion of supported Indian languages.""" + test_cases = { + "en-IN": "en", + "hi-IN": "hi", + "bn-IN": "bn", + "ta-IN": "ta", + "te-IN": "te", + "mr-IN": "mr", + "gu-IN": "gu", + "kn-IN": "kn", + "ml-IN": "ml", + "pa-IN": "pa", + } + + for bcp47, expected in test_cases.items(): + result = bcp47_to_elevenlabs_lang(bcp47) + assert result == expected + + def test_unsupported_language_returns_none(self): + """Test that unsupported languages return None.""" + assert bcp47_to_elevenlabs_lang("fr-FR") is None + assert bcp47_to_elevenlabs_lang("de-DE") is None + assert bcp47_to_elevenlabs_lang("invalid") is None + + +class TestVoiceToId: + """Test voice name to ID conversion for ElevenLabs.""" + + def test_supported_voices(self): + """Test conversion of supported voice names.""" + test_cases = { + "Sarah": "EXAVITQu4vr4xnSDxMaL", + "George": "JBFqnCBsd6RMkjVDRZzb", + "Callum": "N2lVS1w4EtoT3dr4eOWO", + "Liam": "TX3LPaxmHKxFdv7VOQHJ", + } + + for voice_name, expected_id in test_cases.items(): + result = voice_to_id(voice_name) + assert result == expected_id + + def test_unsupported_voice_returns_none(self): + """Test that unsupported voices return None.""" + assert voice_to_id("InvalidVoice") is None + assert voice_to_id("UnknownSpeaker") is None - with pytest.raises(ValueError) as exc_info: - transform_kaapi_config_to_native(mock_config) - assert "Unsupported provider" in str(exc_info.value) +class TestTransformKaapiConfigToNative: + """Test end-to-end transformation with completion_type parameter.""" - def test_transform_preserves_param_structure(self): - """Test that transformation correctly structures nested parameters.""" + def test_transform_elevenlabs_tts_config(self): + """Test transformation of ElevenLabs TTS config.""" kaapi_config = KaapiCompletionConfig( - provider="openai", - type="text", + provider="elevenlabs", + type="tts", params={ - "model": "gpt-4", - "knowledge_base_ids": ["vs_1", "vs_2", "vs_3"], - "max_num_results": 15, + "model": "eleven_turbo_v2", + "voice": "Sarah", + "language": "en-IN", + "response_format": "mp3", }, ) result, warnings = transform_kaapi_config_to_native(kaapi_config) - # Verify the nested structure is correct - assert isinstance(result.params["tools"], list) - assert isinstance(result.params["tools"][0], dict) - assert isinstance(result.params["tools"][0]["vector_store_ids"], list) - assert len(result.params["tools"][0]["vector_store_ids"]) == 3 + assert isinstance(result, NativeCompletionConfig) + assert result.provider == "elevenlabs-native" + assert result.type == "tts" + assert result.params["model_id"] == "eleven_turbo_v2" + assert result.params["voice_id"] == "EXAVITQu4vr4xnSDxMaL" + assert result.params["language_code"] == "en" + assert result.params["output_format"] == "mp3_44100_128" assert warnings == [] - def test_transform_google_config(self): - """Test transformation of Kaapi Google AI config to native format.""" + def test_transform_elevenlabs_stt_config(self): + """Test transformation of ElevenLabs STT config.""" kaapi_config = KaapiCompletionConfig( - provider="google", + provider="elevenlabs", type="stt", params={ - "model": "gemini-2.5-pro", - "instructions": "Transcribe accurately", - "temperature": 0.2, + "model": "scribe_v2", + "input_language": "hi-IN", + "temperature": 0.3, }, ) result, warnings = transform_kaapi_config_to_native(kaapi_config) assert isinstance(result, NativeCompletionConfig) - assert result.provider == "google-native" - assert result.params["model"] == "gemini-2.5-pro" - assert result.params["instructions"] == "Transcribe accurately" - assert result.params["temperature"] == 0.2 + assert result.provider == "elevenlabs-native" + assert result.type == "stt" + assert result.params["model_id"] == "scribe_v2" + assert result.params["language_code"] == "hi" + assert result.params["temperature"] == 0.3 assert warnings == [] - def test_transform_google_with_unsupported_params(self): + def test_transform_sarvamai_stt_with_saaras_model(self): + """Test transformation of SarvamAI STT with saaras:v3 model.""" kaapi_config = KaapiCompletionConfig( - provider="google", - type="text", + provider="sarvamai", + type="stt", params={ - "model": "gemini-2.5-pro", - "knowledge_base_ids": ["vs_123"], - "reasoning": "high", + "model": "saaras:v3", + "input_language": "hi-IN", + "output_language": "en-IN", }, ) result, warnings = transform_kaapi_config_to_native(kaapi_config) - assert result.provider == "google-native" - assert result.params["model"] == "gemini-2.5-pro" - assert result.params["reasoning"] == "high" - assert "knowledge_base_ids" not in result.params - assert len(warnings) == 1 + assert isinstance(result, NativeCompletionConfig) + assert result.provider == "sarvamai-native" + assert result.type == "stt" + assert result.params["model"] == "saaras:v3" + assert result.params["language_code"] == "hi-IN" + # mode should be set for saaras:v3 + assert result.params["mode"] == "translate" + assert warnings == [] - def test_transform_sarvamai_stt_config(self): - """Test transformation of Kaapi SarvamAI STT config to native format.""" + def test_transform_sarvamai_stt_with_saarika_model(self): + """Test transformation of SarvamAI STT with saarika model (no mode).""" kaapi_config = KaapiCompletionConfig( provider="sarvamai", type="stt", - params={ - "model": "saarika:v1", - "input_language": "hi-IN", - }, + params={"model": "saarika:v2.5", "input_language": "hi-IN"}, ) result, warnings = transform_kaapi_config_to_native(kaapi_config) @@ -756,22 +796,18 @@ def test_transform_sarvamai_stt_config(self): assert isinstance(result, NativeCompletionConfig) assert result.provider == "sarvamai-native" assert result.type == "stt" - assert result.params["model"] == "saarika:v1" + assert result.params["model"] == "saarika:v2.5" assert result.params["language_code"] == "hi-IN" - assert result.params["mode"] == "transcribe" + # mode should NOT be set for saarika models + assert "mode" not in result.params assert warnings == [] - def test_transform_sarvamai_tts_config(self): - """Test transformation of Kaapi SarvamAI TTS config to native format.""" + def test_transform_sarvamai_tts_with_optional_voice(self): + """Test transformation of SarvamAI TTS without voice (using API default).""" kaapi_config = KaapiCompletionConfig( provider="sarvamai", type="tts", - params={ - "model": "bulbul:v1", - "voice": "meera", - "language": "hi-IN", - "response_format": "mp3", - }, + params={"model": "bulbul:v3", "language": "hi-IN"}, ) result, warnings = transform_kaapi_config_to_native(kaapi_config) @@ -779,31 +815,69 @@ def test_transform_sarvamai_tts_config(self): assert isinstance(result, NativeCompletionConfig) assert result.provider == "sarvamai-native" assert result.type == "tts" - assert result.params["model"] == "bulbul:v1" - assert result.params["speaker"] == "meera" + assert result.params["model"] == "bulbul:v3" assert result.params["target_language_code"] == "hi-IN" - assert result.params["output_audio_codec"] == "mp3" + # speaker should not be set (will use API default) + assert "speaker" not in result.params assert warnings == [] - def test_transform_sarvamai_stt_with_unsupported_params(self): - """Test SarvamAI STT transformation with unsupported parameters.""" + def test_transform_google_text_completion(self): + """Test transformation of Google text completion.""" kaapi_config = KaapiCompletionConfig( - provider="sarvamai", + provider="google", + type="text", + params={ + "model": "gemini-2.5-pro", + "temperature": 0.7, + "reasoning": "high", + }, + ) + + result, warnings = transform_kaapi_config_to_native(kaapi_config) + + assert isinstance(result, NativeCompletionConfig) + assert result.provider == "google-native" + assert result.type == "text" + assert result.params["model"] == "gemini-2.5-pro" + assert result.params["temperature"] == 0.7 + assert result.params["reasoning"] == "high" + assert warnings == [] + + def test_transform_google_stt_completion(self): + """Test transformation of Google STT completion.""" + kaapi_config = KaapiCompletionConfig( + provider="google", type="stt", + params={"model": "gemini-2.5-pro", "instructions": "Transcribe accurately"}, + ) + + result, warnings = transform_kaapi_config_to_native(kaapi_config) + + assert isinstance(result, NativeCompletionConfig) + assert result.provider == "google-native" + assert result.type == "stt" + assert result.params["model"] == "gemini-2.5-pro" + assert result.params["instructions"] == "Transcribe accurately" + assert warnings == [] + + def test_transform_google_tts_completion(self): + """Test transformation of Google TTS completion.""" + kaapi_config = KaapiCompletionConfig( + provider="google", + type="tts", params={ - "model": "saarika:v1", - "input_language": "hi-IN", - "instructions": "Transcribe carefully", - "temperature": 0.5, + "model": "gemini-2.5-pro", + "voice": "en-US-Journey-D", + "language": "en-US", }, ) result, warnings = transform_kaapi_config_to_native(kaapi_config) - assert result.provider == "sarvamai-native" - assert result.params["model"] == "saarika:v1" - assert "instructions" not in result.params - assert "temperature" not in result.params - assert len(warnings) == 2 - assert any("instructions" in w.lower() for w in warnings) - assert any("temperature" in w.lower() for w in warnings) + assert isinstance(result, NativeCompletionConfig) + assert result.provider == "google-native" + assert result.type == "tts" + assert result.params["model"] == "gemini-2.5-pro" + assert result.params["voice"] == "en-US-Journey-D" + assert result.params["language"] == "en-US" + assert warnings == [] diff --git a/backend/app/utils.py b/backend/app/utils.py index 9a6659abe..d576468f6 100644 --- a/backend/app/utils.py +++ b/backend/app/utils.py @@ -417,7 +417,7 @@ def send_callback(callback_url: str, data: dict[str, Any]) -> bool: response.raise_for_status() - logger.info("[send_callback] Callback sent successfully") + logger.info(f"[send_callback] Callback sent successfully to {callback_url}") return True except requests.RequestException as e: diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 7e527f891..504caac43 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -42,6 +42,7 @@ dependencies = [ "jiwer>=3.1.0", "indic-nlp-library>=0.92", "whisper-normalizer>=0.1.12", + "elevenlabs>=2.38.1", ] [tool.uv] diff --git a/backend/uv.lock b/backend/uv.lock index 322eebfd4..f3586f64b 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.12, <4.0" resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'win32'", @@ -219,6 +219,7 @@ dependencies = [ { name = "bcrypt" }, { name = "boto3" }, { name = "celery" }, + { name = "elevenlabs" }, { name = "email-validator" }, { name = "emails" }, { name = "fastapi", extra = ["standard"] }, @@ -271,6 +272,7 @@ requires-dist = [ { name = "bcrypt", specifier = "==4.0.1" }, { name = "boto3", specifier = ">=1.37.20" }, { name = "celery", specifier = ">=5.3.0,<6.0.0" }, + { name = "elevenlabs", specifier = ">=2.38.1" }, { name = "email-validator", specifier = ">=2.1.0.post1,<3.0.0.0" }, { name = "emails", specifier = ">=0.6,<1.0" }, { name = "fastapi", extras = ["standard"], specifier = ">=0.116.0" }, @@ -824,6 +826,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" }, ] +[[package]] +name = "elevenlabs" +version = "2.38.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "pydantic" }, + { name = "pydantic-core" }, + { name = "requests" }, + { name = "typing-extensions" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/62/e2/f160788e20adaa5b8c9d61e17381131ef8b91003c0c84c982442fae32c2b/elevenlabs-2.38.1.tar.gz", hash = "sha256:4dba9e4b09639d1c2fb703792f1d9696cf2e36f4ff8800744839690f1173c0b2", size = 523201, upload-time = "2026-03-06T10:09:15.079Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/20/9a22e8fafafcf0a25a87065bf078ed73c98934414694950c0920d21a1a48/elevenlabs-2.38.1-py3-none-any.whl", hash = "sha256:a726347a38ab1fbe6d74094f327d0baf6b5eb1cc06bab21318b12a9d9f0d7f24", size = 1412352, upload-time = "2026-03-06T10:09:13.113Z" }, +] + [[package]] name = "email-validator" version = "2.3.0" @@ -1201,7 +1220,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f9/c8/9d76a66421d1ae24340dfae7e79c313957f6e3195c144d2c73333b5bfe34/greenlet-3.3.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:7e806ca53acf6d15a888405880766ec84721aa4181261cd11a457dfe9a7a4975", size = 276443, upload-time = "2026-01-23T15:30:10.066Z" }, { url = "https://files.pythonhosted.org/packages/81/99/401ff34bb3c032d1f10477d199724f5e5f6fbfb59816ad1455c79c1eb8e7/greenlet-3.3.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d842c94b9155f1c9b3058036c24ffb8ff78b428414a19792b2380be9cecf4f36", size = 597359, upload-time = "2026-01-23T16:00:57.394Z" }, { url = "https://files.pythonhosted.org/packages/2b/bc/4dcc0871ed557792d304f50be0f7487a14e017952ec689effe2180a6ff35/greenlet-3.3.1-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:20fedaadd422fa02695f82093f9a98bad3dab5fcda793c658b945fcde2ab27ba", size = 607805, upload-time = "2026-01-23T16:05:28.068Z" }, - { url = "https://files.pythonhosted.org/packages/3b/cd/7a7ca57588dac3389e97f7c9521cb6641fd8b6602faf1eaa4188384757df/greenlet-3.3.1-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c620051669fd04ac6b60ebc70478210119c56e2d5d5df848baec4312e260e4ca", size = 622363, upload-time = "2026-01-23T16:15:54.754Z" }, { url = "https://files.pythonhosted.org/packages/cf/05/821587cf19e2ce1f2b24945d890b164401e5085f9d09cbd969b0c193cd20/greenlet-3.3.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14194f5f4305800ff329cbf02c5fcc88f01886cadd29941b807668a45f0d2336", size = 609947, upload-time = "2026-01-23T15:32:51.004Z" }, { url = "https://files.pythonhosted.org/packages/a4/52/ee8c46ed9f8babaa93a19e577f26e3d28a519feac6350ed6f25f1afee7e9/greenlet-3.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7b2fe4150a0cf59f847a67db8c155ac36aed89080a6a639e9f16df5d6c6096f1", size = 1567487, upload-time = "2026-01-23T16:04:22.125Z" }, { url = "https://files.pythonhosted.org/packages/8f/7c/456a74f07029597626f3a6db71b273a3632aecb9afafeeca452cfa633197/greenlet-3.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:49f4ad195d45f4a66a0eb9c1ba4832bb380570d361912fa3554746830d332149", size = 1636087, upload-time = "2026-01-23T15:33:47.486Z" }, @@ -1210,7 +1228,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/ab/d26750f2b7242c2b90ea2ad71de70cfcd73a948a49513188a0fc0d6fc15a/greenlet-3.3.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:7ab327905cabb0622adca5971e488064e35115430cec2c35a50fd36e72a315b3", size = 275205, upload-time = "2026-01-23T15:30:24.556Z" }, { url = "https://files.pythonhosted.org/packages/10/d3/be7d19e8fad7c5a78eeefb2d896a08cd4643e1e90c605c4be3b46264998f/greenlet-3.3.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:65be2f026ca6a176f88fb935ee23c18333ccea97048076aef4db1ef5bc0713ac", size = 599284, upload-time = "2026-01-23T16:00:58.584Z" }, { url = "https://files.pythonhosted.org/packages/ae/21/fe703aaa056fdb0f17e5afd4b5c80195bbdab701208918938bd15b00d39b/greenlet-3.3.1-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7a3ae05b3d225b4155bda56b072ceb09d05e974bc74be6c3fc15463cf69f33fd", size = 610274, upload-time = "2026-01-23T16:05:29.312Z" }, - { url = "https://files.pythonhosted.org/packages/06/00/95df0b6a935103c0452dad2203f5be8377e551b8466a29650c4c5a5af6cc/greenlet-3.3.1-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:12184c61e5d64268a160226fb4818af4df02cfead8379d7f8b99a56c3a54ff3e", size = 624375, upload-time = "2026-01-23T16:15:55.915Z" }, { url = "https://files.pythonhosted.org/packages/cb/86/5c6ab23bb3c28c21ed6bebad006515cfe08b04613eb105ca0041fecca852/greenlet-3.3.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6423481193bbbe871313de5fd06a082f2649e7ce6e08015d2a76c1e9186ca5b3", size = 612904, upload-time = "2026-01-23T15:32:52.317Z" }, { url = "https://files.pythonhosted.org/packages/c2/f3/7949994264e22639e40718c2daf6f6df5169bf48fb038c008a489ec53a50/greenlet-3.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:33a956fe78bbbda82bfc95e128d61129b32d66bcf0a20a1f0c08aa4839ffa951", size = 1567316, upload-time = "2026-01-23T16:04:23.316Z" }, { url = "https://files.pythonhosted.org/packages/8d/6e/d73c94d13b6465e9f7cd6231c68abde838bb22408596c05d9059830b7872/greenlet-3.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b065d3284be43728dd280f6f9a13990b56470b81be20375a207cdc814a983f2", size = 1636549, upload-time = "2026-01-23T15:33:48.643Z" }, @@ -1219,7 +1236,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ae/fb/011c7c717213182caf78084a9bea51c8590b0afda98001f69d9f853a495b/greenlet-3.3.1-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:bd59acd8529b372775cd0fcbc5f420ae20681c5b045ce25bd453ed8455ab99b5", size = 275737, upload-time = "2026-01-23T15:32:16.889Z" }, { url = "https://files.pythonhosted.org/packages/41/2e/a3a417d620363fdbb08a48b1dd582956a46a61bf8fd27ee8164f9dfe87c2/greenlet-3.3.1-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b31c05dd84ef6871dd47120386aed35323c944d86c3d91a17c4b8d23df62f15b", size = 646422, upload-time = "2026-01-23T16:01:00.354Z" }, { url = "https://files.pythonhosted.org/packages/b4/09/c6c4a0db47defafd2d6bab8ddfe47ad19963b4e30f5bed84d75328059f8c/greenlet-3.3.1-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:02925a0bfffc41e542c70aa14c7eda3593e4d7e274bfcccca1827e6c0875902e", size = 658219, upload-time = "2026-01-23T16:05:30.956Z" }, - { url = "https://files.pythonhosted.org/packages/e2/89/b95f2ddcc5f3c2bc09c8ee8d77be312df7f9e7175703ab780f2014a0e781/greenlet-3.3.1-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3e0f3878ca3a3ff63ab4ea478585942b53df66ddde327b59ecb191b19dbbd62d", size = 671455, upload-time = "2026-01-23T16:15:57.232Z" }, { url = "https://files.pythonhosted.org/packages/80/38/9d42d60dffb04b45f03dbab9430898352dba277758640751dc5cc316c521/greenlet-3.3.1-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34a729e2e4e4ffe9ae2408d5ecaf12f944853f40ad724929b7585bca808a9d6f", size = 660237, upload-time = "2026-01-23T15:32:53.967Z" }, { url = "https://files.pythonhosted.org/packages/96/61/373c30b7197f9e756e4c81ae90a8d55dc3598c17673f91f4d31c3c689c3f/greenlet-3.3.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:aec9ab04e82918e623415947921dea15851b152b822661cce3f8e4393c3df683", size = 1615261, upload-time = "2026-01-23T16:04:25.066Z" }, { url = "https://files.pythonhosted.org/packages/fd/d3/ca534310343f5945316f9451e953dcd89b36fe7a19de652a1dc5a0eeef3f/greenlet-3.3.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:71c767cf281a80d02b6c1bdc41c9468e1f5a494fb11bc8688c360524e273d7b1", size = 1683719, upload-time = "2026-01-23T15:33:50.61Z" }, @@ -1228,7 +1244,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/28/24/cbbec49bacdcc9ec652a81d3efef7b59f326697e7edf6ed775a5e08e54c2/greenlet-3.3.1-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:3e63252943c921b90abb035ebe9de832c436401d9c45f262d80e2d06cc659242", size = 282706, upload-time = "2026-01-23T15:33:05.525Z" }, { url = "https://files.pythonhosted.org/packages/86/2e/4f2b9323c144c4fe8842a4e0d92121465485c3c2c5b9e9b30a52e80f523f/greenlet-3.3.1-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:76e39058e68eb125de10c92524573924e827927df5d3891fbc97bd55764a8774", size = 651209, upload-time = "2026-01-23T16:01:01.517Z" }, { url = "https://files.pythonhosted.org/packages/d9/87/50ca60e515f5bb55a2fbc5f0c9b5b156de7d2fc51a0a69abc9d23914a237/greenlet-3.3.1-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c9f9d5e7a9310b7a2f416dd13d2e3fd8b42d803968ea580b7c0f322ccb389b97", size = 654300, upload-time = "2026-01-23T16:05:32.199Z" }, - { url = "https://files.pythonhosted.org/packages/7c/25/c51a63f3f463171e09cb586eb64db0861eb06667ab01a7968371a24c4f3b/greenlet-3.3.1-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b9721549a95db96689458a1e0ae32412ca18776ed004463df3a9299c1b257ab", size = 662574, upload-time = "2026-01-23T16:15:58.364Z" }, { url = "https://files.pythonhosted.org/packages/1d/94/74310866dfa2b73dd08659a3d18762f83985ad3281901ba0ee9a815194fb/greenlet-3.3.1-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92497c78adf3ac703b57f1e3813c2d874f27f71a178f9ea5887855da413cd6d2", size = 653842, upload-time = "2026-01-23T15:32:55.671Z" }, { url = "https://files.pythonhosted.org/packages/97/43/8bf0ffa3d498eeee4c58c212a3905dd6146c01c8dc0b0a046481ca29b18c/greenlet-3.3.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ed6b402bc74d6557a705e197d47f9063733091ed6357b3de33619d8a8d93ac53", size = 1614917, upload-time = "2026-01-23T16:04:26.276Z" }, { url = "https://files.pythonhosted.org/packages/89/90/a3be7a5f378fc6e84abe4dcfb2ba32b07786861172e502388b4c90000d1b/greenlet-3.3.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:59913f1e5ada20fde795ba906916aea25d442abcc0593fba7e26c92b7ad76249", size = 1676092, upload-time = "2026-01-23T15:33:52.176Z" },