Source code for promapp.ai_utils.sarvamai_tts

'''
Text-to-Speech utility using Sarvam AI

This module provides the utility function for converting text to speech
using the Sarvam AI SDK.
'''

import os
import base64


[docs] def sarvam_tts(text, language_code='en-IN', config=None): """ Convert text to speech using Sarvam AI SDK. Args: text (str): The text to convert to speech language_code (str): Language code (e.g., 'en-IN', 'hi-IN', 'ta-IN') config (AIAPIConfiguration, optional): The configuration object containing API details Returns: dict: Dictionary containing: - 'audio_data': bytes - Decoded WAV audio data - 'request_id': str - Request ID from Sarvam API (if available) - 'format': str - Audio format (always 'wav') Raises: ValueError: If configuration is invalid or text is empty Exception: If API call fails """ if not text or not text.strip(): raise ValueError("Text cannot be empty") # Import Sarvam AI SDK try: from sarvamai import SarvamAI except ImportError: raise ImportError("Sarvam AI SDK not installed. Please install it with: pip install sarvamai") # Get API key from configuration or environment if config: if not config.api_key_environment_variable_name: raise ValueError("API key environment variable name is required") api_key = os.environ.get(config.api_key_environment_variable_name) if not api_key: raise ValueError(f"API key not found in environment variable: {config.api_key_environment_variable_name}") else: # Fallback to environment variables api_key = os.environ.get('SARVAM_API_KEY') if not api_key: raise ValueError("SARVAM_API_KEY environment variable not set") # Initialize Sarvam AI client client = SarvamAI(api_subscription_key=api_key) # Map language codes to appropriate speakers speaker_map = { 'en-IN': 'ritu', 'hi-IN': 'ritu', 'ta-IN': 'ritu', 'te-IN': 'ritu', 'kn-IN': 'ritu', 'ml-IN': 'ritu', 'mr-IN': 'ritu', 'gu-IN': 'ritu', 'bn-IN': 'ritu', 'pa-IN': 'ritu', } speaker = speaker_map.get(language_code, 'ritu') try: # Call Sarvam AI TTS API using SDK response = client.text_to_speech.convert( text=text, target_language_code=language_code, speaker=speaker, pace=0.9, enable_preprocessing=True, model="bulbul:v3" ) # The SDK response contains base64 encoded audio in 'audios' field if hasattr(response, 'audios') and response.audios: # Decode base64 audio data (first audio in list) base64_audio = response.audios[0] audio_bytes = base64.b64decode(base64_audio) return { 'audio_data': audio_bytes, 'request_id': getattr(response, 'request_id', None), 'format': 'wav' } else: raise ValueError("No audio data in response from Sarvam AI") except Exception as e: raise Exception(f"Sarvam AI TTS failed: {str(e)}")