| 1 | import asyncio |
| 2 | import base64 |
| 3 | from sarvamai import AsyncSarvamAI, AudioOutput |
| 4 | import websockets |
| 5 | |
| 6 | async def tts_stream(): |
| 7 | client = AsyncSarvamAI(api_subscription_key="YOUR_SARVAM_API_KEY") |
| 8 | |
| 9 | async with client.text_to_speech_streaming.connect(model="bulbul:v3") as ws: |
| 10 | await ws.configure( |
| 11 | target_language_code="hi-IN", |
| 12 | speaker="shubh", |
| 13 | max_chunk_length= 200 |
| 14 | ) |
| 15 | print("Sent configuration") |
| 16 | |
| 17 | text = ( |
| 18 | "भारत की संस्कृति विश्व की सबसे प्राचीन और समृद्ध संस्कृतियों में से एक है।" |
| 19 | "यह विविधता, सहिष्णुता और परंपराओं का अद्भुत संगम है, " |
| 20 | "जिसमें विभिन्न धर्म, भाषाएं, त्योहार, संगीत, नृत्य, वास्तुकला और जीवनशैली शामिल हैं।" |
| 21 | ) |
| 22 | |
| 23 | await ws.convert(text) |
| 24 | print("Sent text message") |
| 25 | |
| 26 | await ws.flush() |
| 27 | print("Flushed buffer") |
| 28 | |
| 29 | chunk_count = 0 |
| 30 | with open("output.mp3", "wb") as f: |
| 31 | async for message in ws: |
| 32 | if isinstance(message, AudioOutput): |
| 33 | chunk_count += 1 |
| 34 | audio_chunk = base64.b64decode(message.data.audio) |
| 35 | f.write(audio_chunk) |
| 36 | f.flush() |
| 37 | |
| 38 | print(f"All {chunk_count} chunks saved to output.mp3") |
| 39 | print("Audio generation complete") |
| 40 | |
| 41 | |
| 42 | if hasattr(ws, "_websocket") and not ws._websocket.closed: |
| 43 | await ws._websocket.close() |
| 44 | print("WebSocket connection closed.") |
| 45 | |
| 46 | |
| 47 | if __name__ == "__main__": |
| 48 | asyncio.run(tts_stream()) |
| 49 | |
| 50 | # --- Notebook/Colab usage --- |
| 51 | # await tts_stream() |