> For clean Markdown of any page, append `.md` to the page URL.
> For a complete documentation index, see https://docs.sarvam.ai/llms.txt.
> For full documentation content in one file, see https://docs.sarvam.ai/llms-full.txt.
> For AI client integration (Claude Code, Cursor, etc.), connect to the MCP server at https://docs.sarvam.ai/_mcp/server.

# REST Stream

POST https://api.sarvam.ai/text-to-speech/stream
Content-Type: application/json

Converts the input text into a streamed spoken audio response.

This endpoint supports streaming audio using the specified output codec (e.g., `audio/mpeg` for MP3). The response is returned as a binary audio stream, which can be played or saved directly by the client.

Supports the `dict_id` parameter to apply a [pronunciation dictionary](https://docs.sarvam.ai/api-reference-docs/pronunciation-dictionary/create) during synthesis.

Reference: https://docs.sarvam.ai/api-reference-docs/text-to-speech/convert-stream

## OpenAPI Specification

```yaml
openapi: 3.1.0
info:
  title: ''
  version: 1.0.0
paths:
  /text-to-speech/stream:
    post:
      operationId: convert-stream
      summary: Text-to-Speech Stream Audio
      description: >-
        Converts the input text into a streamed spoken audio response.


        This endpoint supports streaming audio using the specified output codec
        (e.g., `audio/mpeg` for MP3). The response is returned as a binary audio
        stream, which can be played or saved directly by the client.


        Supports the `dict_id` parameter to apply a [pronunciation
        dictionary](https://docs.sarvam.ai/api-reference-docs/pronunciation-dictionary/create)
        during synthesis.
      tags:
        - subpackage_textToSpeech
      parameters:
        - name: api-subscription-key
          in: header
          required: true
          schema:
            type: string
      responses:
        '200':
          description: >-
            Success. Returns a streamed audio response in the requested format
            (e.g., `audio/mpeg` for MP3, `audio/wav` for WAV).
          content:
            application/octet-stream:
              schema:
                type: string
                format: binary
        '400':
          description: Bad Request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Sarvam_Model_API_ErrorMessage'
        '403':
          description: Forbidden
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Sarvam_Model_API_ErrorMessage'
        '422':
          description: Unprocessable Entity
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Sarvam_Model_API_ErrorMessage'
        '429':
          description: Quota Exceeded
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Sarvam_Model_API_ErrorMessage'
        '500':
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Sarvam_Model_API_ErrorMessage'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/Sarvam_Model_API_TextToSpeechStreamRequest'
servers:
  - url: https://api.sarvam.ai
components:
  schemas:
    Sarvam_Model_API_TextToSpeechLanguage:
      type: string
      enum:
        - bn-IN
        - en-IN
        - gu-IN
        - hi-IN
        - kn-IN
        - ml-IN
        - mr-IN
        - od-IN
        - pa-IN
        - ta-IN
        - te-IN
      title: Sarvam_Model_API_TextToSpeechLanguage
    Sarvam_Model_API_TextToSpeechSpeaker:
      type: string
      enum:
        - anushka
        - abhilash
        - manisha
        - vidya
        - arya
        - karun
        - hitesh
        - aditya
        - ritu
        - priya
        - neha
        - rahul
        - pooja
        - rohan
        - simran
        - kavya
        - amit
        - dev
        - ishita
        - shreya
        - ratan
        - varun
        - manan
        - sumit
        - roopa
        - kabir
        - aayan
        - shubh
        - ashutosh
        - advait
        - anand
        - tanya
        - tarun
        - sunny
        - mani
        - gokul
        - vijay
        - shruti
        - suhani
        - mohit
        - kavitha
        - rehan
        - soham
        - rupali
      title: Sarvam_Model_API_TextToSpeechSpeaker
    Sarvam_Model_API_SpeechSampleRate:
      type: string
      enum:
        - '8000'
        - '16000'
        - '22050'
        - '24000'
        - '32000'
        - '44100'
        - '48000'
      title: Sarvam_Model_API_SpeechSampleRate
    Sarvam_Model_API_TextToSpeechModel:
      type: string
      enum:
        - bulbul:v2
        - bulbul:v3
      title: Sarvam_Model_API_TextToSpeechModel
    Sarvam_Model_API_SpeechStreamCodec:
      type: string
      enum:
        - mp3
        - linear16
        - mulaw
        - alaw
        - opus
        - flac
        - aac
        - wav
      title: Sarvam_Model_API_SpeechStreamCodec
    Sarvam_Model_API_SpeechStreamBitrate:
      type: string
      enum:
        - 32k
        - 64k
        - 96k
        - 128k
        - 192k
      title: Sarvam_Model_API_SpeechStreamBitrate
    Sarvam_Model_API_TextToSpeechStreamRequest:
      type: object
      properties:
        text:
          type: string
          description: >-
            The text to be converted into streamed speech.


            **Features:**

            - Max 3500 characters

            - Supports code-mixed text (English and Indic languages)


            **Important Note:**

            - For numbers larger than 4 digits, use commas (e.g., '10,000'
            instead of '10000')

            - This ensures proper pronunciation as a whole number
        target_language_code:
          $ref: '#/components/schemas/Sarvam_Model_API_TextToSpeechLanguage'
          description: The language code in BCP-47 format.
        speaker:
          oneOf:
            - $ref: '#/components/schemas/Sarvam_Model_API_TextToSpeechSpeaker'
            - type: 'null'
          description: >-
            The speaker voice to be used for the output audio.


            **Default:** shubh (for bulbul:v3), anushka (for bulbul:v2)


            **Note:** Speaker selection must match the chosen model version.


            **Important:** Speaker names are case-sensitive and must be
            lowercase (e.g., `ritu` not `Ritu`).
        pitch:
          type:
            - number
            - 'null'
          format: double
          description: >-
            Controls the pitch of the audio. Range: -0.75 to 0.75. Default is
            0.0.


            **Note:** Only supported for bulbul:v2.
        pace:
          type:
            - number
            - 'null'
          format: double
          default: 1
          description: |-
            Controls the speed of the audio. Default is 1.0.

            **Model-specific ranges:**
            - **bulbul:v3:** 0.5 to 2.0
            - **bulbul:v2:** 0.3 to 3.0
        loudness:
          type:
            - number
            - 'null'
          format: double
          description: >-
            Controls the loudness of the audio. Range: 0.3 to 3.0. Default is
            1.0.


            **Note:** Only supported for bulbul:v2.
        speech_sample_rate:
          oneOf:
            - $ref: '#/components/schemas/Sarvam_Model_API_SpeechSampleRate'
            - type: 'null'
          default: 22050
          description: >-
            Specifies the sample rate of the output audio. Default is 22050 Hz.


            **Note:** OPUS codec only supports 8000, 12000, 16000, 24000, 48000
            Hz.
        enable_preprocessing:
          type: boolean
          default: false
          description: >-
            Controls whether normalization of English words and numeric entities
            is performed. Default is false.
        model:
          $ref: '#/components/schemas/Sarvam_Model_API_TextToSpeechModel'
          description: >-
            Specifies the model to use for text-to-speech conversion. Default is
            bulbul:v2.
        temperature:
          type:
            - number
            - 'null'
          format: double
          default: 0.6
          description: >-
            Controls the randomness of the output. Range: 0.01 to 1.0. Default
            is 0.6.


            **Note:** Only supported for bulbul:v3.
        enable_cached_responses:
          type: boolean
          default: false
          description: Enable caching for the request. Default is false. Currently in beta.
        dict_id:
          type:
            - string
            - 'null'
          description: >-
            The ID of a pronunciation dictionary to apply during synthesis. When
            provided, matching words in the input text will be replaced with
            their custom pronunciations before generating speech.


            Create and manage dictionaries via the [Pronunciation Dictionary
            API](https://docs.sarvam.ai/api-reference-docs/pronunciation-dictionary/create).
            Only supported by **bulbul:v3**.
        output_audio_codec:
          $ref: '#/components/schemas/Sarvam_Model_API_SpeechStreamCodec'
          description: Specifies the codec for the streamed output audio (e.g., 'mp3').
        output_audio_bitrate:
          $ref: '#/components/schemas/Sarvam_Model_API_SpeechStreamBitrate'
          description: Bitrate for the streamed output audio. Default is '128k'.
      required:
        - text
      title: Sarvam_Model_API_TextToSpeechStreamRequest
    Sarvam_Model_API_ErrorCode:
      type: string
      enum:
        - invalid_request_error
        - internal_server_error
        - unprocessable_entity_error
        - insufficient_quota_error
        - invalid_api_key_error
        - authentication_error
        - not_found_error
        - rate_limit_exceeded_error
      title: Sarvam_Model_API_ErrorCode
    Sarvam_Model_API_ErrorDetails:
      type: object
      properties:
        request_id:
          type:
            - string
            - 'null'
        message:
          type: string
          description: Message describing the error
        code:
          $ref: '#/components/schemas/Sarvam_Model_API_ErrorCode'
          description: >-
            Error code for the specific error that has occured. Refer to the
            error code documentation for more details.
      required:
        - request_id
        - message
        - code
      title: Sarvam_Model_API_ErrorDetails
    Sarvam_Model_API_ErrorMessage:
      type: object
      properties:
        error:
          $ref: '#/components/schemas/Sarvam_Model_API_ErrorDetails'
          description: Error details
      required:
        - error
      title: Sarvam_Model_API_ErrorMessage
  securitySchemes:
    ApiKeyAuth:
      type: apiKey
      in: header
      name: api-subscription-key

```

## SDK Code Examples

```typescript
import { SarvamAIClient } from "sarvamai";

async function main() {
    const client = new SarvamAIClient({
        apiSubscriptionKey: "YOUR_API_KEY_HERE",
    });
    await client.textToSpeech.convertStream({
        text: "Hello, welcome to Sarvam AI's text-to-speech service!",
        target_language_code: "en-IN",
        speaker: "shubh",
        pitch: 0,
        pace: 1.2,
        loudness: 1,
        speech_sample_rate: 22050,
        enable_preprocessing: false,
        model: "bulbul:v3",
        temperature: 0.6,
        enable_cached_responses: false,
        dict_id: "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
        output_audio_codec: "mp3",
        output_audio_bitrate: "128k",
    });
}
main();

```

```python
from sarvamai import SarvamAI

client = SarvamAI(
    api_subscription_key="YOUR_API_KEY_HERE",
)

client.text_to_speech.convert_stream(
    text="Hello, welcome to Sarvam AI\'s text-to-speech service!",
    target_language_code="en-IN",
    speaker="shubh",
    pitch=0,
    pace=1.2,
    loudness=1,
    speech_sample_rate=22050,
    enable_preprocessing=False,
    model="bulbul:v3",
    temperature=0.6,
    enable_cached_responses=False,
    dict_id="a1b2c3d4-e5f6-7890-abcd-ef1234567890",
    output_audio_codec="mp3",
    output_audio_bitrate="128k",
)

```

```go
package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.sarvam.ai/text-to-speech/stream"

	payload := strings.NewReader("{\n  \"text\": \"Hello, welcome to Sarvam AI's text-to-speech service!\",\n  \"target_language_code\": \"en-IN\",\n  \"speaker\": \"shubh\",\n  \"pitch\": 0,\n  \"pace\": 1.2,\n  \"loudness\": 1,\n  \"speech_sample_rate\": 22050,\n  \"enable_preprocessing\": false,\n  \"model\": \"bulbul:v3\",\n  \"temperature\": 0.6,\n  \"enable_cached_responses\": false,\n  \"dict_id\": \"a1b2c3d4-e5f6-7890-abcd-ef1234567890\",\n  \"output_audio_codec\": \"mp3\",\n  \"output_audio_bitrate\": \"128k\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("api-subscription-key", "<apiSubscriptionKey>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(res)
	fmt.Println(string(body))

}
```

```ruby
require 'uri'
require 'net/http'

url = URI("https://api.sarvam.ai/text-to-speech/stream")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["api-subscription-key"] = '<apiSubscriptionKey>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"text\": \"Hello, welcome to Sarvam AI's text-to-speech service!\",\n  \"target_language_code\": \"en-IN\",\n  \"speaker\": \"shubh\",\n  \"pitch\": 0,\n  \"pace\": 1.2,\n  \"loudness\": 1,\n  \"speech_sample_rate\": 22050,\n  \"enable_preprocessing\": false,\n  \"model\": \"bulbul:v3\",\n  \"temperature\": 0.6,\n  \"enable_cached_responses\": false,\n  \"dict_id\": \"a1b2c3d4-e5f6-7890-abcd-ef1234567890\",\n  \"output_audio_codec\": \"mp3\",\n  \"output_audio_bitrate\": \"128k\"\n}"

response = http.request(request)
puts response.read_body
```

```java
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;

HttpResponse<String> response = Unirest.post("https://api.sarvam.ai/text-to-speech/stream")
  .header("api-subscription-key", "<apiSubscriptionKey>")
  .header("Content-Type", "application/json")
  .body("{\n  \"text\": \"Hello, welcome to Sarvam AI's text-to-speech service!\",\n  \"target_language_code\": \"en-IN\",\n  \"speaker\": \"shubh\",\n  \"pitch\": 0,\n  \"pace\": 1.2,\n  \"loudness\": 1,\n  \"speech_sample_rate\": 22050,\n  \"enable_preprocessing\": false,\n  \"model\": \"bulbul:v3\",\n  \"temperature\": 0.6,\n  \"enable_cached_responses\": false,\n  \"dict_id\": \"a1b2c3d4-e5f6-7890-abcd-ef1234567890\",\n  \"output_audio_codec\": \"mp3\",\n  \"output_audio_bitrate\": \"128k\"\n}")
  .asString();
```

```php
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://api.sarvam.ai/text-to-speech/stream', [
  'body' => '{
  "text": "Hello, welcome to Sarvam AI\'s text-to-speech service!",
  "target_language_code": "en-IN",
  "speaker": "shubh",
  "pitch": 0,
  "pace": 1.2,
  "loudness": 1,
  "speech_sample_rate": 22050,
  "enable_preprocessing": false,
  "model": "bulbul:v3",
  "temperature": 0.6,
  "enable_cached_responses": false,
  "dict_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
  "output_audio_codec": "mp3",
  "output_audio_bitrate": "128k"
}',
  'headers' => [
    'Content-Type' => 'application/json',
    'api-subscription-key' => '<apiSubscriptionKey>',
  ],
]);

echo $response->getBody();
```

```csharp
using RestSharp;

var client = new RestClient("https://api.sarvam.ai/text-to-speech/stream");
var request = new RestRequest(Method.POST);
request.AddHeader("api-subscription-key", "<apiSubscriptionKey>");
request.AddHeader("Content-Type", "application/json");
request.AddParameter("application/json", "{\n  \"text\": \"Hello, welcome to Sarvam AI's text-to-speech service!\",\n  \"target_language_code\": \"en-IN\",\n  \"speaker\": \"shubh\",\n  \"pitch\": 0,\n  \"pace\": 1.2,\n  \"loudness\": 1,\n  \"speech_sample_rate\": 22050,\n  \"enable_preprocessing\": false,\n  \"model\": \"bulbul:v3\",\n  \"temperature\": 0.6,\n  \"enable_cached_responses\": false,\n  \"dict_id\": \"a1b2c3d4-e5f6-7890-abcd-ef1234567890\",\n  \"output_audio_codec\": \"mp3\",\n  \"output_audio_bitrate\": \"128k\"\n}", ParameterType.RequestBody);
IRestResponse response = client.Execute(request);
```

```swift
import Foundation

let headers = [
  "api-subscription-key": "<apiSubscriptionKey>",
  "Content-Type": "application/json"
]
let parameters = [
  "text": "Hello, welcome to Sarvam AI's text-to-speech service!",
  "target_language_code": "en-IN",
  "speaker": "shubh",
  "pitch": 0,
  "pace": 1.2,
  "loudness": 1,
  "speech_sample_rate": 22050,
  "enable_preprocessing": false,
  "model": "bulbul:v3",
  "temperature": 0.6,
  "enable_cached_responses": false,
  "dict_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
  "output_audio_codec": "mp3",
  "output_audio_bitrate": "128k"
] as [String : Any]

let postData = JSONSerialization.data(withJSONObject: parameters, options: [])

let request = NSMutableURLRequest(url: NSURL(string: "https://api.sarvam.ai/text-to-speech/stream")! as URL,
                                        cachePolicy: .useProtocolCachePolicy,
                                    timeoutInterval: 10.0)
request.httpMethod = "POST"
request.allHTTPHeaderFields = headers
request.httpBody = postData as Data

let session = URLSession.shared
let dataTask = session.dataTask(with: request as URLRequest, completionHandler: { (data, response, error) -> Void in
  if (error != nil) {
    print(error as Any)
  } else {
    let httpResponse = response as? HTTPURLResponse
    print(httpResponse)
  }
})

dataTask.resume()
```