> For clean Markdown of any page, append `.md` to the page URL.
> For a complete documentation index, see https://docs.sarvam.ai/llms.txt.
> For full documentation content in one file, see https://docs.sarvam.ai/llms-full.txt.
> For AI client integration (Claude Code, Cursor, etc.), connect to the MCP server at https://docs.sarvam.ai/_mcp/server.

# Chat Completion

POST https://api.sarvam.ai/v1/chat/completions
Content-Type: application/json

Reference: https://docs.sarvam.ai/api-reference-docs/chat/chat-completions

## OpenAPI Specification

```yaml
openapi: 3.1.0
info:
  title: ''
  version: 1.0.0
paths:
  /v1/chat/completions:
    post:
      operationId: completions
      summary: Chat Completions
      tags:
        - subpackage_chat
      parameters:
        - name: Authorization
          in: header
          description: Bearer authentication
          required: true
          schema:
            type: string
        - name: api-subscription-key
          in: header
          description: |-
            API subscription key in sk_xxx format.
                    [Steps to get your key](https://docs.sarvam.ai/api-reference-docs/authentication)
          required: false
          schema:
            type:
              - string
              - 'null'
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CreateChatCompletionResponse'
        '400':
          description: Bad Request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorMessage'
        '403':
          description: Forbidden
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorMessage'
        '422':
          description: Unprocessable Entity
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorMessage'
        '429':
          description: Quota Exceeded
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorMessage'
        '500':
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorMessage'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateChatCompletionRequest'
servers:
  - url: https://api.sarvam.ai
components:
  schemas:
    FunctionCall:
      type: object
      properties:
        name:
          type: string
          description: The name of the function to call.
        arguments:
          type: string
          description: The arguments to call the function with, as a JSON string.
      required:
        - name
        - arguments
      title: FunctionCall
    ChatCompletionMessageToolCall:
      type: object
      properties:
        id:
          type: string
          description: The ID of the tool call.
        type:
          type: string
          enum:
            - function
          description: The type of the tool call, currently only `function`.
        function:
          $ref: '#/components/schemas/FunctionCall'
      required:
        - id
        - type
        - function
      title: ChatCompletionMessageToolCall
    ChatCompletionRequestMessage:
      oneOf:
        - type: object
          properties:
            role:
              type: string
              enum:
                - assistant
              description: 'Discriminator value: assistant'
            content:
              type:
                - string
                - 'null'
              description: |
                The contents of the assistant message
            tool_calls:
              type:
                - array
                - 'null'
              items:
                $ref: '#/components/schemas/ChatCompletionMessageToolCall'
              description: The tool calls generated by the model.
          required:
            - role
          description: ChatCompletionRequestAssistantMessage variant
        - type: object
          properties:
            role:
              type: string
              enum:
                - system
              description: The role of the messages author, in this case `system`.
            content:
              type: string
              description: The contents of the system message.
          required:
            - role
            - content
          description: ChatCompletionRequestSystemMessage variant
        - type: object
          properties:
            role:
              type: string
              enum:
                - tool
              description: The role of the messages author, in this case `tool`.
            content:
              type: string
              description: The contents of the tool message.
            tool_call_id:
              type: string
              description: Tool call that this message is responding to.
          required:
            - role
            - content
            - tool_call_id
          description: ChatCompletionRequestToolMessage variant
        - type: object
          properties:
            role:
              type: string
              enum:
                - user
              description: The role of the messages author, in this case `user`.
            content:
              type: string
              description: |
                The contents of the user message.
          required:
            - role
            - content
          description: ChatCompletionRequestUserMessage variant
      discriminator:
        propertyName: role
      title: ChatCompletionRequestMessage
    SarvamModelIds:
      type: string
      enum:
        - sarvam-105b
        - sarvam-30b
        - sarvam-m
      description: >-
        Supported chat completion model IDs. Primary models: `sarvam-30b`,
        `sarvam-105b`. Legacy: `sarvam-m` (24B); prefer Sarvam-30B or
        Sarvam-105B for new integrations.
      title: SarvamModelIds
    ReasoningEffort:
      type: string
      enum:
        - low
        - medium
        - high
      title: ReasoningEffort
    StopConfiguration:
      oneOf:
        - type: string
        - type: array
          items:
            type: string
      description: |
        Up to 4 sequences where the API will stop generating further tokens. The
        returned text will not contain the stop sequence.
      title: StopConfiguration
    FunctionDefinition:
      type: object
      properties:
        name:
          type: string
          description: The name of the function to be called.
        description:
          type:
            - string
            - 'null'
          description: A description of what the function does.
        parameters:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
          description: >-
            The parameters the function accepts, described as a JSON Schema
            object.
      required:
        - name
      title: FunctionDefinition
    ChatCompletionTool:
      type: object
      properties:
        type:
          type: string
          enum:
            - function
          description: The type of the tool. Currently, only `function` is supported.
        function:
          $ref: '#/components/schemas/FunctionDefinition'
      required:
        - type
        - function
      title: ChatCompletionTool
    ToolChoiceOption0:
      type: string
      enum:
        - none
        - auto
        - required
      title: ToolChoiceOption0
    ChatCompletionNamedToolChoiceFunction:
      type: object
      properties:
        name:
          type: string
          description: The name of the function to call.
      required:
        - name
      title: ChatCompletionNamedToolChoiceFunction
    ChatCompletionNamedToolChoice:
      type: object
      properties:
        type:
          type: string
          enum:
            - function
          description: The type of the tool. Currently, only `function` is supported.
        function:
          $ref: '#/components/schemas/ChatCompletionNamedToolChoiceFunction'
      required:
        - type
        - function
      title: ChatCompletionNamedToolChoice
    ToolChoiceOption:
      oneOf:
        - $ref: '#/components/schemas/ToolChoiceOption0'
        - $ref: '#/components/schemas/ChatCompletionNamedToolChoice'
      title: ToolChoiceOption
    CreateChatCompletionRequest:
      type: object
      properties:
        messages:
          type: array
          items:
            $ref: '#/components/schemas/ChatCompletionRequestMessage'
          description: A list of messages comprising the conversation so far.
        model:
          $ref: '#/components/schemas/SarvamModelIds'
          description: >-
            Chat model ID. Prefer `sarvam-30b` (64K context) or `sarvam-105b`
            (128K context) for new workloads. The value `sarvam-m` is a legacy
            24B model that remains accepted; migrate to Sarvam-30B or
            Sarvam-105B for better performance.
        temperature:
          type:
            - number
            - 'null'
          format: double
          default: 0.2
          description: >
            What sampling temperature to use, between 0 and 2. Higher values
            like 0.8 will make the output more random, while lower values like
            0.2 will make it more focused and deterministic.

            We generally recommend altering this or `top_p` but not both.
        top_p:
          type:
            - number
            - 'null'
          format: double
          default: 1
          description: >
            An alternative to sampling with temperature, called nucleus
            sampling,

            where the model considers the results of the tokens with top_p
            probability

            mass. So 0.1 means only the tokens comprising the top 10%
            probability mass

            are considered.


            We generally recommend altering this or `temperature` but not both.
        reasoning_effort:
          oneOf:
            - $ref: '#/components/schemas/ReasoningEffort'
            - type: 'null'
          default: medium
          description: >-
            The effort to use for reasoning. Can be disabled by explicitly
            setting to None. Default is 'medium'.
        max_tokens:
          type:
            - integer
            - 'null'
          default: 2048
          description: >-
            The maximum number of tokens that can be generated in the chat
            completion.
        stream:
          type:
            - boolean
            - 'null'
          default: false
          description: >-
            If set to true, the model response data will be streamed to the
            client

            as it is generated using [server-sent
            events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
        stop:
          oneOf:
            - $ref: '#/components/schemas/StopConfiguration'
            - type: 'null'
        'n':
          type:
            - integer
            - 'null'
          default: 1
          description: >-
            How many chat completion choices to generate for each input message.
            Note that you will be charged based on the number of generated
            tokens across all of the choices. Keep `n` as `1` to minimize costs.
        seed:
          type:
            - integer
            - 'null'
          description: >
            This feature is in Beta.

            If specified, our system will make a best effort to sample
            deterministically, such that repeated requests with the same `seed`
            and parameters should return the same result.

            Determinism is not guaranteed, and you should refer to the
            `system_fingerprint` response parameter to monitor changes in the
            backend.
        frequency_penalty:
          type:
            - number
            - 'null'
          format: double
          default: 0
          description: >
            Number between -2.0 and 2.0. Positive values penalize new tokens
            based on

            their existing frequency in the text so far, decreasing the model's

            likelihood to repeat the same line verbatim.
        presence_penalty:
          type:
            - number
            - 'null'
          format: double
          default: 0
          description: >
            Number between -2.0 and 2.0. Positive values penalize new tokens
            based on

            whether they appear in the text so far, increasing the model's
            likelihood

            to talk about new topics.
        wiki_grounding:
          type:
            - boolean
            - 'null'
          default: false
          description: If set to true, the model response will be wiki grounded.
        tools:
          type:
            - array
            - 'null'
          items:
            $ref: '#/components/schemas/ChatCompletionTool'
          description: >-
            A list of tools the model may call. Currently, only functions are
            supported as a tool.
        tool_choice:
          oneOf:
            - $ref: '#/components/schemas/ToolChoiceOption'
            - type: 'null'
          description: Controls which (if any) tool is called by the model.
      required:
        - messages
        - model
      title: CreateChatCompletionRequest
    FinishReason:
      type: string
      enum:
        - stop
        - length
        - tool_calls
        - content_filter
        - function_call
      title: FinishReason
    Role:
      type: string
      enum:
        - assistant
      title: Role
    ChatCompletionResponseMessage:
      type: object
      properties:
        content:
          type:
            - string
            - 'null'
          description: The contents of the message.
        refusal:
          type:
            - string
            - 'null'
        reasoning_content:
          type:
            - string
            - 'null'
          description: The contents of the reasoning message.
        role:
          $ref: '#/components/schemas/Role'
          description: The role of the author of this message.
        tool_calls:
          type:
            - array
            - 'null'
          items:
            $ref: '#/components/schemas/ChatCompletionMessageToolCall'
          description: The tool calls generated by the model.
      required:
        - content
        - role
      title: ChatCompletionResponseMessage
    Choice:
      type: object
      properties:
        finish_reason:
          $ref: '#/components/schemas/FinishReason'
          description: >
            The reason the model stopped generating tokens. This will be `stop`
            if the model hit a natural stop point or a provided stop sequence,

            `length` if the maximum number of tokens specified in the request
            was reached,

            `content_filter` if content was omitted due to a flag from our
            content filters,

            `tool_calls` if the model called a tool, or `function_call`
            (deprecated) if the model called a function.
        index:
          type: integer
          description: The index of the choice in the list of choices.
        logprobs:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
        message:
          $ref: '#/components/schemas/ChatCompletionResponseMessage'
      required:
        - finish_reason
        - index
        - message
      title: Choice
    CompletionUsage:
      type: object
      properties:
        completion_tokens:
          type: integer
          description: Number of tokens in the generated completion.
        prompt_tokens:
          type: integer
          description: Number of tokens in the prompt.
        total_tokens:
          type: integer
          description: Total number of tokens used in the request (prompt + completion).
        completion_tokens_details:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
        prompt_tokens_details:
          type:
            - object
            - 'null'
          additionalProperties:
            description: Any type
      required:
        - completion_tokens
        - prompt_tokens
        - total_tokens
      title: CompletionUsage
    CreateChatCompletionResponse:
      type: object
      properties:
        id:
          type: string
          description: A unique identifier for the chat completion.
        choices:
          type: array
          items:
            $ref: '#/components/schemas/Choice'
          description: >-
            A list of chat completion choices. Can be more than one if `n` is
            greater than 1.
        created:
          type: integer
          description: >-
            The Unix timestamp (in seconds) of when the chat completion was
            created.
        model:
          type: string
          description: The model used for the chat completion.
        object:
          type: string
          enum:
            - chat.completion
          description: The object type, which is always `chat.completion`.
        service_tier:
          type:
            - string
            - 'null'
        system_fingerprint:
          type:
            - string
            - 'null'
        usage:
          oneOf:
            - $ref: '#/components/schemas/CompletionUsage'
            - type: 'null'
      required:
        - id
        - choices
        - created
        - model
        - object
      title: CreateChatCompletionResponse
    ErrorCode:
      type: string
      enum:
        - invalid_request_error
        - internal_server_error
        - unprocessable_entity_error
        - insufficient_quota_error
        - invalid_api_key_error
        - authentication_error
        - rate_limit_exceeded_error
        - not_found_error
      title: ErrorCode
    ErrorDetails:
      type: object
      properties:
        message:
          type: string
          description: Message describing the error
        code:
          $ref: '#/components/schemas/ErrorCode'
          description: >-
            Error code for the specific error that has occured. Refer to the
            error code documentation for more details.
        request_id:
          type: string
          default: ''
          description: 'Unique identifier for the request. Format: date_UUID4'
      required:
        - message
        - code
      title: ErrorDetails
    ErrorMessage:
      type: object
      properties:
        error:
          $ref: '#/components/schemas/ErrorDetails'
          description: Error details
      required:
        - error
      title: ErrorMessage
  securitySchemes:
    HTTPBearer:
      type: http
      scheme: bearer
    ApiKeyAuth:
      type: apiKey
      in: header
      name: api-subscription-key

```

## SDK Code Examples

```typescript
import { SarvamAIClient } from "sarvamai";

async function main() {
    const client = new SarvamAIClient();
    await client.chat.completions({
        messages: [
            {
                role: "user",
                content: "Hello, can you explain the theory of relativity?",
            },
        ],
        model: "sarvam-105b",
    });
}
main();

```

```python
from sarvamai import SarvamAI, ChatCompletionRequestMessage_User

client = SarvamAI()

client.chat.completions(
    messages=[
        ChatCompletionRequestMessage_User(
            content="Hello, can you explain the theory of relativity?",
        )
    ],
    model="sarvam-105b",
)

```

```go
package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.sarvam.ai/v1/chat/completions"

	payload := strings.NewReader("{\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"Hello, can you explain the theory of relativity?\"\n    }\n  ],\n  \"model\": \"sarvam-105b\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(res)
	fmt.Println(string(body))

}
```

```ruby
require 'uri'
require 'net/http'

url = URI("https://api.sarvam.ai/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"Hello, can you explain the theory of relativity?\"\n    }\n  ],\n  \"model\": \"sarvam-105b\"\n}"

response = http.request(request)
puts response.read_body
```

```java
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;

HttpResponse<String> response = Unirest.post("https://api.sarvam.ai/v1/chat/completions")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"Hello, can you explain the theory of relativity?\"\n    }\n  ],\n  \"model\": \"sarvam-105b\"\n}")
  .asString();
```

```php
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://api.sarvam.ai/v1/chat/completions', [
  'body' => '{
  "messages": [
    {
      "role": "user",
      "content": "Hello, can you explain the theory of relativity?"
    }
  ],
  "model": "sarvam-105b"
}',
  'headers' => [
    'Authorization' => 'Bearer <token>',
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
```

```csharp
using RestSharp;

var client = new RestClient("https://api.sarvam.ai/v1/chat/completions");
var request = new RestRequest(Method.POST);
request.AddHeader("Authorization", "Bearer <token>");
request.AddHeader("Content-Type", "application/json");
request.AddParameter("application/json", "{\n  \"messages\": [\n    {\n      \"role\": \"user\",\n      \"content\": \"Hello, can you explain the theory of relativity?\"\n    }\n  ],\n  \"model\": \"sarvam-105b\"\n}", ParameterType.RequestBody);
IRestResponse response = client.Execute(request);
```

```swift
import Foundation

let headers = [
  "Authorization": "Bearer <token>",
  "Content-Type": "application/json"
]
let parameters = [
  "messages": [
    [
      "role": "user",
      "content": "Hello, can you explain the theory of relativity?"
    ]
  ],
  "model": "sarvam-105b"
] as [String : Any]

let postData = JSONSerialization.data(withJSONObject: parameters, options: [])

let request = NSMutableURLRequest(url: NSURL(string: "https://api.sarvam.ai/v1/chat/completions")! as URL,
                                        cachePolicy: .useProtocolCachePolicy,
                                    timeoutInterval: 10.0)
request.httpMethod = "POST"
request.allHTTPHeaderFields = headers
request.httpBody = postData as Data

let session = URLSession.shared
let dataTask = session.dataTask(with: request as URLRequest, completionHandler: { (data, response, error) -> Void in
  if (error != nil) {
    print(error as Any)
  } else {
    let httpResponse = response as? HTTPURLResponse
    print(httpResponse)
  }
})

dataTask.resume()
```