> For clean Markdown of any page, append `.md` to the page URL.
> For a complete documentation index, see https://docs.sarvam.ai/llms.txt.
> For full documentation content in one file, see https://docs.sarvam.ai/llms-full.txt.
> For AI client integration (Claude Code, Cursor, etc.), connect to the MCP server at https://docs.sarvam.ai/_mcp/server.

# Start Document Intelligence Job

POST https://api.sarvam.ai/doc-digitization/job/v1/{job_id}/start

Validates the uploaded file and starts processing.

**Validation Checks:**
- File must be uploaded before starting
- File size must not exceed 200 MB
- PDF must be parseable by the PDF parser
- ZIP must contain only JPEG/PNG images
- ZIP must be flat (no nested folders beyond one level)
- ZIP must contain at least one valid image
- Page/image count must not exceed 10 (returns `422` with `max_page_limit_exceeded` if exceeded)
- User must have sufficient credits

**Processing:**
Job runs asynchronously. Poll the status endpoint or use webhook callback for completion notification.

Reference: https://docs.sarvam.ai/api-reference-docs/document-intelligence/start

## OpenAPI Specification

```yaml
openapi: 3.1.0
info:
  title: ''
  version: 1.0.0
paths:
  /doc-digitization/job/v1/{job_id}/start:
    post:
      operationId: start
      summary: Start Document Intelligence Job
      description: >-
        Validates the uploaded file and starts processing.


        **Validation Checks:**

        - File must be uploaded before starting

        - File size must not exceed 200 MB

        - PDF must be parseable by the PDF parser

        - ZIP must contain only JPEG/PNG images

        - ZIP must be flat (no nested folders beyond one level)

        - ZIP must contain at least one valid image

        - Page/image count must not exceed 10 (returns `422` with
        `max_page_limit_exceeded` if exceeded)

        - User must have sufficient credits


        **Processing:**

        Job runs asynchronously. Poll the status endpoint or use webhook
        callback for completion notification.
      tags:
        - subpackage_documentIntelligence
      parameters:
        - name: job_id
          in: path
          description: The unique identifier of the job
          required: true
          schema:
            type: string
            format: uuid
        - name: api-subscription-key
          in: header
          required: true
          schema:
            type: string
      responses:
        '202':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DocDigitizationJobStatusResponse'
        '400':
          description: Bad Request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DocDigitizationErrorMessage'
        '403':
          description: Forbidden
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DocDigitizationErrorMessage'
        '429':
          description: Quota Exceeded / Rate Limited
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DocDigitizationErrorMessage'
        '500':
          description: Internal Server Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DocDigitizationErrorMessage'
        '503':
          description: Service Unavailable
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DocDigitizationErrorMessage'
servers:
  - url: https://api.sarvam.ai
components:
  schemas:
    DocDigitizationJobState:
      type: string
      enum:
        - Accepted
        - Pending
        - Running
        - Completed
        - PartiallyCompleted
        - Failed
      description: Current state of the document intelligence job
      title: DocDigitizationJobState
    StorageContainerType:
      type: string
      enum:
        - Azure
        - Local
        - Google
        - Azure_V1
      title: StorageContainerType
    TaskFileDetails:
      type: object
      properties:
        file_name:
          type: string
        file_id:
          type: string
      required:
        - file_name
        - file_id
      title: TaskFileDetails
    DocDigitizationJobDetailState:
      type: string
      enum:
        - Pending
        - Running
        - Success
        - PartialSuccess
        - Failed
      description: Processing state for individual file
      title: DocDigitizationJobDetailState
    DocDigitizationPageError:
      type: object
      properties:
        page_number:
          type: integer
          description: Page number that failed
        error_code:
          type: string
          description: Standardized error code
        error_message:
          type: string
          description: Human-readable error description
      required:
        - page_number
        - error_code
        - error_message
      description: Error details for a specific page.
      title: DocDigitizationPageError
    DocDigitizationJobDetail:
      type: object
      properties:
        inputs:
          type: array
          items:
            $ref: '#/components/schemas/TaskFileDetails'
          description: Input file(s) for this task
        outputs:
          type: array
          items:
            $ref: '#/components/schemas/TaskFileDetails'
          description: Output file(s) produced
        state:
          $ref: '#/components/schemas/DocDigitizationJobDetailState'
          description: Processing state for this file
        total_pages:
          type: integer
          default: 0
          description: Total pages/images in the input file
        pages_processed:
          type: integer
          default: 0
          description: Number of pages processed so far
        pages_succeeded:
          type: integer
          default: 0
          description: Number of pages successfully processed
        pages_failed:
          type: integer
          default: 0
          description: Number of pages that failed processing
        error_message:
          type: string
          default: ''
          description: Error message if processing failed
        error_code:
          type:
            - string
            - 'null'
          description: Standardized error code if failed
        page_errors:
          type: array
          items:
            $ref: '#/components/schemas/DocDigitizationPageError'
          description: Detailed errors for each failed page
      required:
        - inputs
        - outputs
        - state
      description: Processing details for a single input file with page-level metrics.
      title: DocDigitizationJobDetail
    DocDigitizationJobStatusResponse:
      type: object
      properties:
        job_id:
          type: string
          format: uuid
          description: Job identifier (UUID)
        job_state:
          $ref: '#/components/schemas/DocDigitizationJobState'
          description: Current job state
        created_at:
          type: string
          format: date-time
          description: Job creation timestamp (ISO 8601)
        updated_at:
          type: string
          format: date-time
          description: Last update timestamp (ISO 8601)
        storage_container_type:
          $ref: '#/components/schemas/StorageContainerType'
          description: Storage backend type
        total_files:
          type: integer
          default: 0
          description: Total input files (always 1)
        successful_files_count:
          type: integer
          default: 0
          description: Files that completed successfully
        failed_files_count:
          type: integer
          default: 0
          description: Files that failed
        error_message:
          type: string
          default: ''
          description: Job-level error message
        job_details:
          type: array
          items:
            $ref: '#/components/schemas/DocDigitizationJobDetail'
          description: Per-file processing details with page metrics
      required:
        - job_id
        - job_state
        - created_at
        - updated_at
        - storage_container_type
      description: Response model for job status endpoint.
      title: DocDigitizationJobStatusResponse
    DocDigitizationErrorCode:
      type: string
      enum:
        - invalid_request_error
        - internal_server_error
        - insufficient_quota_error
        - invalid_api_key_error
        - rate_limit_exceeded_error
        - high_load_error
      title: DocDigitizationErrorCode
    DocDigitizationErrorDetails:
      type: object
      properties:
        message:
          type: string
          description: Message describing the error
        code:
          $ref: '#/components/schemas/DocDigitizationErrorCode'
          description: Error code for the specific error that has occurred.
        request_id:
          type: string
          default: ''
          description: 'Unique identifier for the request. Format: date_UUID4'
      required:
        - message
        - code
      title: DocDigitizationErrorDetails
    DocDigitizationErrorMessage:
      type: object
      properties:
        error:
          $ref: '#/components/schemas/DocDigitizationErrorDetails'
          description: Error details
      required:
        - error
      title: DocDigitizationErrorMessage
  securitySchemes:
    ApiKeyAuth:
      type: apiKey
      in: header
      name: api-subscription-key

```

## SDK Code Examples

```typescript
import { SarvamAIClient } from "sarvamai";

async function main() {
    const client = new SarvamAIClient({
        apiSubscriptionKey: "YOUR_API_KEY_HERE",
    });
    await client.documentIntelligence.start("job_id");
}
main();

```

```python
from sarvamai import SarvamAI

client = SarvamAI(
    api_subscription_key="YOUR_API_KEY_HERE",
)

client.document_intelligence.start(
    job_id="job_id",
)

```

```go
package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.sarvam.ai/doc-digitization/job/v1/job_id/start"

	payload := strings.NewReader("{}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("api-subscription-key", "<apiSubscriptionKey>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(res)
	fmt.Println(string(body))

}
```

```ruby
require 'uri'
require 'net/http'

url = URI("https://api.sarvam.ai/doc-digitization/job/v1/job_id/start")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["api-subscription-key"] = '<apiSubscriptionKey>'
request["Content-Type"] = 'application/json'
request.body = "{}"

response = http.request(request)
puts response.read_body
```

```java
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;

HttpResponse<String> response = Unirest.post("https://api.sarvam.ai/doc-digitization/job/v1/job_id/start")
  .header("api-subscription-key", "<apiSubscriptionKey>")
  .header("Content-Type", "application/json")
  .body("{}")
  .asString();
```

```php
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://api.sarvam.ai/doc-digitization/job/v1/job_id/start', [
  'body' => '{}',
  'headers' => [
    'Content-Type' => 'application/json',
    'api-subscription-key' => '<apiSubscriptionKey>',
  ],
]);

echo $response->getBody();
```

```csharp
using RestSharp;

var client = new RestClient("https://api.sarvam.ai/doc-digitization/job/v1/job_id/start");
var request = new RestRequest(Method.POST);
request.AddHeader("api-subscription-key", "<apiSubscriptionKey>");
request.AddHeader("Content-Type", "application/json");
request.AddParameter("application/json", "{}", ParameterType.RequestBody);
IRestResponse response = client.Execute(request);
```

```swift
import Foundation

let headers = [
  "api-subscription-key": "<apiSubscriptionKey>",
  "Content-Type": "application/json"
]
let parameters = [] as [String : Any]

let postData = JSONSerialization.data(withJSONObject: parameters, options: [])

let request = NSMutableURLRequest(url: NSURL(string: "https://api.sarvam.ai/doc-digitization/job/v1/job_id/start")! as URL,
                                        cachePolicy: .useProtocolCachePolicy,
                                    timeoutInterval: 10.0)
request.httpMethod = "POST"
request.allHTTPHeaderFields = headers
request.httpBody = postData as Data

let session = URLSession.shared
let dataTask = session.dataTask(with: request as URLRequest, completionHandler: { (data, response, error) -> Void in
  if (error != nil) {
    print(error as Any)
  } else {
    let httpResponse = response as? HTTPURLResponse
    print(httpResponse)
  }
})

dataTask.resume()
```