Text-to-Speech Rest API

Before using the Rest API, obtain an API key from Prosa Console.

Synchronous Request

Synchronous Text-to-Speech API request consist of a speech synthesize configuration as well as text to generate speech from. The text in each synchronous request is limited to 280 characters. The wait parameter is set to true to indicate that it is a synchronous request.

Example request

1	`POST https://api.prosa.ai/v2/speech/tts`

{
    "config": {
      "model": "tts-dimas-formal",
      "wait": true,
      "audio_format": "opus"
    },
    "request": {
        "text": "Hasil akhir dari pekerjaan ini cukup memuaskan"
    }
}

Example result

{
  "job_id": "2fec34e1efb146f7a7431cb35b64550d",
  "status": "complete",
  "created_at": "2019-08-24T14:15:22Z",
  "modified_at": "2019-08-24T14:15:22Z",
  "request": {
    "label": "string",
    "text": "Hasil akhir dari pekerjaan ini cukup memuaskan"
  },
  "result": {
    "data": "<base64-encoded audio data>"
  },
  "job_config": {
    "model": "tts-dimas-formal",
    "wait": false,
    "pitch": 0,
    "tempo": 1,
    "audio_format": "opus"
  },
  "model": {
    "name": "tts-dimas-formal",
    "label": "TTS Dimas Formal",
    "language": "Bahasa Indonesia",
    "domain": "formal",
    "voice": "Dimas",
    "gender": "male",
    "channels": 1,
    "samplerate": 48000
  }
}

Info

See Submit a TTS Request for more information on the request.

Here are some example codes to help you get started quickly.

PythonNode.js

import base64

import requests

url = "https://api.prosa.ai/v2/speech/tts"
api_key = "..."


def main():
    filename = "audio_file.webm"
    text = "Hasil akhir dari pekerjaan ini cukup memuaskan"

    audio_data = tts(text, "opus")
    with open(filename, "wb") as f:
        f.write(audio_data)


def tts(text: str, audio_format: str) -> bytes:
    job = submit_tts_request(text, audio_format)
    if job["status"] == "complete":
        return base64.b64decode(job["result"]["data"])

    # Job was not completed within the timeframe

def submit_tts_request(text: str, audio_format: str) -> dict:
    payload = {
        "config": {
            "model": "tts-dimas-formal",
            "wait": True,  # Blocks the request until the execution is finished
            "audio_format": audio_format
        },
        "request": {
            "text": text
        }
    }

    response = requests.post(url, json=payload, headers={
        "x-api-key": api_key
    })

    return response.json()


if __name__ == '__main__':
    main()

const https = require('https');
const fs = require('fs');

// Setup
const url = 'https://api.prosa.ai/v2/speech/tts';
const apiKey = '...';

(async () => {
  const filename = 'generated_audio.webm';
  const text = "Hasil akhir dari pekerjaan ini cukup memuaskan";

  const audioData = await tts(text, "opus");

  fs.writeFileSync(filename, audioData);

})();

async function tts(text) {
  let job = await submitTtsRequest(text);

  if (job["status"] === "complete") {
    let base64AudioData = job["result"]["data"];
    return Buffer.from(base64AudioData, 'base64');
  }
  // Job was not completed within the timeframe
}

async function submitTtsRequest(text, audio_format) {
  const payload = {
    "config": {
      "model": "tts-dimas-formal",
      "wait": true,  // Blocks the request until the execution is finished
      "audio_format": audio_format
    },
    "request": {
      "text": text
    }
  }

  return await request(url, "POST", {
    json: payload,
    headers: {
      "x-api-key": apiKey
    }
  });
}

function request(url, method, {headers = null, json = null}) {
  return new Promise((resolve, reject) => {
    let req = https.request(url, {
      method: method,
      headers: {
        "Accept": "application/json",
        "Content-Type": "application/json; charset=UTF-8",
        ...headers
      }
    }, (res) => {
      if (res.statusCode === 200) {
          let data = ""
          res.on('data', (chunk) => {
            data += chunk;
          });
          res.on('end', () => {
            const response = JSON.parse(data);
            resolve(response);
          });
      } else {
        reject(res.statusCode);
      }
    })

    req.on('error', reject);

    if (json != null) {
      req.write(JSON.stringify(json));
    }
    req.end();
  })
}

Note

The Node.js example contains a simple promise wrapper for built-in https module.

Warning

If the job could not be completed within a specified timeframe, it is treated as an Asynchronous Request. See Retrieving Result on how to retrieve the result of asynchronous requests.

Configure request

Configure the model to use. In this example, the model being used is tts-dimas-formal.

PythonNode.js

def submit_tts_request(text: str, audio_format: str) -> dict:
    payload = {
        "config": {
            "model": "tts-dimas-formal",
            "wait": True,  # Blocks the request until the execution is finished
            "audio_format": audio_format
        }
    }

async function submitTtsRequest(text, audio_format) {
  const payload = {
    "config": {
      "model": "tts-dimas-formal",
      "wait": true,  // Blocks the request until the execution is finished
      "audio_format": audio_format
    }
  }

}

Sending text

The text in sent under request object in the request payload.

{
  "request": {
    "text": "text to generate speech from"
  }
}

PythonNode.js

def submit_tts_request(text: str, audio_format: str) -> dict:
    payload = {
        "config": {
            "model": "tts-dimas-formal",
            "wait": True,  # Blocks the request until the execution is finished
            "audio_format": audio_format
        },
        "request": {
            "text": text
        }
    }

async function submitTtsRequest(text, audio_format) {
  const payload = {
    "config": {
      "model": "tts-dimas-formal",
      "wait": true,  // Blocks the request until the execution is finished
      "audio_format": audio_format
    },
    "request": {
      "text": text
    }
  }
}

Sending the request

Authenticate the request by including API Key in the HTTP request header.

PythonNode.js

url = "https://api.prosa.ai/v2/speech/tts"

def submit_tts_request(text: str, audio_format: str) -> dict:
    payload = {
        "config": {
            "model": "tts-dimas-formal",
            "wait": True,  # Blocks the request until the execution is finished
            "audio_format": audio_format
        },
        "request": {
            "text": text
        }
    }

    response = requests.post(url, json=payload, headers={
        "x-api-key": api_key
    })

    return response.json()

const url = 'https://api.prosa.ai/v2/speech/tts';

async function submitTtsRequest(text, audio_format) {
  const payload = {
    "config": {
      "model": "tts-dimas-formal",
      "wait": true,  // Blocks the request until the execution is finished
      "audio_format": audio_format
    },
    "request": {
      "text": text
    }
  }

  return await request(url, "POST", {
    json: payload,
    headers: {
      "x-api-key": apiKey
    }
  });
}

Receiving response

For synchronous requests, the audio data is returned directly under the object result as base64-encoded data.

If the job could not be completed within a specified timeframe, it is treated as an Asynchronous Request instead. In that case, you need to poll and retrieve the result using job_id. See Retrieving Result on how to retrieve the result of asynchronous requests.

PythonNode.js

def tts(text: str, audio_format: str) -> bytes:
    job = submit_tts_request(text, audio_format)
    if job["status"] == "complete":
        return base64.b64decode(job["result"]["data"])

    # Job was not completed within the timeframe
    job_id = job["job_id"]  # Retrieve with job_id instead

async function tts(text, audio_format) {
  let job = await submitTtsRequest(text, audio_format);

  if (job["status"] === "complete") {
    let base64AudioData = job["result"]["data"];
    return Buffer.from(base64AudioData, 'base64');
  }
  // Job was not completed within the timeframe
}

Info

See TtsResponse for more information regading the response.

Asynchronous Request

Asynchronous Text-to-Speech API request is fairly similar to synchronous Text-to-Speech API request. However, instead of immediately returning the result, the request will initiate a Long Running Operation and return a response without result. Each asynchronous requests can process up to 5000 characters.

Here are some example codes to help you get started quickly.

PythonNode.js

import base64
import time
from typing import Optional

import requests

url = "https://api.prosa.ai/v2/speech/tts"
api_key = "..."


def main():
    filename = "audio_file.webm"
    text = "halo, dunia"

    audio_data = tts(text, "opus")

    with open(filename, "wb") as f:
        f.write(audio_data)


def tts(text: str, audio_format: str, poll_interval: float = 5.0) -> bytes:
    job = submit_tts_request(text, audio_format)

    job_id = job["job_id"]

    while True:
        result = query_tts_result(job_id)
        if result is not None:
            return base64.b64decode(result["data"])

        time.sleep(poll_interval)


def submit_tts_request(text: str, audio_format: str) -> dict:
    payload = {
        "config": {
            "model": "tts-dimas-formal",
            "wait": False,  # Do not wait for the request to complete
            "audio_format": audio_format
        },
        "request": {
            "text": text
        }
    }

    response = requests.post(url, json=payload, headers={
        "x-api-key": api_key
    })

    return response.json()


def query_tts_result(job_id: str) -> Optional[dict]:
    response = requests.get(url + "/" + job_id, headers={
        "x-api-key": api_key
    })

    if response.status_code == 200:
        job = response.json()

        status = job["status"]

        if status == "complete":
            result = job["result"]

            return result

    return None


if __name__ == '__main__':
    main()

const https = require('https');
const fs = require('fs');

// Setup
const url = 'https://api.prosa.ai/v2/speech/tts';
const apiKey = '...';

(async () => {
  const filename = 'generated_audio.webm';
  const text = "Hasil akhir dari pekerjaan ini cukup memuaskan";

  let res = await submitTtsRequest(text, "opus");

  const jobId = res["job_id"];

  const pollInterval = 5.0 * 1000;

  let result = null;

  while (true) {
    result = await queryTtsResult(jobId);

    if (result != null) {
      break;
    }

    await new Promise((resolve) => {
      setTimeout(resolve, pollInterval);
    });
  }

  fs.writeFileSync(filename, result);

})();


async function submitTtsRequest(text, audio_format) {
  const payload = {
    "config": {
      "model": "tts-dimas-formal",
      "wait": false,  // Do not wait for the request to complete
      "audio_format": audio_format
    },
    "request": {
      "text": text
    }
  }

  return await request(url, "POST", {
    json: payload,
    headers: {
      "x-api-key": apiKey
    }
  });
}

async function queryTtsResult(jobId) {
  let res = await request(url + "/" + jobId, "GET", {
    headers: {
      "x-api-key": apiKey
    }
  });
  if (res["status"] === "complete") {
    let base64AudioData = res["result"]["data"];
    return Buffer.from(base64AudioData, 'base64');
  }

  return null;
}

function request(url, method, {headers = null, json = null}) {
  return new Promise((resolve, reject) => {
    let req = https.request(url, {
      method: method,
      headers: {
        "Accept": "application/json",
        "Content-Type": "application/json; charset=UTF-8",
        ...headers
      }
    }, (res) => {
      if (res.statusCode === 200) {
          let data = ""
          res.on('data', (chunk) => {
            data += chunk;
          });
          res.on('end', () => {
            const response = JSON.parse(data);
            resolve(response);
          });
      } else {
        reject(res.statusCode);
      }
    })

    req.on('error', reject);

    if (json != null) {
      req.write(JSON.stringify(json));
    }
    req.end();
  })
}

Info

See Submit a TTS Request for more information on the request.

Submitting request

The request is fairly similar to synchronous requests except the wait parameter is set to false to indicate that this is an asynchronous request.

PythonNode.js

url = "https://api.prosa.ai/v2/speech/tts"

def submit_tts_request(text: str, audio_format: str) -> dict:
    payload = {
        "config": {
            "model": "tts-dimas-formal",
            "wait": False,  # Do not wait for the request to complete
            "audio_format": audio_format
        },
        "request": {
            "text": text
        }
    }

    response = requests.post(url, json=payload, headers={
        "x-api-key": api_key
    })

    return response.json()

const url = 'https://api.prosa.ai/v2/speech/tts';

async function submitTtsRequest(text, audio_format) {
  const payload = {
    "config": {
      "model": "tts-dimas-formal",
      "wait": false,  // Do not wait for the request to complete
      "audio_format": audio_format
    },
    "request": {
      "text": text
    }
  }

  return await request(url, "POST", {
    json: payload,
    headers: {
      "x-api-key": apiKey
    }
  });
}

Note

Note that the value returned is an TtsResponse object which job_id property will be used to retrieve the result.

Retrieving result

Using the job_id from TtsResponse object we previously received when submitting requests, we can retrieve the status and the result by querying GET https://api.prosa.ai/v2/speech/tts/{job_id}.

Example request

1	`GET https://api.prosa.ai/v2/speech/tts/2fec34e1efb146f7a7431cb35b64550d`

Example result

{
  "job_id": "2fec34e1efb146f7a7431cb35b64550d",
  "status": "complete",
  "created_at": "2019-08-24T14:15:22Z",
  "modified_at": "2019-08-24T14:15:22Z",
  "request": {
    "label": "string",
    "text": "Hasil akhir dari pekerjaan ini cukup memuaskan"
  },
  "result": {
    "data": "<base64-encoded audio data>"
  },
  "job_config": {
    "model": "tts-dimas-formal",
    "wait": false,
    "pitch": 0,
    "tempo": 1,
    "audio_format": "opus"
  },
  "model": {
    "name": "tts-dimas-formal",
    "label": "TTS Dimas Formal",
    "language": "Bahasa Indonesia",
    "domain": "formal",
    "voice": "Dimas",
    "gender": "male",
    "channels": 1,
    "samplerate": 48000
  }
}

Info

See Retrieve a TTS Job for more information on the request.

The status describes the progress of the TTS request. We check to see if the status is compelete before returning the result.

PythonNode.js

url = "https://api.prosa.ai/v2/speech/tts"


def main():
    filename = "audio_file.webm"
    text = "halo, dunia"

    job = submit_tts_request(text, "opus")

    job_id = job["job_id"]

    audio_data = query_tts_result(job_id)

    with open(filename, "wb") as f:
        f.write(audio_data)


def query_tts_result(job_id: str) -> Optional[dict]:
    response = requests.get(url + "/" + job_id, headers={
        "x-api-key": api_key
    })

    if response.status_code == 200:
        job = response.json()

        status = job["status"]

        if status == "complete":
            result = job["result"]

            return result

    return None

const url = 'https://api.prosa.ai/v2/speech/tts';

(async () => {
  const filename = 'generated_audio.webm';
  const text = "halo, dunia";

  let res = await submitTtsRequest(text, "opus");

  const jobId = res["job_id"];

  let audioData = await queryTtsResult(jobId);

  fs.writeFileSync(filename, audioData);

})();

async function queryTtsResult(jobId) {
  let res = await request(url + "/" + jobId, "GET", {
    headers: {
      "x-api-key": apiKey
    }
  });
  if (res["status"] === "complete") {
    let base64AudioData = res["result"]["data"];
    return Buffer.from(base64AudioData, 'base64');
  }

  return null;
}