@@ -22,8 +22,8 @@ export class Audio extends APIResource {
2222 * ```ts
2323 * const audio = await client.audio.create({
2424 * input: 'input',
25- * model: 'cartesia/sonic ',
26- * voice: 'laidback woman ',
25+ * model: 'canopylabs/orpheus-3b-0.1-ft ',
26+ * voice: 'voice ',
2727 * });
2828 *
2929 * const content = await audio.blob();
@@ -88,17 +88,26 @@ export interface AudioCreateParamsBase {
8888 * The name of the model to query.
8989 *
9090 * [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#audio-models)
91+ * The current supported tts models are: - cartesia/sonic - hexgrad/Kokoro-82M -
92+ * canopylabs/orpheus-3b-0.1-ft
9193 */
92- model : 'cartesia/sonic' | ( string & { } ) ;
94+ model : 'cartesia/sonic' | 'hexgrad/Kokoro-82M' | 'canopylabs/orpheus-3b-0.1-ft' | ( string & { } ) ;
9395
9496 /**
95- * The voice to use for generating the audio.
97+ * The voice to use for generating the audio. The voices supported are different
98+ * for each model. For eg - for canopylabs/orpheus-3b-0.1-ft, one of the voices
99+ * supported is tara, for hexgrad/Kokoro-82M, one of the voices supported is
100+ * af_alloy and for cartesia/sonic, one of the voices supported is "friendly
101+ * sidekick".
102+ *
103+ * You can view the voices supported for each model using the /v1/voices endpoint
104+ * sending the model name as the query parameter.
96105 * [View all supported voices here](https://docs.together.ai/docs/text-to-speech#voices-available).
97106 */
98- voice : 'laidback woman' | 'polite man' | 'storyteller lady' | 'friendly sidekick' | ( string & { } ) ;
107+ voice : string ;
99108
100109 /**
101- * Language of input text
110+ * Language of input text.
102111 */
103112 language ?:
104113 | 'en'
@@ -123,12 +132,15 @@ export interface AudioCreateParamsBase {
123132 response_encoding ?: 'pcm_f32le' | 'pcm_s16le' | 'pcm_mulaw' | 'pcm_alaw' ;
124133
125134 /**
126- * The format of audio output
135+ * The format of audio output. Supported formats are mp3, wav, raw if streaming is
136+ * false. If streaming is true, the only supported format is raw.
127137 */
128138 response_format ?: 'mp3' | 'wav' | 'raw' ;
129139
130140 /**
131- * Sampling rate to use for the output audio
141+ * Sampling rate to use for the output audio. The default sampling rate for
142+ * canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for
143+ * cartesia/sonic is 44100.
132144 */
133145 sample_rate ?: number ;
134146
0 commit comments