diff --git a/openapi.yaml b/openapi.yaml index e83d5966..d9e8ac98 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -1338,6 +1338,19 @@ paths: [vision](/docs/guides/vision), and [audio](/docs/guides/audio) guides. + + + Parameter support can differ depending on the model used to generate the + + response, particularly for newer reasoning models. Parameters that are + only + + supported for reasoning models are noted below. For the current state + of + + unsupported parameters in reasoning models, + + [refer to the reasoning guide](/docs/guides/reasoning). requestBody: required: true content: @@ -1371,7 +1384,7 @@ paths: "model": "VAR_chat_model_id", "messages": [ { - "role": "system", + "role": "developer", "content": "You are a helpful assistant." }, { @@ -1389,7 +1402,7 @@ paths: completion = client.chat.completions.create( model="VAR_chat_model_id", messages=[ - {"role": "system", "content": "You are a helpful assistant."}, + {"role": "developer", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"} ] ) @@ -1405,7 +1418,7 @@ paths: async function main() { const completion = await openai.chat.completions.create({ - messages: [{ role: "system", content: "You are a helpful assistant." }], + messages: [{ role: "developer", content: "You are a helpful assistant." }], model: "VAR_chat_model_id", }); @@ -1562,7 +1575,7 @@ paths: "model": "VAR_chat_model_id", "messages": [ { - "role": "system", + "role": "developer", "content": "You are a helpful assistant." }, { @@ -1581,7 +1594,7 @@ paths: completion = client.chat.completions.create( model="VAR_chat_model_id", messages=[ - {"role": "system", "content": "You are a helpful assistant."}, + {"role": "developer", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"} ], stream=True @@ -1601,7 +1614,7 @@ paths: const completion = await openai.chat.completions.create({ model: "VAR_chat_model_id", messages: [ - {"role": "system", "content": "You are a helpful assistant."}, + {"role": "developer", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"} ], stream: true, @@ -2670,6 +2683,16 @@ paths: "status": "queued", "validation_file": null, "training_file": "file-abc123", + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto", + } + } + } } - title: Epochs request: @@ -2680,8 +2703,13 @@ paths: -d '{ "training_file": "file-abc123", "model": "gpt-4o-mini", - "hyperparameters": { - "n_epochs": 2 + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "n_epochs": 2 + } + } } }' python: | @@ -2691,8 +2719,13 @@ paths: client.fine_tuning.jobs.create( training_file="file-abc123", model="gpt-4o-mini", - hyperparameters={ - "n_epochs":2 + method={ + "type": "supervised", + "supervised": { + "hyperparameters": { + "n_epochs": 2 + } + } } ) node.js: | @@ -2704,7 +2737,14 @@ paths: const fineTune = await openai.fineTuning.jobs.create({ training_file: "file-abc123", model: "gpt-4o-mini", - hyperparameters: { n_epochs: 2 } + method: { + type: "supervised", + supervised: { + hyperparameters: { + n_epochs: 2 + } + } + } }); console.log(fineTune); @@ -2724,6 +2764,16 @@ paths: "validation_file": null, "training_file": "file-abc123", "hyperparameters": {"n_epochs": 2}, + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": 2, + } + } + } } - title: Validation file request: @@ -2772,6 +2822,59 @@ paths: "status": "queued", "validation_file": "file-abc123", "training_file": "file-abc123", + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto", + } + } + } + } + - title: DPO + request: + curl: | + curl https://api.openai.com/v1/fine_tuning/jobs \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "training_file": "file-abc123", + "validation_file": "file-abc123", + "model": "gpt-4o-mini", + "method": { + "type": "dpo", + "dpo": { + "hyperparameters": { + "beta": 0.1, + } + } + } + }' + response: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "gpt-4o-mini-2024-07-18", + "created_at": 1721764800, + "fine_tuned_model": null, + "organization_id": "org-123", + "result_files": [], + "status": "queued", + "validation_file": "file-abc123", + "training_file": "file-abc123", + "method": { + "type": "dpo", + "dpo": { + "hyperparameters": { + "beta": 0.1, + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto", + } + } + } } - title: W&B Integration request: @@ -2817,7 +2920,17 @@ paths: "run_id": "ftjob-abc123" } } - ] + ], + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "batch_size": "auto", + "learning_rate_multiplier": "auto", + "n_epochs": "auto", + } + } + } } get: operationId: listPaginatedFineTuningJobs @@ -2972,7 +3085,17 @@ paths: "trained_tokens": 5768, "integrations": [], "seed": 0, - "estimated_finish": 0 + "estimated_finish": 0, + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "n_epochs": 4, + "batch_size": 1, + "learning_rate_multiplier": 1.0 + } + } + } } /fine_tuning/jobs/{fine_tuning_job_id}/cancel: post: @@ -3036,9 +3159,6 @@ paths: "fine_tuned_model": null, "organization_id": "org-123", "result_files": [], - "hyperparameters": { - "n_epochs": "auto" - }, "status": "cancelled", "validation_file": "file-abc123", "training_file": "file-abc123" @@ -6771,6 +6891,79 @@ paths: "id": "user_abc", "deleted": true } + /realtime/sessions: + post: + summary: > + Create an ephemeral API token for use in client-side applications with + the + + Realtime API. Can be configured with the same session parameters as the + + `session.update` client event. + + + It responds with a session object, plus a `client_secret` key which + contains + + a usable ephemeral API token that can be used to authenticate browser + clients + + for the Realtime API. + operationId: create-realtime-session + tags: + - Realtime + requestBody: + description: Create an ephemeral API key with the given session configuration. + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/RealtimeSessionCreateRequest" + responses: + "200": + description: Session created successfully. + content: + application/json: + schema: + $ref: "#/components/schemas/RealtimeSessionCreateResponse" + x-oaiMeta: + name: Create session + group: realtime + returns: The created Realtime session object, plus an ephemeral key + examples: + request: + curl: | + curl -X POST https://api.openai.com/v1/realtime/sessions \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o-realtime-preview-2024-12-17", + "modalities": ["audio", "text"], + "instructions": "You are a friendly assistant." + }' + response: | + { + "id": "sess_001", + "object": "realtime.session", + "model": "gpt-4o-realtime-preview-2024-12-17", + "modalities": ["audio", "text"], + "instructions": "You are a friendly assistant.", + "voice": "alloy", + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": { + "model": "whisper-1" + }, + "turn_detection": null, + "tools": [], + "tool_choice": "none", + "temperature": 0.7, + "max_response_output_tokens": 200, + "client_secret": { + "value": "ek_abc123", + "expires_at": 1234567890 + } + } /threads: post: operationId: createThread @@ -12274,6 +12467,8 @@ components: ChatCompletionRequestAssistantMessage: type: object title: Assistant message + description: | + Messages sent by the model in response to user messages. properties: content: x-oaiExpandable: true @@ -12348,6 +12543,43 @@ components: - $ref: "#/components/schemas/ChatCompletionRequestMessageContentPartText" - $ref: "#/components/schemas/ChatCompletionRequestMessageContentPartRefusal" x-oaiExpandable: true + ChatCompletionRequestDeveloperMessage: + type: object + title: Developer message + description: > + Developer-provided instructions that the model should follow, regardless + of + + messages sent by the user. With o1 models and newer, `developer` + messages + + replace the previous `system` messages. + properties: + content: + description: The contents of the developer message. + oneOf: + - type: string + description: The contents of the developer message. + title: Text content + - type: array + description: An array of content parts with a defined type. For developer + messages, only type `text` is supported. + title: Array of content parts + items: + $ref: "#/components/schemas/ChatCompletionRequestMessageContentPartText" + minItems: 1 + role: + type: string + enum: + - developer + description: The role of the messages author, in this case `developer`. + name: + type: string + description: An optional name for the participant. Provides the model + information to differentiate between participants of the same role. + required: + - content + - role ChatCompletionRequestFunctionMessage: type: object title: Function message @@ -12371,6 +12603,7 @@ components: - name ChatCompletionRequestMessage: oneOf: + - $ref: "#/components/schemas/ChatCompletionRequestDeveloperMessage" - $ref: "#/components/schemas/ChatCompletionRequestSystemMessage" - $ref: "#/components/schemas/ChatCompletionRequestUserMessage" - $ref: "#/components/schemas/ChatCompletionRequestAssistantMessage" @@ -12475,6 +12708,14 @@ components: ChatCompletionRequestSystemMessage: type: object title: System message + description: > + Developer-provided instructions that the model should follow, regardless + of + + messages sent by the user. With o1 models and newer, use `developer` + messages + + for this purpose instead. properties: content: description: The contents of the system message. @@ -12541,6 +12782,9 @@ components: ChatCompletionRequestUserMessage: type: object title: User message + description: | + Messages sent by an end user, containing prompts or additional context + information. properties: content: description: | @@ -13335,6 +13579,8 @@ components: - type: string - type: string enum: + - o1 + - o1-2024-12-17 - o1-preview - o1-preview-2024-09-12 - o1-mini @@ -13343,10 +13589,11 @@ components: - gpt-4o-2024-11-20 - gpt-4o-2024-08-06 - gpt-4o-2024-05-13 - - gpt-4o-realtime-preview - - gpt-4o-realtime-preview-2024-10-01 - gpt-4o-audio-preview - gpt-4o-audio-preview-2024-10-01 + - gpt-4o-audio-preview-2024-12-17 + - gpt-4o-mini-audio-preview + - gpt-4o-mini-audio-preview-2024-12-17 - chatgpt-4o-latest - gpt-4o-mini - gpt-4o-mini-2024-07-18 @@ -13376,9 +13623,33 @@ components: nullable: true description: > Whether or not to store the output of this chat completion request + for + + use in our [model distillation](/docs/guides/distillation) or - for use in our [model distillation](/docs/guides/distillation) or [evals](/docs/guides/evals) products. + reasoning_effort: + type: string + enum: + - low + - medium + - high + default: medium + description: > + **o1 models only** + + + Constrains effort on reasoning for + + [reasoning + models](https://platform.openai.com/docs/guides/reasoning). + + Currently supported values are `low`, `medium`, and `high`. Reducing + + reasoning effort can result in faster responses and fewer tokens + used + + on reasoning in a response. metadata: type: object nullable: true @@ -13395,12 +13666,11 @@ components: nullable: true description: > Number between -2.0 and 2.0. Positive values penalize new tokens - based on their existing frequency in the text so far, decreasing the - model's likelihood to repeat the same line verbatim. + based on + their existing frequency in the text so far, decreasing the model's - [See more information about frequency and presence - penalties.](/docs/guides/text-generation) + likelihood to repeat the same line verbatim. logit_bias: type: object x-oaiTypeLabel: map @@ -13414,24 +13684,41 @@ components: Accepts a JSON object that maps tokens (specified by their token ID - in the tokenizer) to an associated bias value from -100 to 100. - Mathematically, the bias is added to the logits generated by the - model prior to sampling. The exact effect will vary per model, but - values between -1 and 1 should decrease or increase likelihood of - selection; values like -100 or 100 should result in a ban or - exclusive selection of the relevant token. + in the + + tokenizer) to an associated bias value from -100 to 100. + Mathematically, + + the bias is added to the logits generated by the model prior to + sampling. + + The exact effect will vary per model, but values between -1 and 1 + should + + decrease or increase likelihood of selection; values like -100 or + 100 + + should result in a ban or exclusive selection of the relevant token. logprobs: - description: Whether to return log probabilities of the output tokens or not. If - true, returns the log probabilities of each output token returned in - the `content` of `message`. + description: > + Whether to return log probabilities of the output tokens or not. If + true, + + returns the log probabilities of each output token returned in the + + `content` of `message`. type: boolean default: false nullable: true top_logprobs: - description: An integer between 0 and 20 specifying the number of most likely - tokens to return at each token position, each with an associated log - probability. `logprobs` must be set to `true` if this parameter is - used. + description: > + An integer between 0 and 20 specifying the number of most likely + tokens to + + return at each token position, each with an associated log + probability. + + `logprobs` must be set to `true` if this parameter is used. type: integer minimum: 0 maximum: 20 @@ -13439,13 +13726,17 @@ components: max_tokens: description: > The maximum number of [tokens](/tokenizer) that can be generated in - the chat completion. This value can be used to control + the + + chat completion. This value can be used to control + [costs](https://openai.com/api/pricing/) for text generated via API. This value is now deprecated in favor of `max_completion_tokens`, - and is not compatible with [o1 series - models](/docs/guides/reasoning). + and is + + not compatible with [o1 series models](/docs/guides/reasoning). type: integer nullable: true deprecated: true @@ -13534,39 +13825,51 @@ components: nullable: true description: > Number between -2.0 and 2.0. Positive values penalize new tokens - based on whether they appear in the text so far, increasing the - model's likelihood to talk about new topics. + based on + whether they appear in the text so far, increasing the model's + likelihood - [See more information about frequency and presence - penalties.](/docs/guides/text-generation) + to talk about new topics. response_format: description: > An object specifying the format that the model must output. - Compatible with [GPT-4o](/docs/models#gpt-4o), [GPT-4o - mini](/docs/models#gpt-4o-mini), [GPT-4 - Turbo](/docs/models#gpt-4-turbo-and-gpt-4) and all GPT-3.5 Turbo - models newer than `gpt-3.5-turbo-1106`. Setting to `{ "type": "json_schema", "json_schema": {...} }` enables + Structured Outputs which ensures the model will match your supplied - JSON schema. Learn more in the [Structured Outputs + JSON + + schema. Learn more in the [Structured Outputs + guide](/docs/guides/structured-outputs). Setting to `{ "type": "json_object" }` enables JSON mode, which - ensures the message the model generates is valid JSON. + ensures + + the message the model generates is valid JSON. **Important:** when using JSON mode, you **must** also instruct the - model to produce JSON yourself via a system or user message. Without - this, the model may generate an unending stream of whitespace until - the generation reaches the token limit, resulting in a long-running - and seemingly "stuck" request. Also note that the message content - may be partially cut off if `finish_reason="length"`, which - indicates the generation exceeded `max_tokens` or the conversation - exceeded the max context length. + model + + to produce JSON yourself via a system or user message. Without this, + the + + model may generate an unending stream of whitespace until the + generation + + reaches the token limit, resulting in a long-running and seemingly + "stuck" + + request. Also note that the message content may be partially cut off + if + + `finish_reason="length"`, which indicates the generation exceeded + + `max_tokens` or the conversation exceeded the max context length. oneOf: - $ref: "#/components/schemas/ResponseFormatText" - $ref: "#/components/schemas/ResponseFormatJsonObject" @@ -13643,7 +13946,6 @@ components: like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. - We generally recommend altering this or `top_p` but not both. top_p: type: number @@ -13654,9 +13956,15 @@ components: nullable: true description: > An alternative to sampling with temperature, called nucleus - sampling, where the model considers the results of the tokens with - top_p probability mass. So 0.1 means only the tokens comprising the - top 10% probability mass are considered. + sampling, + + where the model considers the results of the tokens with top_p + probability + + mass. So 0.1 means only the tokens comprising the top 10% + probability mass + + are considered. We generally recommend altering this or `temperature` but not both. @@ -13688,18 +13996,29 @@ components: Controls which (if any) function is called by the model. + `none` means the model will not call a function and instead - generates a message. + generates a + + message. + `auto` means the model can pick between generating a message or - calling a function. + calling a + + function. + Specifying a particular function via `{"name": "my_function"}` - forces the model to call that function. + forces the + + model to call that function. `none` is the default when no functions are present. `auto` is the - default if functions are present. + default + + if functions are present. oneOf: - type: string description: > @@ -14583,8 +14902,10 @@ components: The contents of the file should differ depending on if the model - uses the [chat](/docs/api-reference/fine-tuning/chat-input) or + uses the [chat](/docs/api-reference/fine-tuning/chat-input), [completions](/docs/api-reference/fine-tuning/completions-input) + format, or if the fine-tuning method uses the + [preference](/docs/api-reference/fine-tuning/preference-input) format. @@ -14594,7 +14915,11 @@ components: example: file-abc123 hyperparameters: type: object - description: The hyperparameters used for the fine-tuning job. + description: > + The hyperparameters used for the fine-tuning job. + + This value is now deprecated in favor of `method`, and should be + passed in under the `method` parameter. properties: batch_size: description: > @@ -14638,6 +14963,7 @@ components: minimum: 1 maximum: 50 default: auto + deprecated: true suffix: description: > A string of up to 64 characters that will be added to your @@ -14752,6 +15078,8 @@ components: minimum: 0 maximum: 2147483647 example: 42 + method: + $ref: "#/components/schemas/FineTuneMethod" required: - model - training_file @@ -16589,301 +16917,612 @@ components: - $ref: "#/components/schemas/ChatCompletionRequestAssistantMessage" required: - role - FineTuningIntegration: + FineTuneChatRequestInput: type: object - title: Fine-Tuning Job Integration - required: - - type - - wandb + description: The per-line training example of a fine-tuning input file for chat + models using the supervised method. properties: - type: - type: string - description: The type of the integration being enabled for the fine-tuning job - enum: - - wandb - wandb: - type: object - description: > - The settings for your integration with Weights and Biases. This - payload specifies the project that - - metrics will be sent to. Optionally, you can set an explicit display - name for your run, add tags - - to your run, and set a default entity (team, username, etc) to be - associated with your run. - required: - - project - properties: - project: - description: | - The name of the project that the new run will be created under. - type: string - example: my-wandb-project - name: - description: > - A display name to set for the run. If not set, we will use the - Job ID as the name. - nullable: true - type: string - entity: - description: > - The entity to use for the run. This allows you to set the team - or username of the WandB user that you would - - like associated with the run. If not set, the default entity for - the registered WandB API key is used. - nullable: true - type: string - tags: - description: > - A list of tags to be attached to the newly created run. These - tags are passed through directly to WandB. Some - - default tags are generated by OpenAI: "openai/finetune", - "openai/{base-model}", "openai/{ftjob-abcdef}". - type: array - items: - type: string - example: custom-tag - FineTuningJob: - type: object - title: FineTuningJob - description: > - The `fine_tuning.job` object represents a fine-tuning job that has been - created through the API. + messages: + type: array + minItems: 1 + items: + oneOf: + - $ref: "#/components/schemas/ChatCompletionRequestSystemMessage" + - $ref: "#/components/schemas/ChatCompletionRequestUserMessage" + - $ref: "#/components/schemas/FineTuneChatCompletionRequestAssistantMessage" + - $ref: "#/components/schemas/ChatCompletionRequestToolMessage" + - $ref: "#/components/schemas/ChatCompletionRequestFunctionMessage" + x-oaiExpandable: true + tools: + type: array + description: A list of tools the model may generate JSON inputs for. + items: + $ref: "#/components/schemas/ChatCompletionTool" + parallel_tool_calls: + $ref: "#/components/schemas/ParallelToolCalls" + functions: + deprecated: true + description: A list of functions the model may generate JSON inputs for. + type: array + minItems: 1 + maxItems: 128 + items: + $ref: "#/components/schemas/ChatCompletionFunctions" + x-oaiMeta: + name: Training format for chat models using the supervised method + example: > + { + "messages": [ + { "role": "user", "content": "What is the weather in San Francisco?" }, + { + "role": "assistant", + "tool_calls": [ + { + "id": "call_id", + "type": "function", + "function": { + "name": "get_current_weather", + "arguments": "{\"location\": \"San Francisco, USA\", \"format\": \"celsius\"}" + } + } + ] + } + ], + "parallel_tool_calls": false, + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and country, eg. San Francisco, USA" + }, + "format": { "type": "string", "enum": ["celsius", "fahrenheit"] } + }, + "required": ["location", "format"] + } + } + } + ] + } + FineTuneCompletionRequestInput: + type: object + description: The per-line training example of a fine-tuning input file for + completions models properties: - id: + prompt: type: string - description: The object identifier, which can be referenced in the API endpoints. - created_at: - type: integer - description: The Unix timestamp (in seconds) for when the fine-tuning job was - created. - error: - type: object - nullable: true - description: For fine-tuning jobs that have `failed`, this will contain more - information on the cause of the failure. - properties: - code: - type: string - description: A machine-readable error code. - message: - type: string - description: A human-readable error message. - param: - type: string - description: The parameter that was invalid, usually `training_file` or - `validation_file`. This field will be null if the failure was - not parameter-specific. - nullable: true - required: - - code - - message - - param - fine_tuned_model: + description: The input prompt for this training example. + completion: type: string - nullable: true - description: The name of the fine-tuned model that is being created. The value - will be null if the fine-tuning job is still running. - finished_at: - type: integer - nullable: true - description: The Unix timestamp (in seconds) for when the fine-tuning job was - finished. The value will be null if the fine-tuning job is still - running. + description: The desired completion for this training example. + x-oaiMeta: + name: Training format for completions models + example: | + { + "prompt": "What is the answer to 2+2", + "completion": "4" + } + FineTuneDPOMethod: + type: object + description: Configuration for the DPO fine-tuning method. + properties: hyperparameters: type: object - description: The hyperparameters used for the fine-tuning job. See the - [fine-tuning guide](/docs/guides/fine-tuning) for more details. + description: The hyperparameters used for the fine-tuning job. properties: - n_epochs: + beta: + description: > + The beta value for the DPO method. A higher beta value will + increase the weight of the penalty between the policy and + reference model. + oneOf: + - type: string + enum: + - auto + - type: number + minimum: 0 + maximum: 2 + exclusiveMinimum: true + default: auto + batch_size: + description: > + Number of examples in each batch. A larger batch size means that + model parameters are updated less frequently, but with lower + variance. oneOf: - type: string enum: - auto - type: integer minimum: 1 - maximum: 50 + maximum: 256 + default: auto + learning_rate_multiplier: + description: > + Scaling factor for the learning rate. A smaller learning rate + may be useful to avoid overfitting. + oneOf: + - type: string + enum: + - auto + - type: number + minimum: 0 + exclusiveMinimum: true default: auto - description: >- + n_epochs: + description: > The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset. - - "auto" decides the optimal number of epochs based on the size of - the dataset. If setting the number manually, we support any - number between 1 and 50 epochs. - required: - - n_epochs - model: - type: string - description: The base model that is being fine-tuned. - object: + oneOf: + - type: string + enum: + - auto + - type: integer + minimum: 1 + maximum: 50 + default: auto + FineTuneMethod: + type: object + description: The method used for fine-tuning. + properties: + type: type: string - description: The object type, which is always "fine_tuning.job". + description: The type of method. Is either `supervised` or `dpo`. enum: - - fine_tuning.job - organization_id: - type: string - description: The organization that owns the fine-tuning job. - result_files: + - supervised + - dpo + supervised: + type: object + $ref: "#/components/schemas/FineTuneSupervisedMethod" + dpo: + type: object + $ref: "#/components/schemas/FineTuneDPOMethod" + FineTunePreferenceRequestInput: + type: object + description: The per-line training example of a fine-tuning input file for chat + models using the dpo method. + properties: + input: + type: object + properties: + messages: + type: array + minItems: 1 + items: + oneOf: + - $ref: "#/components/schemas/ChatCompletionRequestSystemMessage" + - $ref: "#/components/schemas/ChatCompletionRequestUserMessage" + - $ref: "#/components/schemas/FineTuneChatCompletionRequestAssistantMessage" + - $ref: "#/components/schemas/ChatCompletionRequestToolMessage" + - $ref: "#/components/schemas/ChatCompletionRequestFunctionMessage" + x-oaiExpandable: true + tools: + type: array + description: A list of tools the model may generate JSON inputs for. + items: + $ref: "#/components/schemas/ChatCompletionTool" + parallel_tool_calls: + $ref: "#/components/schemas/ParallelToolCalls" + preferred_completion: type: array - description: The compiled results file ID(s) for the fine-tuning job. You can - retrieve the results with the [Files - API](/docs/api-reference/files/retrieve-contents). + description: The preferred completion message for the output. + maxItems: 1 items: - type: string - example: file-abc123 - status: - type: string - description: The current status of the fine-tuning job, which can be either - `validating_files`, `queued`, `running`, `succeeded`, `failed`, or - `cancelled`. - enum: - - validating_files - - queued - - running - - succeeded - - failed - - cancelled - trained_tokens: - type: integer - nullable: true - description: The total number of billable tokens processed by this fine-tuning - job. The value will be null if the fine-tuning job is still running. - training_file: - type: string - description: The file ID used for training. You can retrieve the training data - with the [Files API](/docs/api-reference/files/retrieve-contents). - validation_file: - type: string - nullable: true - description: The file ID used for validation. You can retrieve the validation - results with the [Files - API](/docs/api-reference/files/retrieve-contents). - integrations: + oneOf: + - $ref: "#/components/schemas/ChatCompletionRequestAssistantMessage" + x-oaiExpandable: true + non_preferred_completion: type: array - nullable: true - description: A list of integrations to enable for this fine-tuning job. - maxItems: 5 + description: The non-preferred completion message for the output. + maxItems: 1 items: oneOf: - - $ref: "#/components/schemas/FineTuningIntegration" + - $ref: "#/components/schemas/ChatCompletionRequestAssistantMessage" x-oaiExpandable: true - seed: - type: integer - description: The seed used for the fine-tuning job. - estimated_finish: - type: integer - nullable: true - description: The Unix timestamp (in seconds) for when the fine-tuning job is - estimated to finish. The value will be null if the fine-tuning job - is not running. - required: - - created_at - - error - - finished_at - - fine_tuned_model - - hyperparameters - - id - - model - - object - - organization_id - - result_files - - status - - trained_tokens - - training_file - - validation_file - - seed x-oaiMeta: - name: The fine-tuning job object - example: | + name: Training format for chat models using the preference method + example: > { - "object": "fine_tuning.job", - "id": "ftjob-abc123", - "model": "davinci-002", - "created_at": 1692661014, - "finished_at": 1692661190, - "fine_tuned_model": "ft:davinci-002:my-org:custom_suffix:7q8mpxmy", - "organization_id": "org-123", - "result_files": [ - "file-abc123" - ], - "status": "succeeded", - "validation_file": null, - "training_file": "file-abc123", - "hyperparameters": { - "n_epochs": 4, - "batch_size": 1, - "learning_rate_multiplier": 1.0 + "input": { + "messages": [ + { "role": "user", "content": "What is the weather in San Francisco?" } + ] }, - "trained_tokens": 5768, - "integrations": [], - "seed": 0, - "estimated_finish": 0 + "preferred_completion": [ + { + "role": "assistant", + "content": "The weather in San Francisco is 70 degrees Fahrenheit." + } + ], + "non_preferred_completion": [ + { + "role": "assistant", + "content": "The weather in San Francisco is 21 degrees Celsius." + } + ] } - FineTuningJobCheckpoint: + FineTuneSupervisedMethod: type: object - title: FineTuningJobCheckpoint + description: Configuration for the supervised fine-tuning method. + properties: + hyperparameters: + type: object + description: The hyperparameters used for the fine-tuning job. + properties: + batch_size: + description: > + Number of examples in each batch. A larger batch size means that + model parameters are updated less frequently, but with lower + variance. + oneOf: + - type: string + enum: + - auto + - type: integer + minimum: 1 + maximum: 256 + default: auto + learning_rate_multiplier: + description: > + Scaling factor for the learning rate. A smaller learning rate + may be useful to avoid overfitting. + oneOf: + - type: string + enum: + - auto + - type: number + minimum: 0 + exclusiveMinimum: true + default: auto + n_epochs: + description: > + The number of epochs to train the model for. An epoch refers to + one full cycle through the training dataset. + oneOf: + - type: string + enum: + - auto + - type: integer + minimum: 1 + maximum: 50 + default: auto + FineTuningIntegration: + type: object + title: Fine-Tuning Job Integration + required: + - type + - wandb + properties: + type: + type: string + description: The type of the integration being enabled for the fine-tuning job + enum: + - wandb + wandb: + type: object + description: > + The settings for your integration with Weights and Biases. This + payload specifies the project that + + metrics will be sent to. Optionally, you can set an explicit display + name for your run, add tags + + to your run, and set a default entity (team, username, etc) to be + associated with your run. + required: + - project + properties: + project: + description: | + The name of the project that the new run will be created under. + type: string + example: my-wandb-project + name: + description: > + A display name to set for the run. If not set, we will use the + Job ID as the name. + nullable: true + type: string + entity: + description: > + The entity to use for the run. This allows you to set the team + or username of the WandB user that you would + + like associated with the run. If not set, the default entity for + the registered WandB API key is used. + nullable: true + type: string + tags: + description: > + A list of tags to be attached to the newly created run. These + tags are passed through directly to WandB. Some + + default tags are generated by OpenAI: "openai/finetune", + "openai/{base-model}", "openai/{ftjob-abcdef}". + type: array + items: + type: string + example: custom-tag + FineTuningJob: + type: object + title: FineTuningJob description: > - The `fine_tuning.job.checkpoint` object represents a model checkpoint - for a fine-tuning job that is ready to use. + The `fine_tuning.job` object represents a fine-tuning job that has been + created through the API. properties: id: type: string - description: The checkpoint identifier, which can be referenced in the API - endpoints. + description: The object identifier, which can be referenced in the API endpoints. created_at: type: integer - description: The Unix timestamp (in seconds) for when the checkpoint was created. - fine_tuned_model_checkpoint: + description: The Unix timestamp (in seconds) for when the fine-tuning job was + created. + error: + type: object + nullable: true + description: For fine-tuning jobs that have `failed`, this will contain more + information on the cause of the failure. + properties: + code: + type: string + description: A machine-readable error code. + message: + type: string + description: A human-readable error message. + param: + type: string + description: The parameter that was invalid, usually `training_file` or + `validation_file`. This field will be null if the failure was + not parameter-specific. + nullable: true + required: + - code + - message + - param + fine_tuned_model: type: string - description: The name of the fine-tuned checkpoint model that is created. - step_number: + nullable: true + description: The name of the fine-tuned model that is being created. The value + will be null if the fine-tuning job is still running. + finished_at: type: integer - description: The step number that the checkpoint was created at. - metrics: + nullable: true + description: The Unix timestamp (in seconds) for when the fine-tuning job was + finished. The value will be null if the fine-tuning job is still + running. + hyperparameters: type: object - description: Metrics at the step number during the fine-tuning job. + description: The hyperparameters used for the fine-tuning job. This value will + only be returned when running `supervised` jobs. properties: - step: - type: number - train_loss: - type: number - train_mean_token_accuracy: - type: number - valid_loss: - type: number - valid_mean_token_accuracy: - type: number - full_valid_loss: - type: number - full_valid_mean_token_accuracy: - type: number - fine_tuning_job_id: + batch_size: + description: > + Number of examples in each batch. A larger batch size means that + model parameters + + are updated less frequently, but with lower variance. + oneOf: + - type: string + enum: + - auto + - type: integer + minimum: 1 + maximum: 256 + default: auto + learning_rate_multiplier: + description: > + Scaling factor for the learning rate. A smaller learning rate + may be useful to avoid + + overfitting. + oneOf: + - type: string + enum: + - auto + - type: number + minimum: 0 + exclusiveMinimum: true + default: auto + n_epochs: + description: > + The number of epochs to train the model for. An epoch refers to + one full cycle + + through the training dataset. + oneOf: + - type: string + enum: + - auto + - type: integer + minimum: 1 + maximum: 50 + default: auto + model: type: string - description: The name of the fine-tuning job that this checkpoint was created - from. + description: The base model that is being fine-tuned. object: type: string - description: The object type, which is always "fine_tuning.job.checkpoint". + description: The object type, which is always "fine_tuning.job". enum: - - fine_tuning.job.checkpoint - required: - - created_at - - fine_tuning_job_id - - fine_tuned_model_checkpoint - - id - - metrics - - object - - step_number - x-oaiMeta: - name: The fine-tuning job checkpoint object - example: > - { - "object": "fine_tuning.job.checkpoint", - "id": "ftckpt_qtZ5Gyk4BLq1SfLFWp3RtO3P", + - fine_tuning.job + organization_id: + type: string + description: The organization that owns the fine-tuning job. + result_files: + type: array + description: The compiled results file ID(s) for the fine-tuning job. You can + retrieve the results with the [Files + API](/docs/api-reference/files/retrieve-contents). + items: + type: string + example: file-abc123 + status: + type: string + description: The current status of the fine-tuning job, which can be either + `validating_files`, `queued`, `running`, `succeeded`, `failed`, or + `cancelled`. + enum: + - validating_files + - queued + - running + - succeeded + - failed + - cancelled + trained_tokens: + type: integer + nullable: true + description: The total number of billable tokens processed by this fine-tuning + job. The value will be null if the fine-tuning job is still running. + training_file: + type: string + description: The file ID used for training. You can retrieve the training data + with the [Files API](/docs/api-reference/files/retrieve-contents). + validation_file: + type: string + nullable: true + description: The file ID used for validation. You can retrieve the validation + results with the [Files + API](/docs/api-reference/files/retrieve-contents). + integrations: + type: array + nullable: true + description: A list of integrations to enable for this fine-tuning job. + maxItems: 5 + items: + oneOf: + - $ref: "#/components/schemas/FineTuningIntegration" + x-oaiExpandable: true + seed: + type: integer + description: The seed used for the fine-tuning job. + estimated_finish: + type: integer + nullable: true + description: The Unix timestamp (in seconds) for when the fine-tuning job is + estimated to finish. The value will be null if the fine-tuning job + is not running. + method: + $ref: "#/components/schemas/FineTuneMethod" + required: + - created_at + - error + - finished_at + - fine_tuned_model + - hyperparameters + - id + - model + - object + - organization_id + - result_files + - status + - trained_tokens + - training_file + - validation_file + - seed + x-oaiMeta: + name: The fine-tuning job object + example: | + { + "object": "fine_tuning.job", + "id": "ftjob-abc123", + "model": "davinci-002", + "created_at": 1692661014, + "finished_at": 1692661190, + "fine_tuned_model": "ft:davinci-002:my-org:custom_suffix:7q8mpxmy", + "organization_id": "org-123", + "result_files": [ + "file-abc123" + ], + "status": "succeeded", + "validation_file": null, + "training_file": "file-abc123", + "hyperparameters": { + "n_epochs": 4, + "batch_size": 1, + "learning_rate_multiplier": 1.0 + }, + "trained_tokens": 5768, + "integrations": [], + "seed": 0, + "estimated_finish": 0, + "method": { + "type": "supervised", + "supervised": { + "hyperparameters": { + "n_epochs": 4, + "batch_size": 1, + "learning_rate_multiplier": 1.0 + } + } + } + } + FineTuningJobCheckpoint: + type: object + title: FineTuningJobCheckpoint + description: > + The `fine_tuning.job.checkpoint` object represents a model checkpoint + for a fine-tuning job that is ready to use. + properties: + id: + type: string + description: The checkpoint identifier, which can be referenced in the API + endpoints. + created_at: + type: integer + description: The Unix timestamp (in seconds) for when the checkpoint was created. + fine_tuned_model_checkpoint: + type: string + description: The name of the fine-tuned checkpoint model that is created. + step_number: + type: integer + description: The step number that the checkpoint was created at. + metrics: + type: object + description: Metrics at the step number during the fine-tuning job. + properties: + step: + type: number + train_loss: + type: number + train_mean_token_accuracy: + type: number + valid_loss: + type: number + valid_mean_token_accuracy: + type: number + full_valid_loss: + type: number + full_valid_mean_token_accuracy: + type: number + fine_tuning_job_id: + type: string + description: The name of the fine-tuning job that this checkpoint was created + from. + object: + type: string + description: The object type, which is always "fine_tuning.job.checkpoint". + enum: + - fine_tuning.job.checkpoint + required: + - created_at + - fine_tuning_job_id + - fine_tuned_model_checkpoint + - id + - metrics + - object + - step_number + x-oaiMeta: + name: The fine-tuning job checkpoint object + example: > + { + "object": "fine_tuning.job.checkpoint", + "id": "ftckpt_qtZ5Gyk4BLq1SfLFWp3RtO3P", "created_at": 1712211699, "fine_tuned_model_checkpoint": "ft:gpt-4o-mini-2024-07-18:my-org:custom_suffix:9ABel2dg:ckpt-step-88", "fine_tuning_job_id": "ftjob-fpbNQ3H1GrMehXRf8cO97xTN", @@ -16902,22 +17541,37 @@ components: type: object description: Fine-tuning job event object properties: + object: + type: string + description: The object type, which is always "fine_tuning.job.event". + enum: + - fine_tuning.job.event id: type: string + description: The object identifier. created_at: type: integer + description: The Unix timestamp (in seconds) for when the fine-tuning job was + created. level: type: string + description: The log level of the event. enum: - info - warn - error message: type: string - object: + description: The message of the event. + type: type: string + description: The type of event. enum: - - fine_tuning.job.event + - message + - metrics + data: + type: object + description: The data associated with the event. required: - id - object @@ -16932,164 +17586,76 @@ components: "id": "ftevent-abc123" "created_at": 1677610602, "level": "info", - "message": "Created fine-tuning job" + "message": "Created fine-tuning job", + "data": {}, + "type": "message" } - FinetuneChatRequestInput: + FunctionObject: type: object - description: The per-line training example of a fine-tuning input file for chat models properties: - messages: - type: array - minItems: 1 - items: - oneOf: - - $ref: "#/components/schemas/ChatCompletionRequestSystemMessage" - - $ref: "#/components/schemas/ChatCompletionRequestUserMessage" - - $ref: "#/components/schemas/FineTuneChatCompletionRequestAssistantMessage" - - $ref: "#/components/schemas/ChatCompletionRequestToolMessage" - - $ref: "#/components/schemas/ChatCompletionRequestFunctionMessage" - x-oaiExpandable: true - tools: - type: array - description: A list of tools the model may generate JSON inputs for. - items: - $ref: "#/components/schemas/ChatCompletionTool" - parallel_tool_calls: - $ref: "#/components/schemas/ParallelToolCalls" - functions: - deprecated: true - description: A list of functions the model may generate JSON inputs for. - type: array - minItems: 1 - maxItems: 128 - items: - $ref: "#/components/schemas/ChatCompletionFunctions" - x-oaiMeta: - name: Training format for chat models - example: > - { - "messages": [ - { "role": "user", "content": "What is the weather in San Francisco?" }, - { - "role": "assistant", - "tool_calls": [ - { - "id": "call_id", - "type": "function", - "function": { - "name": "get_current_weather", - "arguments": "{\"location\": \"San Francisco, USA\", \"format\": \"celsius\"}" - } - } - ] - } - ], - "parallel_tool_calls": false, - "tools": [ - { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and country, eg. San Francisco, USA" - }, - "format": { "type": "string", "enum": ["celsius", "fahrenheit"] } - }, - "required": ["location", "format"] - } - } - } - ] - } - FinetuneCompletionRequestInput: - type: object - description: The per-line training example of a fine-tuning input file for - completions models - properties: - prompt: - type: string - description: The input prompt for this training example. - completion: - type: string - description: The desired completion for this training example. - x-oaiMeta: - name: Training format for completions models - example: | - { - "prompt": "What is the answer to 2+2", - "completion": "4" - } - FunctionObject: - type: object - properties: - description: - type: string - description: A description of what the function does, used by the model to - choose when and how to call the function. - name: - type: string - description: The name of the function to be called. Must be a-z, A-Z, 0-9, or - contain underscores and dashes, with a maximum length of 64. - parameters: - $ref: "#/components/schemas/FunctionParameters" - strict: - type: boolean - nullable: true - default: false - description: Whether to enable strict schema adherence when generating the - function call. If set to true, the model will follow the exact - schema defined in the `parameters` field. Only a subset of JSON - Schema is supported when `strict` is `true`. Learn more about - Structured Outputs in the [function calling - guide](docs/guides/function-calling). - required: - - name - FunctionParameters: - type: object - description: >- - The parameters the functions accepts, described as a JSON Schema object. - See the [guide](/docs/guides/function-calling) for examples, and the - [JSON Schema - reference](https://json-schema.org/understanding-json-schema/) for - documentation about the format. - - - Omitting `parameters` defines a function with an empty parameter list. - additionalProperties: true - Image: - type: object - description: Represents the url or the content of an image generated by the - OpenAI API. - properties: - b64_json: - type: string - description: The base64-encoded JSON of the generated image, if - `response_format` is `b64_json`. - url: - type: string - description: The URL of the generated image, if `response_format` is `url` - (default). - revised_prompt: - type: string - description: The prompt that was used to generate the image, if there was any - revision to the prompt. - x-oaiMeta: - name: The image object - example: | - { - "url": "...", - "revised_prompt": "..." - } - ImagesResponse: - properties: - created: - type: integer - data: + description: + type: string + description: A description of what the function does, used by the model to + choose when and how to call the function. + name: + type: string + description: The name of the function to be called. Must be a-z, A-Z, 0-9, or + contain underscores and dashes, with a maximum length of 64. + parameters: + $ref: "#/components/schemas/FunctionParameters" + strict: + type: boolean + nullable: true + default: false + description: Whether to enable strict schema adherence when generating the + function call. If set to true, the model will follow the exact + schema defined in the `parameters` field. Only a subset of JSON + Schema is supported when `strict` is `true`. Learn more about + Structured Outputs in the [function calling + guide](docs/guides/function-calling). + required: + - name + FunctionParameters: + type: object + description: >- + The parameters the functions accepts, described as a JSON Schema object. + See the [guide](/docs/guides/function-calling) for examples, and the + [JSON Schema + reference](https://json-schema.org/understanding-json-schema/) for + documentation about the format. + + + Omitting `parameters` defines a function with an empty parameter list. + additionalProperties: true + Image: + type: object + description: Represents the url or the content of an image generated by the + OpenAI API. + properties: + b64_json: + type: string + description: The base64-encoded JSON of the generated image, if + `response_format` is `b64_json`. + url: + type: string + description: The URL of the generated image, if `response_format` is `url` + (default). + revised_prompt: + type: string + description: The prompt that was used to generate the image, if there was any + revision to the prompt. + x-oaiMeta: + name: The image object + example: | + { + "url": "...", + "revised_prompt": "..." + } + ImagesResponse: + properties: + created: + type: integer + data: type: array items: $ref: "#/components/schemas/Image" @@ -19446,6 +20012,11 @@ components: enum: - response.cancel description: The event type, must be `response.cancel`. + response_id: + type: string + description: | + A specific response ID to cancel - if not provided, will cancel an + in-progress response in the default conversation. required: - type x-oaiMeta: @@ -19501,10 +20072,9 @@ components: - response.create description: The event type, must be `response.create`. response: - $ref: "#/components/schemas/RealtimeSession" + $ref: "#/components/schemas/RealtimeResponseCreateParams" required: - type - - response x-oaiMeta: name: response.create group: realtime @@ -19533,8 +20103,8 @@ components: } ], "tool_choice": "auto", - "temperature": 0.7, - "max_output_tokens": 150 + "temperature": 0.8, + "max_output_tokens": 1024 } } RealtimeClientEventSessionUpdate: @@ -19566,7 +20136,7 @@ components: - session.update description: The event type, must be `session.update`. session: - $ref: "#/components/schemas/RealtimeSession" + $ref: "#/components/schemas/RealtimeSessionCreateRequest" required: - type - session @@ -19590,7 +20160,8 @@ components: "type": "server_vad", "threshold": 0.5, "prefix_padding_ms": 300, - "silence_duration_ms": 500 + "silence_duration_ms": 500, + "create_response": true }, "tools": [ { @@ -19659,7 +20230,7 @@ components: enum: - user - assistant - - systems + - system description: > The role of the message sender (`user`, `assistant`, `system`), only @@ -19686,13 +20257,26 @@ components: enum: - input_audio - input_text + - item_reference - text - description: The content type (`input_text`, `input_audio`, `text`). + description: > + The content type (`input_text`, `input_audio`, + `item_reference`, `text`). text: type: string description: > The text content, used for `input_text` and `text` content types. + id: + type: string + description: > + ID of a previous conversation item to reference (for + `item_reference` + + content types in `response.create` events). These can + reference both + + client and server created items. audio: type: string description: > @@ -19766,7 +20350,10 @@ components: The type of error that caused the response to fail, corresponding - with the `status` field (`cancelled`, `incomplete`, `failed`). + with the `status` field (`completed`, `cancelled`, + `incomplete`, + + `failed`). reason: type: string enum: @@ -19805,6 +20392,13 @@ components: description: The list of output items generated by the response. items: $ref: "#/components/schemas/RealtimeConversationItem" + metadata: + description: > + Developer-provided string key-value pairs associated with this + response. + type: object + x-oaiTypeLabel: map + nullable: true usage: type: object description: > @@ -19863,69 +20457,245 @@ components: audio_tokens: type: integer description: The number of audio tokens used in the Response. - RealtimeServerEventConversationCreated: + RealtimeResponseCreateParams: type: object - description: > - Returned when a conversation is created. Emitted right after session - creation. + description: Create a new Realtime response with these parameters properties: - event_id: - type: string - description: The unique ID of the server event. - type: + modalities: + type: array + description: | + The set of modalities the model can respond with. To disable audio, + set this to ["text"]. + items: + type: string + enum: + - text + - audio + instructions: type: string - enum: - - conversation.created - description: The event type, must be `conversation.created`. - conversation: - type: object - description: The conversation resource. - properties: - id: - type: string - description: The unique ID of the conversation. - object: - type: string - description: The object type, must be `realtime.conversation`. - required: - - event_id - - type - - conversation - x-oaiMeta: - name: conversation.created - group: realtime - example: | - { - "event_id": "event_9101", - "type": "conversation.created", - "conversation": { - "id": "conv_001", - "object": "realtime.conversation" - } - } - RealtimeServerEventConversationItemCreated: - type: object - description: > - Returned when a conversation item is created. There are several - scenarios that + description: > + The default system instructions (i.e. system message) prepended to + model - produce this event: - - The server is generating a Response, which if successful will produce - either one or two Items, which will be of type `message` - (role `assistant`) or type `function_call`. - - The input audio buffer has been committed, either by the client or the - server (in `server_vad` mode). The server will take the content of the - input audio buffer and add it to a new user message Item. - - The client has sent a `conversation.item.create` event to add a new Item - to the Conversation. - properties: - event_id: - type: string - description: The unique ID of the server event. - type: + calls. This field allows the client to guide the model on desired + + responses. The model can be instructed on response content and + format, + + (e.g. "be extremely succinct", "act friendly", "here are examples of + good + + responses") and on audio behavior (e.g. "talk quickly", "inject + emotion + + into your voice", "laugh frequently"). The instructions are not + guaranteed + + to be followed by the model, but they provide guidance to the model + on the + + desired behavior. + + + Note that the server sets default instructions which will be used if + this + + field is not set and are visible in the `session.created` event at + the + + start of the session. + voice: type: string enum: - - conversation.item.created + - alloy + - ash + - ballad + - coral + - echo + - sage + - shimmer + - verse + description: > + The voice the model uses to respond. Voice cannot be changed during + the + + session once the model has responded with audio at least once. + Current + + voice options are `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, + + `shimmer` and `verse`. + output_audio_format: + type: string + enum: + - pcm16 + - g711_ulaw + - g711_alaw + description: > + The format of output audio. Options are `pcm16`, `g711_ulaw`, or + `g711_alaw`. + tools: + type: array + description: Tools (functions) available to the model. + items: + type: object + properties: + type: + type: string + enum: + - function + description: The type of the tool, i.e. `function`. + name: + type: string + description: The name of the function. + description: + type: string + description: > + The description of the function, including guidance on when + and how + + to call it, and guidance about what to tell the user when + calling + + (if anything). + parameters: + type: object + description: Parameters of the function in JSON Schema. + tool_choice: + type: string + description: > + How the model chooses tools. Options are `auto`, `none`, `required`, + or + + specify a function, like `{"type": "function", "function": {"name": + "my_function"}}`. + temperature: + type: number + description: > + Sampling temperature for the model, limited to [0.6, 1.2]. Defaults + to 0.8. + max_response_output_tokens: + oneOf: + - type: integer + - type: string + enum: + - inf + description: | + Maximum number of output tokens for a single assistant response, + inclusive of tool calls. Provide an integer between 1 and 4096 to + limit output tokens, or `inf` for the maximum available tokens for a + given model. Defaults to `inf`. + conversation: + description: > + Controls which conversation the response is added to. Currently + supports + + `auto` and `none`, with `auto` as the default value. The `auto` + value + + means that the contents of the response will be added to the default + + conversation. Set this to `none` to create an out-of-band response + which + + will not add items to default conversation. + oneOf: + - type: string + - type: string + default: auto + enum: + - auto + - none + metadata: + description: > + Set of 16 key-value pairs that can be attached to an object. This + can be + + useful for storing additional information about the object in a + structured + + format. Keys can be a maximum of 64 characters long and values can + be a + + maximum of 512 characters long. + type: object + x-oaiTypeLabel: map + nullable: true + input: + type: array + description: > + Input items to include in the prompt for the model. Creates a new + context + + for this response, without including the default conversation. Can + include + + references to items from the default conversation. + items: + $ref: "#/components/schemas/RealtimeConversationItem" + RealtimeServerEventConversationCreated: + type: object + description: > + Returned when a conversation is created. Emitted right after session + creation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + type: string + enum: + - conversation.created + description: The event type, must be `conversation.created`. + conversation: + type: object + description: The conversation resource. + properties: + id: + type: string + description: The unique ID of the conversation. + object: + type: string + description: The object type, must be `realtime.conversation`. + required: + - event_id + - type + - conversation + x-oaiMeta: + name: conversation.created + group: realtime + example: | + { + "event_id": "event_9101", + "type": "conversation.created", + "conversation": { + "id": "conv_001", + "object": "realtime.conversation" + } + } + RealtimeServerEventConversationItemCreated: + type: object + description: > + Returned when a conversation item is created. There are several + scenarios that + + produce this event: + - The server is generating a Response, which if successful will produce + either one or two Items, which will be of type `message` + (role `assistant`) or type `function_call`. + - The input audio buffer has been committed, either by the client or the + server (in `server_vad` mode). The server will take the content of the + input audio buffer and add it to a new user message Item. + - The client has sent a `conversation.item.create` event to add a new Item + to the Conversation. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + type: string + enum: + - conversation.item.created description: The event type, must be `conversation.item.created`. previous_item_id: type: string @@ -20201,6 +20971,9 @@ components: error: type: object description: Details of the error. + required: + - type + - message properties: type: type: string @@ -20210,17 +20983,20 @@ components: code: type: string description: Error code, if any. + nullable: true message: type: string description: A human-readable error message. param: type: string description: Parameter related to the error, if any. + nullable: true event_id: type: string description: > The event_id of the client event that caused the error, if applicable. + nullable: true required: - event_id - type @@ -20455,7 +21231,11 @@ components: properties: name: type: string - description: The name of the rate limit (`requests`, `tokens`). + enum: + - requests + - tokens + description: | + The name of the rate limit (`requests`, `tokens`). limit: type: integer description: The maximum allowed value for the rate limit. @@ -20792,6 +21572,9 @@ components: properties: type: type: string + enum: + - audio + - text description: The content type ("text", "audio"). text: type: string @@ -21242,106 +22025,566 @@ components: description: > Returned when a Session is created. Emitted automatically when a new - connection is established as the first server event. This event will - contain + connection is established as the first server event. This event will + contain + + the default Session configuration. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + type: string + enum: + - session.created + description: The event type, must be `session.created`. + session: + $ref: "#/components/schemas/RealtimeSession" + required: + - event_id + - type + - session + x-oaiMeta: + name: session.created + group: realtime + example: | + { + "event_id": "event_1234", + "type": "session.created", + "session": { + "id": "sess_001", + "object": "realtime.session", + "model": "gpt-4o-realtime-preview-2024-12-17", + "modalities": ["text", "audio"], + "instructions": "...model instructions here...", + "voice": "sage", + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": null, + "turn_detection": { + "type": "server_vad", + "threshold": 0.5, + "prefix_padding_ms": 300, + "silence_duration_ms": 200 + }, + "tools": [], + "tool_choice": "auto", + "temperature": 0.8, + "max_response_output_tokens": "inf" + } + } + RealtimeServerEventSessionUpdated: + type: object + description: > + Returned when a session is updated with a `session.update` event, + unless + + there is an error. + properties: + event_id: + type: string + description: The unique ID of the server event. + type: + type: string + enum: + - session.updated + description: The event type, must be `session.updated`. + session: + $ref: "#/components/schemas/RealtimeSession" + required: + - event_id + - type + - session + x-oaiMeta: + name: session.updated + group: realtime + example: | + { + "event_id": "event_5678", + "type": "session.updated", + "session": { + "id": "sess_001", + "object": "realtime.session", + "model": "gpt-4o-realtime-preview-2024-12-17", + "modalities": ["text"], + "instructions": "New instructions", + "voice": "sage", + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": { + "model": "whisper-1" + }, + "turn_detection": null, + "tools": [], + "tool_choice": "none", + "temperature": 0.7, + "max_response_output_tokens": 200 + } + } + RealtimeSession: + type: object + description: Realtime session object configuration. + properties: + id: + type: string + description: | + Unique identifier for the session object. + modalities: + description: | + The set of modalities the model can respond with. To disable audio, + set this to ["text"]. + items: + type: string + enum: + - text + - audio + model: + description: | + The Realtime model used for this session. + anyOf: + - type: string + - type: string + enum: + - gpt-4o-realtime-preview + - gpt-4o-realtime-preview-2024-10-01 + - gpt-4o-realtime-preview-2024-12-17 + - gpt-4o-mini-realtime-preview + - gpt-4o-mini-realtime-preview-2024-12-17 + instructions: + type: string + description: > + The default system instructions (i.e. system message) prepended to + model + + calls. This field allows the client to guide the model on desired + + responses. The model can be instructed on response content and + format, + + (e.g. "be extremely succinct", "act friendly", "here are examples of + good + + responses") and on audio behavior (e.g. "talk quickly", "inject + emotion + + into your voice", "laugh frequently"). The instructions are not + guaranteed + + to be followed by the model, but they provide guidance to the model + on the + + desired behavior. + + + Note that the server sets default instructions which will be used if + this + + field is not set and are visible in the `session.created` event at + the + + start of the session. + voice: + type: string + enum: + - alloy + - ash + - ballad + - coral + - echo + - sage + - shimmer + - verse + description: > + The voice the model uses to respond. Voice cannot be changed during + the + + session once the model has responded with audio at least once. + Current + + voice options are `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, + + `shimmer` and `verse`. + input_audio_format: + type: string + enum: + - pcm16 + - g711_ulaw + - g711_alaw + description: > + The format of input audio. Options are `pcm16`, `g711_ulaw`, or + `g711_alaw`. + output_audio_format: + type: string + enum: + - pcm16 + - g711_ulaw + - g711_alaw + description: > + The format of output audio. Options are `pcm16`, `g711_ulaw`, or + `g711_alaw`. + input_audio_transcription: + type: object + description: > + Configuration for input audio transcription, defaults to off and can + be + + set to `null` to turn off once on. Input audio transcription is not + native + + to the model, since the model consumes audio directly. Transcription + runs + + asynchronously through Whisper and should be treated as rough + guidance + + rather than the representation understood by the model. + properties: + model: + type: string + description: > + The model to use for transcription, `whisper-1` is the only + currently + + supported model. + turn_detection: + type: object + nullable: true + description: > + Configuration for turn detection. Can be set to `null` to turn off. + Server + + VAD means that the model will detect the start and end of speech + based on + + audio volume and respond at the end of user speech. + properties: + type: + type: string + enum: + - server_vad + description: > + Type of turn detection, only `server_vad` is currently supported. + threshold: + type: number + description: > + Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. + A + + higher threshold will require louder audio to activate the + model, and + + thus might perform better in noisy environments. + prefix_padding_ms: + type: integer + description: | + Amount of audio to include before the VAD detected speech (in + milliseconds). Defaults to 300ms. + silence_duration_ms: + type: integer + description: > + Duration of silence to detect speech stop (in milliseconds). + Defaults + + to 500ms. With shorter values the model will respond more + quickly, + + but may jump in on short pauses from the user. + tools: + type: array + description: Tools (functions) available to the model. + items: + type: object + properties: + type: + type: string + enum: + - function + description: The type of the tool, i.e. `function`. + name: + type: string + description: The name of the function. + description: + type: string + description: > + The description of the function, including guidance on when + and how + + to call it, and guidance about what to tell the user when + calling + + (if anything). + parameters: + type: object + description: Parameters of the function in JSON Schema. + tool_choice: + type: string + description: > + How the model chooses tools. Options are `auto`, `none`, `required`, + or + + specify a function. + temperature: + type: number + description: > + Sampling temperature for the model, limited to [0.6, 1.2]. Defaults + to 0.8. + max_response_output_tokens: + oneOf: + - type: integer + - type: string + enum: + - inf + description: | + Maximum number of output tokens for a single assistant response, + inclusive of tool calls. Provide an integer between 1 and 4096 to + limit output tokens, or `inf` for the maximum available tokens for a + given model. Defaults to `inf`. + RealtimeSessionCreateRequest: + type: object + description: Realtime session object configuration. + required: + - model + properties: + modalities: + description: | + The set of modalities the model can respond with. To disable audio, + set this to ["text"]. + items: + type: string + enum: + - text + - audio + model: + type: string + description: | + The Realtime model used for this session. + enum: + - gpt-4o-realtime-preview + - gpt-4o-realtime-preview-2024-10-01 + - gpt-4o-realtime-preview-2024-12-17 + - gpt-4o-mini-realtime-preview + - gpt-4o-mini-realtime-preview-2024-12-17 + instructions: + type: string + description: > + The default system instructions (i.e. system message) prepended to + model + + calls. This field allows the client to guide the model on desired + + responses. The model can be instructed on response content and + format, + + (e.g. "be extremely succinct", "act friendly", "here are examples of + good + + responses") and on audio behavior (e.g. "talk quickly", "inject + emotion + + into your voice", "laugh frequently"). The instructions are not + guaranteed + + to be followed by the model, but they provide guidance to the model + on the + + desired behavior. + + + Note that the server sets default instructions which will be used if + this + + field is not set and are visible in the `session.created` event at + the + + start of the session. + voice: + type: string + enum: + - alloy + - ash + - ballad + - coral + - echo + - sage + - shimmer + - verse + description: > + The voice the model uses to respond. Voice cannot be changed during + the + + session once the model has responded with audio at least once. + Current - the default Session configuration. - properties: - event_id: + voice options are `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, + + `shimmer` and `verse`. + input_audio_format: type: string - description: The unique ID of the server event. - type: + enum: + - pcm16 + - g711_ulaw + - g711_alaw + description: > + The format of input audio. Options are `pcm16`, `g711_ulaw`, or + `g711_alaw`. + output_audio_format: type: string enum: - - session.created - description: The event type, must be `session.created`. - session: - $ref: "#/components/schemas/RealtimeSession" - required: - - event_id - - type - - session - x-oaiMeta: - name: session.created - group: realtime - example: | - { - "event_id": "event_1234", - "type": "session.created", - "session": { - "id": "sess_001", - "object": "realtime.session", - "model": "gpt-4o-realtime-preview-2024-10-01", - "modalities": ["text", "audio"], - "instructions": "...model instructions here...", - "voice": "sage", - "input_audio_format": "pcm16", - "output_audio_format": "pcm16", - "input_audio_transcription": null, - "turn_detection": { - "type": "server_vad", - "threshold": 0.5, - "prefix_padding_ms": 300, - "silence_duration_ms": 200 - }, - "tools": [], - "tool_choice": "auto", - "temperature": 0.8, - "max_response_output_tokens": "inf" - } - } - RealtimeServerEventSessionUpdated: - type: object - description: > - Returned when a session is updated with a `session.update` event, - unless + - pcm16 + - g711_ulaw + - g711_alaw + description: > + The format of output audio. Options are `pcm16`, `g711_ulaw`, or + `g711_alaw`. + input_audio_transcription: + type: object + description: > + Configuration for input audio transcription, defaults to off and can + be - there is an error. - properties: - event_id: - type: string - description: The unique ID of the server event. - type: + set to `null` to turn off once on. Input audio transcription is not + native + + to the model, since the model consumes audio directly. Transcription + runs + + asynchronously through Whisper and should be treated as rough + guidance + + rather than the representation understood by the model. + properties: + model: + type: string + description: > + The model to use for transcription, `whisper-1` is the only + currently + + supported model. + turn_detection: + type: object + description: > + Configuration for turn detection. Can be set to `null` to turn off. + Server + + VAD means that the model will detect the start and end of speech + based on + + audio volume and respond at the end of user speech. + properties: + type: + type: string + description: > + Type of turn detection, only `server_vad` is currently supported. + threshold: + type: number + description: > + Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5. + A + + higher threshold will require louder audio to activate the + model, and + + thus might perform better in noisy environments. + prefix_padding_ms: + type: integer + description: | + Amount of audio to include before the VAD detected speech (in + milliseconds). Defaults to 300ms. + silence_duration_ms: + type: integer + description: > + Duration of silence to detect speech stop (in milliseconds). + Defaults + + to 500ms. With shorter values the model will respond more + quickly, + + but may jump in on short pauses from the user. + create_response: + type: boolean + default: true + description: | + Whether or not to automatically generate a response when VAD is + enabled. `true` by default. + tools: + type: array + description: Tools (functions) available to the model. + items: + type: object + properties: + type: + type: string + enum: + - function + description: The type of the tool, i.e. `function`. + name: + type: string + description: The name of the function. + description: + type: string + description: > + The description of the function, including guidance on when + and how + + to call it, and guidance about what to tell the user when + calling + + (if anything). + parameters: + type: object + description: Parameters of the function in JSON Schema. + tool_choice: type: string - enum: - - session.updated - description: The event type, must be `session.updated`. - session: - $ref: "#/components/schemas/RealtimeSession" - required: - - event_id - - type - - session - x-oaiMeta: - name: session.updated - group: realtime - example: | - { - "event_id": "event_5678", - "type": "session.updated", - "session": { - "id": "sess_001", - "object": "realtime.session", - "model": "gpt-4o-realtime-preview-2024-10-01", - "modalities": ["text"], - "instructions": "New instructions", - "voice": "sage", - "input_audio_format": "pcm16", - "output_audio_format": "pcm16", - "input_audio_transcription": { - "model": "whisper-1" - }, - "turn_detection": null, - "tools": [], - "tool_choice": "none", - "temperature": 0.7, - "max_response_output_tokens": 200 - } - } - RealtimeSession: + description: > + How the model chooses tools. Options are `auto`, `none`, `required`, + or + + specify a function. + temperature: + type: number + description: > + Sampling temperature for the model, limited to [0.6, 1.2]. Defaults + to 0.8. + max_response_output_tokens: + oneOf: + - type: integer + - type: string + enum: + - inf + description: | + Maximum number of output tokens for a single assistant response, + inclusive of tool calls. Provide an integer between 1 and 4096 to + limit output tokens, or `inf` for the maximum available tokens for a + given model. Defaults to `inf`. + RealtimeSessionCreateResponse: type: object - description: Realtime session object configuration. + description: > + A new Realtime session configuration, with an ephermeral key. Default + TTL + + for keys is one minute. properties: + client_secret: + type: object + description: Ephemeral key returned by the API. + properties: + value: + type: string + description: > + Ephemeral key usable in client environments to authenticate + connections + + to the Realtime API. Use this in client-side environments rather + than + + a standard API token, which should only be used server-side. + expires_at: + type: integer + description: > + Timestamp for when the token expires. Currently, all tokens + expire + + after one minute. modalities: description: | The set of modalities the model can respond with. To disable audio, @@ -21396,21 +22639,15 @@ components: - shimmer - verse description: > - The voice the model uses to respond. Current voice options are - `ash`, - - `ballad`, `coral`, `sage`, and `verse`. - - - Also supported but not recommended are `alloy`, `echo`, and - `shimmer`. - - These older voices are less expressive. + The voice the model uses to respond. Voice cannot be changed during + the + session once the model has responded with audio at least once. + Current - Voice cannot be changed during the session once the model has + voice options are `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, - responded with audio at least once. + `shimmer` and `verse`. input_audio_format: type: string description: > @@ -21535,6 +22772,32 @@ components: inclusive of tool calls. Provide an integer between 1 and 4096 to limit output tokens, or `inf` for the maximum available tokens for a given model. Defaults to `inf`. + x-oaiMeta: + name: The session object + group: realtime + example: | + { + "id": "sess_001", + "object": "realtime.session", + "model": "gpt-4o-realtime-preview-2024-12-17", + "modalities": ["audio", "text"], + "instructions": "You are a friendly assistant.", + "voice": "alloy", + "input_audio_format": "pcm16", + "output_audio_format": "pcm16", + "input_audio_transcription": { + "model": "whisper-1" + }, + "turn_detection": null, + "tools": [], + "tool_choice": "none", + "temperature": 0.7, + "max_response_output_tokens": 200, + "client_secret": { + "value": "ek_abc123", + "expires_at": 1234567890 + } + } ResponseFormatJsonObject: type: object properties: @@ -24140,10 +25403,13 @@ x-oaiMeta: key: cancelFineTuningJob path: cancel - type: object - key: FinetuneChatRequestInput + key: FineTuneChatRequestInput path: chat-input - type: object - key: FinetuneCompletionRequestInput + key: FineTunePreferenceRequestInput + path: preference-input + - type: object + key: FineTuneCompletionRequestInput path: completions-input - type: object key: FineTuningJob @@ -24801,14 +26067,27 @@ x-oaiMeta: - id: realtime title: Realtime beta: true + description: | + Communicate with a GPT-4o class model in real time using WebRTC or + WebSockets. Supports text and audio inputs and ouputs, along with audio + transcriptions. + [Learn more about the Realtime API](/docs/guides/realtime). + navigationGroup: realtime + - id: realtime-sessions + title: Session tokens description: > - Communicate with a GPT-4o class model live, in real time, over - WebSocket. + REST API endpoint to generate ephemeral session tokens for use in + client-side - Produces both audio and text transcriptions. - - [Learn more about the Realtime API](/docs/guides/realtime). + applications. navigationGroup: realtime + sections: + - type: endpoint + key: create-realtime-session + path: create + - type: object + key: RealtimeSessionCreateResponse + path: session_object - id: realtime-client-events title: Client events description: >