diff --git a/openapi.yaml b/openapi.yaml index 97e273d0..a087ad9b 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -4268,6 +4268,7 @@ paths: "last_error": null, "model": "gpt-4-turbo", "instructions": null, + "incomplete_details": null, "tools": [ { "type": "code_interpreter" @@ -4283,7 +4284,15 @@ paths: "completion_tokens": 456, "total_tokens": 579 }, - "temperature": 1 + "temperature": 1, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto" }, { "id": "run_abc456", @@ -4300,6 +4309,7 @@ paths: "last_error": null, "model": "gpt-4-turbo", "instructions": null, + "incomplete_details": null, "tools": [ { "type": "code_interpreter" @@ -4315,7 +4325,15 @@ paths: "completion_tokens": 456, "total_tokens": 579 }, - "temperature": 1 + "temperature": 1, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto" } ], "first_id": "run_abc123", @@ -4404,6 +4422,7 @@ paths: "last_error": null, "model": "gpt-4-turbo", "instructions": null, + "incomplete_details": null, "tools": [ { "type": "code_interpreter" @@ -4415,7 +4434,15 @@ paths: ], "metadata": {}, "usage": null, - "temperature": 1 + "temperature": 1, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto" } - title: Streaming request: @@ -4736,6 +4763,7 @@ paths: "last_error": null, "model": "gpt-4-turbo", "instructions": null, + "incomplete_details": null, "tools": [ { "type": "code_interpreter" @@ -4751,7 +4779,15 @@ paths: "completion_tokens": 456, "total_tokens": 579 }, - "temperature": 1 + "temperature": 1, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto" } post: operationId: modifyRun @@ -4848,6 +4884,7 @@ paths: "last_error": null, "model": "gpt-4-turbo", "instructions": null, + "incomplete_details": null, "tools": [ { "type": "code_interpreter" @@ -4865,7 +4902,15 @@ paths: "completion_tokens": 456, "total_tokens": 579 }, - "temperature": 1 + "temperature": 1, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto" } /threads/{thread_id}/runs/{run_id}/submit_tool_outputs: @@ -4977,6 +5022,7 @@ paths: "last_error": null, "model": "gpt-4-turbo", "instructions": null, + "incomplete_details": null, "tools": [ { "type": "function", @@ -5003,7 +5049,15 @@ paths: "file_ids": [], "metadata": {}, "usage": null, - "temperature": 1 + "temperature": 1, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto" } - title: Streaming @@ -8779,6 +8833,84 @@ components: - type - function + TruncationObject: + type: object + title: Thread Truncation Controls + properties: + type: + type: string + description: The truncation strategy to use for the thread. The default is `auto`. If set to `last_messages`, the thread will be truncated to the n most recent messages in the thread. When set to `auto`, messages in the middle of the thread will be dropped to fit the context length of the model, `max_prompt_tokens`. + enum: ["auto", "last_messages"] + last_messages: + type: integer + description: The number of most recent messages from the thread when constructing the context for the run. + minimum: 1 + nullable: true + required: + - strategy + + AssistantsApiToolChoiceOption: + description: | + Controls which (if any) tool is called by the model. + `none` means the model will not call any tools and instead generates a message. + `auto` is the default value and means the model can pick between generating a message or calling a tool. + Specifying a particular tool like `{"type": "TOOL_TYPE"}` or `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool. + + oneOf: + - type: string + description: > + `none` means the model will not call a function and instead generates a message. + `auto` means the model can pick between generating a message or calling a function. + enum: [none, auto] + - $ref: "#/components/schemas/AssistantsApiNamedToolChoice" + x-oaiExpandable: true + + AssistantsApiNamedToolChoice: + type: object + description: Specifies a tool the model should use. Use to force the model to call a specific tool. + properties: + type: + type: string + enum: ["function", "code_interpreter", "retrieval"] + description: The type of the tool. If type is `function`, the function name must be set + function: + type: object + properties: + name: + type: string + description: The name of the function to call. + required: + - name + required: + - type + + AssistantsApiResponseFormatOption: + description: | + Specifies the format that the model must output. Compatible with [GPT-4 Turbo](/docs/models/gpt-4-and-gpt-4-turbo) and all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`. + + Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the message the model generates is valid JSON. + + **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length. + oneOf: + - type: string + description: > + `auto` is the default value + enum: [none, auto] + - $ref: "#/components/schemas/AssistantsApiResponseFormat" + x-oaiExpandable: true + + AssistantsApiResponseFormat: + type: object + description: | + An object describing the expected output of the model. If `json_object` only `function` type `tools` are allowed to be passed to the Run. If `text` the model can return text or any value needed. + properties: + type: + type: string + enum: ["text", "json_object"] + example: "json_object" + default: "text" + description: Must be one of `text` or `json_object`. + RunObject: type: object title: A run on a thread @@ -8872,6 +9004,15 @@ components: description: The Unix timestamp (in seconds) for when the run was completed. type: integer nullable: true + incomplete_details: + description: Details on why the run is incomplete. Will be `null` if the run is not incomplete. + type: object + nullable: true + properties: + reason: + description: The reason why the run is incomplete. This will point to which specific token limit was reached over the course of the run. + type: string + enum: ["max_completion_tokens", "max_prompt_tokens"] model: description: The model that the [assistant](/docs/api-reference/assistants) used for this run. type: string @@ -8906,6 +9047,27 @@ components: description: The sampling temperature used for this run. If not set, defaults to 1. type: number nullable: true + max_prompt_tokens: + type: integer + nullable: true + description: | + The maximum number of prompt tokens specified to have been used over the course of the run. + minimum: 256 + max_completion_tokens: + type: integer + nullable: true + description: | + The maximum number of completion tokens specified to have been used over the course of the run. + minimum: 256 + truncation_strategy: + $ref: "#/components/schemas/TruncationObject" + nullable: true + tool_choice: + $ref: "#/components/schemas/AssistantsApiToolChoiceOption" + nullable: true + response_format: + $ref: "#/components/schemas/AssistantsApiResponseFormatOption" + nullable: true required: - id - object @@ -8926,6 +9088,12 @@ components: - file_ids - metadata - usage + - incomplete_details + - max_prompt_tokens + - max_completion_tokens + - truncation_strategy + - tool_choice + - response_format x-oaiMeta: name: The run object beta: true @@ -8948,12 +9116,21 @@ components: "tools": [{"type": "retrieval"}, {"type": "code_interpreter"}], "file_ids": [], "metadata": {}, + "incomplete_details": null, "usage": { "prompt_tokens": 123, "completion_tokens": 456, "total_tokens": 579 }, - "temperature": 1 + "temperature": 1, + "max_prompt_tokens": 1000, + "max_completion_tokens": 1000, + "truncation_strategy": { + "type": "auto", + "last_messages": null + }, + "response_format": "auto", + "tool_choice": "auto" } CreateRunRequest: type: object @@ -9035,6 +9212,27 @@ components: nullable: true description: | If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message. + max_prompt_tokens: + type: integer + nullable: true + description: | + The maximum number of prompt tokens that may be used over the course of the run. The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run. If the run exceeds the number of prompt tokens specified, the run will end with status `complete`. See `incomplete_details` for more info. + minimum: 256 + max_completion_tokens: + type: integer + nullable: true + description: | + The maximum number of completion tokens that may be used over the course of the run. The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run. If the run exceeds the number of completion tokens specified, the run will end with status `complete`. See `incomplete_details` for more info. + minimum: 256 + truncation_strategy: + $ref: "#/components/schemas/TruncationObject" + nullable: true + tool_choice: + $ref: "#/components/schemas/AssistantsApiToolChoiceOption" + nullable: true + response_format: + $ref: "#/components/schemas/AssistantsApiResponseFormatOption" + nullable: true required: - thread_id - assistant_id @@ -9196,6 +9394,27 @@ components: nullable: true description: | If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message. + max_prompt_tokens: + type: integer + nullable: true + description: | + The maximum number of prompt tokens that may be used over the course of the run. The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run. If the run exceeds the number of prompt tokens specified, the run will end with status `complete`. See `incomplete_details` for more info. + minimum: 256 + max_completion_tokens: + type: integer + nullable: true + description: | + The maximum number of completion tokens that may be used over the course of the run. The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run. If the run exceeds the number of completion tokens specified, the run will end with status `incomplete`. See `incomplete_details` for more info. + minimum: 256 + truncation_strategy: + $ref: "#/components/schemas/TruncationObject" + nullable: true + tool_choice: + $ref: "#/components/schemas/AssistantsApiToolChoiceOption" + nullable: true + response_format: + $ref: "#/components/schemas/AssistantsApiResponseFormatOption" + nullable: true required: - thread_id - assistant_id