schema/schema.json

{
  "title": "llm-sdk",
  "$schema": "http://json-schema.org/draft-07/schema#",
  "definitions": {
    "TextPart": {
      "type": "object",
      "description": "A part of the message that contains text.",
      "properties": {
        "type": {
          "type": "string",
          "const": "text"
        },
        "text": {
          "type": "string"
        },
        "id": {
          "type": "string",
          "description": "The ID of the part, if applicable."
        }
      },
      "required": ["type", "text"],
      "additionalProperties": false
    },
    "ImagePart": {
      "type": "object",
      "description": "A part of the message that contains an image.",
      "properties": {
        "type": {
          "type": "string",
          "const": "image"
        },
        "mimeType": {
          "type": "string",
          "description": "The MIME type of the image. E.g. \"image/jpeg\", \"image/png\"."
        },
        "imageData": {
          "type": "string",
          "description": "The base64-encoded image data."
        },
        "width": {
          "type": "integer",
          "description": "The width of the image in pixels."
        },
        "height": {
          "type": "integer",
          "description": "The height of the image in pixels."
        },
        "id": {
          "type": "string",
          "description": "The ID of the part, if applicable."
        }
      },
      "required": ["type", "mimeType", "imageData"],
      "additionalProperties": false
    },
    "AudioEncoding": {
      "type": "string",
      "enum": ["linear16", "flac", "mulaw", "alaw", "aac", "mp3", "opus"],
      "description": "The encoding of the audio."
    },
    "AudioContainer": {
      "type": "string",
      "enum": ["wav", "ogg", "flac", "webm"],
      "description": "The container format of the audio."
    },
    "AudioPart": {
      "type": "object",
      "description": "A part of the message that contains an audio.",
      "properties": {
        "type": {
          "type": "string",
          "const": "audio"
        },
        "container": {
          "$ref": "#/definitions/AudioContainer"
        },
        "audioData": {
          "type": "string",
          "description": "The base64-encoded audio data."
        },
        "encoding": {
          "$ref": "#/definitions/AudioEncoding"
        },
        "sampleRate": {
          "type": "integer",
          "description": "The sample rate of the audio. E.g. 44100, 48000."
        },
        "channels": {
          "type": "integer",
          "description": "The number of channels of the audio. E.g. 1, 2."
        },
        "transcript": {
          "type": "string",
          "description": "The transcript of the audio."
        },
        "id": {
          "type": "string",
          "description": "The ID of the part, if applicable."
        }
      },
      "required": ["type", "audioData"],
      "additionalProperties": false
    },
    "ToolCallPart": {
      "type": "object",
      "description": "A part of the message that represents a call to a tool the model wants to use.",
      "properties": {
        "type": {
          "type": "string",
          "const": "tool-call"
        },
        "toolCallId": {
          "type": "string",
          "description": "The ID of the tool call, used to match the tool result with the tool call."
        },
        "toolName": {
          "type": "string",
          "description": "The name of the tool to call."
        },
        "args": {
          "oneOf": [
            {
              "type": "object",
              "additionalProperties": true
            },
            {
              "type": "null"
            }
          ],
          "description": "The arguments to pass to the tool."
        },
        "id": {
          "type": "string",
          "description": "The ID of the part, if applicable. This might not be the same as the toolCallId."
        }
      },
      "required": ["type", "toolCallId", "toolName", "args"],
      "additionalProperties": false
    },
    "ToolResultPart": {
      "type": "object",
      "description": "A part of the message that represents the result of a tool call.",
      "properties": {
        "type": {
          "type": "string",
          "const": "tool-result"
        },
        "toolCallId": {
          "type": "string",
          "description": "The ID of the tool call from previous assistant message."
        },
        "toolName": {
          "type": "string",
          "description": "The name of the tool that was called."
        },
        "result": {
          "anyOf": [
            {
              "type": "object"
            },
            {
              "type": "array"
            }
          ],
          "description": "The result of the tool call."
        },
        "isError": {
          "type": "boolean",
          "description": "Marks the tool result as an error."
        }
      },
      "required": ["type", "toolCallId", "toolName", "result"],
      "additionalProperties": false
    },
    "Part": {
      "oneOf": [
        {
          "$ref": "#/definitions/TextPart"
        },
        {
          "$ref": "#/definitions/ImagePart"
        },
        {
          "$ref": "#/definitions/AudioPart"
        },
        {
          "$ref": "#/definitions/ToolCallPart"
        },
        {
          "$ref": "#/definitions/ToolResultPart"
        }
      ]
    },
    "UserMessage": {
      "type": "object",
      "description": "Represents a message sent by the user.",
      "properties": {
        "role": {
          "type": "string",
          "const": "user"
        },
        "content": {
          "type": "array",
          "items": {
            "oneOf": [
              {
                "$ref": "#/definitions/TextPart"
              },
              {
                "$ref": "#/definitions/ImagePart"
              },
              {
                "$ref": "#/definitions/AudioPart"
              }
            ]
          }
        }
      },
      "required": ["role", "content"],
      "additionalProperties": false
    },
    "AssistantMessage": {
      "type": "object",
      "description": "Represents a message generated by the model.",
      "properties": {
        "role": {
          "type": "string",
          "const": "assistant"
        },
        "content": {
          "type": "array",
          "items": {
            "oneOf": [
              {
                "$ref": "#/definitions/TextPart"
              },
              {
                "$ref": "#/definitions/ToolCallPart"
              },
              {
                "$ref": "#/definitions/AudioPart"
              }
            ]
          }
        }
      },
      "required": ["role", "content"],
      "additionalProperties": false
    },
    "TextPartDelta": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "text"
        },
        "text": {
          "type": "string"
        },
        "id": {
          "type": "string",
          "description": "The ID of the part, if applicable."
        }
      },
      "required": ["type", "text"],
      "additionalProperties": false
    },
    "ToolCallPartDelta": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "tool-call"
        },
        "toolCallId": {
          "type": "string",
          "description": "The ID of the tool call, used to match the tool result with the tool call."
        },
        "toolName": {
          "type": "string",
          "description": "The name of the tool to call."
        },
        "args": {
          "type": "string",
          "description": "The partial JSON string of the arguments to pass to the tool."
        },
        "id": {
          "type": "string",
          "description": "The ID of the part, if applicable. This might not be the same as the toolCallId."
        }
      },
      "required": ["type"],
      "additionalProperties": false
    },
    "AudioPartDelta": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "audio"
        },
        "audioData": {
          "type": "string",
          "description": "The base64-encoded audio data."
        },
        "container": {
          "$ref": "#/definitions/AudioContainer"
        },
        "encoding": {
          "$ref": "#/definitions/AudioEncoding"
        },
        "sampleRate": {
          "type": "integer",
          "description": "The sample rate of the audio. E.g. 44100, 48000."
        },
        "channels": {
          "type": "integer",
          "description": "The number of channels of the audio. E.g. 1, 2."
        },
        "transcript": {
          "type": "string",
          "description": "The transcript of the audio."
        },
        "id": {
          "type": "string",
          "description": "The ID of the part, if applicable."
        }
      },
      "required": ["type"],
      "additionalProperties": false
    },
    "ContentDelta": {
      "type": "object",
      "properties": {
        "index": {
          "type": "integer"
        },
        "part": {
          "oneOf": [
            {
              "$ref": "#/definitions/TextPartDelta"
            },
            {
              "$ref": "#/definitions/ToolCallPartDelta"
            },
            {
              "$ref": "#/definitions/AudioPartDelta"
            }
          ]
        }
      },
      "required": ["index", "part"],
      "additionalProperties": false
    },
    "JSONSchema": {
      "type": "object",
      "description": "Represents a JSON schema.",
      "additionalProperties": true
    },
    "Tool": {
      "type": "object",
      "description": "Represents a tool that can be used by the model.",
      "properties": {
        "name": {
          "type": "string",
          "description": "The name of the tool."
        },
        "description": {
          "type": "string",
          "description": "A description of the tool."
        },
        "parameters": {
          "description": "The JSON schema of the parameters that the tool accepts. The type must be \"object\".",
          "oneOf": [
            {
              "$ref": "#/definitions/JSONSchema"
            },
            {
              "type": "null"
            }
          ]
        }
      },
      "required": ["name", "description", "parameters"],
      "additionalProperties": false
    },
    "ToolMessage": {
      "type": "object",
      "description": "Represents tool result in the message history.",
      "properties": {
        "role": {
          "type": "string",
          "const": "tool"
        },
        "content": {
          "type": "array",
          "items": {
            "$ref": "#/definitions/ToolResultPart"
          }
        }
      },
      "required": ["role", "content"],
      "additionalProperties": false
    },
    "Message": {
      "oneOf": [
        {
          "$ref": "#/definitions/UserMessage"
        },
        {
          "$ref": "#/definitions/AssistantMessage"
        },
        {
          "$ref": "#/definitions/ToolMessage"
        }
      ]
    },
    "ModelTokensDetail": {
      "type": "object",
      "description": "Represents the token usage of the model.",
      "properties": {
        "textTokens": {
          "type": "integer"
        },
        "cachedTextTokens": {
          "type": "integer"
        },
        "audioTokens": {
          "type": "integer"
        },
        "cachedAudioTokens": {
          "type": "integer"
        },
        "imageTokens": {
          "type": "integer"
        },
        "cachedImageTokens": {
          "type": "integer"
        }
      },
      "additionalProperties": false
    },
    "ModelUsage": {
      "type": "object",
      "description": "Represents the token usage of the model.",
      "properties": {
        "inputTokens": {
          "type": "integer"
        },
        "outputTokens": {
          "type": "integer"
        },
        "inputTokensDetail": {
          "$ref": "#/definitions/ModelTokensDetail"
        },
        "outputTokensDetail": {
          "$ref": "#/definitions/ModelTokensDetail"
        }
      },
      "required": ["inputTokens", "outputTokens"],
      "additionalProperties": false
    },
    "ModelResponse": {
      "type": "object",
      "description": "Represents the response generated by the model.",
      "properties": {
        "content": {
          "type": "array",
          "items": {
            "oneOf": [
              {
                "$ref": "#/definitions/TextPart"
              },
              {
                "$ref": "#/definitions/ToolCallPart"
              },
              {
                "$ref": "#/definitions/AudioPart"
              }
            ]
          }
        },
        "usage": {
          "$ref": "#/definitions/ModelUsage"
        },
        "cost": {
          "type": "number",
          "description": "The cost of the response."
        }
      },
      "required": ["content"],
      "additionalProperties": false
    },
    "PartialModelResponse": {
      "type": "object",
      "properties": {
        "delta": {
          "$ref": "#/definitions/ContentDelta"
        }
      },
      "required": ["delta"],
      "additionalProperties": false
    },
    "ToolChoiceAuto": {
      "type": "object",
      "description": "The model will automatically choose the tool to use or not use any tools.",
      "properties": {
        "type": {
          "type": "string",
          "const": "auto"
        }
      },
      "required": ["type"],
      "additionalProperties": false
    },
    "ToolChoiceNone": {
      "type": "object",
      "description": "The model will not use any tools.",
      "properties": {
        "type": {
          "type": "string",
          "const": "none"
        }
      },
      "required": ["type"],
      "additionalProperties": false
    },
    "ToolChoiceRequired": {
      "type": "object",
      "description": "The model will be forced to use a tool.",
      "properties": {
        "type": {
          "type": "string",
          "const": "required"
        }
      },
      "required": ["type"],
      "additionalProperties": false
    },
    "ToolChoiceTool": {
      "type": "object",
      "description": "The model will use the specified tool.",
      "properties": {
        "type": {
          "type": "string",
          "const": "tool"
        },
        "toolName": {
          "type": "string"
        }
      },
      "required": ["type", "toolName"],
      "additionalProperties": false
    },
    "ResponseFormatText": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "text"
        }
      },
      "required": ["type"],
      "additionalProperties": false
    },
    "ResponseFormatJson": {
      "type": "object",
      "properties": {
        "type": {
          "type": "string",
          "const": "json"
        },
        "schema": {
          "$ref": "#/definitions/JSONSchema"
        }
      },
      "required": ["type"],
      "additionalProperties": false
    },
    "Modality": {
      "type": "string",
      "enum": ["text", "audio"]
    },
    "LanguageModelInput": {
      "type": "object",
      "properties": {
        "systemPrompt": {
          "type": "string",
          "description": "A system prompt is a way of providing context and instructions to the model"
        },
        "messages": {
          "type": "array",
          "items": {
            "$ref": "#/definitions/Message"
          },
          "description": "A list of messages comprising the conversation so far."
        },
        "tools": {
          "type": "array",
          "items": {
            "$ref": "#/definitions/Tool"
          },
          "description": "Definitions of tools that the model may use."
        },
        "toolChoice": {
          "oneOf": [
            {
              "$ref": "#/definitions/ToolChoiceAuto"
            },
            {
              "$ref": "#/definitions/ToolChoiceNone"
            },
            {
              "$ref": "#/definitions/ToolChoiceRequired"
            },
            {
              "$ref": "#/definitions/ToolChoiceTool"
            }
          ],
          "description": "Determines how the model should choose which tool to use. \"auto\" - The model will automatically choose the tool to use or not use any tools. \"none\" - The model will not use any tools. \"required\" - The model will be forced to use a tool. { type: \"tool\", toolName: \"toolName\" } - The model will use the specified tool."
        },
        "responseFormat": {
          "oneOf": [
            {
              "$ref": "#/definitions/ResponseFormatJson"
            },
            {
              "$ref": "#/definitions/ResponseFormatText"
            }
          ],
          "description": "The format that the model must output"
        },
        "maxTokens": {
          "type": "integer",
          "description": "The maximum number of tokens that can be generated in the chat completion."
        },
        "temperature": {
          "type": "number",
          "description": "Amount of randomness injected into the response. Ranges from 0.0 to 1.0"
        },
        "topP": {
          "type": "number",
          "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass Ranges from 0.0 to 1.0"
        },
        "topK": {
          "type": "number",
          "description": "Only sample from the top K options for each subsequent token. Used to remove \"long tail\" low probability responses. Ranges from 0.0 to 1.0"
        },
        "presencePenalty": {
          "type": "number",
          "description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics."
        },
        "frequencyPenalty": {
          "type": "number",
          "description": "Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim."
        },
        "seed": {
          "type": "integer",
          "description": "The seed (integer), if set and supported by the model, to enable deterministic results."
        },
        "modalities": {
          "type": "array",
          "items": {
            "$ref": "#/definitions/Modality"
          },
          "description": "The modalities that the model should support."
        },
        "extra": {
          "type": "object",
          "description": "Extra options that the model may support.",
          "additionalProperties": true
        }
      },
      "required": ["messages"],
      "additionalProperties": false
    },
    "LanguageModelCapability": {
      "type": "string",
      "description": "A metadata property that describes the capability of the model.",
      "enum": [
        "structured-output",
        "function-calling",
        "structured-output-strict",
        "audio-input",
        "audio-output",
        "image-input",
        "image-output"
      ]
    },
    "LanguageModelPricing": {
      "type": "object",
      "description": "A metadata property that describes the pricing of the model.",
      "properties": {
        "inputCostPerTextToken": {
          "type": "number",
          "description": "The cost in USD per single text token for input."
        },
        "inputCostPerCachedTextToken": {
          "type": "number",
          "description": "The cost in USD per single cached text token for input."
        },
        "outputCostPerTextToken": {
          "type": "number",
          "description": "The cost in USD per single text token for output."
        },
        "inputCostPerAudioToken": {
          "type": "number",
          "description": "The cost in USD per single audio token for input."
        },
        "inputCostPerCachedAudioToken": {
          "type": "number",
          "description": "The cost in USD per single cached audio token for input."
        },
        "outputCostPerAudioToken": {
          "type": "number",
          "description": "The cost in USD per single audio token for output."
        },
        "inputCostPerImageToken": {
          "type": "number",
          "description": "The cost in USD per single image token for input."
        },
        "inputCostPerCachedImageToken": {
          "type": "number",
          "description": "The cost in USD per single cached image token for input."
        },
        "outputCostPerImageToken": {
          "type": "number",
          "description": "The cost in USD per single image token for output."
        }
      },
      "additionalProperties": false
    }
  }
}