openai
diff --git a/‎.stats.yml‎
Lines changed: 3 additions & 3 deletions b/‎.stats.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎lib/openai.rb‎
Lines changed: 3 additions & 0 deletions b/‎lib/openai.rb‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎lib/openai/models/audio/transcription_create_params.rb‎
Lines changed: 42 additions & 11 deletions b/‎lib/openai/models/audio/transcription_create_params.rb‎
Lines changed: 42 additions & 11 deletions
diff --git a/‎lib/openai/models/audio/transcription_create_response.rb‎
Lines changed: 4 additions & 1 deletion b/‎lib/openai/models/audio/transcription_create_response.rb‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎lib/openai/models/audio/transcription_diarized.rb‎
Lines changed: 160 additions & 0 deletions b/‎lib/openai/models/audio/transcription_diarized.rb‎
Lines changed: 160 additions & 0 deletions
diff --git a/‎lib/openai/models/audio/transcription_diarized_segment.rb‎
Lines changed: 65 additions & 0 deletions b/‎lib/openai/models/audio/transcription_diarized_segment.rb‎
Lines changed: 65 additions & 0 deletions
@@ -1,4 +1,4 @@
 configured_endpoints: 135
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-11d308a9ef78ad01aa11c880a084a3982276800d7994db3f454aa515474977d7.yml
-openapi_spec_hash: 0a4bbb5aa0ae532a072bd6b3854e70b1
-config_hash: f0940d0906846178759ef7128e4cb98e
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-104cced8f4c7436a76eea02e26307828166405ccfb296faffb008b72772c11a7.yml
+openapi_spec_hash: fdc03ed84a65a31b80da909255e53924
+config_hash: 03b48e9b8c7231a902403210dbd7dfa0
@@ -79,11 +79,14 @@
 require_relative "openai/models/audio/transcription"
 require_relative "openai/models/audio/transcription_create_params"
 require_relative "openai/models/audio/transcription_create_response"
+require_relative "openai/models/audio/transcription_diarized"
+require_relative "openai/models/audio/transcription_diarized_segment"
 require_relative "openai/models/audio/transcription_include"
 require_relative "openai/models/audio/transcription_segment"
 require_relative "openai/models/audio/transcription_stream_event"
 require_relative "openai/models/audio/transcription_text_delta_event"
 require_relative "openai/models/audio/transcription_text_done_event"
+require_relative "openai/models/audio/transcription_text_segment_event"
 require_relative "openai/models/audio/transcription_verbose"
 require_relative "openai/models/audio/transcription_word"
 require_relative "openai/models/audio/translation"
 
@@ -19,8 +19,8 @@ class TranscriptionCreateParams < OpenAI::Internal::Type::BaseModel
 
         # @!attribute model
         #   ID of the model to use. The options are `gpt-4o-transcribe`,
-        #   `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
-        #   Whisper V2 model).
+        #   `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
+        #   Whisper V2 model), and `gpt-4o-transcribe-diarize`.
         #
         #   @return [String, Symbol, OpenAI::Models::AudioModel]
         required :model, union: -> { OpenAI::Audio::TranscriptionCreateParams::Model }
@@ -30,6 +30,8 @@ class TranscriptionCreateParams < OpenAI::Internal::Type::BaseModel
         #   first normalizes loudness and then uses voice activity detection (VAD) to choose
         #   boundaries. `server_vad` object can be provided to tweak VAD detection
         #   parameters manually. If unset, the audio is transcribed as a single block.
+        #   Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
+        #   seconds.
         #
         #   @return [Symbol, :auto, OpenAI::Models::Audio::TranscriptionCreateParams::ChunkingStrategy::VadConfig, nil]
         optional :chunking_strategy,
@@ -41,11 +43,30 @@ class TranscriptionCreateParams < OpenAI::Internal::Type::BaseModel
         #   return the log probabilities of the tokens in the response to understand the
         #   model's confidence in the transcription. `logprobs` only works with
         #   response_format set to `json` and only with the models `gpt-4o-transcribe` and
-        #   `gpt-4o-mini-transcribe`.
+        #   `gpt-4o-mini-transcribe`. This field is not supported when using
+        #   `gpt-4o-transcribe-diarize`.
         #
         #   @return [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>, nil]
         optional :include, -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Audio::TranscriptionInclude] }
 
+        # @!attribute known_speaker_names
+        #   Optional list of speaker names that correspond to the audio samples provided in
+        #   `known_speaker_references[]`. Each entry should be a short identifier (for
+        #   example `customer` or `agent`). Up to 4 speakers are supported.
+        #
+        #   @return [Array<String>, nil]
+        optional :known_speaker_names, OpenAI::Internal::Type::ArrayOf[String]
+
+        # @!attribute known_speaker_references
+        #   Optional list of audio samples (as
+        #   [data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs))
+        #   that contain known speaker references matching `known_speaker_names[]`. Each
+        #   sample must be between 2 and 10 seconds, and can use any of the same input audio
+        #   formats supported by `file`.
+        #
+        #   @return [Array<String>, nil]
+        optional :known_speaker_references, OpenAI::Internal::Type::ArrayOf[String]
+
         # @!attribute language
         #   The language of the input audio. Supplying the input language in
         #   [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
@@ -58,15 +79,18 @@ class TranscriptionCreateParams < OpenAI::Internal::Type::BaseModel
         #   An optional text to guide the model's style or continue a previous audio
         #   segment. The
         #   [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
-        #   should match the audio language.
+        #   should match the audio language. This field is not supported when using
+        #   `gpt-4o-transcribe-diarize`.
         #
         #   @return [String, nil]
         optional :prompt, String
 
         # @!attribute response_format
         #   The format of the output, in one of these options: `json`, `text`, `srt`,
-        #   `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
-        #   the only supported format is `json`.
+        #   `verbose_json`, `vtt`, or `diarized_json`. For `gpt-4o-transcribe` and
+        #   `gpt-4o-mini-transcribe`, the only supported format is `json`. For
+        #   `gpt-4o-transcribe-diarize`, the supported formats are `json`, `text`, and
+        #   `diarized_json`, with `diarized_json` required to receive speaker annotations.
         #
         #   @return [Symbol, OpenAI::Models::AudioResponseFormat, nil]
         optional :response_format, enum: -> { OpenAI::AudioResponseFormat }
@@ -86,13 +110,14 @@ class TranscriptionCreateParams < OpenAI::Internal::Type::BaseModel
         #   `response_format` must be set `verbose_json` to use timestamp granularities.
         #   Either or both of these options are supported: `word`, or `segment`. Note: There
         #   is no additional latency for segment timestamps, but generating word timestamps
-        #   incurs additional latency.
+        #   incurs additional latency. This option is not available for
+        #   `gpt-4o-transcribe-diarize`.
         #
         #   @return [Array<Symbol, OpenAI::Models::Audio::TranscriptionCreateParams::TimestampGranularity>, nil]
         optional :timestamp_granularities,
                  -> { OpenAI::Internal::Type::ArrayOf[enum: OpenAI::Audio::TranscriptionCreateParams::TimestampGranularity] }
 
-        # @!method initialize(file:, model:, chunking_strategy: nil, include: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
+        # @!method initialize(file:, model:, chunking_strategy: nil, include: nil, known_speaker_names: nil, known_speaker_references: nil, language: nil, prompt: nil, response_format: nil, temperature: nil, timestamp_granularities: nil, request_options: {})
         #   Some parameter documentations has been truncated, see
         #   {OpenAI::Models::Audio::TranscriptionCreateParams} for more details.
         #
@@ -104,6 +129,10 @@ class TranscriptionCreateParams < OpenAI::Internal::Type::BaseModel
         #
         #   @param include [Array<Symbol, OpenAI::Models::Audio::TranscriptionInclude>] Additional information to include in the transcription response.
         #
+        #   @param known_speaker_names [Array<String>] Optional list of speaker names that correspond to the audio samples provided in
+        #
+        #   @param known_speaker_references [Array<String>] Optional list of audio samples (as [data URLs](https://developer.mozilla.org/en-
+        #
         #   @param language [String] The language of the input audio. Supplying the input language in [ISO-639-1](htt
         #
         #   @param prompt [String] An optional text to guide the model's style or continue a previous audio segment
@@ -117,14 +146,14 @@ class TranscriptionCreateParams < OpenAI::Internal::Type::BaseModel
         #   @param request_options [OpenAI::RequestOptions, Hash{Symbol=>Object}]
 
         # ID of the model to use. The options are `gpt-4o-transcribe`,
-        # `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
-        # Whisper V2 model).
+        # `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source
+        # Whisper V2 model), and `gpt-4o-transcribe-diarize`.
         module Model
           extend OpenAI::Internal::Type::Union
 
           variant String
 
-          # ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source Whisper V2 model).
+          # ID of the model to use. The options are `gpt-4o-transcribe`, `gpt-4o-mini-transcribe`, `whisper-1` (which is powered by our open source Whisper V2 model), and `gpt-4o-transcribe-diarize`.
           variant enum: -> { OpenAI::AudioModel }
 
           # @!method self.variants
@@ -135,6 +164,8 @@ module Model
         # first normalizes loudness and then uses voice activity detection (VAD) to choose
         # boundaries. `server_vad` object can be provided to tweak VAD detection
         # parameters manually. If unset, the audio is transcribed as a single block.
+        # Required when using `gpt-4o-transcribe-diarize` for inputs longer than 30
+        # seconds.
         module ChunkingStrategy
           extend OpenAI::Internal::Type::Union
 
 
@@ -15,11 +15,14 @@ module TranscriptionCreateResponse
         # Represents a transcription response returned by model, based on the provided input.
         variant -> { OpenAI::Audio::Transcription }
 
+        # Represents a diarized transcription response returned by the model, including the combined transcript and speaker-segment annotations.
+        variant -> { OpenAI::Audio::TranscriptionDiarized }
+
         # Represents a verbose json transcription response returned by model, based on the provided input.
         variant -> { OpenAI::Audio::TranscriptionVerbose }
 
         # @!method self.variants
-        #   @return [Array(OpenAI::Models::Audio::Transcription, OpenAI::Models::Audio::TranscriptionVerbose)]
+        #   @return [Array(OpenAI::Models::Audio::Transcription, OpenAI::Models::Audio::TranscriptionDiarized, OpenAI::Models::Audio::TranscriptionVerbose)]
       end
     end
   end
 
@@ -0,0 +1,160 @@
+# frozen_string_literal: true
+
+module OpenAI
+  module Models
+    module Audio
+      class TranscriptionDiarized < OpenAI::Internal::Type::BaseModel
+        # @!attribute duration
+        #   Duration of the input audio in seconds.
+        #
+        #   @return [Float]
+        required :duration, Float
+
+        # @!attribute segments
+        #   Segments of the transcript annotated with timestamps and speaker labels.
+        #
+        #   @return [Array<OpenAI::Models::Audio::TranscriptionDiarizedSegment>]
+        required :segments, -> { OpenAI::Internal::Type::ArrayOf[OpenAI::Audio::TranscriptionDiarizedSegment] }
+
+        # @!attribute task
+        #   The type of task that was run. Always `transcribe`.
+        #
+        #   @return [Symbol, :transcribe]
+        required :task, const: :transcribe
+
+        # @!attribute text
+        #   The concatenated transcript text for the entire audio input.
+        #
+        #   @return [String]
+        required :text, String
+
+        # @!attribute usage
+        #   Token or duration usage statistics for the request.
+        #
+        #   @return [OpenAI::Models::Audio::TranscriptionDiarized::Usage::Tokens, OpenAI::Models::Audio::TranscriptionDiarized::Usage::Duration, nil]
+        optional :usage, union: -> { OpenAI::Audio::TranscriptionDiarized::Usage }
+
+        # @!method initialize(duration:, segments:, text:, usage: nil, task: :transcribe)
+        #   Represents a diarized transcription response returned by the model, including
+        #   the combined transcript and speaker-segment annotations.
+        #
+        #   @param duration [Float] Duration of the input audio in seconds.
+        #
+        #   @param segments [Array<OpenAI::Models::Audio::TranscriptionDiarizedSegment>] Segments of the transcript annotated with timestamps and speaker labels.
+        #
+        #   @param text [String] The concatenated transcript text for the entire audio input.
+        #
+        #   @param usage [OpenAI::Models::Audio::TranscriptionDiarized::Usage::Tokens, OpenAI::Models::Audio::TranscriptionDiarized::Usage::Duration] Token or duration usage statistics for the request.
+        #
+        #   @param task [Symbol, :transcribe] The type of task that was run. Always `transcribe`.
+
+        # Token or duration usage statistics for the request.
+        #
+        # @see OpenAI::Models::Audio::TranscriptionDiarized#usage
+        module Usage
+          extend OpenAI::Internal::Type::Union
+
+          discriminator :type
+
+          # Usage statistics for models billed by token usage.
+          variant :tokens, -> { OpenAI::Audio::TranscriptionDiarized::Usage::Tokens }
+
+          # Usage statistics for models billed by audio input duration.
+          variant :duration, -> { OpenAI::Audio::TranscriptionDiarized::Usage::Duration }
+
+          class Tokens < OpenAI::Internal::Type::BaseModel
+            # @!attribute input_tokens
+            #   Number of input tokens billed for this request.
+            #
+            #   @return [Integer]
+            required :input_tokens, Integer
+
+            # @!attribute output_tokens
+            #   Number of output tokens generated.
+            #
+            #   @return [Integer]
+            required :output_tokens, Integer
+
+            # @!attribute total_tokens
+            #   Total number of tokens used (input + output).
+            #
+            #   @return [Integer]
+            required :total_tokens, Integer
+
+            # @!attribute type
+            #   The type of the usage object. Always `tokens` for this variant.
+            #
+            #   @return [Symbol, :tokens]
+            required :type, const: :tokens
+
+            # @!attribute input_token_details
+            #   Details about the input tokens billed for this request.
+            #
+            #   @return [OpenAI::Models::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails, nil]
+            optional :input_token_details,
+                     -> { OpenAI::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails }
+
+            # @!method initialize(input_tokens:, output_tokens:, total_tokens:, input_token_details: nil, type: :tokens)
+            #   Usage statistics for models billed by token usage.
+            #
+            #   @param input_tokens [Integer] Number of input tokens billed for this request.
+            #
+            #   @param output_tokens [Integer] Number of output tokens generated.
+            #
+            #   @param total_tokens [Integer] Total number of tokens used (input + output).
+            #
+            #   @param input_token_details [OpenAI::Models::Audio::TranscriptionDiarized::Usage::Tokens::InputTokenDetails] Details about the input tokens billed for this request.
+            #
+            #   @param type [Symbol, :tokens] The type of the usage object. Always `tokens` for this variant.
+
+            # @see OpenAI::Models::Audio::TranscriptionDiarized::Usage::Tokens#input_token_details
+            class InputTokenDetails < OpenAI::Internal::Type::BaseModel
+              # @!attribute audio_tokens
+              #   Number of audio tokens billed for this request.
+              #
+              #   @return [Integer, nil]
+              optional :audio_tokens, Integer
+
+              # @!attribute text_tokens
+              #   Number of text tokens billed for this request.
+              #
+              #   @return [Integer, nil]
+              optional :text_tokens, Integer
+
+              # @!method initialize(audio_tokens: nil, text_tokens: nil)
+              #   Details about the input tokens billed for this request.
+              #
+              #   @param audio_tokens [Integer] Number of audio tokens billed for this request.
+              #
+              #   @param text_tokens [Integer] Number of text tokens billed for this request.
+            end
+          end
+
+          class Duration < OpenAI::Internal::Type::BaseModel
+            # @!attribute seconds
+            #   Duration of the input audio in seconds.
+            #
+            #   @return [Float]
+            required :seconds, Float
+
+            # @!attribute type
+            #   The type of the usage object. Always `duration` for this variant.
+            #
+            #   @return [Symbol, :duration]
+            required :type, const: :duration
+
+            # @!method initialize(seconds:, type: :duration)
+            #   Usage statistics for models billed by audio input duration.
+            #
+            #   @param seconds [Float] Duration of the input audio in seconds.
+            #
+            #   @param type [Symbol, :duration] The type of the usage object. Always `duration` for this variant.
+          end
+
+          # @!method self.variants
+          #   @return [Array(OpenAI::Models::Audio::TranscriptionDiarized::Usage::Tokens, OpenAI::Models::Audio::TranscriptionDiarized::Usage::Duration)]
+        end
+      end
+    end
+  end
+end
@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+
+module OpenAI
+  module Models
+    module Audio
+      class TranscriptionDiarizedSegment < OpenAI::Internal::Type::BaseModel
+        # @!attribute id
+        #   Unique identifier for the segment.
+        #
+        #   @return [String]
+        required :id, String
+
+        # @!attribute end_
+        #   End timestamp of the segment in seconds.
+        #
+        #   @return [Float]
+        required :end_, Float, api_name: :end
+
+        # @!attribute speaker
+        #   Speaker label for this segment. When known speakers are provided, the label
+        #   matches `known_speaker_names[]`. Otherwise speakers are labeled sequentially
+        #   using capital letters (`A`, `B`, ...).
+        #
+        #   @return [String]
+        required :speaker, String
+
+        # @!attribute start
+        #   Start timestamp of the segment in seconds.
+        #
+        #   @return [Float]
+        required :start, Float
+
+        # @!attribute text
+        #   Transcript text for this segment.
+        #
+        #   @return [String]
+        required :text, String
+
+        # @!attribute type
+        #   The type of the segment. Always `transcript.text.segment`.
+        #
+        #   @return [Symbol, :"transcript.text.segment"]
+        required :type, const: :"transcript.text.segment"
+
+        # @!method initialize(id:, end_:, speaker:, start:, text:, type: :"transcript.text.segment")
+        #   Some parameter documentations has been truncated, see
+        #   {OpenAI::Models::Audio::TranscriptionDiarizedSegment} for more details.
+        #
+        #   A segment of diarized transcript text with speaker metadata.
+        #
+        #   @param id [String] Unique identifier for the segment.
+        #
+        #   @param end_ [Float] End timestamp of the segment in seconds.
+        #
+        #   @param speaker [String] Speaker label for this segment. When known speakers are provided, the label matc
+        #
+        #   @param start [Float] Start timestamp of the segment in seconds.
+        #
+        #   @param text [String] Transcript text for this segment.
+        #
+        #   @param type [Symbol, :"transcript.text.segment"] The type of the segment. Always `transcript.text.segment`.
+      end
+    end
+  end
+end