Skip to content

Commit 4e867d8

Browse files
committed
Add support for new fields
1 parent 3a7e7e1 commit 4e867d8

File tree

1 file changed

+41
-13
lines changed

1 file changed

+41
-13
lines changed

src/Models/Session.swift

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,42 +22,68 @@ public struct Session: Codable, Equatable, Sendable {
2222
}
2323

2424
public struct InputAudioTranscription: Codable, Equatable, Sendable {
25+
/// The model to use for transcription
2526
public var model: String
27+
/// The language of the input audio. Supplying the input language in ISO-639-1 (e.g. `en`) format will improve accuracy and latency.
28+
public var language: String?
29+
/// An optional text to guide the model's style or continue a previous audio segment.
30+
public var prompt: String?
2631

2732
public init(model: String = "whisper-1") {
2833
self.model = model
2934
}
3035
}
3136

37+
public struct InputAudioNoiseReduction: Codable, Equatable, Sendable {
38+
/// Type of noise reduction.
39+
public enum NoiseReductionType: String, CaseIterable, Codable, Sendable {
40+
/// For close-talking microphones such as headphones
41+
case nearField = "near_field"
42+
/// For far-field microphones such as laptop or conference room microphones
43+
case farField = "far_field"
44+
}
45+
46+
/// Type of noise reduction.
47+
public var type: NoiseReductionType?
48+
}
49+
3250
public struct TurnDetection: Codable, Equatable, Sendable {
3351
public enum TurnDetectionType: String, Codable, Sendable {
3452
case serverVad = "server_vad"
53+
case semanticVad = "semantic_vad"
3554
case none
3655
}
3756

57+
public enum TurnDetectionEagerness: String, Codable, Sendable {
58+
case low
59+
case high
60+
case auto
61+
case medium
62+
}
63+
3864
/// The type of turn detection.
3965
public var type: TurnDetectionType
40-
/// Activation threshold for VAD (0.0 to 1.0).
41-
public var threshold: Double
42-
/// Amount of audio to include before speech starts (in milliseconds).
43-
public var prefixPaddingMs: Int
44-
/// Duration of silence to detect speech stop (in milliseconds).
45-
public var silenceDurationMs: Int
66+
/// Used only for `server_vad` mode. Activation threshold for VAD (0.0 to 1.0).
67+
public var threshold: Double?
68+
/// Whether or not to automatically interrupt any ongoing response with output to the default conversation (i.e. `conversation` of `auto`) when a VAD start event occurs.
69+
public var interruptResponse: Bool?
70+
/// Used only for `server_vad` mode. Amount of audio to include before speech starts (in milliseconds).
71+
public var prefixPaddingMs: Int?
72+
/// Used only for `server_vad` mode. Duration of silence to detect speech stop (in milliseconds).
73+
public var silenceDurationMs: Int?
4674
/// Whether or not to automatically generate a response when VAD is enabled.
4775
public var createResponse: Bool
76+
/// Used only for `semantic_vad` mode. The eagerness of the model to respond. `low` will wait longer for the user to continue speaking, `high` will respond more quickly. `auto` is the default and is equivalent to `medium`.
77+
public var eagerness: TurnDetectionEagerness?
4878

49-
public init(
50-
type: TurnDetectionType = .serverVad,
51-
threshold: Double = 0.5,
52-
prefixPaddingMs: Int = 300,
53-
silenceDurationMs: Int = 500,
54-
createResponse: Bool = true
55-
) {
79+
public init(type: TurnDetectionType = .serverVad, threshold: Double? = nil, interruptResponse: Bool? = nil, prefixPaddingMs: Int? = nil, silenceDurationMs: Int? = nil, createResponse: Bool = true, eagerness: TurnDetectionEagerness? = nil) {
5680
self.type = type
81+
self.eagerness = eagerness
5782
self.threshold = threshold
5883
self.createResponse = createResponse
5984
self.prefixPaddingMs = prefixPaddingMs
6085
self.silenceDurationMs = silenceDurationMs
86+
self.interruptResponse = interruptResponse
6187
}
6288
}
6389

@@ -239,6 +265,8 @@ public struct Session: Codable, Equatable, Sendable {
239265
public var outputAudioFormat: AudioFormat
240266
/// Configuration for input audio transcription.
241267
public var inputAudioTranscription: InputAudioTranscription?
268+
/// Configuration for input audio noise reduction.
269+
public var inputAudioNoiseReduction: InputAudioNoiseReduction?
242270
/// Configuration for turn detection.
243271
public var turnDetection: TurnDetection?
244272
/// Tools (functions) available to the model.

0 commit comments

Comments
 (0)