Skip to content

Commit

Permalink
Merge pull request #71 from finnvoor/main
Browse files Browse the repository at this point in the history
Add `Progress` to `WhisperKit`
  • Loading branch information
ZachNagengast committed Mar 14, 2024
2 parents eca4a2e + e109be3 commit 0b78c52
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 0 deletions.
10 changes: 10 additions & 0 deletions Examples/WhisperAX/WhisperAX/Views/ContentView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,16 @@ struct ContentView: View {
.frame(maxWidth: .infinity)
.defaultScrollAnchor(.bottom)
.padding()
if let whisperKit,
!isRecording,
!isTranscribing,
whisperKit.progress.fractionCompleted > 0,
whisperKit.progress.fractionCompleted < 1 {
ProgressView(whisperKit.progress)
.progressViewStyle(.linear)
.labelsHidden()
.padding(.horizontal)
}
}
}

Expand Down
11 changes: 11 additions & 0 deletions Sources/WhisperKit/Core/WhisperKit.swift
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ public class WhisperKit: Transcriber {
public var decoderInputs: DecodingInputs?
public var currentTimings: TranscriptionTimings?

public let progress = Progress()

public init(
model: String? = nil,
downloadBase: URL? = nil,
Expand Down Expand Up @@ -362,6 +364,7 @@ public class WhisperKit: Transcriber {
decodeOptions: DecodingOptions? = nil,
callback: TranscriptionCallback = nil) async throws -> TranscriptionResult?
{
progress.completedUnitCount = 0
if currentTimings == nil {
currentTimings = TranscriptionTimings()
}
Expand Down Expand Up @@ -451,11 +454,16 @@ public class WhisperKit: Transcriber {

let startDecodeLoopTime = CFAbsoluteTimeGetCurrent()

let totalSeekDuration = seekClips.reduce(0, { return $0 + ($1.end - $1.start) })
progress.totalUnitCount = Int64(totalSeekDuration)
defer { progress.completedUnitCount = progress.totalUnitCount }
for (seekClipStart, seekClipEnd) in seekClips {
// Loop through the current clip until we reach the end
// Typically this will be the full audio file, unless seek points are explicitly provided
var seek: Int = seekClipStart

let previousSeekProgress = progress.completedUnitCount

let windowPadding = 16000 // prevent hallucinations at the end of the clip by stopping up to 1.0s early
while seek < seekClipEnd - windowPadding {
// calculate new encoder segment features
Expand Down Expand Up @@ -579,6 +587,9 @@ public class WhisperKit: Transcriber {

// Reset cache and move on to the next window
resetDecoderInputs()

let clipProgress = min(seek, seekClipEnd) - seekClipStart
progress.completedUnitCount = previousSeekProgress + Int64(clipProgress)
}
}

Expand Down

0 comments on commit 0b78c52

Please sign in to comment.