@@ -4,34 +4,9 @@ import RateLimiter from '@/common/utils/RateLimiter';
4
4
import StrUtil from '@/common/utils/str-util' ;
5
5
import { Cancelable } from '@/common/interfaces' ;
6
6
import OpenAI from 'openai' ;
7
-
8
- import { z } from 'zod' ;
9
7
import dpLog from '@/backend/ioc/logger' ;
10
-
11
- const WhisperResponseVerifySchema = z . object ( {
12
- language : z . string ( ) ,
13
- duration : z . union ( [ z . number ( ) , z . string ( ) ] ) ,
14
- text : z . string ( ) ,
15
- segments : z . array ( z . object ( {
16
- seek : z . number ( ) ,
17
- start : z . number ( ) ,
18
- end : z . number ( ) ,
19
- text : z . string ( )
20
- } ) )
21
- } ) ;
22
-
23
- export interface WhisperResponse {
24
- language : string ;
25
- duration : number ;
26
- text : string ;
27
- offset : number ;
28
- segments : {
29
- seek : number ;
30
- start : number ;
31
- end : number ;
32
- text : string ;
33
- } [ ] ;
34
- }
8
+ import { WhisperResponseFormatError } from '@/backend/errors/errors' ;
9
+ import { WhisperResponse , WhisperResponseVerifySchema } from '@/common/types/video-info' ;
35
10
36
11
class OpenAiWhisperRequest implements Cancelable {
37
12
private readonly file : string ;
@@ -55,33 +30,41 @@ class OpenAiWhisperRequest implements Cancelable {
55
30
public async invoke ( ) : Promise < WhisperResponse > {
56
31
this . cancel ( ) ;
57
32
await RateLimiter . wait ( 'whisper' ) ;
58
- this . abortController = new AbortController ( ) ;
59
- const transcription = await this . openAi . audio . transcriptions . create ( {
60
- file : fs . createReadStream ( this . file ) ,
61
- model : "whisper-1" ,
62
- response_format : "verbose_json" ,
63
- timestamp_granularities : [ "segment" ]
64
- } , { signal : this . abortController . signal } ) ;
33
+ const transcription = await this . doTranscription ( ) ;
65
34
// 用 zed 校验一下 transcription 是否为 类型 TranscriptionVerbose
66
35
const parseRes = WhisperResponseVerifySchema . safeParse ( transcription ) ;
67
36
if ( ! parseRes . success ) {
68
37
// dperror 为什么不匹配
69
38
dpLog . error ( 'Invalid response from OpenAI' , parseRes . error . errors ) ;
70
- throw new Error ( 'Invalid response from OpenAI' ) ;
39
+ throw new WhisperResponseFormatError ( ) ;
71
40
}
72
41
return {
73
42
language : transcription . language ,
74
- duration : Number ( transcription . duration ) ,
43
+ duration : transcription . duration ,
75
44
text : transcription . text ,
76
- offset : 0 ,
77
45
segments : transcription . segments ?. map ( ( seg ) => ( {
78
46
seek : seg . seek ,
79
47
start : seg . start ,
80
48
end : seg . end ,
81
49
text : seg . text
82
- } ) ) ?? [ ]
83
- }
50
+ } ) ) ?? [ ]
51
+ } ;
52
+
53
+ }
84
54
55
+ private async doTranscription ( ) {
56
+ this . abortController = new AbortController ( ) ;
57
+ try {
58
+ return await this . openAi . audio . transcriptions . create ( {
59
+ file : fs . createReadStream ( this . file ) ,
60
+ model : 'whisper-1' ,
61
+ response_format : 'verbose_json' ,
62
+ timestamp_granularities : [ 'segment' ]
63
+ } , { signal : this . abortController . signal } ) ;
64
+ } catch ( error ) {
65
+ dpLog . error ( error ) ;
66
+ throw error ;
67
+ }
85
68
}
86
69
87
70
public cancel ( ) : void {
0 commit comments