1
1
import { storeGet } from '@/backend/store' ;
2
2
import fs from 'fs' ;
3
3
import RateLimiter from '@/common/utils/RateLimiter' ;
4
- import FormData from 'form-data' ;
5
- import axios , { CancelTokenSource } from 'axios' ;
6
- import UrlUtil from '@/common/utils/UrlUtil' ;
7
- import dpLog from '@/backend/ioc/logger' ;
8
4
import StrUtil from '@/common/utils/str-util' ;
9
5
import { Cancelable } from '@/common/interfaces' ;
10
- import CancelByUserError from '@/backend/errors/CancelByUserError' ;
6
+ import OpenAI from 'openai' ;
7
+
8
+ import { z } from 'zod' ;
9
+ import dpLog from '@/backend/ioc/logger' ;
10
+
11
+ const WhisperResponseVerifySchema = z . object ( {
12
+ language : z . string ( ) ,
13
+ duration : z . union ( [ z . number ( ) , z . string ( ) ] ) ,
14
+ text : z . string ( ) ,
15
+ segments : z . array ( z . object ( {
16
+ seek : z . number ( ) ,
17
+ start : z . number ( ) ,
18
+ end : z . number ( ) ,
19
+ text : z . string ( )
20
+ } ) )
21
+ } ) ;
11
22
12
23
export interface WhisperResponse {
13
24
language : string ;
@@ -23,73 +34,60 @@ export interface WhisperResponse {
23
34
}
24
35
25
36
class OpenAiWhisperRequest implements Cancelable {
26
- private readonly apiKey : string ;
27
- private readonly endpoint : string ;
28
37
private readonly file : string ;
29
- private cancelTokenSource : CancelTokenSource | null = null ;
38
+ private abortController : AbortController | null = null ;
39
+ public readonly openAi : OpenAI ;
30
40
31
- constructor ( file : string , apiKey : string , endpoint : string ) {
41
+ constructor ( openai : OpenAI , file : string ) {
32
42
this . file = file ;
33
- this . apiKey = apiKey ;
34
- this . endpoint = endpoint ;
43
+ this . openAi = openai ;
35
44
}
36
45
37
- public static build ( file : string ) : OpenAiWhisperRequest | null {
46
+ public static build ( openai : OpenAI , file : string ) : OpenAiWhisperRequest | null {
38
47
const apiKey = storeGet ( 'apiKeys.openAi.key' ) ;
39
48
const endpoint = storeGet ( 'apiKeys.openAi.endpoint' ) ;
40
49
if ( StrUtil . hasBlank ( file , apiKey , endpoint ) ) {
41
50
return null ;
42
51
}
43
- return new OpenAiWhisperRequest ( file , apiKey , endpoint ) ;
52
+ return new OpenAiWhisperRequest ( openai , file ) ;
44
53
}
45
54
46
55
public async invoke ( ) : Promise < WhisperResponse > {
47
- if ( this . cancelTokenSource ) {
48
- this . cancelTokenSource . cancel ( 'Operation canceled by the user' ) ;
49
- this . cancelTokenSource = null ;
50
- }
56
+ this . cancel ( ) ;
51
57
await RateLimiter . wait ( 'whisper' ) ;
52
- const data = new FormData ( ) ;
53
- data . append ( 'file' , fs . createReadStream ( this . file ) as any ) ;
54
- data . append ( 'model' , 'whisper-1' ) ;
55
- data . append ( 'language' , 'en' ) ;
56
- data . append ( 'response_format' , 'verbose_json' ) ;
57
-
58
- this . cancelTokenSource = axios . CancelToken . source ( ) ;
59
-
60
- // 创建一个 CancelToken 的实例
61
- const config = {
62
- method : 'post' ,
63
- url : UrlUtil . joinWebUrl ( this . endpoint , '/v1/audio/transcriptions' ) ,
64
- headers : {
65
- 'Accept' : 'application/json' ,
66
- 'Authorization' : `Bearer ${ this . apiKey } ` ,
67
- 'Content-Type' : 'multipart/form-data' ,
68
- ...data . getHeaders ( )
69
- } ,
70
- data : data ,
71
- timeout : 1000 * 60 * 10 ,
72
- cancelToken : this . cancelTokenSource . token
73
- } ;
74
-
75
- const response = await axios ( config )
76
- . catch ( ( error ) => {
77
- if ( axios . isCancel ( error ) ) {
78
- dpLog . info ( 'Request canceled' , error . message ) ;
79
- throw new CancelByUserError ( ) ;
80
- }
81
- dpLog . error ( 'Request error' , error ) ;
82
- throw error ;
83
- } ) ;
58
+ this . abortController = new AbortController ( ) ;
59
+ const transcription = await this . openAi . audio . transcriptions . create ( {
60
+ file : fs . createReadStream ( this . file ) ,
61
+ model : "whisper-1" ,
62
+ response_format : "verbose_json" ,
63
+ timestamp_granularities : [ "segment" ]
64
+ } , { signal : this . abortController . signal } ) ;
65
+ // 用 zed 校验一下 transcription 是否为 类型 TranscriptionVerbose
66
+ const parseRes = WhisperResponseVerifySchema . safeParse ( transcription ) ;
67
+ if ( ! parseRes . success ) {
68
+ // dperror 为什么不匹配
69
+ dpLog . error ( 'Invalid response from OpenAI' , parseRes . error . errors ) ;
70
+ throw new Error ( 'Invalid response from OpenAI' ) ;
71
+ }
84
72
return {
85
- ...response . data
86
- } ;
73
+ language : transcription . language ,
74
+ duration : Number ( transcription . duration ) ,
75
+ text : transcription . text ,
76
+ offset : 0 ,
77
+ segments : transcription . segments ?. map ( ( seg ) => ( {
78
+ seek : seg . seek ,
79
+ start : seg . start ,
80
+ end : seg . end ,
81
+ text : seg . text
82
+ } ) ) ?? [ ]
83
+ }
84
+
87
85
}
88
86
89
87
public cancel ( ) : void {
90
- if ( this . cancelTokenSource ) {
91
- this . cancelTokenSource . cancel ( 'Operation canceled by the user' ) ;
92
- this . cancelTokenSource = null ;
88
+ if ( this . abortController ) {
89
+ this . abortController . abort ( 'Operation canceled by the user' ) ;
90
+ this . abortController = null ;
93
91
}
94
92
}
95
93
0 commit comments