Skip to content

Commit 2f3b07e

Browse files
authored
Merge pull request #113 from solidSpoon/whisper-issue
优化生成字幕的逻辑
2 parents afe656a + d5e502e commit 2f3b07e

File tree

17 files changed

+466
-147
lines changed

17 files changed

+466
-147
lines changed

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "dash-player",
33
"productName": "DashPlayer",
4-
"version": "5.1.3",
4+
"version": "5.1.4",
55
"description": "My Electron application description",
66
"main": ".vite/build/main.js",
77
"scripts": {
@@ -139,6 +139,7 @@
139139
"tailwind-merge": "^2.2.1",
140140
"tailwindcss-animate": "^1.0.7",
141141
"tencentcloud-sdk-nodejs": "^4.0.764",
142+
"ts-error": "^1.0.6",
142143
"vaul": "^0.9.0",
143144
"zod": "^3.23.8",
144145
"zod-to-json-schema": "^3.22.5",

src/backend/errors/CancelByUserError.ts

Lines changed: 0 additions & 9 deletions
This file was deleted.

src/backend/errors/errors.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import { ExtendableError } from 'ts-error';
2+
3+
/**
4+
* Whisper 相应格式错误
5+
*/
6+
export class WhisperResponseFormatError extends ExtendableError {
7+
}
8+
9+
/**
10+
* 任务被用户取消
11+
*/
12+
export class CancelByUserError extends ExtendableError {
13+
}

src/backend/objs/OpenAiWhisperRequest.ts

Lines changed: 22 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -4,34 +4,9 @@ import RateLimiter from '@/common/utils/RateLimiter';
44
import StrUtil from '@/common/utils/str-util';
55
import { Cancelable } from '@/common/interfaces';
66
import OpenAI from 'openai';
7-
8-
import { z } from 'zod';
97
import dpLog from '@/backend/ioc/logger';
10-
11-
const WhisperResponseVerifySchema = z.object({
12-
language: z.string(),
13-
duration: z.union([z.number(), z.string()]),
14-
text: z.string(),
15-
segments: z.array(z.object({
16-
seek: z.number(),
17-
start: z.number(),
18-
end: z.number(),
19-
text: z.string()
20-
}))
21-
});
22-
23-
export interface WhisperResponse {
24-
language: string;
25-
duration: number;
26-
text: string;
27-
offset: number;
28-
segments: {
29-
seek: number;
30-
start: number;
31-
end: number;
32-
text: string;
33-
}[];
34-
}
8+
import { WhisperResponseFormatError } from '@/backend/errors/errors';
9+
import { WhisperResponse, WhisperResponseVerifySchema } from '@/common/types/video-info';
3510

3611
class OpenAiWhisperRequest implements Cancelable {
3712
private readonly file: string;
@@ -55,33 +30,41 @@ class OpenAiWhisperRequest implements Cancelable {
5530
public async invoke(): Promise<WhisperResponse> {
5631
this.cancel();
5732
await RateLimiter.wait('whisper');
58-
this.abortController = new AbortController();
59-
const transcription = await this.openAi.audio.transcriptions.create({
60-
file: fs.createReadStream(this.file),
61-
model: "whisper-1",
62-
response_format: "verbose_json",
63-
timestamp_granularities: ["segment"]
64-
}, {signal: this.abortController.signal});
33+
const transcription = await this.doTranscription();
6534
// 用 zed 校验一下 transcription 是否为 类型 TranscriptionVerbose
6635
const parseRes = WhisperResponseVerifySchema.safeParse(transcription);
6736
if (!parseRes.success) {
6837
// dperror 为什么不匹配
6938
dpLog.error('Invalid response from OpenAI', parseRes.error.errors);
70-
throw new Error('Invalid response from OpenAI');
39+
throw new WhisperResponseFormatError();
7140
}
7241
return {
7342
language: transcription.language,
74-
duration: Number(transcription.duration),
43+
duration: transcription.duration,
7544
text: transcription.text,
76-
offset: 0,
7745
segments: transcription.segments?.map((seg) => ({
7846
seek: seg.seek,
7947
start: seg.start,
8048
end: seg.end,
8149
text: seg.text
82-
}))??[]
83-
}
50+
})) ?? []
51+
};
52+
53+
}
8454

55+
private async doTranscription() {
56+
this.abortController = new AbortController();
57+
try {
58+
return await this.openAi.audio.transcriptions.create({
59+
file: fs.createReadStream(this.file),
60+
model: 'whisper-1',
61+
response_format: 'verbose_json',
62+
timestamp_granularities: ['segment']
63+
}, { signal: this.abortController.signal });
64+
} catch (error) {
65+
dpLog.error(error);
66+
throw error;
67+
}
8568
}
8669

8770
public cancel(): void {

src/backend/objs/config-tender.ts

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import { z } from 'zod';
2+
import fs from 'fs';
3+
import path from 'path';
4+
5+
/**
6+
* 配置文件托管类
7+
* @template T 配置类型
8+
* @template S Zod Schema 类型
9+
*/
10+
export class ConfigTender<T, S extends z.ZodType<T>> {
11+
private readonly configPath: string;
12+
private readonly schema: S;
13+
14+
constructor(configPath: string, schema: S, defaultValue?: T) {
15+
this.configPath = configPath;
16+
this.schema = schema;
17+
18+
// 确保目录存在
19+
const dir = path.dirname(configPath);
20+
if (!fs.existsSync(dir)) {
21+
fs.mkdirSync(dir, { recursive: true });
22+
}
23+
24+
// 如果文件不存在且提供了默认值,则创建文件
25+
if (!fs.existsSync(configPath) && defaultValue) {
26+
this.save(defaultValue);
27+
}
28+
}
29+
30+
/**
31+
* 读取整个配置
32+
*/
33+
get(): T {
34+
try {
35+
const content = fs.readFileSync(this.configPath, 'utf-8');
36+
const parsed = JSON.parse(content);
37+
return this.schema.parse(parsed);
38+
} catch (error) {
39+
throw new Error(`Failed to read config: ${error}`);
40+
}
41+
}
42+
43+
/**
44+
* 保存整个配置
45+
*/
46+
save(config: T): void {
47+
try {
48+
const validated = this.schema.parse(config);
49+
fs.writeFileSync(this.configPath, JSON.stringify(validated, null, 2));
50+
} catch (error) {
51+
throw new Error(`Failed to save config: ${error}`);
52+
}
53+
}
54+
}

src/backend/services/FfmpegService.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { VideoInfo } from '@/common/types/video-info';
12

23

34
export default interface FfmpegService {
@@ -82,5 +83,10 @@ export default interface FfmpegService {
8283
}): Promise<string>;
8384

8485
trimVideo(inputPath: string, startTime: number, endTime: number, outputPath: string): Promise<void>;
86+
87+
/**
88+
* Get video information
89+
*/
90+
getVideoInfo(filePath: string): Promise<VideoInfo>;
8591
}
8692

src/backend/services/impl/DpTaskServiceImpl.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@ import { DpTask, dpTask, DpTaskState, InsertDpTask } from '@/backend/db/tables/d
44

55
import LRUCache from 'lru-cache';
66
import TimeUtil from '@/common/utils/TimeUtil';
7-
import ErrorConstants from '@/common/constants/error-constants';
87
import { injectable, postConstruct } from 'inversify';
98
import DpTaskService from '@/backend/services/DpTaskService';
109
import dpLog from '@/backend/ioc/logger';
1110
import { Cancelable } from '@/common/interfaces';
12-
import CancelByUserError from '@/backend/errors/CancelByUserError';
11+
12+
import { CancelByUserError } from '@/backend/errors/errors';
1313

1414
@injectable()
1515
export default class DpTaskServiceImpl implements DpTaskService {
@@ -188,6 +188,9 @@ export default class DpTaskServiceImpl implements DpTaskService {
188188
public registerTask(taskId: number, process: Cancelable) {
189189
const existingProcesses = this.taskMapping.get(taskId) || [];
190190
this.taskMapping.set(taskId, [...existingProcesses, process]);
191+
if (this.cancelQueue.has(taskId)) {
192+
process.cancel();
193+
}
191194
}
192195

193196
}

src/backend/services/impl/FfmpegServiceImpl.ts

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@ import TYPES from '@/backend/ioc/types';
99
import FfmpegService from '@/backend/services/FfmpegService';
1010
import FfmpegTask from '@/backend/objs/FfmpegTask';
1111
import DpTaskService from '@/backend/services/DpTaskService';
12-
import CancelByUserError from '@/backend/errors/CancelByUserError';
1312
import dpLog from '@/backend/ioc/logger';
1413
import ffmpeg from 'fluent-ffmpeg';
1514
import LocationService, { ProgramType } from '@/backend/services/LocationService';
15+
import { VideoInfo } from '@/common/types/video-info';
16+
import { CancelByUserError } from '@/backend/errors/errors';
1617

1718
@injectable()
1819
export default class FfmpegServiceImpl implements FfmpegService {
@@ -100,6 +101,34 @@ export default class FfmpegServiceImpl implements FfmpegService {
100101
});
101102
}
102103

104+
/**
105+
* 获取视频文件的详细信息
106+
*/
107+
@WaitLock('ffprobe')
108+
@logParams()
109+
public async getVideoInfo(filePath: string): Promise<VideoInfo> {
110+
// 获取文件基本信息
111+
const stats = await fs.promises.stat(filePath);
112+
113+
// 获取ffprobe信息
114+
const probeData = await new Promise<any>((resolve, reject) => {
115+
ffmpeg.ffprobe(filePath, (err, metadata) => {
116+
if (err) reject(err);
117+
else resolve(metadata);
118+
});
119+
});
120+
121+
return {
122+
filename: path.basename(filePath),
123+
duration: probeData.format.duration || 0,
124+
size: stats.size,
125+
modifiedTime: stats.mtimeMs,
126+
createdTime: stats.ctimeMs,
127+
bitrate: probeData.format.bit_rate ? parseInt(probeData.format.bit_rate) : undefined,
128+
videoCodec: probeData.streams.find((s: any) => s.codec_type === 'video')?.codec_name,
129+
audioCodec: probeData.streams.find((s: any) => s.codec_type === 'audio')?.codec_name
130+
};
131+
}
103132

104133
/**
105134
* 截取视频的缩略图

0 commit comments

Comments
 (0)