Skip to content

Commit e025ba3

Browse files
committed
feat: produce subtitles that follow industry standard
1 parent 9f19029 commit e025ba3

File tree

1 file changed

+110
-3
lines changed

1 file changed

+110
-3
lines changed

src/TranscribeService/TranscribeService.ts

+110-3
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,112 @@ export class TranscribeService {
195195
.join('\n');
196196
}
197197

198+
private optimizeSegments(segments: TSegment[]): TSegment[] {
199+
const optimized: TSegment[] = [];
200+
const MIN_DURATION = 1.0; // 1 second
201+
const MAX_DURATION = 7.0; // 7 seconds
202+
const MAX_CHARS_PER_LINE = 42;
203+
const CHARS_PER_SECOND = 20;
204+
205+
for (const segment of segments) {
206+
const words = segment.text.split(' ');
207+
let currentLine = '';
208+
let lines: string[] = [];
209+
210+
// Split into lines based on character count
211+
for (const word of words) {
212+
if ((currentLine + word).length > MAX_CHARS_PER_LINE) {
213+
if (currentLine) lines.push(currentLine.trim());
214+
currentLine = word + ' ';
215+
} else {
216+
currentLine += word + ' ';
217+
}
218+
}
219+
if (currentLine) lines.push(currentLine.trim());
220+
221+
// Limit to 2 lines maximum
222+
if (lines.length > 2) {
223+
const newLines = this.redistributeLines(lines);
224+
lines = newLines.slice(0, 2);
225+
}
226+
227+
const text = lines.join('\n');
228+
const duration = segment.end - segment.start;
229+
230+
// Adjust timing based on text length and reading speed
231+
const requiredDuration = text.length / CHARS_PER_SECOND;
232+
const newDuration = Math.min(
233+
MAX_DURATION,
234+
Math.max(MIN_DURATION, requiredDuration)
235+
);
236+
237+
if (duration < newDuration && optimized.length > 0) {
238+
// Try to extend previous segment's duration
239+
const prev = optimized[optimized.length - 1];
240+
const gap = segment.start - prev.end;
241+
if (gap < 0.5) {
242+
// If segments are close enough
243+
prev.end = Math.min(segment.start, prev.start + MAX_DURATION);
244+
}
245+
}
246+
247+
optimized.push({
248+
start: segment.start,
249+
end: Math.min(segment.start + newDuration, segment.end),
250+
text
251+
});
252+
}
253+
254+
return this.mergeShortSegments(optimized);
255+
}
256+
257+
private redistributeLines(lines: string[]): string[] {
258+
const words = lines.join(' ').split(' ');
259+
const MAX_CHARS_PER_LINE = 42;
260+
const newLines: string[] = [];
261+
let currentLine = '';
262+
263+
for (const word of words) {
264+
if ((currentLine + word).length > MAX_CHARS_PER_LINE) {
265+
if (currentLine) newLines.push(currentLine.trim());
266+
currentLine = word + ' ';
267+
} else {
268+
currentLine += word + ' ';
269+
}
270+
}
271+
if (currentLine) newLines.push(currentLine.trim());
272+
return newLines;
273+
}
274+
275+
private mergeShortSegments(segments: TSegment[]): TSegment[] {
276+
const MIN_DURATION = 1.0;
277+
const result: TSegment[] = [];
278+
let current: TSegment | null = null;
279+
280+
for (const segment of segments) {
281+
if (!current) {
282+
current = { ...segment };
283+
continue;
284+
}
285+
286+
const currentDuration = current.end - current.start;
287+
const nextDuration = segment.end - segment.start;
288+
const gap = segment.start - current.end;
289+
290+
if (currentDuration < MIN_DURATION && gap < 0.3) {
291+
// Merge with next segment
292+
current.end = segment.end;
293+
current.text += '\n' + segment.text;
294+
} else {
295+
result.push(current);
296+
current = { ...segment };
297+
}
298+
}
299+
300+
if (current) result.push(current);
301+
return result;
302+
}
303+
198304
private async transcribe({
199305
source,
200306
language,
@@ -224,12 +330,13 @@ export class TranscribeService {
224330
console.log(`Deleted chunk ${filePath}`);
225331
currentTime += actualDuration;
226332
}
333+
const optimizedSegments = this.optimizeSegments(allSegments);
227334
if (format === 'vtt') {
228-
return this.formatSegmentsToVTT(allSegments);
335+
return this.formatSegmentsToVTT(optimizedSegments);
229336
} else if (format === 'srt') {
230-
return this.formatSegmentsToSRT(allSegments);
337+
return this.formatSegmentsToSRT(optimizedSegments);
231338
} else if (format === 'json') {
232-
return JSON.stringify(allSegments);
339+
return JSON.stringify(optimizedSegments);
233340
} else {
234341
return allSegments.map((segment) => segment.text).join('\n');
235342
}

0 commit comments

Comments
 (0)