@@ -195,6 +195,112 @@ export class TranscribeService {
195
195
. join ( '\n' ) ;
196
196
}
197
197
198
+ private optimizeSegments ( segments : TSegment [ ] ) : TSegment [ ] {
199
+ const optimized : TSegment [ ] = [ ] ;
200
+ const MIN_DURATION = 1.0 ; // 1 second
201
+ const MAX_DURATION = 7.0 ; // 7 seconds
202
+ const MAX_CHARS_PER_LINE = 42 ;
203
+ const CHARS_PER_SECOND = 20 ;
204
+
205
+ for ( const segment of segments ) {
206
+ const words = segment . text . split ( ' ' ) ;
207
+ let currentLine = '' ;
208
+ let lines : string [ ] = [ ] ;
209
+
210
+ // Split into lines based on character count
211
+ for ( const word of words ) {
212
+ if ( ( currentLine + word ) . length > MAX_CHARS_PER_LINE ) {
213
+ if ( currentLine ) lines . push ( currentLine . trim ( ) ) ;
214
+ currentLine = word + ' ' ;
215
+ } else {
216
+ currentLine += word + ' ' ;
217
+ }
218
+ }
219
+ if ( currentLine ) lines . push ( currentLine . trim ( ) ) ;
220
+
221
+ // Limit to 2 lines maximum
222
+ if ( lines . length > 2 ) {
223
+ const newLines = this . redistributeLines ( lines ) ;
224
+ lines = newLines . slice ( 0 , 2 ) ;
225
+ }
226
+
227
+ const text = lines . join ( '\n' ) ;
228
+ const duration = segment . end - segment . start ;
229
+
230
+ // Adjust timing based on text length and reading speed
231
+ const requiredDuration = text . length / CHARS_PER_SECOND ;
232
+ const newDuration = Math . min (
233
+ MAX_DURATION ,
234
+ Math . max ( MIN_DURATION , requiredDuration )
235
+ ) ;
236
+
237
+ if ( duration < newDuration && optimized . length > 0 ) {
238
+ // Try to extend previous segment's duration
239
+ const prev = optimized [ optimized . length - 1 ] ;
240
+ const gap = segment . start - prev . end ;
241
+ if ( gap < 0.5 ) {
242
+ // If segments are close enough
243
+ prev . end = Math . min ( segment . start , prev . start + MAX_DURATION ) ;
244
+ }
245
+ }
246
+
247
+ optimized . push ( {
248
+ start : segment . start ,
249
+ end : Math . min ( segment . start + newDuration , segment . end ) ,
250
+ text
251
+ } ) ;
252
+ }
253
+
254
+ return this . mergeShortSegments ( optimized ) ;
255
+ }
256
+
257
+ private redistributeLines ( lines : string [ ] ) : string [ ] {
258
+ const words = lines . join ( ' ' ) . split ( ' ' ) ;
259
+ const MAX_CHARS_PER_LINE = 42 ;
260
+ const newLines : string [ ] = [ ] ;
261
+ let currentLine = '' ;
262
+
263
+ for ( const word of words ) {
264
+ if ( ( currentLine + word ) . length > MAX_CHARS_PER_LINE ) {
265
+ if ( currentLine ) newLines . push ( currentLine . trim ( ) ) ;
266
+ currentLine = word + ' ' ;
267
+ } else {
268
+ currentLine += word + ' ' ;
269
+ }
270
+ }
271
+ if ( currentLine ) newLines . push ( currentLine . trim ( ) ) ;
272
+ return newLines ;
273
+ }
274
+
275
+ private mergeShortSegments ( segments : TSegment [ ] ) : TSegment [ ] {
276
+ const MIN_DURATION = 1.0 ;
277
+ const result : TSegment [ ] = [ ] ;
278
+ let current : TSegment | null = null ;
279
+
280
+ for ( const segment of segments ) {
281
+ if ( ! current ) {
282
+ current = { ...segment } ;
283
+ continue ;
284
+ }
285
+
286
+ const currentDuration = current . end - current . start ;
287
+ const nextDuration = segment . end - segment . start ;
288
+ const gap = segment . start - current . end ;
289
+
290
+ if ( currentDuration < MIN_DURATION && gap < 0.3 ) {
291
+ // Merge with next segment
292
+ current . end = segment . end ;
293
+ current . text += '\n' + segment . text ;
294
+ } else {
295
+ result . push ( current ) ;
296
+ current = { ...segment } ;
297
+ }
298
+ }
299
+
300
+ if ( current ) result . push ( current ) ;
301
+ return result ;
302
+ }
303
+
198
304
private async transcribe ( {
199
305
source,
200
306
language,
@@ -224,12 +330,13 @@ export class TranscribeService {
224
330
console . log ( `Deleted chunk ${ filePath } ` ) ;
225
331
currentTime += actualDuration ;
226
332
}
333
+ const optimizedSegments = this . optimizeSegments ( allSegments ) ;
227
334
if ( format === 'vtt' ) {
228
- return this . formatSegmentsToVTT ( allSegments ) ;
335
+ return this . formatSegmentsToVTT ( optimizedSegments ) ;
229
336
} else if ( format === 'srt' ) {
230
- return this . formatSegmentsToSRT ( allSegments ) ;
337
+ return this . formatSegmentsToSRT ( optimizedSegments ) ;
231
338
} else if ( format === 'json' ) {
232
- return JSON . stringify ( allSegments ) ;
339
+ return JSON . stringify ( optimizedSegments ) ;
233
340
} else {
234
341
return allSegments . map ( ( segment ) => segment . text ) . join ( '\n' ) ;
235
342
}
0 commit comments