Skip to content
This repository has been archived by the owner on Feb 26, 2022. It is now read-only.

Commit

Permalink
Fix issues with straight-through SRT
Browse files Browse the repository at this point in the history
  • Loading branch information
rossng committed Dec 23, 2020
1 parent 548f46c commit 4ef4122
Show file tree
Hide file tree
Showing 14 changed files with 523 additions and 41 deletions.
2 changes: 1 addition & 1 deletion demo/select-stt-json-type.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ const SttTypeSelect = props => {
<option value="ibm">IBM Watson STT</option>
<option value="assemblyai" disabled>AssemblyAI</option>
<option value="rev" disabled>Rev</option>
<option value="srt" disabled>Srt</option>
<option value="srt">Srt</option>
<option value="vtt" disabled>VTT</option>
<option value="vtt-youtube" disabled>Youtube VTT</option>
<option value="amazontranscribe">Amazon Transcribe</option>
Expand Down
57 changes: 20 additions & 37 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"@fortawesome/free-brands-svg-icons": "^5.11.2",
"@fortawesome/free-solid-svg-icons": "^5.11.2",
"@fortawesome/react-fontawesome": "^0.1.5",
"@types/strip-bom": "^4.0.1",
"babel-loader": "^8.0.6",
"babel-plugin-transform-object-rest-spread": "^6.26.0",
"babel-polyfill": "^6.26.0",
Expand All @@ -72,6 +73,7 @@
"sass-loader": "^10.1.0",
"sbd": "^1.0.15",
"smpte-timecode": "^1.2.3",
"strip-bom": "^3.0.0",
"style-loader": "^0.23.1",
"stylelint-config-standard": "^18.3.0",
"webpack": "^4.41.0",
Expand Down
2 changes: 1 addition & 1 deletion packages/components/transcript-editor/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class TranscriptEditor extends React.Component {
rollBackValueInSeconds: 15,
timecodeOffset: 0,
showTimecodes: true,
showSpeakers: true,
showSpeakers: false,
previewIsDisplayed: true,
mediaDuration: "00:00:00:00",
gridDisplay: null,
Expand Down
2 changes: 1 addition & 1 deletion packages/export-adapters/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const exportAdapter = (blockData, exportFormat, transcriptTitle) => {
return { data: draftToDigitalPaperEdit(blockData), ext: 'json' };
case 'srt':
var { words } = draftToDigitalPaperEdit(blockData);
const srtContent = subtitlesGenerator({ words, type: 'srt', numberOfCharPerLine: 35 });
const srtContent = subtitlesGenerator({ words, type: 'srt', numberOfCharPerLine: 30 });

return { data: srtContent, ext: 'srt' };

Expand Down
5 changes: 5 additions & 0 deletions packages/stt-adapters/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import ibmToDraft from './ibm/index';
import digitalPaperEditToDraft from './digital-paper-edit/index';
import createEntityMap from './create-entity-map/index';
import gcpSttToDraft from './google-stt/index';
import srtToDraft from "./srt/index";

/**
* Adapters for STT conversion
Expand Down Expand Up @@ -43,6 +44,10 @@ const sttJsonAdapter = (transcriptData, sttJsonType) => {

return { blocks, entityMap: createEntityMap(blocks) };

case 'srt':
blocks = srtToDraft(transcriptData);

return { blocks, entityMap: createEntityMap(blocks) };
case 'google-stt':
blocks = gcpSttToDraft(transcriptData);

Expand Down
194 changes: 194 additions & 0 deletions packages/stt-adapters/srt/Parser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
/**
* The MIT License (MIT)
* Copyright (c) 2015 Guilherme Santiago
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

import stripBom from 'strip-bom'
import { RE_TIMESTAMP, parseTimestamps } from './parseTimestamps'
import type {Node} from './types'

export type Pusher = (node: Node) => void

export interface ParseState {
expect: 'header' | 'id' | 'timestamp' | 'text'
row: number
hasContentStarted: boolean
isWebVTT: boolean
node: Partial<Node>
buffer: string[]
}

export class Parser {
private push: Pusher
private state: ParseState

constructor({ push }: { push: Pusher }) {
this.push = push
this.state = {
expect: 'header',
row: 0,
hasContentStarted: false,
isWebVTT: false,
node: {},
buffer: []
}
}

private isIndex(line: string): boolean {
return /^\d+$/.test(line.trim())
}

private isTimestamp(line: string): boolean {
return RE_TIMESTAMP.test(line)
}

private getError(expected: string, index: number, row: string): Error {
return new Error(
`expected ${expected} at row ${index + 1}, but received: "${row}"`
)
}

public parseLine(line: string): void {
const contents = this.state.row === 0 ? stripBom(line) : line

if (!this.state.hasContentStarted) {
if (contents.trim()) {
this.state.hasContentStarted = true
} else {
return
}
}

const parse = {
header: this.parseHeader,
id: this.parseId,
timestamp: this.parseTimestamp,
text: this.parseText
}[this.state.expect]

parse.call(this, contents)

this.state.row++
}

public flush(): void {
if (this.state.buffer.length > 0) {
this.pushNode()
}
}

private parseHeader(line: string) {
if (!this.state.isWebVTT) {
this.state.isWebVTT = /^WEBVTT/.test(line)

if (this.state.isWebVTT) {
this.state.node.type = 'header'
} else {
this.parseId(line)
return
}
}

this.state.buffer.push(line)

if (!line) {
this.state.expect = 'id'
return
}
}

private parseId(line: string) {
this.state.expect = 'timestamp'

if (this.state.node.type === 'header') {
this.pushNode()
}

if (!this.isIndex(line)) {
this.parseTimestamp(line)
}
}

private parseTimestamp(line: string) {
if (!this.isTimestamp(line)) {
throw this.getError('timestamp', this.state.row, line)
}

this.state.node = {
type: 'cue',
data: {
...parseTimestamps(line),
text: ''
}
}

this.state.expect = 'text'
}

private parseText(line: string) {
if (this.state.buffer.length > 0 && this.isTimestamp(line)) {
const lastIndex = this.state.buffer.length - 1

if (this.isIndex(this.state.buffer[lastIndex])) {
this.state.buffer.pop()
}

this.pushNode()
this.parseTimestamp(line)
} else {
this.state.buffer.push(line)
}
}

private pushNode(): void {
if (this.state.node.type === 'cue') {
while (true) {
const lastItem = this.state.buffer[this.state.buffer.length - 1]
if (['', '\n'].includes(lastItem)) {
this.state.buffer.pop()
} else {
break
}
}

while (true) {
const firstItem = this.state.buffer[0]
if (['', '\n'].includes(firstItem)) {
this.state.buffer.shift()
} else {
break
}
}

this.state.node.data!.text = this.state.buffer.join('\n')
}

if (this.state.node.type === 'header') {
this.state.node.data = this.state.buffer.join('\n').trim()
}

this.push(this.state.node as Node)

this.state.node = {}
this.state.buffer = []
}
}
Loading

0 comments on commit 4ef4122

Please sign in to comment.