Skip to content

Commit

Permalink
Merge pull request #66 from glut23/voice_spans
Browse files Browse the repository at this point in the history
Add voice span support #55
  • Loading branch information
glut23 authored May 27, 2024
2 parents d215aab + 873c4d1 commit e2d1f6a
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 0 deletions.
44 changes: 44 additions & 0 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,50 @@ def test_malformed_start_timestamp(self):
'01:00'
)

def test_voice_span(self):
caption = Caption(text='<v Homer Simpson>Hello there!</v>')
self.assertEqual(caption.text, 'Hello there!')
self.assertEqual(caption.raw_text, '<v Homer Simpson>Hello there!</v>')
self.assertEqual(caption.voice, 'Homer Simpson')

def test_voice_span_with_classes(self):
caption = Caption(text='<v.quiet.slow Lisa Simpson>I am Lisa</v>')
self.assertEqual(caption.text, 'I am Lisa')
self.assertEqual(
caption.raw_text,
'<v.quiet.slow Lisa Simpson>I am Lisa</v>'
)
self.assertEqual(caption.voice, 'Lisa Simpson')

def test_voice_span_is_invalid(self):
caption = Caption(text='<v Lets eat donuts')
self.assertEqual(caption.text, '<v Lets eat donuts')
self.assertEqual(
caption.raw_text,
'<v Lets eat donuts'
)
self.assertIsNone(caption.voice)

def test_voice_span_injected(self):
caption = Caption(text='This is a test')
self.assertEqual(caption.text, 'This is a test')
self.assertEqual(caption.raw_text, 'This is a test')
self.assertIsNone(caption.voice)
caption.text = '<v Homer Simpson>I like tests</v>'
self.assertEqual(caption.text, 'I like tests')
self.assertEqual(caption.raw_text, '<v Homer Simpson>I like tests</v>')
self.assertEqual(caption.voice, 'Homer Simpson')

def test_voice_span_removed(self):
caption = Caption(text='<v Homer Simpson>I like tests</v>')
self.assertEqual(caption.text, 'I like tests')
self.assertEqual(caption.raw_text, '<v Homer Simpson>I like tests</v>')
self.assertEqual(caption.voice, 'Homer Simpson')
caption.text = 'This is a test'
self.assertEqual(caption.text, 'This is a test')
self.assertEqual(caption.raw_text, 'This is a test')
self.assertIsNone(caption.voice)


class TestStyle(unittest.TestCase):

Expand Down
55 changes: 55 additions & 0 deletions tests/test_webvtt.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,61 @@ def test_can_parse_youtube_dl_files(self):
vtt.captions[2].text
)

def test_parse_voice_spans(self):
vtt = webvtt.from_string(textwrap.dedent("""
WEBVTT
00:00:00.000 --> 00:00:00.800
<v.quiet.slow Lisa Simpson>Knock knock</v>
00:00:02.100 --> 00:00:06.500
<v Homer Simpson>Who's there?</v>
00:00:10.530 --> 00:00:11.090
<v.loud Lisa Simpson>Atish</v>
""").strip()
)
self.assertEqual(len(vtt), 3)
self.assertEqual(
str(vtt[0]),
'00:00:00.000 00:00:00.800 Knock knock'
)
self.assertEqual(
vtt[0].voice,
'Lisa Simpson'
)
self.assertEqual(
str(vtt[1]),
'00:00:02.100 00:00:06.500 Who\'s there?'
)
self.assertEqual(
vtt[1].voice,
'Homer Simpson'
)
self.assertEqual(
str(vtt[2]),
'00:00:10.530 00:00:11.090 Atish'
)
self.assertEqual(
vtt[2].voice,
'Lisa Simpson'
)

def test_parse_caption_not_a_voice_span(self):
vtt = webvtt.from_string(textwrap.dedent("""
WEBVTT
00:00:00.000 --> 00:00:00.800
<v Not an actual voice span here
""").strip()
)
self.assertEqual(len(vtt), 1)
self.assertEqual(
str(vtt[0]),
'00:00:00.000 00:00:00.800 <v Not an actual voice span here'
)
self.assertIsNone(vtt[0].voice)


class TestParseSRT(unittest.TestCase):

Expand Down
11 changes: 11 additions & 0 deletions webvtt/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class Caption:
"""Representation of a caption."""

CUE_TEXT_TAGS = re.compile('<.*?>')
VOICE_SPAN_PATTERN = re.compile(r'<v(?:\.\w+)*\s+([^>]+)>')

def __init__(self,
start: typing.Optional[str] = None,
Expand Down Expand Up @@ -204,6 +205,16 @@ def text(self, value: str):

self.lines = value.splitlines()

@property
def voice(self) -> typing.Optional[str]:
"""Return the voice span if present."""
if self.lines and self.lines[0].startswith('<v'):
match = re.match(self.VOICE_SPAN_PATTERN, self.lines[0])
if match:
return match.group(1)

return None


class Style:
"""Representation of a style."""
Expand Down

0 comments on commit e2d1f6a

Please sign in to comment.