1
+ <lexer >
2
+ <config >
3
+ <name >WebVTT</name >
4
+ <alias >vtt</alias >
5
+ <filename >*.vtt</filename >
6
+ <mime_type >text/vtt</mime_type >
7
+ </config >
8
+ <!--
9
+ The WebVTT spec refers to a WebVTT line terminator as either CRLF, CR or LF.
10
+ (https://www.w3.org/TR/webvtt1/#webvtt-line-terminator) However, with this
11
+ definition it is unclear whether CRLF is one line terminator (CRLF) or two
12
+ line terminators (CR and LF).
13
+
14
+ To work around this ambiguity, only CRLF and LF are considered as line terminators.
15
+ To my knowledge only classic Mac OS uses CR as line terminators, so the lexer should
16
+ still work for most files.
17
+ -->
18
+ <rules >
19
+ <!-- https://www.w3.org/TR/webvtt1/#webvtt-file-body -->
20
+ <state name =" root" >
21
+ <rule pattern =" (\AWEBVTT)((?:[ \t][^\r\n]*)?(?:\r?\n){2,})" >
22
+ <bygroups >
23
+ <token type =" Keyword" />
24
+ <token type =" Text" />
25
+ </bygroups >
26
+ </rule >
27
+ <rule pattern =" (^REGION)([ \t]*$)" >
28
+ <bygroups >
29
+ <token type =" Keyword" />
30
+ <token type =" Text" />
31
+ </bygroups >
32
+ <push state =" region-settings-list" />
33
+ </rule >
34
+ <rule
35
+ pattern =" (^STYLE)([ \t]*$)((?:(?!--> )[\s\S])*?)((?:\r?\n){2})" >
36
+ <bygroups >
37
+ <token type =" Keyword" />
38
+ <token type =" Text" />
39
+ <using lexer =" CSS" />
40
+ <token type =" Text" />
41
+ </bygroups >
42
+ </rule >
43
+ <rule >
44
+ <include state =" comment" />
45
+ </rule >
46
+ <rule
47
+ pattern =" (?=((?![^\r\n]*--> )[^\r\n]*\r?\n)?(\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3}[ \t]+--> [ \t]+(\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})"
48
+ >
49
+ <push state =" cues" />
50
+ </rule >
51
+ </state >
52
+
53
+ <!-- https://www.w3.org/TR/webvtt1/#webvtt-region-settings-list -->
54
+ <state name =" region-settings-list" >
55
+ <rule pattern =" (?: |\t|\r?\n(?!\r?\n))+" >
56
+ <token type =" Text" />
57
+ </rule >
58
+ <rule pattern =" (?:\r?\n){2}" >
59
+ <token type =" Text" />
60
+ <pop depth =" 1" />
61
+ </rule >
62
+ <rule pattern =" (id)(:)(?!--> )(\S+)" >
63
+ <bygroups >
64
+ <token type =" Keyword" />
65
+ <token type =" Punctuation" />
66
+ <token type =" Literal" />
67
+ </bygroups >
68
+ </rule >
69
+ <rule pattern =" (width)(:)((?:[1-9]?\d|100)(?:\.\d+)?)(%)" >
70
+ <bygroups >
71
+ <token type =" Keyword" />
72
+ <token type =" Punctuation" />
73
+ <token type =" Literal" />
74
+ <token type =" KeywordType" />
75
+ </bygroups >
76
+ </rule >
77
+ <rule pattern =" (lines)(:)(\d+)" >
78
+ <bygroups >
79
+ <token type =" Keyword" />
80
+ <token type =" Punctuation" />
81
+ <token type =" Literal" />
82
+ </bygroups >
83
+ </rule >
84
+ <rule
85
+ pattern =" (regionanchor|viewportanchor)(:)((?:[1-9]?\d|100)(?:\.\d+)?)(%)(,)((?:[1-9]?\d|100)(?:\.\d+)?)(%)" >
86
+ <bygroups >
87
+ <token type =" Keyword" />
88
+ <token type =" Punctuation" />
89
+ <token type =" Literal" />
90
+ <token type =" KeywordType" />
91
+ <token type =" Punctuation" />
92
+ <token type =" Literal" />
93
+ <token type =" KeywordType" />
94
+ </bygroups >
95
+ </rule >
96
+ <rule pattern =" (scroll)(:)(up)" >
97
+ <bygroups >
98
+ <token type =" Keyword" />
99
+ <token type =" Punctuation" />
100
+ <token type =" KeywordConstant" />
101
+ </bygroups >
102
+ </rule >
103
+ </state >
104
+
105
+ <!-- https://www.w3.org/TR/webvtt1/#webvtt-comment-block -->
106
+ <state name =" comment" >
107
+ <rule
108
+ pattern =" ^NOTE( |\t|\r?\n)((?!--> )[\s\S])*?(?:(\r?\n){2}|\Z)" >
109
+ <token type =" Comment" />
110
+ </rule >
111
+ </state >
112
+
113
+ <!--
114
+ "Zero or more WebVTT cue blocks and WebVTT comment blocks separated from each other by one or more
115
+ WebVTT line terminators." (https://www.w3.org/TR/webvtt1/#file-structure)
116
+ -->
117
+ <state name =" cues" >
118
+ <rule
119
+ pattern =" (?:((?!--> )[^\r\n]+)?(\r?\n))?((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})([ \t]+)(--> )([ \t]+)((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})([ \t]*)" >
120
+ <bygroups >
121
+ <token type =" Name" />
122
+ <token type =" Text" />
123
+ <token type =" LiteralDate" />
124
+ <token type =" Text" />
125
+ <token type =" Operator" />
126
+ <token type =" Text" />
127
+ <token type =" LiteralDate" />
128
+ <token type =" Text" />
129
+ </bygroups >
130
+ <push state =" cue-settings-list" />
131
+ </rule >
132
+ <rule >
133
+ <include state =" comment" />
134
+ </rule >
135
+ </state >
136
+
137
+ <!-- https://www.w3.org/TR/webvtt1/#webvtt-cue-settings-list -->
138
+ <state name =" cue-settings-list" >
139
+ <rule pattern =" [ \t]+" >
140
+ <token type =" Text" />
141
+ </rule >
142
+ <rule pattern =" (vertical)(:)?(rl|lr)?" >
143
+ <bygroups >
144
+ <token type =" Keyword" />
145
+ <token type =" Punctuation" />
146
+ <token type =" KeywordConstant" />
147
+ </bygroups >
148
+ </rule >
149
+ <rule
150
+ pattern =" (line)(:)?(?:(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%)|(-?\d+))(?:(,)(start|center|end))?)?" >
151
+ <bygroups >
152
+ <token type =" Keyword" />
153
+ <token type =" Punctuation" />
154
+ <token type =" Literal" />
155
+ <token type =" KeywordType" />
156
+ <token type =" Literal" />
157
+ <token type =" Punctuation" />
158
+ <token type =" KeywordConstant" />
159
+ </bygroups >
160
+ </rule >
161
+ <rule
162
+ pattern =" (position)(:)?(?:(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%)|(-?\d+))(?:(,)(line-left|center|line-right))?)?" >
163
+ <bygroups >
164
+ <token type =" Keyword" />
165
+ <token type =" Punctuation" />
166
+ <token type =" Literal" />
167
+ <token type =" KeywordType" />
168
+ <token type =" Literal" />
169
+ <token type =" Punctuation" />
170
+ <token type =" KeywordConstant" />
171
+ </bygroups >
172
+ </rule >
173
+ <rule pattern =" (size)(:)?(?:((?:[1-9]?\d|100)(?:\.\d+)?)(%))?" >
174
+ <bygroups >
175
+ <token type =" Keyword" />
176
+ <token type =" Punctuation" />
177
+ <token type =" Literal" />
178
+ <token type =" KeywordType" />
179
+ </bygroups >
180
+ </rule >
181
+ <rule pattern =" (align)(:)?(start|center|end|left|right)?" >
182
+ <bygroups >
183
+ <token type =" Keyword" />
184
+ <token type =" Punctuation" />
185
+ <token type =" KeywordConstant" />
186
+ </bygroups >
187
+ </rule >
188
+ <rule pattern =" (region)(:)?((?![^\r\n]*--> (?=[ \t]+?))[^ \t\r\n]+)?" >
189
+ <bygroups >
190
+ <token type =" Keyword" />
191
+ <token type =" Punctuation" />
192
+ <token type =" Literal" />
193
+ </bygroups >
194
+ </rule >
195
+ <rule
196
+ pattern =" (?=\r?\n)" >
197
+ <push state =" cue-payload" />
198
+ </rule >
199
+ </state >
200
+
201
+ <!-- https://www.w3.org/TR/webvtt1/#cue-payload -->
202
+ <state name =" cue-payload" >
203
+ <rule pattern =" (\r?\n){2,}" >
204
+ <token type =" Text" />
205
+ <pop depth =" 2" />
206
+ </rule >
207
+ <rule pattern =" [^<& ]+?" >
208
+ <token type =" Text" />
209
+ </rule >
210
+ <rule pattern =" & (#\d+|#x[0-9A-Fa-f]+|[a-zA-Z0-9]+);" >
211
+ <token type =" Text" />
212
+ </rule >
213
+ <rule pattern =" (?=< )" >
214
+ <token type =" Text" />
215
+ <push state =" cue-span-tag" />
216
+ </rule >
217
+ </state >
218
+ <state name =" cue-span-tag" >
219
+ <rule
220
+ pattern =" < (?=c|i|b|u|ruby|rt|v|lang|(?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})" >
221
+ <token type =" Punctuation" />
222
+ <push state =" cue-span-start-tag-name" />
223
+ </rule >
224
+ <rule pattern =" (< /)(c|i|b|u|ruby|rt|v|lang)" >
225
+ <bygroups >
226
+ <token type =" Punctuation" />
227
+ <token type =" NameTag" />
228
+ </bygroups >
229
+ </rule >
230
+ <rule pattern =" > " >
231
+ <token type =" Punctuation" />
232
+ <pop depth =" 1" />
233
+ </rule >
234
+ </state >
235
+ <state name =" cue-span-start-tag-name" >
236
+ <rule pattern =" (c|i|b|u|ruby|rt)|((?:\d{2}:)?(?:[0-5][0-9]):(?:[0-5][0-9])\.\d{3})" >
237
+ <bygroups >
238
+ <token type =" NameTag" />
239
+ <token type =" LiteralDate" />
240
+ </bygroups >
241
+ <push state =" cue-span-classes-without-annotations" />
242
+ </rule >
243
+ <rule pattern =" v|lang" >
244
+ <token type =" NameTag" />
245
+ <push state =" cue-span-classes-with-annotations" />
246
+ </rule >
247
+ </state >
248
+ <state name =" cue-span-classes-without-annotations" >
249
+ <rule >
250
+ <include state =" cue-span-classes" />
251
+ </rule >
252
+ <rule pattern =" (?=> )" >
253
+ <pop depth =" 2" />
254
+ </rule >
255
+ </state >
256
+ <state name =" cue-span-classes-with-annotations" >
257
+ <rule >
258
+ <include state =" cue-span-classes" />
259
+ </rule >
260
+ <rule pattern =" (?=[ \t])" >
261
+ <push state =" cue-span-start-tag-annotations" />
262
+ </rule >
263
+ </state >
264
+ <state name =" cue-span-classes" >
265
+ <rule pattern =" (\.)([^ \t\n\r&<> \.]+)" >
266
+ <bygroups >
267
+ <token type =" Punctuation" />
268
+ <token type =" NameTag" />
269
+ </bygroups >
270
+ </rule >
271
+ </state >
272
+ <state name =" cue-span-start-tag-annotations" >
273
+ <rule
274
+ pattern =" [ \t](?:[^\n\r&> ]|& (?:#\d+|#x[0-9A-Fa-f]+|[a-zA-Z0-9]+);)+" >
275
+ <token type =" Text" />
276
+ </rule >
277
+ <rule pattern =" (?=> )" >
278
+ <token type =" Text" />
279
+ <pop depth =" 3" />
280
+ </rule >
281
+ </state >
282
+ </rules >
283
+ </lexer >
0 commit comments