-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvtparser.py
819 lines (643 loc) · 33.4 KB
/
vtparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
import logging
import sys
from enum import Enum
LOG = logging.getLogger('vtparser')
LOG_TRACE = 5
class States(Enum):
"""
VT500 Parser state machine state ids.
"""
GROUND = 'ground'
ESCAPE = 'escape'
ESCAPE_INTERMEDIATE = 'escape_intermediate'
CSI_ENTRY = 'csi_entry'
CSI_PARAM = 'csi_param'
CSI_INTERMEDIATE = 'csi_intermediate'
CSI_IGNORE = 'csi_ignore'
DCS_ENTRY = 'dcs_entry'
DCS_PARAM = 'dcs_param'
DCS_INTERMEDIATE = 'dcs_intermediate'
DCS_PASSTHROUGH = 'dcs_passthrough'
DCS_IGNORE = 'dcs_ignore'
OSC_STRING = 'osc_string'
SOS_PM_APC_STRING = 'sos_pm_apc_string'
class Actions(Enum):
"""
Vt500 parser state machine action ids.
"""
IGNORE = 'ignore'
PRINT = 'print'
EXECUTE = 'execute'
CLEAR = 'clear'
COLLECT = 'collect'
PARAM = 'param'
ESC_DISPATCH = 'esc_dispatch'
CSI_DISPATCH = 'csi_dispatch'
HOOK = 'hook'
PUT = 'put'
UNHOOK = 'unhook'
OSC_START = 'osc_start'
OSC_PUT = 'osc_put'
OSC_END = 'osc_end'
class Utf8StateMachine:
"""
State machine parsing UTF-8 multibyte sequences and converting them to unicode code points.
"""
class State(Enum):
"""
UTF-8 state machine states
"""
START = 'start'
ACCEPT = 'accept'
INVALID = 'invalid'
EXPECT_3 = 3
EXPECT_2 = 2
EXPECT_1 = 1
def __init__(self):
self.state = self.State.START
self.uic = 0
def reset(self):
self.state = self.State.START
self.uic = 0
def input(self, code):
if (self.state == self.State.START
or self.state == self.State.ACCEPT
or self.state == self.State.INVALID):
if code < 0x80:
# A normal 7-bit ASCII code is accepted as it is
self.uic = code
self.state = self.State.ACCEPT
elif code < 0xc2 or code > 0xfd:
# Anything but a multibyte start byte is accepted as it is
LOG.debug("8-bit code seen in UTF-8 parser: 0x{:02x}. Accepted as normal code.", code)
self.uic = code
self.state = self.State.ACCEPT
elif (code & 0xE0) == 0xC0:
# Start byte of a two-byte sequence. Store and expect one more byte
self.uic = (code & 0x1F) << 6
self.state = self.State.EXPECT_1
elif (code & 0xF0) == 0xE0:
# Start byte of a three-byte sequence. Store and expect two more bytes
self.uic = (code & 0x0F) << 12
self.state = self.State.EXPECT_2
elif (code & 0xF8) == 0xF0:
# Start byte of a four-byte sequence. Store and expect three more bytes
self.uic = (code & 0x07) << 18
self.state = self.State.EXPECT_3
elif 0xf8 <= code <= 0xfd:
# These would be five-byte or six-byte sequences. Not implemented, so throw.
raise NotImplementedError("UTF-8 sequence starting with 0x{:02x} is not supported.".format(code))
elif self.state == self.State.EXPECT_3:
if (code & 0xC0) == 0x80:
self.uic |= (code & 0x3F) << 12
self.state = self.State.EXPECT_2
else:
# Nope, we need a sequence code. This ain't one so reject the byte.
self.state = self.State.INVALID
elif self.state == self.State.EXPECT_2:
if (code & 0xC0) == 0x80:
self.uic |= (code & 0x3F) << 6
self.state = self.State.EXPECT_1
else:
# Nope, we need a sequence code. This ain't one so reject the byte.
self.state = self.State.INVALID
elif self.state == self.State.EXPECT_1:
if (code & 0xC0) == 0x80:
self.uic |= (code & 0x3F)
self.state = self.State.ACCEPT
else:
# Nope, we need a sequence code. This ain't one so reject the byte.
self.state = self.State.INVALID
else:
raise NotImplementedError("Not implemented state '%s'".format(self.state))
return self.state
def get(self):
if self.state == self.State.ACCEPT:
return self.uic
return ord('\N{REPLACEMENT CHARACTER}')
class State:
"""
VT500Parser state machine state. It defines a mapping from an input code to a action and/or new state
for each defined state of the state machine.
"""
states = {}
def __init__(self, state_id: States):
# State id. In case someone wants to find out who we are.
self.id = state_id
# Does this state support UTF-8 input? The default is no. Only the GROUND state accepts UTF-8
# multibyte sequences. Other states use the bytes as they come in.
self.accept_utf8 = False
# The event map defines for each input code a (action,state) tuple.
# The tuple has an action if the event, i.e. input code, results
# in an action, and it has a state if the event results in a
# transition to a new state.
# The keys of the map can be single codes, or tuples that define a
# start and an end of a code range.
#
# The following defaults are always the same for all states.
self.event_map = {
# The CANcel and SUBstitute control codes are immediately executed
# and always transition to 'ground'
0x18: (Actions.EXECUTE, States.GROUND),
0x1A: (Actions.EXECUTE, States.GROUND),
# The ESCape code always starts a escape control, i.e. transition to 'escape'
0x1B: (None, States.ESCAPE),
# The following C1 controls get executed immediately and transition to 'ground'
(0x80, 0x8F): (Actions.EXECUTE, States.GROUND),
(0x91, 0x97): (Actions.EXECUTE, States.GROUND),
0x99: (Actions.EXECUTE, States.GROUND),
0x9A: (Actions.EXECUTE, States.GROUND),
# The String Terminator control function always transitions to ground
0x9C: (None, States.GROUND),
# The SOS, PM and APC control functions are ignored and immediately transition
# to 'sos_pm_apc_string' for that.
0x98: (None, States.SOS_PM_APC_STRING),
0x9E: (None, States.SOS_PM_APC_STRING),
0x9F: (None, States.SOS_PM_APC_STRING),
# The Device Control String control function always starts a new device control string
0x90: (None, States.DCS_ENTRY),
# The Control String Initiator control function always starts a new control string
0x9B: (None, States.CSI_ENTRY),
# The Operating System Command control function always starts a new OSC string
0x9D: (None, States.OSC_STRING)
}
@classmethod
def get(cls, state_id):
if state_id in cls.states:
return cls.states[state_id]
else:
state = State.generate_state(state_id)
cls.states[state_id] = state
return state
def event(self, code):
entry = None
if self.accept_utf8:
s_code = code
else:
# All codes A0-ff (GR area) are treated identically
# to codes 20-7F (GL area). So for these codes search the
# mapping table for the GL counterpart
if 0xA0 <= code <= 0xff:
s_code = code - 0x80
else:
s_code = code
# First check if the code is in the map as a single key
if s_code in self.event_map:
entry = self.event_map[s_code]
else:
# Otherwise find a range in the keys in which the code does fit
for key in self.event_map:
if isinstance(key, tuple):
if key[0] <= s_code <= key[1]:
entry = self.event_map[key]
break
if entry is not None:
action, state_id = entry
return (action, None if state_id is None else self.get(state_id))
raise NotImplementedError("The input 0x{:02x}(0x{:02x}) has no mapping defined.".format(s_code, code))
#return None
def entry(self):
if 'entry' in self.event_map:
return self.event_map['entry'][0]
return None
def exit(self):
if 'exit' in self.event_map:
return self.event_map['exit'][0]
return None
@staticmethod
def generate_state(state_id):
if state_id == States.GROUND:
state = State(state_id)
state.accept_utf8 = True
state.event_map[(0x00, 0x17)] = (Actions.EXECUTE, None)
state.event_map[0x19] = (Actions.EXECUTE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.EXECUTE, None)
state.event_map[(0x20, 0x7F)] = (Actions.PRINT, None)
state.event_map[(0xA0, 0x10FFFF)] = (Actions.PRINT, None)
return state
if state_id == States.ESCAPE:
state = State(state_id)
state.event_map['entry'] = (Actions.CLEAR,)
state.event_map[(0x00, 0x17)] = (Actions.EXECUTE, None)
state.event_map[0x19] = (Actions.EXECUTE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.EXECUTE, None)
state.event_map[(0x20, 0x2F)] = (Actions.COLLECT, States.ESCAPE_INTERMEDIATE)
state.event_map[(0x30, 0x4F)] = (Actions.ESC_DISPATCH, States.GROUND)
state.event_map[(0x51, 0x57)] = (Actions.ESC_DISPATCH, States.GROUND)
state.event_map[0x59] = (Actions.ESC_DISPATCH, States.GROUND)
state.event_map[0x5A] = (Actions.ESC_DISPATCH, States.GROUND)
state.event_map[0x5C] = (Actions.ESC_DISPATCH, States.GROUND)
state.event_map[(0x60, 0x7E)] = (Actions.ESC_DISPATCH, States.GROUND)
state.event_map[0x58] = (None, States.SOS_PM_APC_STRING)
state.event_map[0x5E] = (None, States.SOS_PM_APC_STRING)
state.event_map[0x5F] = (None, States.SOS_PM_APC_STRING)
state.event_map[0x50] = (None, States.DCS_ENTRY)
state.event_map[0x5B] = (None, States.CSI_ENTRY)
state.event_map[0x5D] = (None, States.OSC_STRING)
state.event_map[0x7F] = (Actions.IGNORE, None)
return state
if state_id == States.ESCAPE_INTERMEDIATE:
state = State(state_id)
state.event_map[(0x00, 0x17)] = (Actions.EXECUTE, None)
state.event_map[0x19] = (Actions.EXECUTE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.EXECUTE, None)
state.event_map[(0x20, 0x2F)] = (Actions.COLLECT, None)
state.event_map[(0x30, 0x7E)] = (Actions.ESC_DISPATCH, States.GROUND)
state.event_map[0x7F] = (Actions.IGNORE, None)
return state
if state_id == States.CSI_ENTRY:
state = State(state_id)
state.event_map['entry'] = (Actions.CLEAR,)
state.event_map[(0x00, 0x17)] = (Actions.EXECUTE, None)
state.event_map[0x19] = (Actions.EXECUTE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.EXECUTE, None)
state.event_map[(0x20, 0x2F)] = (Actions.COLLECT, States.CSI_INTERMEDIATE)
state.event_map[(0x30, 0x39)] = (Actions.PARAM, States.CSI_PARAM)
state.event_map[0x3B] = (Actions.PARAM, States.CSI_PARAM)
state.event_map[(0x3C, 0x3F)] = (Actions.COLLECT, States.CSI_PARAM)
state.event_map[0x3A] = (None, States.CSI_IGNORE)
state.event_map[(0x40, 0x7E)] = (Actions.CSI_DISPATCH, States.GROUND)
state.event_map[0x7F] = (Actions.IGNORE, None)
return state
if state_id == States.CSI_PARAM:
state = State(state_id)
state.event_map[(0x00, 0x17)] = (Actions.EXECUTE, None)
state.event_map[0x19] = (Actions.EXECUTE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.EXECUTE, None)
state.event_map[(0x20, 0x2F)] = (Actions.COLLECT, States.CSI_INTERMEDIATE)
state.event_map[(0x30, 0x39)] = (Actions.PARAM, None)
state.event_map[0x3B] = (Actions.PARAM, None)
state.event_map[0x3A] = (None, States.CSI_IGNORE)
state.event_map[(0x3C, 0x3F)] = (None, States.CSI_IGNORE)
state.event_map[(0x40, 0x7E)] = (Actions.CSI_DISPATCH, States.GROUND)
state.event_map[0x7F] = (Actions.IGNORE, None)
return state
if state_id == States.CSI_INTERMEDIATE:
state = State(state_id)
state.event_map[(0x00, 0x17)] = (Actions.EXECUTE, None)
state.event_map[0x19] = (Actions.EXECUTE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.EXECUTE, None)
state.event_map[(0x20, 0x2F)] = (Actions.COLLECT, None)
state.event_map[(0x30, 0x3F)] = (None, States.CSI_IGNORE)
state.event_map[(0x40, 0x7E)] = (Actions.CSI_DISPATCH, States.GROUND)
state.event_map[0x7F] = (Actions.IGNORE, None)
return state
if state_id == States.CSI_IGNORE:
state = State(state_id)
state.event_map[(0x00, 0x17)] = (Actions.EXECUTE, None)
state.event_map[0x19] = (Actions.EXECUTE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.EXECUTE, None)
state.event_map[(0x20, 0x3F)] = (Actions.IGNORE, None)
state.event_map[(0x40, 0x7E)] = (None, States.GROUND)
state.event_map[0x7F] = (Actions.IGNORE, None)
return state
if state_id == States.DCS_ENTRY:
state = State(state_id)
state.event_map['entry'] = (Actions.CLEAR,)
state.event_map[(0x00, 0x17)] = (Actions.IGNORE, None)
state.event_map[0x19] = (Actions.IGNORE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.IGNORE, None)
state.event_map[(0x20, 0x2F)] = (Actions.COLLECT, States.DCS_INTERMEDIATE)
state.event_map[(0x30, 0x39)] = (Actions.PARAM, States.DCS_PARAM)
state.event_map[0x3B] = (Actions.PARAM, States.DCS_PARAM)
state.event_map[(0x3C, 0x3F)] = (Actions.COLLECT, States.DCS_PARAM)
state.event_map[0x3A] = (None, States.DCS_IGNORE)
state.event_map[(0x40, 0x7E)] = (None, States.DCS_PASSTHROUGH)
state.event_map[0x7F] = (Actions.IGNORE, None)
return state
if state_id == States.DCS_PARAM:
state = State(state_id)
state.event_map[(0x00, 0x17)] = (Actions.IGNORE, None)
state.event_map[0x19] = (Actions.IGNORE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.IGNORE, None)
state.event_map[(0x20, 0x2F)] = (Actions.COLLECT, States.DCS_INTERMEDIATE)
state.event_map[(0x30, 0x39)] = (Actions.PARAM, None)
state.event_map[0x3B] = (Actions.PARAM, None)
state.event_map[0x3A] = (None, States.DCS_IGNORE)
state.event_map[(0x3C, 0x3F)] = (None, States.DCS_IGNORE)
state.event_map[(0x40, 0x7E)] = (None, States.DCS_PASSTHROUGH)
state.event_map[0x7F] = (Actions.IGNORE, None)
return state
if state_id == States.DCS_INTERMEDIATE:
state = State(state_id)
state.event_map[(0x00, 0x17)] = (Actions.IGNORE, None)
state.event_map[0x19] = (Actions.IGNORE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.IGNORE, None)
state.event_map[(0x20, 0x2F)] = (Actions.COLLECT, None)
state.event_map[(0x30, 0x3F)] = (None, States.DCS_IGNORE)
state.event_map[(0x40, 0x7E)] = (None, States.DCS_PASSTHROUGH)
state.event_map[0x7F] = (Actions.IGNORE, None)
return state
if state_id == States.DCS_PASSTHROUGH:
state = State(state_id)
state.event_map['entry'] = (Actions.HOOK,)
state.event_map[(0x00, 0x17)] = (Actions.PUT, None)
state.event_map[0x19] = (Actions.PUT, None)
state.event_map[(0x1C, 0x1F)] = (Actions.PUT, None)
state.event_map[(0x20, 0x7E)] = (Actions.PUT, None)
state.event_map[0x7F] = (Actions.IGNORE, None)
state.event_map['exit'] = (Actions.UNHOOK,)
return state
if state_id == States.DCS_IGNORE:
state = State(state_id)
state.event_map['entry'] = (Actions.HOOK,)
state.event_map[(0x00, 0x17)] = (Actions.IGNORE, None)
state.event_map[0x19] = (Actions.IGNORE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.IGNORE, None)
state.event_map[(0x20, 0x7F)] = (Actions.IGNORE, None)
return state
if state_id == States.OSC_STRING:
state = State(state_id)
state.event_map['entry'] = (Actions.OSC_START,)
state.event_map[(0x00, 0x06)] = (Actions.IGNORE, None)
state.event_map[(0x08, 0x17)] = (Actions.IGNORE, None)
state.event_map[0x19] = (Actions.IGNORE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.IGNORE, None)
state.event_map[(0x20, 0x7F)] = (Actions.OSC_PUT, None)
# XTerm accepts either BEL or ST for terminating OSC sequences
# This is different from the VT500 Parser diagram by Paul Flo Williams
state.event_map[0x07] = (None, States.GROUND)
state.event_map['exit'] = (Actions.OSC_END,)
return state
if state_id == States.SOS_PM_APC_STRING:
state = State(state_id)
state.event_map[(0x00, 0x17)] = (Actions.IGNORE, None)
state.event_map[0x19] = (Actions.IGNORE, None)
state.event_map[(0x1C, 0x1F)] = (Actions.IGNORE, None)
state.event_map[(0x20, 0x7F)] = (Actions.IGNORE, None)
return state
# Should this raise an exception?
return None
class VT500Parser:
"""An implementation of a state machine for a parser for escape and control sequences,
suitable for use in a VT emulator. Modeled after https://vt100.net/emu/dec_ansi_parser"""
# Default NOP implementation of a terminal driver concerned with how codes are to be displayed
class DefaultTerminalOutputHandler:
def print(self, code):
pass
def execute(self, code):
pass
# Default NOP implementation of a ESC and CSI handler
class DefaultControlSequenceHandler:
def esc_dispatch(self, intermediate, final_code):
pass
def csi_dispatch(self, private_marker, parameters, intermediate, final_code):
pass
# Default NOP implementation of a Device Control dispatcher and handler
# We do not differentiate between an dispatcher and multiple handler.
# If the application requires that, it can pass different derived classes for that to the parser.
class DefaultDcsHandler:
def hook(self, private_marker, parameters, intermediate, final_code):
return self
def put(self, code):
pass
def end_of_data(self, code=None):
pass
# Default NOP implementation of a Operation System Control string handler
class DefaultOscHandler:
def start(self, code):
pass
def put(self, code):
pass
def end_of_data(self, code=None):
pass
def __init__(self):
# Initialize handlers with default NOP implementations
# These should be set by derived parsers with variants that do actual work
self.terminal_output_handler = self.DefaultTerminalOutputHandler()
self.control_sequence_handler = self.DefaultControlSequenceHandler()
self.dc_control_handler = self.DefaultDcsHandler()
self.osc_handler = self.DefaultOscHandler()
# This is returned by the dc_control_handler.hook function
self.dc_string_handler = self.dc_control_handler
self.input_code = None
self.private_flag = ''
self.intermediate_char = ''
self.final_char = ''
self.parameter_string = ''
self.state = State.get(States.GROUND)
# Device control string. Buffered for statistics, because we can.
self.device_control_string = ''
# Operating system command. Buffered for statistics, because we can.
self.operating_system_command = ''
# Parse UTF-8 multibyte sequences into code points
self.utf8_stm = Utf8StateMachine()
# We keep some statistics to see what we are dealing with in a file.
self.states_visited = {States.GROUND: 1}
self.actions_performed = {}
self.control_functions_seen = {}
self.escape_sequences_seen = {}
self.control_sequences_seen = {}
self.device_control_functions_seen = {}
self.device_control_strings = set()
self.os_commands = set()
def perform_action(self, action, code):
if action is None:
return
LOG.log(LOG_TRACE, "{:02x} -> run action {}".format(code, action))
method = getattr(self, action.value, self.default_action)
method(code)
self.stats_dict_inc(self.actions_performed, action)
def default_action(self, code=None):
LOG.warning("ENCOUNTERED AN UNIMPLEMENTED ACTION")
def transition_to(self, new_state):
LOG.debug("Entering new state %s", new_state.id)
self.state = new_state
self.stats_dict_inc(self.states_visited, self.state.id)
def input(self, code: int):
if self.state.accept_utf8:
# Parse possible multibyte sequences to code points. Only the
# whole code points are then sent to the state as an event.
stm_state = self.utf8_stm.input(code)
if stm_state == Utf8StateMachine.State.ACCEPT:
# Ready to get a code point
code = self.utf8_stm.get()
elif stm_state == Utf8StateMachine.State.INVALID:
# An invalid sequence occurred.
# We could get the replacement character here but then we still have to deal with
# the code that just came in. Instead, we drop the invalid sequence and continue
# with the current code.
LOG.warning("An invalid UTF-8 sequence occurred. Dropping sequence and continuing with current code 0x{:02x}", code)
else:
# Need more input
return
# Send event to state
LOG.log(LOG_TRACE, "> %02x %s", code, "("+chr(code)+")" if (0x20 <= code <= 0x7E or 0xA0 < code) else '')
action, new_state = self.state.event(code)
# If a new state is returned,
# - run exit action of state, if any
# - run transition action, if any
# - set current state to new state
# - run entry action of new event, if any
if isinstance(new_state, State):
self.perform_action(self.state.exit(), code)
self.perform_action(action, code)
self.transition_to(new_state)
self.perform_action(self.state.entry(), code)
# If only an action was returned, execute the action
elif action is not None:
self.perform_action(action, code)
# Implementation of the Actions
def ignore(self, code=None):
"""The character or control is not processed.
No observable difference in the terminal’s state would occur if the character that caused this action
was not present in the input stream."""
pass
def print(self, code):
"""The current code should be mapped to a glyph according to the character set mappings and shift states
in effect, and that glyph should be displayed."""
self.terminal_output_handler.print(code)
def execute(self, code):
"""The C0 or C1 control function should be executed, which may have any one of a variety of effects,
including changing the cursor position, suspending or resuming communications or changing the
shift states in effect. There are no parameters to this action."""
self.stats_dict_inc(self.control_functions_seen, code)
self.terminal_output_handler.execute(code)
def clear(self, _code=None):
"""This action causes the current private flag, intermediate characters, final character
and parameters to be forgotten."""
self.private_flag = ''
self.intermediate_char = ''
self.final_char = ''
self.parameter_string = ''
def collect(self, code):
"""The private marker or intermediate character should be stored for later use in selecting
a control function to be executed when a final character arrives. """
# We want to differentiate between private markers and
# intermediate characters. Not sure why.
if 0x3c <= code <= 0x3f:
self.private_flag = chr(code)
else:
self.intermediate_char += chr(code)
def param(self, code):
"""This action collects the characters of a parameter string for a control sequence or device control sequence
and builds a list of parameters. The characters processed by this action are the digits 0-9 (codes 30-39) and
the semicolon (code 3B). The semicolon separates parameters."""
self.parameter_string += chr(code)
def esc_dispatch(self, code):
"""The final character of an escape sequence has arrived, so determined the control function to be executed
from the intermediate character(s) and final character, and execute it. The intermediate characters are
available because collect stored them as they arrived."""
self.final_char += chr(code)
self.stats_dict_inc(self.escape_sequences_seen, 'Esc' + self.private_flag + self.parameter_string
+ self.intermediate_char + self.final_char)
LOG.debug("execute escape sequence: {}_{}".format(self.intermediate_char, self.final_char))
self.control_sequence_handler.esc_dispatch(self.intermediate_char, self.final_char)
def csi_dispatch(self, code):
"""A final character has arrived, so determine the control function to be executed from private marker,
intermediate character(s) and final character, and execute it, passing in the parameter list."""
self.final_char += chr(code)
self.stats_dict_inc(self.control_sequences_seen, 'Esc[' + self.private_flag + self.parameter_string
+ self.intermediate_char + self.final_char)
LOG.debug("determine control function from {}_{}_{}".format(self.private_flag,
self.intermediate_char,
self.final_char))
LOG.debug("execute with parameters: {}".format(self.parameter_string))
self.control_sequence_handler.csi_dispatch(self.private_flag, self.parameter_string,
self.intermediate_char, self.final_char)
def hook(self, code):
"""This action is invoked when a final character arrives in the first part of a device control string.
It determines the control function from the private marker, intermediate character(s) and final character,
and executes it, passing in the parameter list. It also selects a handler function for the rest of the
characters in the control string. This handler function will be called by the put action for every character
in the control string as it arrives."""
self.final_char += chr(code)
self.device_control_string = ''
self.stats_dict_inc(self.device_control_functions_seen, 'EscP' + self.private_flag + self.parameter_string
+ self.intermediate_char + self.final_char)
LOG.debug("determine control function from {}_{}_{}".format(self.private_flag,
self.intermediate_char,
self.final_char))
LOG.debug("execute with parameters: {}".format(self.parameter_string))
LOG.debug("Select handler function for following put actions")
self.dc_string_handler = self.dc_control_handler.hook(self.private_flag, self.parameter_string,
self.intermediate_char, self.final_char)
def put(self, code=None):
"""This action passes characters from the data string part of a device control string to a handler that
has previously been selected by the hook action. C0 controls are also passed to the handler."""
self.device_control_string += chr(code)
self.dc_string_handler.put(code)
def unhook(self, _code=None):
"""When a device control string is terminated by ST, CAN, SUB or ESC, this action calls the previously
selected handler function with an “end of data” parameter. This allows the handler to finish neatly."""
self.device_control_strings.add(self.device_control_string)
LOG.debug("Signal EOD to handler function")
self.dc_string_handler.end_of_data()
def osc_start(self, _code=None):
"""When the control function OSC (Operating System Command) is recognised, this action initializes
an external parser (the “OSC Handler”) to handle the characters from the control string. OSC control strings
are not structured in the same way as device control strings, so there is no choice of parsers."""
self.operating_system_command = ''
LOG.info("Initialize OSC handler")
self.osc_handler.start(_code)
def osc_put(self, code):
"""This action passes characters from the control string to the OSC Handler as they arrive.
There is therefore no need to buffer characters until the end of the control string is recognised."""
self.operating_system_command += chr(code)
self.osc_handler.put(code)
def osc_end(self, _code=None):
"""This action is called when the OSC string is terminated by ST, CAN, SUB or ESC,
to allow the OSC handler to finish neatly."""
self.os_commands.add(self.operating_system_command)
LOG.info("Finish OSC handler")
self.osc_handler.end_of_data()
# Private helper functions
def stats_dict_inc(self, stats_dict, code):
"""Increase count in map, or create new entry with count 0 if it doesn't exist yet."""
if code in stats_dict:
stats_dict[code] += 1
else:
stats_dict[code] = 1
def log_statistics(self):
"""Gather statistics and dump to log"""
LOG.info("##########################################################################")
LOG.info("###### S T A T I S T I C S #######")
LOG.info("##########################################################################")
LOG.info("-- Visited States:")
for state in sorted(self.states_visited, key=self.states_visited.get, reverse=True):
LOG.info(f"{state.name} : {self.states_visited[state]}")
LOG.info("-- Executed Actions:")
for action in sorted(self.actions_performed, key=self.actions_performed.get, reverse=True):
LOG.info(f"{action.name} : {self.actions_performed[action]}")
LOG.info("-- Control Functions:")
for cf in sorted(self.control_functions_seen, key=self.control_functions_seen.get, reverse=True):
LOG.info(f"{cf:02x} : {self.control_functions_seen[cf]}")
LOG.info("-- Escape Sequences:")
for key in sorted(self.escape_sequences_seen, key=self.escape_sequences_seen.get, reverse=True):
LOG.info(f"{key} : {self.escape_sequences_seen[key]}")
LOG.info("-- Control Sequences:")
for key in sorted(self.control_sequences_seen, key=self.control_sequences_seen.get, reverse=True):
LOG.info(f"{key} : {self.control_sequences_seen[key]}")
LOG.info("-- Device Control Functions:")
for key in sorted(self.device_control_functions_seen, key=self.device_control_functions_seen.get, reverse=True):
LOG.info(f"{key} : {self.device_control_functions_seen[key]}")
LOG.info("-- Device Control strings:")
for dcs in self.device_control_strings:
LOG.info(f" {dcs}")
LOG.info("-- Operating System Commands:")
for osc in self.os_commands:
LOG.info(f" {osc}")
def parse(logfile):
"""Read the input file byte by byte and input the bytes to a VT500Parser instance"""
parser = VT500Parser()
c = logfile.read(1)
while c:
parser.input(ord(c))
c = logfile.read(1)
# Gather statistics and dump to log
parser.log_statistics()
def main():
if len(sys.argv) <= 1:
print("Log file missing. Specify log file to parse.")
exit()
with open(sys.argv[1], 'rb') as logfile:
LOG.info("Parsing file %s", sys.argv[1])
parse(logfile)
if __name__ == '__main__':
LOG_FORMAT = "%(levelname)s :%(module)s - %(message)s"
logging.basicConfig(filename="parser.log",
level=LOG_TRACE,
format=LOG_FORMAT,
filemode='w')
logging.addLevelName(LOG_TRACE, "TRACE")
main()