-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy pathparser_x86.py
111 lines (94 loc) · 3.11 KB
/
parser_x86.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# Author: Joe Near
# License: GPLv3
from lark import Lark
x86_parser = Lark(r"""
?instr: "movq" arg "," arg -> movq
| "addq" arg "," arg -> addq
| "subq" arg "," arg -> subq
| "cmpq" arg "," arg -> cmpq
| "xorq" arg "," arg -> xorq
| "leaq" arg "," arg -> leaq
| "negq" arg -> negq
| "jmp" CNAME -> jmp
| "jmp" "*" arg -> indirect_jmp
| "je" CNAME -> je
| "jl" CNAME -> jl
| "jle" CNAME -> jle
| "jg" CNAME -> jg
| "jge" CNAME -> jge
| "sete" arg -> sete
| "setl" arg -> setl
| "setle" arg -> setle
| "setg" arg -> setg
| "setge" arg -> setge
| "movzbq" arg "," arg -> movzbq
| "xorq" arg "," arg -> xorq
| "callq" CNAME -> callq
| "callq" "*" arg -> indirect_callq
| "pushq" arg -> pushq
| "popq" arg -> popq
| "retq" -> retq
block: ".globl" CNAME
| ".align" NUMBER
| CNAME ":" (instr)*
?arg: "$" atom -> int_a
| "%" reg -> reg_a
| "#" CNAME -> var_a
| "(" "%" reg ")" -> direct_mem_a
| atom "(" "%" reg ")" -> mem_a
| CNAME "(" "%" reg ")" -> global_val_a
?atom: NUMBER -> int_a
| "-" atom -> neg_a
!?reg: "rsp" | "rbp" | "rax" | "rbx" | "rcx" | "rdx" | "rsi" | "rdi"
| "r8" | "r9" | "r10" | "r11" | "r12" | "r13" | "r14" | "r15"
| "al" | "rip"
prog: block*
%import common.NUMBER
%import common.CNAME
%import common.WS
%ignore WS
""", start='prog', parser='lalr')
x86_parser_instrs = Lark(r"""
?instr: "movq" arg "," arg -> movq
| "addq" arg "," arg -> addq
| "subq" arg "," arg -> subq
| "cmpq" arg "," arg -> cmpq
| "xorq" arg "," arg -> xorq
| "leaq" arg "," arg -> leaq
| "negq" arg -> negq
| "jmp" CNAME -> jmp
| "jmp" "*" arg -> indirect_jmp
| "je" CNAME -> je
| "jl" CNAME -> jl
| "jle" CNAME -> jle
| "jg" CNAME -> jg
| "jge" CNAME -> jge
| "sete" arg -> sete
| "setl" arg -> setl
| "setle" arg -> setle
| "setg" arg -> setg
| "setge" arg -> setge
| "movzbq" arg "," arg -> movzbq
| "xorq" arg "," arg -> xorq
| "callq" CNAME -> callq
| "callq" "*" arg -> indirect_callq
| "pushq" arg -> pushq
| "popq" arg -> popq
| "retq" -> retq
instrs: instr*
?arg: "$" atom -> int_a
| "%" reg -> reg_a
| "#" CNAME -> var_a
| "(" "%" reg ")" -> direct_mem_a
| atom "(" "%" reg ")" -> mem_a
| CNAME "(" "%" reg ")" -> global_val_a
?atom: NUMBER -> int_a
| "-" atom -> neg_a
!?reg: "rsp" | "rbp" | "rax" | "rbx" | "rcx" | "rdx" | "rsi" | "rdi"
| "r8" | "r9" | "r10" | "r11" | "r12" | "r13" | "r14" | "r15"
| "al" | "rip"
%import common.NUMBER
%import common.CNAME
%import common.WS
%ignore WS
""", start='instrs', parser='lalr')