-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser_utils.py
123 lines (108 loc) · 3.34 KB
/
parser_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from __future__ import annotations
from enum import Enum, auto
from typing import List
import re
class TokenId(Enum):
NUMBER = auto()
IDENTIFIER = auto()
OP_PLUS = auto()
OP_MINUS = auto()
OP_MUL = auto()
OP_DIV = auto()
OP_GT = auto()
OP_LT = auto()
OP_EQ = auto()
OP_NEQ = auto()
ASSIGN = auto()
SEMICOLON = auto()
COMMA = auto()
RBRACE_LEFT = auto()
RBRACE_RIGHT = auto()
CBRACE_LEFT = auto()
CBRACE_RIGHT = auto()
IF = auto()
WHILE = auto()
FN_DEF = auto()
RETURN = auto()
# Special tokens, generated while creating the AST
FUNC_CALL = auto()
class Token:
def __init__(self, token_id: TokenId, value=None) -> None:
self.token_id = token_id
self.value = value
def __repr__(self) -> str:
return f'Token({self.token_id}, {self.value})'
def __str__(self) -> str:
return f'({self.token_id}, {self.value})'
class AstNode:
def __init__(self, token: Token, children: List[AstNode]=[]):
self.token = token
self.children = children
if not isinstance(self.children, list):
raise Exception()
def __repr__(self) -> str:
return f"{self.token} -> {self.children}"
def token_id(self) -> TokenId:
return self.token.token_id
def token_value(self) -> str:
return self.token.value
def left(self) -> AstNode:
# if not len(self.children) == 2:
# print("Error, accessing left/right on non-binary node")
return self.children[0]
def right(self) -> AstNode:
# if not len(self.children) == 2:
# print("Error, accessing left/right on non-binary node")
return self.children[1]
_token_map = {
r'\s+': None,
# Keywords
r'if\b': TokenId.IF,
r'while\b': TokenId.WHILE,
r'def\b': TokenId.FN_DEF,
r'return\b': TokenId.RETURN,
# Numbers / Identifiers
r'[0-9]+': TokenId.NUMBER,
r'[a-zA-Z_][a-zA-Z0-9_]*': TokenId.IDENTIFIER,
# Braces
r'{': TokenId.CBRACE_LEFT,
r'}': TokenId.CBRACE_RIGHT,
r'\(': TokenId.RBRACE_LEFT,
r'\)': TokenId.RBRACE_RIGHT,
# Boolean operators
r'>': TokenId.OP_GT,
r'<': TokenId.OP_LT,
r'==': TokenId.OP_EQ,
r'!=': TokenId.OP_NEQ,
# Math operators
r'\+': TokenId.OP_PLUS,
r'-': TokenId.OP_MINUS,
r'\*': TokenId.OP_MUL,
r'/': TokenId.OP_DIV,
# Assignment operators
r'=': TokenId.ASSIGN,
# Misc syntax
r';': TokenId.SEMICOLON,
r',': TokenId.COMMA,
}
def tokenize(src: str, token_map=_token_map) -> List[Token]:
parts = []
while src:
for pattern in token_map:
if (m := re.match(pattern, src)):
if token_map[pattern]:
parts.append(Token(token_map[pattern], m[0]))
src = src[len(m[0]):]
return parts
def match(tokens: List[Token], token_id: TokenId|List[TokenId]) -> Token|None:
tok = look(tokens)
token_id = token_id if isinstance(token_id, list) else [token_id]
if not tok:
print(f"Error, expected {token_id}, got nothing")
return None
if not (tok in token_id):
print(f"Error, expected {token_id}, got {tok}: {tokens[0]}")
return None
return tokens.pop(0)
def look(tokens: List[Token], offset=0) -> TokenId|None:
return tokens[offset].token_id if len(tokens) > offset else None