Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit a5eb9e4

Browse files
committedDec 25, 2023
Refactor and simplify solution to start from variables
1 parent cb05e90 commit a5eb9e4

9 files changed

+196
-239
lines changed
 

‎src/wake/cli.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
import logging
33
import os
44
import sys
5-
from src.wake import parser
65

76
from wake import engine
7+
from wake.parser import Parser
8+
from wake.typedef import AnyDict
89

910
logging.basicConfig(level=logging.DEBUG)
1011
logger = logging.getLogger(__name__)
@@ -23,17 +24,14 @@ def main() -> None:
2324
with open(FILE_NAME, "r", encoding="utf-8") as fd:
2425
contents = fd.read()
2526

26-
parser_obj = parser.Parser()
27-
model: parser.Model = parser_obj.parse_makefile(contents)
28-
if not model.labels:
27+
parser_obj = Parser()
28+
ast: AnyDict = parser_obj.parse_makefile(contents)
29+
if not ast:
2930
raise ValueError(f"'{FILE_NAME}' is missing label definitions.")
3031

3132
label_name = sys.argv[1]
3233

33-
for label in model.labels:
34-
if label_name in [label.name, label.short]:
35-
break
36-
else:
34+
if label not in ast:
3735
raise ValueError(f"Could not find label '{label_name}'")
3836

3937
args = sys.argv[2:]

‎src/wake/engine.py

-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from collections import namedtuple
33

44
from wake import typedef
5-
from src.wake import parser
65

76
Std = namedtuple("Std", ["in_", "out", "err"])
87

‎src/wake/parser.py

+8-101
Original file line numberDiff line numberDiff line change
@@ -1,94 +1,19 @@
1-
import collections
2-
import re
3-
1+
from wake.tokenizer import EOF, EOF_TOKEN, Token, Tokenizer
42
from wake.typedef import AnyDict
53

64

7-
Pattern = collections.namedtuple("Pattern", ["regexp", "name"])
8-
Token = collections.namedtuple("Token", ["type", "value"], defaults=["", ""])
9-
10-
EOF = "EOF"
11-
EOF_TOKEN = Token(type=EOF, value="")
12-
13-
NUMBER = r"\d+"
14-
STRING = r"([\"'])(?:(?=(\\?))\2.)*?\1"
15-
COMMENT = r"#.*$"
16-
NL = r"\n"
17-
NON_WS = r"\S*"
18-
WS = r"[^\S\r\n]*"
19-
IDENTIFIER = r"[a-zA-Z_]+(?:(?:_(?!-)|-(?!_))|[a-zA-Z0-9])*[a-zA-Z0-9_]+"
20-
21-
spec: list[Pattern] = [
22-
Pattern(WS, "WS"),
23-
Pattern(COMMENT, "COMMENT"),
24-
Pattern(NUMBER, "NUMBER"),
25-
Pattern(STRING, "STRING"),
26-
# Pattern(NON_WS, "NON_WS"),
27-
Pattern(IDENTIFIER, "IDENTIFIER"),
28-
Pattern(NL, "NL"),
29-
]
30-
31-
32-
class Tokenizer:
33-
def __init__(self) -> None:
34-
self.string = ""
35-
self.cursor = 0
36-
37-
def get_next(self) -> Token:
38-
"""Gets the next token in the input string.
39-
40-
Returns:
41-
Token: The next token or EOF_TOKEN
42-
if there are no more tokens.
43-
44-
Raises:
45-
SyntaxError: Unexpected token.
46-
"""
47-
if not self.has_more_tokens():
48-
return EOF_TOKEN
49-
50-
string = self.string[self.cursor :]
51-
for regexp, token_type in spec:
52-
value = self._match(r"^" + regexp, string)
53-
if not value:
54-
continue
55-
56-
if token_type == "WS":
57-
return self.get_next()
58-
59-
if token_type == "COMMENT":
60-
return self.get_next()
61-
62-
return Token(
63-
type=token_type,
64-
value=value,
65-
)
66-
67-
raise SyntaxError(f"Unexpected token: '{string[0]}'.")
68-
69-
def _match(self, pattern: str, string: str) -> str | None:
70-
match = re.match(pattern, string, flags=re.IGNORECASE)
71-
if match:
72-
self.cursor += len(match[0])
73-
return match[0]
74-
return None
75-
76-
def has_more_tokens(self) -> bool:
77-
"""Checks if there are more tokens in the input string.
78-
79-
Returns:
80-
bool: True if cursor is not at the end of the input,
81-
False otherwise.
82-
"""
83-
return self.cursor < len(self.string)
84-
85-
865
class Parser:
876
def __init__(self) -> None:
887
self.string = ""
898
self.tokenizer = Tokenizer()
909
self.lookahead = EOF_TOKEN
9110

11+
def parse(self, string) -> Token:
12+
self.string = string
13+
self.tokenizer.string = string
14+
self.lookahead = self.tokenizer.get_next()
15+
return self.variables()
16+
9217
def _consume(self, token_type: str) -> Token:
9318
"""Consumes token of particular type.
9419
@@ -113,32 +38,14 @@ def _consume(self, token_type: str) -> Token:
11338

11439
self.lookahead = self.tokenizer.get_next()
11540
return token
116-
117-
def parse(self, string) -> Token:
118-
self.string = string
119-
self.tokenizer.string = string
120-
self.lookahead = self.tokenizer.get_next()
121-
return self.variables()
122-
123-
def wakefile(self) -> AnyDict:
124-
# statement list
125-
value: dict[str, AnyDict] = {}
126-
127-
if variables := self.variables():
128-
value.update(variables=variables)
129-
if procecures := self.procecures():
130-
value.update(procecures=procecures)
131-
132-
return dict(type="wakefile", value=value)
133-
13441
def variables(self) -> list[AnyDict]:
13542
variables = []
13643
while self.lookahead != "LABEL":
13744
statement = self.statement()
13845
variables.append(statement)
13946
return variables
14047

141-
def procecures(self) -> list[AnyDict]:
48+
def recipes(self) -> list[AnyDict]:
14249
procecures = []
14350
return procecures
14451

‎src/wake/tokenizer.py

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import collections
2+
import re
3+
4+
Pattern = collections.namedtuple("Pattern", ["regexp", "name"])
5+
Token = collections.namedtuple("Token", ["type", "value"], defaults=["", ""])
6+
7+
EOF = "EOF"
8+
EOF_TOKEN = Token(type=EOF, value="")
9+
10+
NUMBER = r"\d+"
11+
STRING = r"([\"'])(?:(?=(\\?))\2.)*?\1"
12+
COMMENT = r"#.*$"
13+
NL = r"\n"
14+
NON_WS = r"\S*"
15+
WS = r"[^\S\r\n]*"
16+
IDENTIFIER = r"[a-zA-Z_]+(?:(?:_(?!-)|-(?!_))|[a-zA-Z0-9])*[a-zA-Z0-9_]+"
17+
18+
spec: list[Pattern] = [
19+
Pattern(WS, "WS"),
20+
Pattern(COMMENT, "COMMENT"),
21+
Pattern(NUMBER, "NUMBER"),
22+
Pattern(STRING, "STRING"),
23+
# Pattern(NON_WS, "NON_WS"),
24+
Pattern(IDENTIFIER, "IDENTIFIER"),
25+
Pattern(NL, "NL"),
26+
]
27+
28+
29+
class Tokenizer:
30+
def __init__(self) -> None:
31+
self.string = ""
32+
self.cursor = 0
33+
34+
def get_next(self) -> Token:
35+
"""Gets the next token in the input string.
36+
37+
Returns:
38+
Token: The next token or EOF_TOKEN
39+
if there are no more tokens.
40+
41+
Raises:
42+
SyntaxError: Unexpected token.
43+
"""
44+
if not self.has_more_tokens():
45+
return EOF_TOKEN
46+
47+
string = self.string[self.cursor :]
48+
for regexp, token_type in spec:
49+
value = self._match(r"^" + regexp, string)
50+
if not value:
51+
continue
52+
53+
if token_type == "WS":
54+
return self.get_next()
55+
56+
if token_type == "COMMENT":
57+
return self.get_next()
58+
59+
return Token(
60+
type=token_type,
61+
value=value,
62+
)
63+
64+
raise SyntaxError(f"Unexpected token: '{string[0]}'.")
65+
66+
def _match(self, pattern: str, string: str) -> str | None:
67+
match = re.match(pattern, string, flags=re.IGNORECASE)
68+
if match:
69+
self.cursor += len(match[0])
70+
return match[0]
71+
return None
72+
73+
def has_more_tokens(self) -> bool:
74+
"""Checks if there are more tokens in the input string.
75+
76+
Returns:
77+
bool: True if cursor is not at the end of the input,
78+
False otherwise.
79+
"""
80+
return self.cursor < len(self.string)

‎src/wake/typedef.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
import typing
22

3-
43
AnyDict = dict[str, typing.Any]

‎tests/test_cli.py

-9
This file was deleted.

‎tests/test_exec.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import pytest
2+
3+
@pytest.fixture
4+
def fixture_name(request):
5+
return request.param
6+
7+
@pytest.mark.parametrize('fixture_name', ['foo', 'bar'], indirect=True)
8+
def test_indirect(fixture_name):
9+
assert fixture_name in ["foo", "bar"]

‎tests/test_parser.py

+93-97
Original file line numberDiff line numberDiff line change
@@ -1,112 +1,108 @@
11
import pytest
22

3-
from wake import pars
3+
from wake.parser import Parser
44

55

66
@pytest.fixture
7-
def parser() -> pars.Parser:
8-
return pars.Parser()
7+
def parser() -> Parser:
8+
return Parser()
99

1010

11-
@pytest.mark.parametrize(
12-
"string, expected",
13-
[
14-
(" 'one'", "one"),
15-
('"two" ', "two"),
16-
(' "two" ', "two"),
17-
],
18-
)
19-
def test_ignore_whitespace(parser, string: pars.Parser, expected):
20-
actual = parser.parse(string)
21-
literal = dict(type="literal", value=dict(type="string", value=expected))
22-
assert actual == literal
11+
# @pytest.mark.parametrize(
12+
# "string, expected",
13+
# [
14+
# (" 'one'", "one"),
15+
# ('"two" ', "two"),
16+
# (' "two" ', "two"),
17+
# ],
18+
# )
19+
# def test_ignore_whitespace(parser, string: Parser, expected):
20+
# actual = parser.parse(string)
21+
# literal = {"type": "literal", "value": {"type": "string", "value": expected}}
22+
# assert actual == literal
2323

2424

25-
class TestLiteral:
26-
@pytest.mark.parametrize(
27-
"string, expected",
28-
[
29-
("5", 5),
30-
("55", 55),
31-
("355", 355),
32-
],
33-
)
34-
def test_number(self, parser, string, expected):
35-
actual = parser.parse(string)
36-
literal = dict(type="literal", value=dict(type="number", value=expected))
37-
assert actual == literal
25+
# class TestLiteral:
26+
# @pytest.mark.parametrize(
27+
# "string, expected",
28+
# [
29+
# ("5", 5),
30+
# ("55", 55),
31+
# ("355", 355),
32+
# ],
33+
# )
34+
# def test_number(self, parser, string, expected):
35+
# actual = parser.parse(string)
36+
# literal = {"type": "literal", "value": {"type": "number", "value": expected}}
37+
# assert actual == literal
3838

39-
@pytest.mark.parametrize(
40-
"string, expected",
41-
[
42-
("'one'", "one"),
43-
('"two"', "two"),
44-
],
45-
)
46-
def test_string(self, parser, string, expected):
47-
actual = parser.parse(string)
48-
literal = dict(type="literal", value=dict(type="string", value=expected))
49-
assert actual == literal
39+
# @pytest.mark.parametrize(
40+
# "string, expected",
41+
# [
42+
# ("'one'", "one"),
43+
# ('"two"', "two"),
44+
# ],
45+
# )
46+
# def test_string(self, parser, string, expected):
47+
# actual = parser.parse(string)
48+
# literal = {"type": "literal", "value": {"type": "string", "value": expected}}
49+
# assert actual == literal
5050

51-
@pytest.mark.parametrize(
52-
"string, expected",
53-
[
54-
("'one'", dict(type="string", value="one")),
55-
('"two"', dict(type="string", value="two")),
56-
("55", dict(type="number", value=55)),
57-
],
58-
)
59-
def test_literal(self, parser, string, expected):
60-
actual = parser.parse(string)
61-
literal = dict(type="literal", value=expected)
62-
assert actual == literal
51+
# @pytest.mark.parametrize(
52+
# "string, expected",
53+
# [
54+
# ("'one'", {"type": "string", "value": "one"}),
55+
# ('"two"', {"type": "string", "value": "two"}),
56+
# ("55", {"type": "number", "value": 55}),
57+
# ],
58+
# )
59+
# def test_literal(self, parser, string, expected):
60+
# actual = parser.parse(string)
61+
# literal = {"type": "literal", "value": expected}
62+
# assert actual == literal
6363

6464

65-
class TestVariable:
66-
@pytest.mark.parametrize(
67-
"string, expected",
68-
[
69-
('VAR="foo"', ""),
70-
("VAR=foo", ""),
71-
("VAR=foo_bar", ""),
72-
("VAR=foo-bar", ""),
73-
("VAR=_foo", ""),
74-
("VAR=foo", ""),
75-
],
76-
)
77-
def test_variable_ok(self, parser, string, expected):
78-
actual = parser.parse(string)
79-
variable = dict(type="variable", value=dict(type="string", value=expected))
80-
assert actual == variable
65+
# class TestVariable:
66+
# @pytest.mark.parametrize(
67+
# "string, expected",
68+
# [
69+
# ('VAR="foo"', ""),
70+
# ("VAR=foo", ""),
71+
# ("VAR=foo_bar", ""),
72+
# ("VAR=foo-bar", ""),
73+
# ("VAR=_foo", ""),
74+
# ("VAR=foo", ""),
75+
# ],
76+
# )
77+
# def test_variable_ok(self, parser, string, expected):
78+
# actual = parser.parse(string)
79+
# variable = {"type": "variable", "value": {"type": "string", "value": expected}}
80+
# assert actual == variable
8181

82-
@pytest.mark.parametrize(
83-
"string, expected",
84-
[
85-
("aVaR", None)
86-
("_var_", None)
87-
("V_a__r", None)
88-
("VAR", None)
89-
("var", None)
90-
("___VAr___", None)
91-
],
92-
)
93-
def test_variable_ok(self, parser, string, expected):
94-
actual = parser.parse(string)
95-
variable = dict(type="variable", value=dict(type="string", value=expected))
96-
assert actual == variable
82+
# @pytest.mark.parametrize(
83+
# "string, expected",
84+
# [
85+
# ("aVaR", None)("_var_", None)("V_a__r", None)("VAR", None)("var", None)(
86+
# "___VAr___", None
87+
# )("_557", None)("v557", None)("v5_5_", None)
88+
# ],
89+
# )
90+
# def test_variable_ok_2(self, parser, string, expected):
91+
# actual = parser.string = string
92+
# variable = {"type": "variable", "value": {"type": "identifier", "value": expected}}
93+
# assert actual == variable
9794

98-
99-
@pytest.mark.parametrize(
100-
"string, expected_err",
101-
[
102-
('VAR=="foo"', SyntaxError),
103-
("VAR=fo-_o", SyntaxError),
104-
("VAR=foo_bar", SyntaxError),
105-
("VAR=foo-bar", SyntaxError),
106-
("VAR=_foo", SyntaxError),
107-
("VAR=foo", SyntaxError),
108-
],
109-
)
110-
def test_variable_fail(self, parser, string, expected_err):
111-
with pytest.raises(ValueError):
112-
_ = parser.parse(string)
95+
# @pytest.mark.parametrize(
96+
# "string, expected_err",
97+
# [
98+
# ('VAR=="foo"', SyntaxError),
99+
# ("VAR=fo-_o", SyntaxError),
100+
# ("VAR=foo_bar", SyntaxError),
101+
# ("VAR=foo-bar", SyntaxError),
102+
# ("VAR=_foo", SyntaxError),
103+
# ("VAR=foo", SyntaxError),
104+
# ],
105+
# )
106+
# def test_variable_fail(self, parser, string, expected_err):
107+
# with pytest.raises(ValueError):
108+
# _ = parser.parse(string)

‎tests/test_tokenizer.py

-22
This file was deleted.

0 commit comments

Comments
 (0)
Please sign in to comment.