Refactor and simplify solution to start from variables

stanislavsabev · stanislavsabev · commit a5eb9e44c562 · 2023-12-25T18:05:54.000+01:00
diff --git a/src/wake/cli.py b/src/wake/cli.py
@@ -2,9 +2,10 @@
 import logging
 import os
 import sys
-from src.wake import parser
 
 from wake import engine
+from wake.parser import Parser
+from wake.typedef import AnyDict
 
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
@@ -23,17 +24,14 @@ def main() -> None:
     with open(FILE_NAME, "r", encoding="utf-8") as fd:
         contents = fd.read()
 
-    parser_obj = parser.Parser()
-    model: parser.Model = parser_obj.parse_makefile(contents)
-    if not model.labels:
+    parser_obj = Parser()
+    ast: AnyDict = parser_obj.parse_makefile(contents)
+    if not ast:
         raise ValueError(f"'{FILE_NAME}' is missing label definitions.")
 
     label_name = sys.argv[1]
 
-    for label in model.labels:
-        if label_name in [label.name, label.short]:
-            break
-    else:
+    if label not in ast:
         raise ValueError(f"Could not find label '{label_name}'")
 
     args = sys.argv[2:]
diff --git a/src/wake/engine.py b/src/wake/engine.py
@@ -2,7 +2,6 @@
 from collections import namedtuple
 
 from wake import typedef
-from src.wake import parser
 
 Std = namedtuple("Std", ["in_", "out", "err"])
 
diff --git a/src/wake/parser.py b/src/wake/parser.py
@@ -1,94 +1,19 @@
-import collections
-import re
-
+from wake.tokenizer import EOF, EOF_TOKEN, Token, Tokenizer
 from wake.typedef import AnyDict
 
 
-Pattern = collections.namedtuple("Pattern", ["regexp", "name"])
-Token = collections.namedtuple("Token", ["type", "value"], defaults=["", ""])
-
-EOF = "EOF"
-EOF_TOKEN = Token(type=EOF, value="")
-
-NUMBER = r"\d+"
-STRING = r"([\"'])(?:(?=(\\?))\2.)*?\1"
-COMMENT = r"#.*$"
-NL = r"\n"
-NON_WS = r"\S*"
-WS = r"[^\S\r\n]*"
-IDENTIFIER = r"[a-zA-Z_]+(?:(?:_(?!-)|-(?!_))|[a-zA-Z0-9])*[a-zA-Z0-9_]+"
-
-spec: list[Pattern] = [
-    Pattern(WS, "WS"),
-    Pattern(COMMENT, "COMMENT"),
-    Pattern(NUMBER, "NUMBER"),
-    Pattern(STRING, "STRING"),
-    # Pattern(NON_WS, "NON_WS"),
-    Pattern(IDENTIFIER, "IDENTIFIER"),
-    Pattern(NL, "NL"),
-]
-
-
-class Tokenizer:
-    def __init__(self) -> None:
-        self.string = ""
-        self.cursor = 0
-
-    def get_next(self) -> Token:
-        """Gets the next token in the input string.
-
-        Returns:
-            Token: The next token or EOF_TOKEN
-                if there are no more tokens.
-
-        Raises:
-            SyntaxError: Unexpected token.
-        """
-        if not self.has_more_tokens():
-            return EOF_TOKEN
-
-        string = self.string[self.cursor :]
-        for regexp, token_type in spec:
-            value = self._match(r"^" + regexp, string)
-            if not value:
-                continue
-
-            if token_type == "WS":
-                return self.get_next()
-
-            if token_type == "COMMENT":
-                return self.get_next()
-
-            return Token(
-                type=token_type,
-                value=value,
-            )
-
-        raise SyntaxError(f"Unexpected token: '{string[0]}'.")
-
-    def _match(self, pattern: str, string: str) -> str | None:
-        match = re.match(pattern, string, flags=re.IGNORECASE)
-        if match:
-            self.cursor += len(match[0])
-            return match[0]
-        return None
-
-    def has_more_tokens(self) -> bool:
-        """Checks if there are more tokens in the input string.
-
-        Returns:
-            bool: True if cursor is not at the end of the input,
-                False otherwise.
-        """
-        return self.cursor < len(self.string)
-
-
 class Parser:
     def __init__(self) -> None:
         self.string = ""
         self.tokenizer = Tokenizer()
         self.lookahead = EOF_TOKEN
 
+    def parse(self, string) -> Token:
+        self.string = string
+        self.tokenizer.string = string
+        self.lookahead = self.tokenizer.get_next()
+        return self.variables()
+
     def _consume(self, token_type: str) -> Token:
         """Consumes token of particular type.
 
@@ -113,32 +38,14 @@ def _consume(self, token_type: str) -> Token:
 
         self.lookahead = self.tokenizer.get_next()
         return token
-
-    def parse(self, string) -> Token:
-        self.string = string
-        self.tokenizer.string = string
-        self.lookahead = self.tokenizer.get_next()
-        return self.variables()
-
-    def wakefile(self) -> AnyDict:
-        # statement list
-        value: dict[str, AnyDict] = {}
-
-        if variables := self.variables():
-            value.update(variables=variables)
-        if procecures := self.procecures():
-            value.update(procecures=procecures)
-
-        return dict(type="wakefile", value=value)
-
     def variables(self) -> list[AnyDict]:
         variables = []
         while self.lookahead != "LABEL":
             statement = self.statement()
             variables.append(statement)
         return variables
 
-    def procecures(self) -> list[AnyDict]:
+    def recipes(self) -> list[AnyDict]:
         procecures = []
         return procecures
 
diff --git a/src/wake/tokenizer.py b/src/wake/tokenizer.py
@@ -0,0 +1,80 @@
+import collections
+import re
+
+Pattern = collections.namedtuple("Pattern", ["regexp", "name"])
+Token = collections.namedtuple("Token", ["type", "value"], defaults=["", ""])
+
+EOF = "EOF"
+EOF_TOKEN = Token(type=EOF, value="")
+
+NUMBER = r"\d+"
+STRING = r"([\"'])(?:(?=(\\?))\2.)*?\1"
+COMMENT = r"#.*$"
+NL = r"\n"
+NON_WS = r"\S*"
+WS = r"[^\S\r\n]*"
+IDENTIFIER = r"[a-zA-Z_]+(?:(?:_(?!-)|-(?!_))|[a-zA-Z0-9])*[a-zA-Z0-9_]+"
+
+spec: list[Pattern] = [
+    Pattern(WS, "WS"),
+    Pattern(COMMENT, "COMMENT"),
+    Pattern(NUMBER, "NUMBER"),
+    Pattern(STRING, "STRING"),
+    # Pattern(NON_WS, "NON_WS"),
+    Pattern(IDENTIFIER, "IDENTIFIER"),
+    Pattern(NL, "NL"),
+]
+
+
+class Tokenizer:
+    def __init__(self) -> None:
+        self.string = ""
+        self.cursor = 0
+
+    def get_next(self) -> Token:
+        """Gets the next token in the input string.
+
+        Returns:
+            Token: The next token or EOF_TOKEN
+                if there are no more tokens.
+
+        Raises:
+            SyntaxError: Unexpected token.
+        """
+        if not self.has_more_tokens():
+            return EOF_TOKEN
+
+        string = self.string[self.cursor :]
+        for regexp, token_type in spec:
+            value = self._match(r"^" + regexp, string)
+            if not value:
+                continue
+
+            if token_type == "WS":
+                return self.get_next()
+
+            if token_type == "COMMENT":
+                return self.get_next()
+
+            return Token(
+                type=token_type,
+                value=value,
+            )
+
+        raise SyntaxError(f"Unexpected token: '{string[0]}'.")
+
+    def _match(self, pattern: str, string: str) -> str | None:
+        match = re.match(pattern, string, flags=re.IGNORECASE)
+        if match:
+            self.cursor += len(match[0])
+            return match[0]
+        return None
+
+    def has_more_tokens(self) -> bool:
+        """Checks if there are more tokens in the input string.
+
+        Returns:
+            bool: True if cursor is not at the end of the input,
+                False otherwise.
+        """
+        return self.cursor < len(self.string)
diff --git a/src/wake/typedef.py b/src/wake/typedef.py
@@ -1,4 +1,3 @@
 import typing
 
-
 AnyDict = dict[str, typing.Any]
diff --git a/tests/test_cli.py b/tests/test_cli.py
diff --git a/tests/test_exec.py b/tests/test_exec.py
@@ -0,0 +1,9 @@
+import pytest
+
+@pytest.fixture
+def fixture_name(request):
+    return request.param
+
+@pytest.mark.parametrize('fixture_name', ['foo', 'bar'], indirect=True)
+def test_indirect(fixture_name):
+    assert fixture_name in ["foo", "bar"]
diff --git a/tests/test_parser.py b/tests/test_parser.py
diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`	`1`	`import typing`
`2`	`2`
`3`		`-`
`4`	`3`	`AnyDict = dict[str, typing.Any]`
-Original file line number
+Diff line change
 import pytest
 -from wake import pars
 +from wake.parser import Parser
 @pytest.fixture
 -def parser() -> pars.Parser:
 -    return pars.Parser()
 +def parser() -> Parser:
 +    return Parser()
 -@pytest.mark.parametrize(
 -    "string, expected",
 -    [
 -        ("   'one'", "one"),
 -        ('"two"    ', "two"),
 -        ('   "two"    ', "two"),
 -    ],
 -)
 -def test_ignore_whitespace(parser, string: pars.Parser, expected):
 -    actual = parser.parse(string)
 -    literal = dict(type="literal", value=dict(type="string", value=expected))
 -    assert actual == literal
 +# @pytest.mark.parametrize(
 +#     "string, expected",
 +#     [
 +#         ("   'one'", "one"),
 +#         ('"two"    ', "two"),
 +#         ('   "two"    ', "two"),
 +#     ],
 +# )
 +# def test_ignore_whitespace(parser, string: Parser, expected):
 +#     actual = parser.parse(string)
 +#     literal = {"type": "literal", "value": {"type": "string", "value": expected}}
 +#     assert actual == literal
 -class TestLiteral:
 -    @pytest.mark.parametrize(
 -        "string, expected",
 -        [
 -            ("5", 5),
 -            ("55", 55),
 -            ("355", 355),
 -        ],
 -    )
 -    def test_number(self, parser, string, expected):
 -        actual = parser.parse(string)
 -        literal = dict(type="literal", value=dict(type="number", value=expected))
 -        assert actual == literal
 +# class TestLiteral:
 +#     @pytest.mark.parametrize(
 +#         "string, expected",
 +#         [
 +#             ("5", 5),
 +#             ("55", 55),
 +#             ("355", 355),
 +#         ],
 +#     )
 +#     def test_number(self, parser, string, expected):
 +#         actual = parser.parse(string)
 +#         literal = {"type": "literal", "value": {"type": "number", "value": expected}}
 +#         assert actual == literal
 -    @pytest.mark.parametrize(
 -        "string, expected",
 -        [
 -            ("'one'", "one"),
 -            ('"two"', "two"),
 -        ],
 -    )
 -    def test_string(self, parser, string, expected):
 -        actual = parser.parse(string)
 -        literal = dict(type="literal", value=dict(type="string", value=expected))
 -        assert actual == literal
 +#     @pytest.mark.parametrize(
 +#         "string, expected",
 +#         [
 +#             ("'one'", "one"),
 +#             ('"two"', "two"),
 +#         ],
 +#     )
 +#     def test_string(self, parser, string, expected):
 +#         actual = parser.parse(string)
 +#         literal = {"type": "literal", "value": {"type": "string", "value": expected}}
 +#         assert actual == literal
 -    @pytest.mark.parametrize(
 -        "string, expected",
 -        [
 -            ("'one'", dict(type="string", value="one")),
 -            ('"two"', dict(type="string", value="two")),
 -            ("55", dict(type="number", value=55)),
 -        ],
 -    )
 -    def test_literal(self, parser, string, expected):
 -        actual = parser.parse(string)
 -        literal = dict(type="literal", value=expected)
 -        assert actual == literal
 +#     @pytest.mark.parametrize(
 +#         "string, expected",
 +#         [
 +#             ("'one'", {"type": "string", "value": "one"}),
 +#             ('"two"', {"type": "string", "value": "two"}),
 +#             ("55", {"type": "number", "value": 55}),
 +#         ],
 +#     )
 +#     def test_literal(self, parser, string, expected):
 +#         actual = parser.parse(string)
 +#         literal = {"type": "literal", "value": expected}
 +#         assert actual == literal
 -class TestVariable:
 -    @pytest.mark.parametrize(
 -        "string, expected",
 -        [
 -            ('VAR="foo"', ""),
 -            ("VAR=foo", ""),
 -            ("VAR=foo_bar", ""),
 -            ("VAR=foo-bar", ""),
 -            ("VAR=_foo", ""),
 -            ("VAR=foo", ""),
 -        ],
 -    )
 -    def test_variable_ok(self, parser, string, expected):
 -        actual = parser.parse(string)
 -        variable = dict(type="variable", value=dict(type="string", value=expected))
 -        assert actual == variable
 +# class TestVariable:
 +#     @pytest.mark.parametrize(
 +#         "string, expected",
 +#         [
 +#             ('VAR="foo"', ""),
 +#             ("VAR=foo", ""),
 +#             ("VAR=foo_bar", ""),
 +#             ("VAR=foo-bar", ""),
 +#             ("VAR=_foo", ""),
 +#             ("VAR=foo", ""),
 +#         ],
 +#     )
 +#     def test_variable_ok(self, parser, string, expected):
 +#         actual = parser.parse(string)
 +#         variable = {"type": "variable", "value": {"type": "string", "value": expected}}
 +#         assert actual == variable
 -    @pytest.mark.parametrize(
 -        "string, expected",
 -        [
 -            ("aVaR", None)
 -            ("_var_", None)
 -            ("V_a__r", None)
 -            ("VAR", None)
 -            ("var", None)
 -            ("___VAr___", None)
 -        ],
 -    )
 -    def test_variable_ok(self, parser, string, expected):
 -        actual = parser.parse(string)
 -        variable = dict(type="variable", value=dict(type="string", value=expected))
 -        assert actual == variable
 +#     @pytest.mark.parametrize(
 +#         "string, expected",
 +#         [
 +#             ("aVaR", None)("_var_", None)("V_a__r", None)("VAR", None)("var", None)(
 +#                 "___VAr___", None
 +#             )("_557", None)("v557", None)("v5_5_", None)
 +#         ],
 +#     )
 +#     def test_variable_ok_2(self, parser, string, expected):
 +#         actual = parser.string = string
 +#         variable = {"type": "variable", "value": {"type": "identifier", "value": expected}}
 +#         assert actual == variable
+-
 -    @pytest.mark.parametrize(
 -        "string, expected_err",
 -        [
 -            ('VAR=="foo"', SyntaxError),
 -            ("VAR=fo-_o", SyntaxError),
 -            ("VAR=foo_bar", SyntaxError),
 -            ("VAR=foo-bar", SyntaxError),
 -            ("VAR=_foo", SyntaxError),
 -            ("VAR=foo", SyntaxError),
 -        ],
 -    )
 -    def test_variable_fail(self, parser, string, expected_err):
 -        with pytest.raises(ValueError):
 -            _ = parser.parse(string)
 +#     @pytest.mark.parametrize(
 +#         "string, expected_err",
 +#         [
 +#             ('VAR=="foo"', SyntaxError),
 +#             ("VAR=fo-_o", SyntaxError),
 +#             ("VAR=foo_bar", SyntaxError),
 +#             ("VAR=foo-bar", SyntaxError),
 +#             ("VAR=_foo", SyntaxError),
 +#             ("VAR=foo", SyntaxError),
 +#         ],
 +#     )
 +#     def test_variable_fail(self, parser, string, expected_err):
 +#         with pytest.raises(ValueError):
 +#             _ = parser.parse(string)