feat(parser): Add Parser to Project

erfanghobadian · Dec 19, 2021 · 68538a1 · 68538a1
1 parent ee77219
commit 68538a1
Show file tree

Hide file tree

Showing 97 changed files with 815 additions and 14 deletions.
diff --git a/__init__.py b/__init__.py
diff --git a/main.py b/main.py
@@ -0,0 +1,18 @@
+import sys
+from parser.parse import Parser
+
+if __name__ == '__main__':
+    input_path = sys.argv[2]
+    output_path = sys.argv[4]
+    table_path = sys.argv[6]
+
+    parser = Parser(input_path, output_path)
+    try:
+        parser.parse()
+    except StopIteration:
+        with open(output_path, 'r') as f:
+            f.write('Syntax is correct!')
+    except Exception as e:
+        print(f"Compilation failed with {e.args[0]} errors.")
+        with open(output_path, 'r') as f:
+            f.write('Syntax is wrong!')
diff --git a/parser/__init__.py b/parser/__init__.py
diff --git a/parser/parse.py b/parser/parse.py
@@ -0,0 +1,101 @@
+import csv
+import sys
+
+from scanner import Lexer
+from utils import errors
+
+START_STATE = 0
+
+
+class Parser:
+    def __init__(self, file_name, table_name):
+        self.table = {}
+        self.grammar = {}
+        with open(file_name, 'r') as file:
+            self.src_code = file.readlines()
+        with open(table_name, 'r') as file:
+            reader = csv.reader(file)
+            header = next(reader)
+            for row in reader:
+                state = int(row[0].strip())
+                for i in range(1, len(row)):
+                    prev_data = self.grammar.get((state, header[i].strip()))
+                    if not prev_data or prev_data == 'ERROR':
+                        self.grammar[(state, header[i].strip())] = row[i].strip()
+
+        self.lexer = Lexer(file_name)
+
+    def parse(self):
+        state = START_STATE
+        parse_stack = []
+        token_generator = self.lexer.get_token()
+        token = next(token_generator)
+        while True:
+            if token is None:
+                break
+            while token.type in ['COMMENT', 'SPACE','NEW_LINE']:
+                token = next(token_generator)
+            print(state, token)
+            data = self.grammar[(state, token.type)].split(' ')
+            print(data)
+            if len(data) == 1:
+                if data[0] == "ERROR":
+                    raise errors.ParserException(errors.ParserException)
+                else:
+                    raise errors.ParserException(errors.INVALID_GRAMMAR)
+
+            if len(data) == 2:
+                if data[0] == "ACCEPT":
+                    print("Compilation completed with 0 errors.")
+                    return
+                elif data[0] == "REDUCE":
+                    nxt_data = self.grammar[(parse_stack.pop(), data[1])].split(' ')
+                    state = int(nxt_data[1][1:])
+                else:
+                    raise errors.ParserException(errors.INVALID_GRAMMAR)
+            if len(data) == 3:
+                if data[0] == "SHIFT":
+                    state = int(data[1][1:])
+                    token = next(token_generator)
+                elif data[0] == "GOTO":
+                    state = int(data[1][1:])
+                    token = next(token_generator)
+                elif data[0] == "PUSH_GOTO":
+                    parse_stack.append(state)
+                    state = int(data[1][1:])
+
+                else:
+                    raise errors.ParserException(errors.INVALID_GRAMMAR)
+
+
+# filenames = next(os.walk('../tests_parser/in'), (None, None, []))[2]  # [] if no file
+# filenames = sorted(filenames)
+# p = Parser(f'../tests_parser/in/28_class1.cool', '../table.csv')
+# p.parse()
+
+
+def main():
+    pass
+
+
+if __name__ == '__main__':
+    main()
+    print(sys.argv)
+
+# for f in filenames:
+#     try:
+#         p = Parser(f'../tests_parser/in/{f}', '../table.csv')
+#         p.parse()
+#     except StopIteration:
+#         continue
+#         # print("Compilation completed with 0 errors.", f)
+#     except Exception as e:
+#         # print(e)
+#         # print(f"Compilation failed with {e.args[0]} errors.", f)
+#         with open(f'../tests_parser/out/{f.split(".")[0]}.out', 'r') as file:
+#             res = file.read()
+#             if res != 'Syntax is wrong!':
+#                 print(f"Expected: Syntax is wrong!\nGot: {res}", f)
+
+
+
diff --git a/scanner/__init__.py b/scanner/__init__.py
@@ -0,0 +1 @@
+from .lex import Lexer
diff --git a/scanner/lex.py b/scanner/lex.py
@@ -1,9 +1,6 @@
 import os
-
 from ply import lex
-import statics
-import os
-
+from . import statics
 
 class Lexer:
     def __init__(self, filename):
@@ -29,7 +26,7 @@ def tokenize(self):
     t_RBRACE = r'}'
     t_LBRACKET = r'\['
     t_RBRACKET = r'\]'
-    t_COLON = r','
+    t_COMMA = r','
     t_SEMICOLON = r';'
     t_DOT = r'\.'
     t_AND = r'&&'
@@ -99,7 +96,7 @@ def t_REAL_NUMBER(self, t):
         return t
 
     def t_INTEGER(self, t):
-        r'\d+'
+        r'[-]?[0-9]+'
         t.value = int(t.value)
         return t
 
@@ -123,4 +120,23 @@ def t_error(self, t):
         print("Illegal character '%s'" % t.value[0])
         t.lexer.skip(1)
 
+    def get_token(self):
+        self.tokenize()
+        tokens = []
+        while True:
+            token = self.lexer.token()
+            if not token:
+                break
+            if token.type in statics.types:
+                token.type = 'TYPE'
+            elif token.type in statics.constants:
+                token.type = 'CONSTANT'
+            elif token.type in statics.parser_operators:
+                token.type = 'OPERATOR'
+            tokens.append(token)
+
+        for t in tokens:
+            yield t
+
+
 
diff --git a/scanner/main.py b/scanner/main.py
@@ -20,7 +20,7 @@ def run(self):
 
 
 if __name__ == '__main__':
-    scanner = Scanner("tests/input1.cool")
+    scanner = Scanner("../tests/input1.cool")
     scanner.run()
-    scanner = Scanner("tests/input2.cool")
+    scanner = Scanner("../tests/input2.cool")
     scanner.run()
diff --git a/scanner/statics.py b/scanner/statics.py
@@ -1,3 +1,17 @@
+types = [
+    'INT',
+    'FLOAT',
+    'STRING_KEYWORD',
+    'BOOL',
+    'REAL'
+]
+
+constants = [
+    'INTEGER',
+    'REAL_NUMBER',
+    'STRING'
+]
+
 reserved = {
     'let': 'LET',
     'void': 'VOID',
@@ -35,6 +49,31 @@
     'BACKSLASH'
 ]
 
+
+parser_operators = [
+    'PLUS',
+    'MINUS',
+    'TIMES',
+    'DIVIDE',
+    'MOD',
+    'EQUALS',
+    'NOT_EQUALS',
+    'LESS_THAN',
+    'LESS_THAN_EQUALS',
+    'GREATER_THAN',
+    'GREATER_THAN_EQUALS',
+    'AND',
+    'OR',
+    'NOT',
+    'ADDITION_ASSIGNMENT',
+    'SUBTRACTION_ASSIGNMENT',
+    'MULTIPLICATION_ASSIGNMENT',
+    'DIVISION_ASSIGNMENT',
+    'BITWISE_AND',
+    'BITWISE_OR',
+    'BITWISE_XOR',
+]
+
 operators = [
     'PLUS',
     'MINUS',
@@ -57,7 +96,7 @@
     'LBRACE',
     'RBRACE',
     'SEMICOLON',
-    'COLON',
+    'COMMA',
     'DOT',
     'ADDITION_ASSIGNMENT',
     'SUBTRACTION_ASSIGNMENT',