-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsyntaxanalyzer.h
173 lines (145 loc) · 5.57 KB
/
syntaxanalyzer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
// Syntax Analyzer
#pragma once
#include <list>
#include <vector>
#include "lexicalscanner.h"
#include "parsetree.h"
#include "symboltable.h"
#include "instruction.h"
using namespace std;
class SyntaxAnalyzer
{
protected:
vector<Record> lexemes;
int currentLexeme;
int previousLexeme;
Record currentToken;
bool printProduction = true;
string currentProduction = "";
string lastError = "";
LexicalScanner & lexicalScanner;
ErrorHandler & errorHandler;
SymbolTable & symbolTable;
Node * currentNode;
ParseTree * parseTree;
vector <Instruction> instr_table;
int instr_address = 0; //global variable for intermediate code generation
public:
SyntaxAnalyzer(LexicalScanner & lexicalScanner, SymbolTable & symbolTable, ErrorHandler & errorHandler)
: lexicalScanner(lexicalScanner), symbolTable(symbolTable), lexemes(), currentLexeme(0), errorHandler(errorHandler) {
}
//function to get the next token in the lexemes list. This function is used to support backtracking. When the syntax analyzer attempts to find a rule by trying all the rules on a token one by one, aka a brute force approach (as opposed to a predictive approach), backtracking is needed when a rule fails and a token needs to be tested from the beginning for the next rule.
//returns a pointer to the Record object of the next lexeme in the list
Record * getNextToken()
{
//check to see if there is a token after currentLexeme. If there is, return a pointer to it
if (currentLexeme < lexemes.size())
{
currentToken = lexemes[currentLexeme++];
return ¤tToken;
//else getNextToken is being called for the first time, and the lexemes list is empty. it will call lexicalScanner.lexer() and add the token to the list and return a pointer to that token
} else {
Record token = lexicalScanner.lexer();
cout << token << endl;
lexemes.push_back(token);
currentToken = lexemes[currentLexeme++];
return ¤tToken;
}
}
//function to peek ahead at the next token in the list
//Returns the next token in the list
Record * lookAhead()
{
Record * token = getNextToken();
backup(); //function to backup variables so that the peek does not affect processing
return token;
}
//function to get the current token being processed by the syntax analyzer
//Returns a pointer to the token
Record * getCurrentToken() {
if (currentLexeme >= 0) {
return &lexemes[currentLexeme > 0 ? currentLexeme-1 : 0];
} else {
return nullptr;
}
}
//function to backup currentLexeme
void backup() {
if (currentLexeme != 0) {
currentLexeme--;
}
}
void print(const string & rule) {
if (printProduction) {
cout << " " << rule << endl;
}
}
virtual ParseTree * createParseTree() = 0;
void gen_instr(string op, string oprnd)
{
Instruction instruction = Instruction(op, oprnd);
instr_table.push_back(instruction);
instr_address++;
}
//Functions that determine the lexemes and tokens
bool isIf(const Record & lexeme);
bool isWhile(const Record & lexeme);
bool isType(const Record & lexeme);
bool isId(const Record & lexeme);
bool isNumber(const Record & lexeme);
bool isThen(const Record & lexeme);
bool isWhileEnd(const Record & lexeme);
bool isDo(const Record & lexeme);
bool isEndIf(const Record & lexeme);
bool isEndDo(const Record & lexeme);
bool isOpenBracket(const Record & lexeme);
bool isCloseBracket(const Record & lexeme);
bool isElse(const Record & lexeme);
bool isEndElse(const Record & lexeme);
bool isSemiColon(const Record & lexeme);
bool isOperator(const Record & lexeme);
bool isRelativeOperator(const Record & lexeme);
bool isError(const Record & lexeme);
bool isBoolValue(const Record & lexeme);
const vector<string> relativeOperators = {"<", ">", "<=", ">=", "<=", "==", "<>"};
bool isPlus(const Record & lexeme) {
return lexeme.token == "OPERATOR" && lexeme.lexeme == "+";
}
bool isMinus(const Record & lexeme) {
return lexeme.token == "OPERATOR" && lexeme.lexeme == "-";
}
bool isMultiply(const Record & lexeme) {
return lexeme.token == "OPERATOR" && lexeme.lexeme == "*";
}
bool isDivideBy(const Record & lexeme) {
return lexeme.token == "OPERATOR" && lexeme.lexeme == "/";
}
bool isLeftParen(const Record & lexeme) {
return lexeme.token == "SEPARATOR" && lexeme.lexeme == "(";
}
bool isRightParan(const Record & lexeme) {
return lexeme.token == "SEPARATOR" && lexeme.lexeme == ")";
}
bool isEOF(const Record & lexeme) {
return lexeme.token == "EOF" && lexeme.lexeme == "$";
}
bool isEquals(const Record & lexeme) {
return lexeme.token == "OPERATOR" && lexeme.lexeme == "=";
}
bool isBeginBlock(const Record & lexeme) {
return lexeme.token == "SEPARATOR" && lexeme.lexeme == "{";
}
bool isEndBlock(const Record & lexeme) {
return lexeme.token == "SEPARATOR" && lexeme.lexeme == "}";
}
//function to return the vector of lexemes
vector<Record> & getTokenList() {
return lexemes;
}
void printIC(ostream & stream) {
for (int i = 0; i < instr_table.size(); ++i) {
auto it = instr_table[i];
stream << it.op << " " << ((it.oprnd != "nil") ?it.oprnd:"") << endl;
}
}
};