-
Notifications
You must be signed in to change notification settings - Fork 70
/
Copy pathlexer.js
137 lines (109 loc) · 3.91 KB
/
lexer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
const constants = require("./constants.js");
class Lexer {
constructor (inputStream) {
this.inputStream = inputStream;
this.currentToken = null;
}
isWhiteSpace (whiteSpaceChar) {
return constants.LIST.WHITESPACES.includes(whiteSpaceChar);
}
isPunctuation (punctuationChar) {
return constants.LIST.PUNCTUATIONS.includes(punctuationChar);
}
isIdentifier (id) {
/**
* @todo replace this with a dictionary matching, rather than use regex (much faster)
*/
return constants.REGEX.IDENTIFIER.test(id);
}
isOperator (operatorChar) {
return constants.LIST.OPERATORS.includes(operatorChar);
}
isKeyword (keywordChar) {
return constants.LIST.KEYWORDS.includes(keywordChar);
}
isDigit (digitChar) {
return constants.REGEX.DIGIT.test(digitChar);
}
readWhile (predicate) {
let str = "";
while (this.inputStream.isNotEndOfFile() && predicate(this.inputStream.peek())) {
str += this.inputStream.next();
}
return str;
}
readString () {
const stringEnd = constants.SYM.STR_QUOTE;
this.inputStream.next(); // needed to skip the opening quote symbol '"'
const str = this.readWhile((ch) => {
return ch !== stringEnd;
});
if (this.inputStream.peek() === stringEnd) this.inputStream.next();// needed to skip the closing quote symbol '"'
else { this.throwError(`Expecting '${stringEnd}' but found unexpected char`); }
return { type: constants.STRING, value: str, };
}
readIdentifier () {
const identifier = this.getIdentifierWithoutAccentMarks(this.readWhile(this.isIdentifier));
return {
type: this.isKeyword(identifier) ? constants.KEYWORD : constants.VARIABLE,
value: identifier,
};
}
getIdentifierWithoutAccentMarks (identifier) {
return identifier.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
}
readNumber () {
let hasDot = false;
const num = this.readWhile((ch) => {
if (ch === constants.SYM.PERIOD) {
if (hasDot) return false;
hasDot = true;
return true;
}
return this.isDigit(ch);
});
return { type: constants.NUMBER, value: parseFloat(num), };
}
skipComments () {
this.readWhile((ch) => {
return ch !== constants.SYM.NEW_LINE;
});
this.inputStream.next(); // skips the "\n" symbol
}
readNext () {
this.readWhile(this.isWhiteSpace);
if (this.inputStream.isEndOfFile()) return null;
const ch = this.inputStream.peek();
if (ch === constants.SYM.COMMENT) {
this.skipComments();
return this.readNext();
}
if (ch === constants.SYM.STR_QUOTE) return this.readString();
if (this.isDigit(ch)) return this.readNumber();
if (this.isIdentifier(ch)) return this.readIdentifier();
if (this.isPunctuation(ch)) return { type: constants.PUNCTUATION, value: this.inputStream.next(), };
if (this.isOperator(ch)) return { type: constants.OPERATOR, value: this.readWhile(this.isOperator), };
this.throwError(`Cant handle character '${ch}'`);
}
peek () {
return this.current || (this.current = this.readNext());
}
// The next function dosent always call readNext()
// because it might have been peeked before
// (in which case readNext() was already called and the inputstream has advanced)
next () {
const token = this.current;
this.current = null;
return token || this.readNext();
}
isEndOfFile () {
return this.peek() == null;
}
isNotEndOfFile () {
return this.peek() != null;
}
throwError (msg) {
this.inputStream.throwError(msg);
}
}
module.exports = Lexer;