@@ -38,6 +38,7 @@ class Parser
38
38
unique_ptr<LookAroundExprAST> parse_lookaround (const TokenValue &token_value);
39
39
unique_ptr<FlagExprAST> parse_flag (const TokenValue &token_value);
40
40
unique_ptr<AnchorExprAST> parse_anchor (const TokenValue &token_value);
41
+ unique_ptr<EOFExprAST> parse_eof (const TokenValue &token_value);
41
42
};
42
43
43
44
Parser::Parser (Lexer &lexer, bool show_error) : lexer_(lexer), error_flag_(false ), show_error_(show_error)
@@ -108,8 +109,7 @@ inline unique_ptr<ExprAST> Parser::parse_token(const Token &token)
108
109
ptr = std::move (parse_anchor (token.get_token_value ()));
109
110
break ;
110
111
case TokenType::END_OF_FILE:
111
- // we are good
112
- // eof = true;
112
+ ptr = std::move (parse_eof (token.get_token_value ()));
113
113
break ;
114
114
case TokenType::UNDEFINED:
115
115
error_flag_ = true ;
@@ -130,6 +130,7 @@ inline unique_ptr<ExprAST> Parser::parse_token(const Token &token)
130
130
inline unique_ptr<CharacterExprAST> Parser::parse_character (const TokenValue &token_value)
131
131
{
132
132
unique_ptr<CharacterExprAST> ptr = nullptr ;
133
+
133
134
if (token_value == TokenValue::LITERALLY || token_value == TokenValue::ONE_OF || token_value == TokenValue::RAW)
134
135
{
135
136
// expect string literal following
@@ -138,56 +139,34 @@ inline unique_ptr<CharacterExprAST> Parser::parse_character(const TokenValue &to
138
139
{
139
140
error_flag_ = true ;
140
141
error_msg_ = " missing string literal" ;
142
+ return ptr;
141
143
}
142
- else
144
+
145
+ string val;
146
+ switch (token_value)
143
147
{
144
- string val;
145
- switch (token_value)
146
- {
147
- case TokenValue::LITERALLY:
148
- val = " (?:" + next_token.get_value () + " )" ;
149
- break ;
150
- case TokenValue::ONE_OF:
151
- val = " [" + next_token.get_value () + " ]" ;
152
- break ;
153
- case TokenValue::RAW:
154
- val = next_token.get_value ();
155
- break ;
156
- default :
157
- break ;
158
- }
159
- ptr = make_unique<CharacterExprAST>(val);
160
- lexer_.get_next_token (); // so we eat the leagal token
148
+ case TokenValue::LITERALLY:
149
+ val = " (?:" + next_token.get_value () + " )" ;
150
+ break ;
151
+ case TokenValue::ONE_OF:
152
+ val = " [" + next_token.get_value () + " ]" ;
153
+ break ;
154
+ case TokenValue::RAW:
155
+ val = next_token.get_value ();
156
+ break ;
157
+ default :
158
+ break ;
161
159
}
160
+ ptr = make_unique<CharacterExprAST>(val);
161
+ lexer_.get_next_token (); // so we eat the leagal token
162
+ return ptr;
162
163
}
163
164
164
- else if (token_value == TokenValue::LETTER || token_value == TokenValue::UPPERCASE_LETTER || token_value == TokenValue::DIGIT)
165
+ if (token_value == TokenValue::LETTER || token_value == TokenValue::UPPERCASE_LETTER || token_value == TokenValue::DIGIT)
165
166
{
166
- Token next_token = lexer_.get_next_token ();
167
+ Token guess_from = lexer_.get_next_token ();
167
168
168
- if (next_token.get_token_value () == TokenValue::FROM)
169
- {
170
- Token next_next_token = lexer_.get_next_token ();
171
- if (next_next_token.get_token_value () == TokenValue::TO)
172
- {
173
- // so we have the modifier
174
- string az = next_next_token.get_value ();
175
- if (az.length () == 2 )
176
- {
177
- az.insert (1 , " -" );
178
- az.insert (0 , " [" );
179
- az.append (" ]" );
180
- ptr = make_unique<CharacterExprAST>(az);
181
- lexer_.get_next_token (); // so we eat the leagal tokens from and to
182
- }
183
- }
184
- else
185
- {
186
- error_flag_ = true ;
187
- error_msg_ = " \" from\" found, but \" to\" not found" ;
188
- }
189
- }
190
- else
169
+ if (guess_from.get_token_value () != TokenValue::FROM)
191
170
{
192
171
string val;
193
172
switch (token_value)
@@ -205,50 +184,76 @@ inline unique_ptr<CharacterExprAST> Parser::parse_character(const TokenValue &to
205
184
break ;
206
185
}
207
186
ptr = make_unique<CharacterExprAST>(val);
208
- lexer_.get_next_token (); // so we eat the leagal token
187
+ // now we already at the one after letter/digit/...
188
+ // because we already move to here for guessing from
189
+ return ptr;
209
190
}
210
- }
211
- else
212
- {
213
- string val;
214
- switch (token_value)
215
- {
216
- case TokenValue::ANY_CHARACTER:
217
- val = " \\ w" ;
218
- break ;
219
- case TokenValue::NO_CHARACTER:
220
- val = " \\ W" ;
221
- break ;
222
- case TokenValue::ANYTHING:
223
- val = " ." ;
224
- break ;
225
- case TokenValue::NEW_LINE:
226
- val = " \\ n" ;
227
- break ;
228
- case TokenValue::WHITESPACE:
229
- val = " \\ s" ;
230
- break ;
231
- case TokenValue::NO_WHITESPACE:
232
- val = " \\ S" ;
233
- break ;
234
- case TokenValue::TAB:
235
- val = " \\ t" ;
236
- break ;
237
- default :
238
- break ;
239
- }
240
- if (val.length () != 0 )
191
+
192
+ Token guess_to = lexer_.get_next_token ();
193
+
194
+ if (guess_from.get_token_value () != TokenValue::FROM)
241
195
{
242
- ptr = make_unique<CharacterExprAST>(val);
243
- lexer_.get_next_token (); // so we eat the leagal token
196
+ error_flag_ = true ;
197
+ error_msg_ = " \" from\" found, but \" to\" not found" ;
198
+ return ptr;
244
199
}
245
- else
200
+
201
+ string az = guess_to.get_value ();
202
+
203
+ if (az.length () != 2 )
246
204
{
247
205
error_flag_ = true ;
248
- error_msg_ = " unknown error" ;
206
+ error_msg_ = " the range \" from\" and \" to\" is not well defined" ;
207
+ return ptr;
249
208
}
209
+
210
+ az.insert (1 , " -" );
211
+ az.insert (0 , " [" );
212
+ az.append (" ]" );
213
+ ptr = make_unique<CharacterExprAST>(az);
214
+ lexer_.get_next_token (); // so we eat the leagal token to
215
+ return ptr;
216
+ }
217
+
218
+ string val;
219
+ switch (token_value)
220
+ {
221
+ case TokenValue::ANY_CHARACTER:
222
+ val = " \\ w" ;
223
+ break ;
224
+ case TokenValue::NO_CHARACTER:
225
+ val = " \\ W" ;
226
+ break ;
227
+ case TokenValue::ANYTHING:
228
+ val = " ." ;
229
+ break ;
230
+ case TokenValue::NEW_LINE:
231
+ val = " \\ n" ;
232
+ break ;
233
+ case TokenValue::WHITESPACE:
234
+ val = " \\ s" ;
235
+ break ;
236
+ case TokenValue::NO_WHITESPACE:
237
+ val = " \\ S" ;
238
+ break ;
239
+ case TokenValue::TAB:
240
+ val = " \\ t" ;
241
+ break ;
242
+ default :
243
+ break ;
244
+ }
245
+ if (val.length () != 0 )
246
+ {
247
+ ptr = make_unique<CharacterExprAST>(val);
248
+ lexer_.get_next_token (); // so we eat the leagal token
249
+ }
250
+ else
251
+ {
252
+ error_flag_ = true ;
253
+ error_msg_ = " unknown error" ;
250
254
}
251
255
256
+
252
257
return std::move (ptr);
253
258
}
254
259
@@ -400,7 +405,7 @@ inline unique_ptr<GroupExprAST> Parser::parse_group(const TokenValue &token_valu
400
405
&& lexer_.get_token ().get_token_type () != TokenType::END_OF_FILE
401
406
&& lexer_.get_token ().get_token_type () != TokenType::UNDEFINED);
402
407
// after parsing the sub_query_ptr_vec, current token should be ")"!!!
403
- std::cout << " now tokn [] " << lexer_. get_token (). get_value () << " [] \n " ;
408
+
404
409
if (lexer_.get_token ().get_token_value () != TokenValue::GROUP_END)
405
410
{
406
411
ptr = nullptr ;
@@ -579,6 +584,13 @@ inline unique_ptr<AnchorExprAST> Parser::parse_anchor(const TokenValue &token_va
579
584
580
585
return std::move (ptr);
581
586
}
587
+
588
+ inline unique_ptr<EOFExprAST> Parser::parse_eof (const TokenValue &token_value)
589
+ {
590
+ // maybe we check the value in the future?
591
+ return make_unique<EOFExprAST>();
592
+ }
593
+
582
594
}
583
595
584
596
#endif // !SIMPLEREGEXLANGUAGE_PARSER_H_
0 commit comments