Skip to content

Commit

Permalink
refactoring for single-char tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilippImhof committed Mar 6, 2023
1 parent 76f6bf4 commit 0902aa3
Showing 1 changed file with 15 additions and 36 deletions.
51 changes: 15 additions & 36 deletions classes/parser/lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,6 @@ private function read_next_token() {
if ($currentchar === InputStream::EOF) {
return self::EOF;
}
// If it is a π character, we will allow it.
if ($currentchar === 'π') {
return $this->read_single_char_token(Token::CONSTANT);
}
// If we have a " or ' character, this is the start of a string.
if ($currentchar === '"' || $currentchar === "'") {
return $this->read_string();
Expand All @@ -165,38 +161,21 @@ private function read_next_token() {
if (preg_match('/[-+*\/%=&|~^<>!?:]/', $currentchar)) {
return $this->read_operator();
}
// Brackets, braces and parentheses are tokens on their own, they are always returned as an individual token.
// We will have a separate category for opening and closing brackets.
if ($currentchar === '(') {
return $this->read_single_char_token(Token::OPENING_PAREN);
}
if ($currentchar === ')') {
return $this->read_single_char_token(Token::CLOSING_PAREN);
}
if ($currentchar === '[') {
return $this->read_single_char_token(Token::OPENING_BRACKET);
}
if ($currentchar === ']') {
return $this->read_single_char_token(Token::CLOSING_BRACKET);
}
if ($currentchar === '{') {
return $this->read_single_char_token(Token::OPENING_BRACE);
}
if ($currentchar === '}') {
return $this->read_single_char_token(Token::CLOSING_BRACE);
}
// The comma is used as an argument separator (or similar) token.
if ($currentchar === ',') {
return $this->read_single_char_token(Token::ARG_SEPARATOR);
}
// The backslash can be used to access a function in case the user has defined
// a variable with the same name, e.g. variable sin and function \sin.
if ($currentchar === '\\') {
return $this->read_single_char_token(Token::PREFIX);
}
// Finally, it might be a semicolon a.k.a end-of-statement marker.
if ($currentchar === ';') {
return $this->read_single_char_token(Token::END_OF_STATEMENT);
// There are some single-character tokens...
if (preg_match('/[]\[(){},;\\\]/', $currentchar)) {
$types = [
'[' => Token::OPENING_BRACKET,
'(' => Token::OPENING_PAREN,
'{' => Token::OPENING_BRACE,
']' => Token::CLOSING_BRACKET,
')' => Token::CLOSING_PAREN,
'}' => Token::CLOSING_BRACE,
',' => Token::ARG_SEPARATOR,
'\\' => Token::PREFIX,
';' => Token::END_OF_STATEMENT,
'π' => Token::CONSTANT
];
return $this->read_single_char_token($types[$currentchar]);
}
// If we are still here, that's not good at all. We need to read the char (it is only peeked so far)
// in order for the inputstream to be at the right position.
Expand Down

0 comments on commit 0902aa3

Please sign in to comment.