diff --git a/README.md b/README.md index c5c4275..7f377e6 100644 --- a/README.md +++ b/README.md @@ -3,19 +3,19 @@ RustyLR will provide you a LR(1) and LALR(1) Deterministic Finite Automata (DFA) ``` [dependencies] -rusty_lr = "0.7.3" +rusty_lr = "0.8.0" ``` ## Features - pure Rust implementation - readable error messages, both for grammar building and parsing - compile-time DFA construction from CFGs ( with proc-macro ) - - customizable reducing action + - customizable reduce action - resolving conflicts of ambiguous grammar - tracing parser action with callback #### Why proc-macro, not external executable? - - Decent built-in lexer, with consideration of unicode. + - Decent built-in lexer, with consideration of unicode and comments. - Can generate *pretty* error messages, by just passing `Span` data. - With modern IDE, can see errors in real-time with specific location. @@ -55,39 +55,39 @@ lalr1! { %left plus; %left star; - // s{N} is slice of shifted terminal symbols captured by N'th symbol - // v{N} is value of N'th symbol ( if it has value ) + // data that each token holds can be accessed by its name // s is slice of shifted terminal symbols captured by current rule - // userdata canbe accessed by `data` ( &mut i32, for current situation ) - A(i32) : A plus A { - println!("{:?} {:?} {:?}", s0, s1, s2 ); - // ^ ^ ^ - // | | |- slice of 2nd 'A' - // | |- slice of 'plus' + // userdata can be accessed by `data` ( &mut i32, for this situation ) + A(i32) : A plus a2=A { + println!("{:?} {:?} {:?}", A.slice, *plus, a2.slice ); + // ^ ^ ^ + // | | |- slice of 2nd 'A' + // | |- &Token // |- slice of 1st 'A' println!( "{:?}", s ); *data += 1; - v0 + v2 // --> this will be new value of current 'A' - // ^ ^ - // | |- value of 2nd 'A' + A.value + a2.value // --> this will be new value of current 'A' + // ^ ^ + // | |- value of 2nd 'A' // |- value of 1st 'A' } - | M { v0 } + | M { M.value } ; - M(i32) : M star M { v0 * v2 } - | P { v0 } + M(i32) : M star m2=M { *M * *m2 } + | P { *P } ; P(i32) : num { - if let Token::Num(n) = v0 { *n } - else { return Err(format!("{:?}", s0)); } - // ^^^^^^^^^ reduce action returns Result<(), String> + if let Token::Num(n) = *num { *n } + else { return Err(format!("{:?}", num)); } + // ^^^^^^^^^^^^^^^^^^^^^^^^^^ + // reduce action returns Result<(), String> } - | lparen E rparen { v1 } + | lparen E rparen { *E } ; - E(i32) : A { v0 }; + E(i32) : A { *A }; } ``` @@ -134,9 +134,9 @@ fn main() { The result will be: ``` -[Num(3)] [Plus] [Num(4)] +[Num(3)] Plus [Num(4)] [Num(3), Plus, Num(4)] -[Num(1)] [Plus] [Num(2), Star, LParen, Num(3), Plus, Num(4), RParen] +[Num(1)] Plus [Num(2), Star, LParen, Num(3), Plus, Num(4), RParen] [Num(1), Plus, Num(2), Star, LParen, Num(3), Plus, Num(4), RParen] 15 userdata: 2 @@ -413,45 +413,73 @@ Define the type of userdata passed to `feed()` function. '%left' ';' '%right' ';' ``` -Set the shift/reduce precedence of terminal symbols. `` must be defined in `%token`. +Set the shift/reduce precedence for terminal symbols. `` must be defined in `%token`. #### Production rules ``` - ':' * - '|' * + ':' * + '|' * ... ';' ``` Define the production rules. -`` must be valid terminal or non-terminal symbols. ``` -(optional) + : ... (1) + | '=' ... (2) + ; +``` +For (1), `` must be valid terminal or non-terminal symbols. In this case, the data of the token will be mapped to the variable with the same name as ``. +For (2), the data of the token will be mapped to the variable on the left side of '='. +For more information about the token data and variable, refer to the [reduce action](#reduceaction) below. + +#### RuleType (optional) +``` : '(' ')' | ; ``` -`` is optional, this will define the type of value that this production rule holds. +Define the type of value that this production rule holds. +#### ReduceAction ``` -(optional) : '{' '}' | ; ``` -`` is optional, -this will define the action to be executed when the rule is matched and reduced. +Define the action to be executed when the rule is matched and reduced. If `` is defined, `` itself must be the value of `` (i.e. no semicolon at the end of the statement). -**Predefined variables** can be used in ``: - - `s0`, `s1`, `s2`, ... : slice of shifted terminal symbols `&[]` captured by N'th symbol +**predefined variables** can be used in ``: - `s` : slice of shifted terminal symbols `&[]` captured by current rule. - - `v0`, `v1`, `v2`, ... : value of N'th symbol. - If N'th symbol is Terminal, it will be `&`, - and if it is NonTerminal, it will be `mut `. - `data` : userdata passed to `feed()` function. +To access the data of each token, you can directly use the name of the token as a variable. +For non-terminal symbols, the type of data is [`rusty_lr::NonTermData<'a, TermType, RuleType>`](rusty_lr/src/nontermdata.rs). +For terminal symbols, the type of data is [`rusty_lr::TermData<'a, TermType>`](rusty_lr/src/termdata.rs). +If multiple variables are defined with the same name, the variable on the front-most will be used. + +For example, following code will print the value of each `A`, and the slice of each `A` and `plus` token in the production rule `E -> A plus A`. +```rust +%token plus ...; + +E : A plus a2=A + { + println!("Value of 1st A: {}", A.value); // A.value or *A + println!("Slice of 1st A: {:?}", A.slice); + println!("Value of 2nd A: {}", a2.value); // a2.value or *a2 + println!("Slice of 2nd A: {:?}", a2.slice); + + if let Token::Plus(plus) = *plus { + println!( "Plus token: {:?}", plus ); + } + } + ; + +A(i32): ... ; +``` + `Result<(), String>` can be returned from ``. Returned `Err` will be delivered to the caller of `feed()` function. diff --git a/example/calculator/src/parser.rs b/example/calculator/src/parser.rs index eeb469a..59ff827 100644 --- a/example/calculator/src/parser.rs +++ b/example/calculator/src/parser.rs @@ -70,37 +70,37 @@ lalr1! { %left plus; %left star; - // s{N} is slice of shifted terminal symbols captured by N'th symbol - // v{N} is value of N'th symbol ( if it has value ) + // data that each token holds can be accessed by its name // s is slice of shifted terminal symbols captured by current rule - // userdata canbe accessed by `data` ( &mut i32, for current situation ) - A(i32) : A plus A { - println!("{:?} {:?} {:?}", s0, s1, s2 ); - // ^ ^ ^ - // | | |- slice of 2nd 'A' - // | |- slice of 'plus' + // userdata can be accessed by `data` ( &mut i32, for this situation ) + A(i32) : A plus a2=A { + println!("{:?} {:?} {:?}", A.slice, *plus, a2.slice ); + // ^ ^ ^ + // | | |- slice of 2nd 'A' + // | |- &Token // |- slice of 1st 'A' println!( "{:?}", s ); *data += 1; - v0 + v2 // --> this will be new value of current 'A' - // ^ ^ - // | |- value of 2nd 'A' + A.value + a2.value // --> this will be new value of current 'A' + // ^ ^ + // | |- value of 2nd 'A' // |- value of 1st 'A' } - | M { v0 } + | M { M.value } ; - M(i32) : M star M { v0 * v2 } - | P { v0 } + M(i32) : M star m2=M { *M * *m2 } + | P { *P } ; P(i32) : num { - if let Token::Num(n) = v0 { *n } - else { return Err(format!("{:?}", s0)); } - // ^^^^^^^^^ reduce action returns Result<(), String> + if let Token::Num(n) = *num { *n } + else { return Err(format!("{:?}", num)); } + // ^^^^^^^^^^^^^^^^^^^^^^^^^^ + // reduce action returns Result<(), String> } - | lparen E rparen { v1 } + | lparen E rparen { *E } ; - E(i32) : A { v0 }; + E(i32) : A { *A }; } diff --git a/rusty_lr/Cargo.toml b/rusty_lr/Cargo.toml index 26e1aed..8cc26aa 100644 --- a/rusty_lr/Cargo.toml +++ b/rusty_lr/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rusty_lr" -version = "0.7.3" +version = "0.8.0" edition = "2021" license = "MIT" description = "LR(1) and LALR(1) parser generator and code generation" @@ -11,7 +11,7 @@ categories = ["parsing"] [dependencies] rusty_lr_core = "0.7.2" -rusty_lr_derive = "0.7.3" +rusty_lr_derive = "0.8.0" # rusty_lr_core = { path = "../rusty_lr_core" } # rusty_lr_derive = { path = "../rusty_lr_derive" } diff --git a/rusty_lr/src/lib.rs b/rusty_lr/src/lib.rs index abba2ce..bd69588 100644 --- a/rusty_lr/src/lib.rs +++ b/rusty_lr/src/lib.rs @@ -10,7 +10,20 @@ //! - resolving conflicts of ambiguous grammar //! - tracing parser action with callback, also error handling //! - readable error messages, both for grammar building and parsing +//! +//! #### Why proc-macro, not external executable? +//! - Decent built-in lexer, with consideration of unicode and comments. +//! - Can generate *pretty* error messages, by just passing `Span` data. +//! - With modern IDE, can see errors in real-time with specific location. + +pub(crate) mod nontermdata; +pub(crate) mod termdata; // re-exports pub use rusty_lr_core::*; pub use rusty_lr_derive::*; + +/// type for NonTerminal data in reduce action +pub use nontermdata::NonTermData; +/// type for Terminal data in reduce action +pub use termdata::TermData; diff --git a/rusty_lr/src/nontermdata.rs b/rusty_lr/src/nontermdata.rs new file mode 100644 index 0000000..2b13072 --- /dev/null +++ b/rusty_lr/src/nontermdata.rs @@ -0,0 +1,35 @@ +use std::ops::Deref; +use std::ops::DerefMut; + +/// type for NonTerminal data in reduce action +#[derive(Debug, Clone)] +pub struct NonTermData<'a, Term, T> { + /// the slice of terms that this non-terminal data is reduced from + pub slice: &'a [Term], + /// the value of this non-terminal data + pub value: T, + /// the range of terms that this non-terminal data is reduced from + pub range: std::ops::Range, +} + +impl<'a, Term, T> NonTermData<'a, Term, T> { + pub fn new(slice: &'a [Term], value: T, range: std::ops::Range) -> Self { + Self { + slice, + value, + range, + } + } +} +impl<'a, Term, T> Deref for NonTermData<'_, Term, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.value + } +} +impl<'a, Term, T> DerefMut for NonTermData<'_, Term, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.value + } +} diff --git a/rusty_lr/src/termdata.rs b/rusty_lr/src/termdata.rs new file mode 100644 index 0000000..02d7839 --- /dev/null +++ b/rusty_lr/src/termdata.rs @@ -0,0 +1,28 @@ +use std::ops::Deref; +use std::ops::DerefMut; + +/// type for Terminal data in reduce action +#[derive(Debug, Clone)] +pub struct TermData<'a, Term> { + /// the terminal symbol + pub value: &'a Term, + /// the index of the terminal symbol + pub index: usize, +} +impl<'a, Term> TermData<'a, Term> { + pub fn new(value: &'a Term, index: usize) -> Self { + Self { value, index } + } +} +impl<'a, Term> Deref for TermData<'a, Term> { + type Target = &'a Term; + + fn deref(&self) -> &Self::Target { + &self.value + } +} +impl<'a, Term> DerefMut for TermData<'a, Term> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.value + } +} diff --git a/rusty_lr_derive/Cargo.toml b/rusty_lr_derive/Cargo.toml index c01397e..ad78b40 100644 --- a/rusty_lr_derive/Cargo.toml +++ b/rusty_lr_derive/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rusty_lr_derive" -version = "0.7.3" +version = "0.8.0" edition = "2021" license = "MIT" description = "yacc-like proc-macro definitions for parser code generation" diff --git a/rusty_lr_derive/src/callback.rs b/rusty_lr_derive/src/callback.rs index 2939091..2d52c49 100644 --- a/rusty_lr_derive/src/callback.rs +++ b/rusty_lr_derive/src/callback.rs @@ -4,6 +4,7 @@ use super::rule::RuleLine; use super::rule::RuleLines; use super::term::TermType; use super::token::Token; +use super::token::TokenMapped; use proc_macro2::Group; use proc_macro2::TokenStream; @@ -17,8 +18,8 @@ pub struct Callback { pub ruletype_stack: Vec>, pub rulelines_stack: Vec, pub ruleline_stack: Vec, - pub tokens_stack: Vec>, - pub token_stack: Vec, + pub tokens_stack: Vec>, + pub token_stack: Vec, pub action_stack: Vec>, pub rustcode_stack: Vec, } @@ -60,12 +61,20 @@ impl rusty_lr_core::Callback for Callback { // Ident let ident = match self.termstack.pop() { Some(TermType::Ident(ident)) => ident.unwrap(), - _ => unreachable!("Rule0 - Ident"), + _ => unreachable!("Rule{} - Ident", rule), }; - let ruletype = self.ruletype_stack.pop().expect("Rule0 - RuleType"); + let ruletype = if let Some(rt) = self.ruletype_stack.pop() { + rt + } else { + unreachable!("Rule{} - RuleType", rule); + }; - let rulelines = self.rulelines_stack.pop().expect("Rule0 - RuleLines"); + let rulelines = if let Some(rl) = self.rulelines_stack.pop() { + rl + } else { + unreachable!("Rule{} - RuleLines", rule); + }; let span = ident.span(); let name = ident.to_string(); @@ -96,11 +105,11 @@ impl rusty_lr_core::Callback for Callback { return Err(ParseError::InvalidRuletypeDelimiter(ruletype.span())); } } else { - unreachable!("Rule1 - Group"); + unreachable!("Rule{} - Group", rule); } self.ruletype_stack.push(ruletype); } else { - unreachable!("Rule1"); + unreachable!("Rule{}", rule); } } @@ -121,10 +130,10 @@ impl rusty_lr_core::Callback for Callback { rulelines.rule_lines.push(ruleline); self.rulelines_stack.push(rulelines); } else { - unreachable!("Rule3 - 2"); + unreachable!("Rule{} - 2", rule); } } else { - unreachable!("Rule3 - 1"); + unreachable!("Rule{} - 1", rule); } } @@ -137,7 +146,7 @@ impl rusty_lr_core::Callback for Callback { }; self.rulelines_stack.push(rulelines); } else { - unreachable!("Rule4"); + unreachable!("Rule{}", rule); } } @@ -147,13 +156,13 @@ impl rusty_lr_core::Callback for Callback { // Action let action = match self.action_stack.pop() { Some(action) => action, - None => unreachable!("Rule5 - Action"), + None => unreachable!("Rule{} - Action", rule), }; // RuleDef let mut tokens = match self.tokens_stack.pop() { Some(tokens) => tokens, - _ => unreachable!("Rule5 - RuleDef"), + _ => unreachable!("Rule{} - RuleDef", rule), }; tokens.reverse(); @@ -178,12 +187,12 @@ impl rusty_lr_core::Callback for Callback { 9 => { let token = match self.token_stack.pop() { Some(token) => token, - _ => unreachable!("Rule9 - Token"), + _ => unreachable!("Rule{} - Token", rule), }; let mut tokens = match self.tokens_stack.pop() { Some(tokens) => tokens, - _ => unreachable!("Rule9 - Tokens"), + _ => unreachable!("Rule{} - Tokens", rule), }; tokens.push(token); self.tokens_stack.push(tokens); @@ -193,7 +202,7 @@ impl rusty_lr_core::Callback for Callback { 10 => { let token = match self.token_stack.pop() { Some(token) => token, - _ => unreachable!("Rule9 - Token"), + _ => unreachable!("Rule{} - Token", rule), }; self.tokens_stack.push(vec![token]); } @@ -201,15 +210,39 @@ impl rusty_lr_core::Callback for Callback { // Token: Ident 11 => match self.termstack.pop() { Some(TermType::Ident(ident)) => { - self.token_stack.push(Token::NonTerm(ident.unwrap())); + self.token_stack.push(TokenMapped { + token: Token::NonTerm(ident.unwrap()), + mapped: None, + }); } _ => { - unreachable!("Rule11 - Ident"); + unreachable!("Rule{} - Ident", rule); } }, + // Token: Ident '=' Ident + 12 => { + let ident = if let Some(TermType::Ident(ident)) = self.termstack.pop() { + ident.unwrap() + } else { + unreachable!("Rule{} - Ident1", rule); + }; + // '=' + self.termstack.pop(); + + let mapped = if let Some(TermType::Ident(ident)) = self.termstack.pop() { + ident.unwrap() + } else { + unreachable!("Rule{} - Ident2", rule); + }; + + self.token_stack.push(TokenMapped { + token: Token::NonTerm(ident), + mapped: Some(mapped), + }); + } // Action: Group - 12 => match self.termstack.pop() { + 13 => match self.termstack.pop() { Some(TermType::Group(group)) => { if let Some(action) = &group { // check if action is enclosed with '{' and '}' @@ -217,22 +250,22 @@ impl rusty_lr_core::Callback for Callback { return Err(ParseError::InvliadReduceActionDelimiter(action.span())); } } else { - unreachable!("Rule12 - Group"); + unreachable!("Rule{} - Group", rule); } self.action_stack.push(group); } _ => { - unreachable!("Rule12 - Group"); + unreachable!("Rule{} - Group", rule); } }, // Action: - 13 => { + 14 => { self.action_stack.push(None); } // TokenDef: '%token' Ident RustCode ';' - 14 => { + 15 => { // ';' self.termstack.pop(); @@ -242,12 +275,12 @@ impl rusty_lr_core::Callback for Callback { // Ident let ident = match self.termstack.pop() { Some(TermType::Ident(ident)) => ident.unwrap(), - _ => unreachable!("Rule14 - Ident"), + _ => unreachable!("Rule{} - Ident", rule), }; let rustcode = match self.rustcode_stack.pop() { Some(rustcode) => rustcode, - _ => unreachable!("Rule14 - RustCode"), + _ => unreachable!("Rule{} - RustCode", rule), }; // '%token' @@ -271,7 +304,6 @@ impl rusty_lr_core::Callback for Callback { } // AnyTokenNoSemi: - 15 => {} 16 => {} 17 => {} 18 => {} @@ -285,45 +317,47 @@ impl rusty_lr_core::Callback for Callback { 26 => {} 27 => {} 28 => {} + 29 => {} + 30 => {} // AnyTokens: AnyTokenNoSemi AnyTokens - 29 => { + 31 => { // AnyTokenNoSemi let token = match self.termstack.pop() { Some(token) => token.stream(), - _ => unreachable!("Rule29 - AnyTokenNoSemi"), + _ => unreachable!("Rule{} - AnyTokenNoSemi", rule), }; // AnyTokens let rustcode = match self.rustcode_stack.pop() { Some(tokens) => tokens, - _ => unreachable!("Rule29 - AnyTokens"), + _ => unreachable!("Rule{} - AnyTokens", rule), }; self.rustcode_stack.push(quote! { #token #rustcode }); } // AnyTokens: AnyTokenNoSemi - 30 => { + 32 => { // AnyTokenNoSemi let token = match self.termstack.pop() { Some(token) => token.stream(), - _ => unreachable!("Rule29 - AnyTokenNoSemi"), + _ => unreachable!("Rule{} - AnyTokenNoSemi", rule), }; self.rustcode_stack.push(token); } // RustCode: AnyTokens - 31 => {} + 33 => {} // StartDef: '%start' Ident ';' - 32 => { + 34 => { // ';' self.termstack.pop(); // Ident let ident = match self.termstack.pop() { Some(TermType::Ident(ident)) => ident.unwrap(), - _ => unreachable!("Rule32 - Ident"), + _ => unreachable!("Rule{} - Ident", rule), }; // '%start' @@ -342,14 +376,14 @@ impl rusty_lr_core::Callback for Callback { } // EofDef: '%eof' RustCode ';' - 33 => { + 35 => { // ';' self.termstack.pop(); // RustCode let rustcode = match self.rustcode_stack.pop() { Some(rustcode) => rustcode, - _ => unreachable!("Rule33 - RustCode"), + _ => unreachable!("Rule{} - RustCode", rule), }; // '%eof' @@ -367,14 +401,14 @@ impl rusty_lr_core::Callback for Callback { } // TokenTypeDef: '%tokentype' RustCode ';' - 34 => { + 36 => { // ';' self.termstack.pop(); // RustCode let rustcode = match self.rustcode_stack.pop() { Some(rustcode) => rustcode, - _ => unreachable!("Rule34 - RustCode"), + _ => unreachable!("Rule{} - RustCode", rule), }; // '%tokentype' @@ -392,14 +426,14 @@ impl rusty_lr_core::Callback for Callback { } // UserDataDef: '%userdata' RustCode ';' - 35 => { + 37 => { // ';' self.termstack.pop(); // RustCode let rustcode = match self.rustcode_stack.pop() { Some(rustcode) => rustcode, - _ => unreachable!("Rule35 - RustCode"), + _ => unreachable!("Rule{} - RustCode", rule), }; // '%userdata' @@ -417,14 +451,14 @@ impl rusty_lr_core::Callback for Callback { } // ReduceDef: '%left' Ident ';' - 36 => { + 38 => { // ';' self.termstack.pop(); // Ident let ident = match self.termstack.pop() { Some(TermType::Ident(ident)) => ident.unwrap(), - _ => unreachable!("Rule36 - Ident"), + _ => unreachable!("Rule{} - Ident", rule), }; self.grammar .reduce_types @@ -435,14 +469,14 @@ impl rusty_lr_core::Callback for Callback { } // ReduceDef: '%right' Ident ';' - 37 => { + 39 => { // ';' self.termstack.pop(); // Ident let ident = match self.termstack.pop() { Some(TermType::Ident(ident)) => ident.unwrap(), - _ => unreachable!("Rule37 - Ident"), + _ => unreachable!("Rule{} - Ident", rule), }; self.grammar .reduce_types @@ -453,46 +487,46 @@ impl rusty_lr_core::Callback for Callback { } // Grammar: Rule Grammar - 38 => {} + 40 => {} // Grammar: Rule - 39 => {} + 41 => {} // Grammar: TokenDef Grammar - 40 => {} + 42 => {} // Grammar: TokenDef - 41 => {} + 43 => {} // Grammar: StartDef Grammar - 42 => {} + 44 => {} // Grammar: StartDef - 43 => {} + 45 => {} // Grammar: EofDef Grammar - 44 => {} + 46 => {} // Grammar: EofDef - 45 => {} + 47 => {} // Grammar: TokenTypeDef Grammar - 46 => {} + 48 => {} // Grammar: TokenTypeDef - 47 => {} + 49 => {} // Grammar: UserDataDef Grammar - 48 => {} + 50 => {} // Grammar: UserDataDef - 49 => {} + 51 => {} // Grammar: ReduceDef Grammar - 50 => {} + 52 => {} // Grammar: ReduceDef - 51 => {} + 53 => {} _ => unreachable!("Invalid RuleID: {}", rule), } diff --git a/rusty_lr_derive/src/emit.rs b/rusty_lr_derive/src/emit.rs index 3914bd6..91a9c72 100644 --- a/rusty_lr_derive/src/emit.rs +++ b/rusty_lr_derive/src/emit.rs @@ -30,7 +30,7 @@ impl Grammar { for rule in rules.rule_lines.iter() { let mut tokens = Vec::with_capacity(rule.tokens.len()); for token in rule.tokens.iter() { - match token { + match &token.token { Token::Term(term) => { tokens.push(rlr::Token::Term(term.to_string())); } @@ -225,26 +225,39 @@ impl Grammar { let s = &self.#terms_stack_name[rusty_lr_macro_generated_new_begin..rusty_lr_macro_generated_new_end]; }); } - for (idx, token) in rule.tokens.iter().enumerate().rev() { - let var_name = format_ident!("v{}", idx); - let slice_name = format_ident!("s{}", idx); - match token { - Token::Term(_) => { + for token in rule.tokens.iter().rev() { + match &token.token { + Token::Term(term) => { + let mapped = token.mapped.as_ref().unwrap_or(term); token_pop_stream.extend(quote! { - let #slice_name = &self.#terms_stack_name[self.#range_stack_name.pop().unwrap()]; - let #var_name = &#slice_name[0]; + let index = self.#range_stack_name.pop().unwrap().start; + let #mapped = ::rusty_lr::TermData::new( + &self.#terms_stack_name[index], + index + ); }); } Token::NonTerm(nonterm) => { - token_pop_stream.extend(quote! { - let #slice_name = &self.#terms_stack_name[self.#range_stack_name.pop().unwrap()]; - }); - + let mapped = token.mapped.as_ref().unwrap_or(nonterm); // if typename is defined for this nonterm, pop from stack and assign to v{i} if self.rules.get(&nonterm.to_string()).unwrap().1.is_some() { let stack_name = Self::stack_name(nonterm); token_pop_stream.extend(quote! { - let mut #var_name = self.#stack_name.pop().unwrap(); + let range = self.#range_stack_name.pop().unwrap(); + let #mapped = ::rusty_lr::NonTermData::new( + &self.#terms_stack_name[range.clone()], + self.#stack_name.pop().unwrap(), + range, + ); + }); + } else { + token_pop_stream.extend(quote! { + let range = self.#range_stack_name.pop().unwrap(); + let #mapped = ::rusty_lr::NonTermData::new( + &self.#terms_stack_name[range], + (), + range, + ); }); } } @@ -325,14 +338,14 @@ impl Grammar { }; Ok(quote! { - #[allow(unused_braces, unused_parens)] + #[allow(unused_braces, unused_parens, unused_variables, non_snake_case)] pub struct #stack_struct_name { pub #terms_stack_name: Vec<#term_typename>, pub #range_stack_name: Vec>, pub state_stack: Vec, #stack_def_streams } - #[allow(unused_braces, unused_parens)] + #[allow(unused_braces, unused_parens, unused_variables, non_snake_case)] impl #stack_struct_name { pub fn new() -> Self { Self { @@ -360,12 +373,12 @@ impl Grammar { self.#range_stack_name.push(l..l+1); } } - #[allow(unused_braces, unused_parens)] + #[allow(unused_braces, unused_parens, unused_variables, non_snake_case)] pub struct #struct_name { pub rules: Vec<::rusty_lr::ProductionRule<#term_typename, &'static str>>, pub states: Vec<::rusty_lr::State<#term_typename, &'static str>>, } - #[allow(unused_braces, unused_parens)] + #[allow(unused_braces, unused_parens, unused_variables, non_snake_case)] impl #struct_name { pub fn new() -> Self { #write_parser diff --git a/rusty_lr_derive/src/grammar.rs b/rusty_lr_derive/src/grammar.rs index ec8108e..7bc0078 100644 --- a/rusty_lr_derive/src/grammar.rs +++ b/rusty_lr_derive/src/grammar.rs @@ -48,7 +48,6 @@ impl Grammar { } pub fn stack_name(name: &Ident) -> Ident { let span = name.span(); - let name = name.to_string().to_lowercase(); let mut ident = format_ident!("rustylr_macro_generated_{}_stack", name); ident.set_span(span); ident @@ -78,6 +77,7 @@ impl Grammar { // ; // // Token: Ident ; + // Token: Ident '=' Ident ; // // Action: Group // | @@ -161,6 +161,14 @@ impl Grammar { grammar.add_rule("TokensOne", vec![Token::NonTerm("Token")]); grammar.add_rule("Token", vec![Token::Term(TermType::Ident(None))]); + grammar.add_rule( + "Token", + vec![ + Token::Term(TermType::Ident(None)), + Token::Term(TermType::Equal(None)), + Token::Term(TermType::Ident(None)), + ], + ); grammar.add_rule("Action", vec![Token::Term(TermType::Group(None))]); grammar.add_rule("Action", vec![]); @@ -194,6 +202,7 @@ impl Grammar { ); grammar.add_rule("AnyTokenNoSemi", vec![Token::Term(TermType::Group(None))]); grammar.add_rule("AnyTokenNoSemi", vec![Token::Term(TermType::Literal(None))]); + grammar.add_rule("AnyTokenNoSemi", vec![Token::Term(TermType::Equal(None))]); grammar.add_rule( "AnyTokenNoSemi", vec![Token::Term(TermType::OtherPunct(None))], @@ -364,10 +373,10 @@ impl Grammar { for (_, (_name, _ruletype, rules)) in grammar.rules.iter_mut() { for rule in rules.rule_lines.iter_mut() { for token in rule.tokens.iter_mut() { - if let Token::NonTerm(ident) = token.clone() { + if let Token::NonTerm(ident) = token.token.clone() { if grammar.terminals.contains_key(&ident.to_string()) { // set the token to Term - *token = Token::Term(ident); + token.token = Token::Term(ident); } } } @@ -383,7 +392,7 @@ impl Grammar { for (_, (_name, _ruletype, rules)) in grammar.rules.iter() { for rule in rules.rule_lines.iter() { for token in rule.tokens.iter() { - if let Token::NonTerm(ident) = token { + if let Token::NonTerm(ident) = &token.token { if !grammar.rules.contains_key(&ident.to_string()) { return Err(ParseError::NonTerminalNotDefined(ident.clone())); } diff --git a/rusty_lr_derive/src/rule.rs b/rusty_lr_derive/src/rule.rs index ac121a7..0a70a6f 100644 --- a/rusty_lr_derive/src/rule.rs +++ b/rusty_lr_derive/src/rule.rs @@ -1,9 +1,9 @@ -use super::token::Token; +use super::token::TokenMapped; use proc_macro2::TokenStream; #[derive(Debug)] pub struct RuleLine { - pub tokens: Vec, + pub tokens: Vec, pub reduce_action: Option, } diff --git a/rusty_lr_derive/src/term.rs b/rusty_lr_derive/src/term.rs index ba83a93..c4af935 100644 --- a/rusty_lr_derive/src/term.rs +++ b/rusty_lr_derive/src/term.rs @@ -19,6 +19,7 @@ pub(crate) enum TermType { UserData(Option<(proc_macro2::Punct, proc_macro2::Ident)>), // %userdata Group(Option), Literal(Option), + Equal(Option), OtherPunct(Option), Eof, } @@ -39,8 +40,9 @@ impl TermType { TermType::UserData(_) => 12, TermType::Group(_) => 13, TermType::Literal(_) => 14, - TermType::OtherPunct(_) => 15, - TermType::Eof => 16, + TermType::Equal(_) => 15, + TermType::OtherPunct(_) => 16, + TermType::Eof => 17, } } pub fn stream(self) -> TokenStream { @@ -80,6 +82,7 @@ impl TermType { } TermType::Group(group) => group.unwrap().to_token_stream(), TermType::Literal(lit) => lit.unwrap().to_token_stream(), + TermType::Equal(punct) => punct.unwrap().to_token_stream(), TermType::OtherPunct(punct) => punct.unwrap().to_token_stream(), TermType::Eof => unreachable!("Eof should not be converted to TokenStream"), } @@ -101,6 +104,7 @@ impl TermType { TermType::UserData(punct_ident) => punct_ident.as_ref().map(|(p, i)| i.span()), TermType::Group(group) => group.as_ref().map(|g| g.span()), TermType::Literal(lit) => lit.as_ref().map(|l| l.span()), + TermType::Equal(punct) => punct.as_ref().map(|p| p.span()), TermType::OtherPunct(punct) => punct.as_ref().map(|p| p.span()), TermType::Eof => None, } @@ -123,6 +127,7 @@ impl std::fmt::Display for TermType { TermType::UserData(_) => write!(f, "%userdata"), TermType::Group(_) => write!(f, "TokenTree::Group"), TermType::Literal(_) => write!(f, "TokenTree::Literal"), + TermType::Equal(_) => write!(f, "="), TermType::OtherPunct(_) => write!(f, "TokenTree::Punct"), TermType::Eof => write!(f, "$"), } diff --git a/rusty_lr_derive/src/token.rs b/rusty_lr_derive/src/token.rs index 9fb7234..9a4bd00 100644 --- a/rusty_lr_derive/src/token.rs +++ b/rusty_lr_derive/src/token.rs @@ -5,3 +5,10 @@ pub enum Token { NonTerm(Ident), Term(Ident), } + +/// for syntax '=' +#[derive(Debug, Clone)] +pub struct TokenMapped { + pub token: Token, + pub mapped: Option, +} diff --git a/rusty_lr_derive/src/tokenizer.rs b/rusty_lr_derive/src/tokenizer.rs index 5d437b7..72655a2 100644 --- a/rusty_lr_derive/src/tokenizer.rs +++ b/rusty_lr_derive/src/tokenizer.rs @@ -54,6 +54,7 @@ impl Tokenizer { } ret } + '=' => Some(TermType::Equal(Some(punct))), _ => Some(TermType::OtherPunct(Some(punct))), },