Skip to content

Commit

Permalink
add parenthesis group pattern syntax
Browse files Browse the repository at this point in the history
    - fix(parser) ParenGroup fallbacks to splitted token
    - add(parser) Group Pattern, PatternArg
    - remove(parser) map_to() from Pattern, use typename() instead
    - bump version to 2.4.0
  • Loading branch information
ehwan committed Aug 21, 2024
1 parent fd26b4d commit 03ea7a7
Show file tree
Hide file tree
Showing 12 changed files with 853 additions and 453 deletions.
27 changes: 26 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ Each `Pattern` follows the syntax:
- `P*` : Zero or more repetition of `P`.
- `P+` : One or more repetition of `P`.
- `P?` : Zero or one repetition of `P`.
- `(P1 P2 P3)` : Grouping of patterns.
- `P / term`, `P / [term1 term_start-term_last]`, `P / [^term1 term_start-term_last]` :
Lookaheads; `P` followed by one of given terminal set. Lookaheads are not consumed.

Expand Down Expand Up @@ -412,7 +413,7 @@ A(i32): ... ;

To access the data of each token, you can directly use the name of the token as a variable.
- For non-terminal symbols, the type of variable is `RuleType`.
- For terminal symbols, the type of variable is `%tokentype`.
- For terminal symbols, the type of variable is [`%tokentype`](#token-type-must-defined).
- If multiple variables are defined with the same name, the variable on the front-most will be used.
- You can remap the variable name by using `=` operator.

Expand Down Expand Up @@ -445,6 +446,30 @@ E: digit=[zero-nine] {
};
```

For group `(P1 P2 P3)`:
- If none of the patterns hold value, the group itself will not hold any value.
- If only one of the patterns holds value, the group will hold the value of the very pattern. And the variable name will be same as the pattern.
(i.e. If `P1` holds value, and others don't, then `(P1 P2 P3)` will hold the value of `P1`, and can be accessed via name `P1`)
- If there are multiple patterns holding value, the group will hold `Tuple` of the values. There is no default variable name for the group, you must define the variable name explicitly by `=` operator.

```rust
NoRuleType: ... ;

I(i32): ... ;

// I will be chosen
A: (NoRuleType I NoRuleType) {
println!( "Value of I: {:?}", I ); // can access by 'I'
I
};

// ( i32, i32 )
B: i2=( I NoRuleType I ) {
println!( "Value of I: {:?}", i2 ); // must explicitly define the variable name
};

```

---

### Exclamation mark `!`
Expand Down
6 changes: 3 additions & 3 deletions rusty_lr/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rusty_lr"
version = "2.3.0"
version = "2.4.0"
edition = "2021"
license = "MIT"
description = "yacc-like, LR(1) and LALR(1) parser generator with custom reduce action"
Expand All @@ -11,8 +11,8 @@ categories = ["parsing", "compilers", "parser-implementations"]

[dependencies]
rusty_lr_core = "2.4"
rusty_lr_derive = "1.10"
rusty_lr_buildscript = { version = "0.5", optional = true }
rusty_lr_derive = "1.11"
rusty_lr_buildscript = { version = "0.6", optional = true }
# rusty_lr_core = { path = "../rusty_lr_core" }
# rusty_lr_derive = { path = "../rusty_lr_derive" }
# rusty_lr_buildscript = { path = "../rusty_lr_buildscript", optional = true }
Expand Down
4 changes: 2 additions & 2 deletions rusty_lr_buildscript/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rusty_lr_buildscript"
version = "0.5.0"
version = "0.6.0"
edition = "2021"
license = "MIT"
description = "buildscipt tools for rusty_lr"
Expand All @@ -14,7 +14,7 @@ categories = ["parsing"]
proc-macro2 = { version = "1.0.86", features = ["span-locations"] }
quote = "1.0"
# rusty_lr_parser = { path = "../rusty_lr_parser" }
rusty_lr_parser = "3.8"
rusty_lr_parser = "3.9"
# rusty_lr_core = { path = "../rusty_lr_core", features = ["fxhash", "builder"] }
rusty_lr_core = { version = "2.4", features = ["fxhash", "builder"] }
codespan-reporting = "0.11"
4 changes: 2 additions & 2 deletions rusty_lr_derive/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rusty_lr_derive"
version = "1.10.0"
version = "1.11.0"
edition = "2021"
license = "MIT"
description = "proc-macro definitions for rusty_lr"
Expand All @@ -15,4 +15,4 @@ proc-macro = true
[dependencies]
proc-macro2 = "1.0.86"
# rusty_lr_parser = { path = "../rusty_lr_parser" }
rusty_lr_parser = "3.8"
rusty_lr_parser = "3.9"
4 changes: 2 additions & 2 deletions rusty_lr_executable/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rustylr"
version = "0.11.0"
version = "0.12.0"
edition = "2021"
license = "MIT"
description = "executable for rusty_lr"
Expand All @@ -11,5 +11,5 @@ categories = ["parsing"]

[dependencies]
clap = { version = "4.5.7", features = ["derive"] }
rusty_lr_buildscript = "0.5"
rusty_lr_buildscript = "0.6"
# rusty_lr_buildscript = { path = "../rusty_lr_buildscript" }
2 changes: 1 addition & 1 deletion rusty_lr_parser/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rusty_lr_parser"
version = "3.8.0"
version = "3.9.0"
edition = "2021"
license = "MIT"
description = "macro line parser for rusty_lr"
Expand Down
2 changes: 1 addition & 1 deletion rusty_lr_parser/src/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ impl Grammar {
let pattern = pattern.into_pattern(&grammar, false)?;
let token_rule = pattern.get_rule(&mut grammar, (begin_span, end_span))?;
let mapto = match pattern.typename(&grammar) {
Some(_) => mapto.or_else(|| pattern.map_to()),
Some((_, mapto_)) => mapto.or(Some(mapto_)),
None => None,
};

Expand Down
19 changes: 19 additions & 0 deletions rusty_lr_parser/src/parser/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ pub enum PatternArgs {
/// lookaheads will not be consumed.
/// span of the rightmost of this pattern
Lookaheads(Box<PatternArgs>, TerminalSetOrIdent),

/// ( Pattern+ )
/// span of '(' and ')'
Group(Vec<PatternArgs>, Span, Span),
}

impl PatternArgs {
Expand Down Expand Up @@ -107,6 +111,20 @@ impl PatternArgs {
);
Ok(pattern)
}
PatternArgs::Group(group, _, _) => {
if group.len() == 1 {
return group
.into_iter()
.next()
.unwrap()
.into_pattern(grammar, put_exclamation);
}
let mut patterns = Vec::with_capacity(group.len());
for pattern in group.into_iter() {
patterns.push(pattern.into_pattern(grammar, put_exclamation)?);
}
Ok(Pattern::Group(patterns))
}
}
}
pub fn span_pair(&self) -> (Span, Span) {
Expand All @@ -125,6 +143,7 @@ impl PatternArgs {
PatternArgs::Lookaheads(base, terminal_set) => {
(base.span_pair().0, terminal_set.span_pair().1)
}
PatternArgs::Group(_, open, close) => (*open, *close),
}
}
}
Expand Down
33 changes: 14 additions & 19 deletions rusty_lr_parser/src/parser/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,24 +310,19 @@ pub fn feed_recursive(
},
TokenTree::Group(group) => match group.delimiter() {
Delimiter::Parenthesis => {
// for now, splitted for brace is not in syntax, so ignore it
parser.feed(
context,
Lexed::ParenGroup(Some(group.clone())),
grammar_args,
)?;

// feed the compound token
// if parser
// .feed(context, Lexed::ParenGroup(Some(group.clone())))
// .is_err()
// {
// // compound token failed
// // feed the splitted tokens
// parser.feed(context, Lexed::LParen(group.span_open()))?;
// feed_recursive(group.stream(), parser, context)?;
// parser.feed(context, Lexed::RParen(group.span_close()))?;
// }
if let Err(GrammarParseError::InvalidTerminal(err)) =
parser.feed(context, Lexed::ParenGroup(Some(group)), grammar_args)
{
let group = if let Lexed::ParenGroup(group) = err.term {
group.unwrap()
} else {
unreachable!();
};
// feed the splitted tokens
parser.feed(context, Lexed::LParen(group.span_open()), grammar_args)?;
feed_recursive(group.stream(), parser, context, grammar_args)?;
parser.feed(context, Lexed::RParen(group.span_close()), grammar_args)?;
}
}
Delimiter::Brace => {
// for now, splitted for brace is not in syntax, so ignore it
Expand Down Expand Up @@ -365,7 +360,7 @@ pub fn feed_recursive(
}
_ => {
// for now, compound for nonegroup is not in syntax, so ignore it
parser.feed(context, Lexed::NoneGroup(Some(group.clone())), grammar_args)?;
parser.feed(context, Lexed::NoneGroup(Some(group)), grammar_args)?;

// feed the compound token
// if parser
Expand Down
13 changes: 13 additions & 0 deletions rusty_lr_parser/src/parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,19 @@ Pattern(PatternArgs): ident {
| Pattern slash TerminalSetOrIdent {
PatternArgs::Lookaheads( Box::new(Pattern), TerminalSetOrIdent )
}
| lparen Pattern+ rparen {
let open = if let Lexed::LParen(lparen) = lparen {
lparen
} else {
unreachable!( "Pattern-Group-Open" );
};
let close = if let Lexed::RParen(rparen) = rparen {
rparen
} else {
unreachable!( "Pattern-Group-Close" );
};
PatternArgs::Group(Pattern, open, close)
}
;


Expand Down
Loading

0 comments on commit 03ea7a7

Please sign in to comment.