-
Notifications
You must be signed in to change notification settings - Fork 80
Pratt parser #798
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pratt parser #798
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,375 @@ | ||
| //! Expression parsing support | ||
| //! | ||
| //! When writing an expression parser it is necessary to first define its operands and | ||
| //! their associativity. | ||
|
|
||
| use core::marker::PhantomData; | ||
|
|
||
| use crate::{ | ||
| combinator::{opt, trace}, | ||
| error::ParserError, | ||
| stream::{Stream, StreamIsPartial}, | ||
| Parser, Result, | ||
| }; | ||
|
|
||
| use super::{empty, fail}; | ||
|
|
||
| /// Parses an expression based on operator precedence. | ||
| #[doc(alias = "pratt")] | ||
| #[doc(alias = "separated")] | ||
| #[doc(alias = "shunting_yard")] | ||
| #[doc(alias = "precedence_climbing")] | ||
| #[inline(always)] | ||
| pub fn expression<I, ParseOperand, O, E>( | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shall we |
||
| parse_operand: ParseOperand, | ||
| ) -> Expression< | ||
| I, | ||
| O, | ||
| ParseOperand, | ||
| impl Parser<I, Prefix<I, O, E>, E>, | ||
| impl Parser<I, Postfix<I, O, E>, E>, | ||
| impl Parser<I, Infix<I, O, E>, E>, | ||
| E, | ||
| > | ||
| where | ||
| I: Stream + StreamIsPartial, | ||
| ParseOperand: Parser<I, O, E>, | ||
| E: ParserError<I>, | ||
| { | ||
| Expression { | ||
| precedence_level: 0, | ||
| parse_operand, | ||
| parse_prefix: fail, | ||
| parse_postfix: fail, | ||
| parse_infix: fail, | ||
| i: Default::default(), | ||
| o: Default::default(), | ||
| e: Default::default(), | ||
| } | ||
| } | ||
|
|
||
| /// Concrete type for the expression parser | ||
| pub struct Expression<I, O, ParseOperand, Pre, Post, Pix, E> | ||
| where | ||
| I: Stream + StreamIsPartial, | ||
| ParseOperand: Parser<I, O, E>, | ||
| E: ParserError<I>, | ||
| { | ||
| precedence_level: i64, | ||
| parse_operand: ParseOperand, | ||
| parse_prefix: Pre, | ||
| parse_postfix: Post, | ||
| parse_infix: Pix, | ||
| i: PhantomData<I>, | ||
| o: PhantomData<O>, | ||
| e: PhantomData<E>, | ||
| } | ||
|
|
||
| impl<I, O, ParseOperand, Pre, Post, Pix, E> Expression<I, O, ParseOperand, Pre, Post, Pix, E> | ||
| where | ||
| ParseOperand: Parser<I, O, E>, | ||
| I: Stream + StreamIsPartial, | ||
| E: ParserError<I>, | ||
| { | ||
| /// Declare the unary prefix operator for the current expression | ||
| #[inline(always)] | ||
| pub fn prefix<NewParsePrefix>( | ||
| self, | ||
| parser: NewParsePrefix, | ||
| ) -> Expression<I, O, ParseOperand, NewParsePrefix, Post, Pix, E> | ||
| where | ||
| NewParsePrefix: Parser<I, Prefix<I, O, E>, E>, | ||
| { | ||
| Expression { | ||
| precedence_level: self.precedence_level, | ||
| parse_operand: self.parse_operand, | ||
| parse_prefix: parser, | ||
| parse_postfix: self.parse_postfix, | ||
| parse_infix: self.parse_infix, | ||
| i: Default::default(), | ||
| o: Default::default(), | ||
| e: Default::default(), | ||
| } | ||
| } | ||
|
|
||
| /// Declare the unary postfix operator for the current expression | ||
| #[inline(always)] | ||
| pub fn postfix<NewParsePostfix>( | ||
| self, | ||
| parser: NewParsePostfix, | ||
| ) -> Expression<I, O, ParseOperand, Pre, NewParsePostfix, Pix, E> | ||
| where | ||
| NewParsePostfix: Parser<I, Postfix<I, O, E>, E>, | ||
| { | ||
| Expression { | ||
| precedence_level: self.precedence_level, | ||
| parse_operand: self.parse_operand, | ||
| parse_prefix: self.parse_prefix, | ||
| parse_postfix: parser, | ||
| parse_infix: self.parse_infix, | ||
| i: Default::default(), | ||
| o: Default::default(), | ||
| e: Default::default(), | ||
| } | ||
| } | ||
|
|
||
| /// Declare the binary infix operator for the current expression | ||
| #[inline(always)] | ||
| pub fn infix<NewParseInfix>( | ||
| self, | ||
| parser: NewParseInfix, | ||
| ) -> Expression<I, O, ParseOperand, Pre, Post, NewParseInfix, E> | ||
| where | ||
| NewParseInfix: Parser<I, Infix<I, O, E>, E>, | ||
| { | ||
| Expression { | ||
| precedence_level: self.precedence_level, | ||
| parse_operand: self.parse_operand, | ||
| parse_prefix: self.parse_prefix, | ||
| parse_postfix: self.parse_postfix, | ||
| parse_infix: parser, | ||
| i: Default::default(), | ||
| o: Default::default(), | ||
| e: Default::default(), | ||
| } | ||
| } | ||
|
|
||
| /// Set the precedence level for the current expression | ||
| #[inline(always)] | ||
| pub fn current_precedence_level( | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably we could drop |
||
| mut self, | ||
| level: i64, | ||
| ) -> Expression<I, O, ParseOperand, Pre, Post, Pix, E> { | ||
| self.precedence_level = level; | ||
| self | ||
| } | ||
| } | ||
|
|
||
| impl<I, O, Pop, Pre, Post, Pix, E> Parser<I, O, E> for Expression<I, O, Pop, Pre, Post, Pix, E> | ||
| where | ||
| I: Stream + StreamIsPartial, | ||
| Pop: Parser<I, O, E>, | ||
| Pix: Parser<I, Infix<I, O, E>, E>, | ||
| Pre: Parser<I, Prefix<I, O, E>, E>, | ||
| Post: Parser<I, Postfix<I, O, E>, E>, | ||
| E: ParserError<I>, | ||
| { | ||
| #[inline(always)] | ||
| fn parse_next(&mut self, input: &mut I) -> Result<O, E> { | ||
| trace("expression", move |i: &mut I| { | ||
| expression_impl( | ||
| i, | ||
| &mut self.parse_operand, | ||
| &mut self.parse_prefix, | ||
| &mut self.parse_postfix, | ||
| &mut self.parse_infix, | ||
| self.precedence_level, | ||
| ) | ||
| }) | ||
| .parse_next(input) | ||
| } | ||
| } | ||
|
|
||
| fn expression_impl<I, O, Pop, Pre, Post, Pix, E>( | ||
| i: &mut I, | ||
| parse_operand: &mut Pop, | ||
| prefix: &mut Pre, | ||
| postfix: &mut Post, | ||
| infix: &mut Pix, | ||
| min_power: i64, | ||
| ) -> Result<O, E> | ||
| where | ||
| I: Stream + StreamIsPartial, | ||
| Pop: Parser<I, O, E>, | ||
| Pix: Parser<I, Infix<I, O, E>, E>, | ||
| Pre: Parser<I, Prefix<I, O, E>, E>, | ||
| Post: Parser<I, Postfix<I, O, E>, E>, | ||
| E: ParserError<I>, | ||
| { | ||
| let operand = opt(trace("operand", parse_operand.by_ref())).parse_next(i)?; | ||
| let mut operand = if let Some(operand) = operand { | ||
| operand | ||
| } else { | ||
| // Prefix unary operators | ||
| let len = i.eof_offset(); | ||
| let Prefix(power, fold_prefix) = trace("prefix", prefix.by_ref()).parse_next(i)?; | ||
| // infinite loop check: the parser must always consume | ||
| if i.eof_offset() == len { | ||
| return Err(E::assert(i, "`prefix` parsers must always consume")); | ||
| } | ||
| let operand = expression_impl(i, parse_operand, prefix, postfix, infix, power)?; | ||
| fold_prefix(i, operand)? | ||
| }; | ||
|
|
||
| // A variable to stop the `'parse` loop when `Assoc::Neither` with the same | ||
| // precedence is encountered e.g. `a == b == c`. `Assoc::Neither` has similar | ||
| // associativity rules as `Assoc::Left`, but we stop parsing when the next operator | ||
| // is the same as the current one. | ||
| let mut prev_op_is_neither = None; | ||
| 'parse: while i.eof_offset() > 0 { | ||
| // Postfix unary operators | ||
| let start = i.checkpoint(); | ||
| if let Some(Postfix(power, fold_postfix)) = | ||
| opt(trace("postfix", postfix.by_ref())).parse_next(i)? | ||
| { | ||
| // control precedence over the prefix e.g.: | ||
| // `--(i++)` or `(--i)++` | ||
| if power < min_power { | ||
| i.reset(&start); | ||
| break 'parse; | ||
| } | ||
| operand = fold_postfix(i, operand)?; | ||
|
|
||
| continue 'parse; | ||
| } | ||
|
|
||
| // Infix binary operators | ||
| let start = i.checkpoint(); | ||
| let parse_result = opt(trace("infix", infix.by_ref())).parse_next(i)?; | ||
| if let Some(infix_op) = parse_result { | ||
| let mut is_neither = None; | ||
| let (lpower, rpower, fold_infix) = match infix_op { | ||
| Infix::Right(p, f) => (p, p - 1, f), | ||
| Infix::Left(p, f) => (p, p + 1, f), | ||
| Infix::Neither(p, f) => { | ||
| is_neither = Some(p); | ||
| (p, p + 1, f) | ||
| } | ||
| }; | ||
| if lpower < min_power | ||
| // MSRV: `is_some_and` | ||
| || match prev_op_is_neither { | ||
| None => false, | ||
| Some(p) => lpower == p, | ||
| } | ||
| { | ||
| i.reset(&start); | ||
| break 'parse; | ||
| } | ||
| prev_op_is_neither = is_neither; | ||
| let rhs = expression_impl(i, parse_operand, prefix, postfix, infix, rpower)?; | ||
| operand = fold_infix(i, operand, rhs)?; | ||
|
|
||
| continue 'parse; | ||
| } | ||
|
|
||
| break 'parse; | ||
| } | ||
|
|
||
| Ok(operand) | ||
| } | ||
|
|
||
| /// Unary prefix operator | ||
| pub struct Prefix<I, O, E>(pub i64, pub fn(&mut I, O) -> Result<O, E>); | ||
|
|
||
| impl<I, O, E> Clone for Prefix<I, O, E> { | ||
| #[inline(always)] | ||
| fn clone(&self) -> Self { | ||
| Prefix(self.0, self.1) | ||
| } | ||
| } | ||
|
|
||
| impl<I: Stream, O, E: ParserError<I>> Parser<I, Prefix<I, O, E>, E> for Prefix<I, O, E> { | ||
| #[inline(always)] | ||
| fn parse_next(&mut self, input: &mut I) -> Result<Prefix<I, O, E>, E> { | ||
| empty.value(self.clone()).parse_next(input) | ||
| } | ||
| } | ||
|
|
||
| /// Unary postfix operator | ||
| pub struct Postfix<I, O, E>(pub i64, pub fn(&mut I, O) -> Result<O, E>); | ||
|
|
||
| impl<I, O, E> Clone for Postfix<I, O, E> { | ||
| #[inline(always)] | ||
| fn clone(&self) -> Self { | ||
| Postfix(self.0, self.1) | ||
| } | ||
| } | ||
|
|
||
| impl<I: Stream, O, E: ParserError<I>> Parser<I, Postfix<I, O, E>, E> | ||
| for (i64, fn(&mut I, O) -> Result<O, E>) | ||
| { | ||
| #[inline(always)] | ||
| fn parse_next(&mut self, input: &mut I) -> Result<Postfix<I, O, E>, E> { | ||
| empty.value(Postfix(self.0, self.1)).parse_next(input) | ||
| } | ||
| } | ||
|
Comment on lines
+289
to
+296
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Question for @39555. I think you meant to implement this for impl<I: Stream, O, E: ParserError<I>> Parser<I, Postfix<I, O, E>, E> for Postfix<I, O, E> {
#[inline(always)]
fn parse_next(&mut self, input: &mut I) -> Result<Postfix<I, O, E>, E> {
empty.value(self.clone()).parse_next(input)
}
}I noticed this while porting the example in #622 to my own fork: https://github.com/ssmendon/winnow/blob/pratt-parsing/examples/c_expression/parser.rs Specifically, I noticed this syntax doesn't work due to unsatisfied trait bounds: dispatch! {take(2usize);
"++" => Postfix(20, |_: &mut _, a| Ok(Expr::PostIncr(Box::new(a)))),
"--" => Postfix(20, |_: &mut _, a| Ok(Expr::PostDecr(Box::new(a)))),
_ => fail,
},Notes:
|
||
|
|
||
| /// Binary infix operator | ||
| pub enum Infix<I, O, E> { | ||
| /// Left-associative operator | ||
| /// | ||
| /// It is valuated from the leftmost term, moving rightward | ||
| /// e.g. `a + b + c + d` | ||
| Left(i64, fn(&mut I, O, O) -> Result<O, E>), | ||
| /// Right-associative operator | ||
| /// | ||
| /// It is evaluated from the rightmost term, moving leftward | ||
| /// | ||
| /// e.g. `a ^ b ^ c ^ d` | ||
| Right(i64, fn(&mut I, O, O) -> Result<O, E>), | ||
| /// Non-associative operator | ||
| /// | ||
| /// Its evaluation does not fold neither left-to-right nor right-to-left. | ||
| /// e.g. `a == b == c` | ||
| Neither(i64, fn(&mut I, O, O) -> Result<O, E>), | ||
| } | ||
|
|
||
| impl<I, O, E> Clone for Infix<I, O, E> { | ||
| #[inline(always)] | ||
| fn clone(&self) -> Self { | ||
| match self { | ||
| Infix::Left(p, f) => Infix::Left(*p, *f), | ||
| Infix::Right(p, f) => Infix::Right(*p, *f), | ||
| Infix::Neither(p, f) => Infix::Neither(*p, *f), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl<I: Stream, O, E: ParserError<I>> Parser<I, Infix<I, O, E>, E> for Infix<I, O, E> { | ||
| #[inline(always)] | ||
| fn parse_next(&mut self, input: &mut I) -> Result<Infix<I, O, E>, E> { | ||
| empty.value(self.clone()).parse_next(input) | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use crate::ascii::digit1; | ||
| use crate::combinator::fail; | ||
| use crate::dispatch; | ||
| use crate::error::ContextError; | ||
| use crate::token::any; | ||
|
|
||
| use super::*; | ||
|
|
||
| fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> { | ||
| move |i: &mut &str| { | ||
| use Infix::*; | ||
| expression(digit1.parse_to::<i32>()) | ||
| .current_precedence_level(0) | ||
| .prefix(dispatch! {any; | ||
| '+' => Prefix(12, |_, a| Ok(a)), | ||
| '-' => Prefix(12, |_, a: i32| Ok(-a)), | ||
| _ => fail | ||
| }) | ||
| .infix(dispatch! {any; | ||
| '+' => Left(5, |_, a, b| Ok(a + b)), | ||
| '-' => Left(5, |_, a, b| Ok(a - b)), | ||
| '*' => Left(7, |_, a, b| Ok(a * b)), | ||
| '/' => Left(7, |_, a, b| Ok(a / b)), | ||
| '%' => Left(7, |_, a, b| Ok(a % b)), | ||
| '^' => Left(9, |_, a, b| Ok(a ^ b)), | ||
| _ => fail | ||
| }) | ||
| .parse_next(i) | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_expression() { | ||
| assert_eq!(parser().parse("-3+-3*4"), Ok(-15)); | ||
| assert_eq!(parser().parse("+2+3*4"), Ok(14)); | ||
| assert_eq!(parser().parse("2*3+4"), Ok(10)); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -165,6 +165,8 @@ mod debug; | |
| mod multi; | ||
| mod sequence; | ||
|
|
||
| pub mod expression; | ||
|
|
||
| #[cfg(test)] | ||
| mod tests; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should include the example from #622
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd probably name that
c_expr, rather thanpratt. Trying to decide how it should show up in our docs, maybe a sub-page if Languages but have Arithmetic cross-link to it?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm on vacation and haven't had much time to look at this code, but I started porting the example to the new API. If it helps: https://github.com/ssmendon/winnow/blob/devel-pratt/examples/pratt/parser.rs#L72-L163
All commits: https://github.com/ssmendon/winnow/commits/devel-pratt
Some notes:
Prefix's fields public to use the API in ssmendon@1d5454e