diff --git a/Cargo.toml b/Cargo.toml index 86a2653ef..01dbfe14b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -197,6 +197,17 @@ required-features = ["alloc"] name = "string" required-features = ["alloc"] +[[example]] +name = "pratt" +required-features = ["std"] + + +[[bench]] +name = "pratt" +path = "examples/pratt/bench.rs" +harness = false +required-features = ["std"] + [[bench]] name = "arithmetic" path = "examples/arithmetic/bench.rs" diff --git a/examples/pratt/bench.rs b/examples/pratt/bench.rs new file mode 100644 index 000000000..b2847f46b --- /dev/null +++ b/examples/pratt/bench.rs @@ -0,0 +1,16 @@ +mod parser; + +use criterion::black_box; +use winnow::prelude::*; + +fn pratt(c: &mut criterion::Criterion) { + let i = + "a = 2*-2 / ( &**foo.a->p! -+1) + 3^1 / 4 == 1 * (2 - 7 + 567 *12 /2) + 3*(1+2*( 45 /2))"; + parser::pratt_parser.parse(i).expect("should parse"); + c.bench_function("pratt_parser", |b| { + b.iter(|| black_box(parser::pratt_parser.parse(i).unwrap())); + }); +} + +criterion::criterion_group!(benches, pratt); +criterion::criterion_main!(benches); diff --git a/examples/pratt/main.rs b/examples/pratt/main.rs new file mode 100644 index 000000000..58f5a7c87 --- /dev/null +++ b/examples/pratt/main.rs @@ -0,0 +1,44 @@ +use winnow::prelude::*; + +mod parser; + +fn main() -> Result<(), lexopt::Error> { + let args = Args::parse()?; + + let input = args.input.as_deref().unwrap_or("1 + 1"); + match parser::pratt_parser.parse(input) { + Ok(result) => { + println!("{result}"); + } + Err(err) => { + println!("FAILED"); + println!("{err}"); + } + } + + Ok(()) +} + +#[derive(Default)] +struct Args { + input: Option, +} + +impl Args { + fn parse() -> Result { + use lexopt::prelude::*; + + let mut res = Args::default(); + + let mut args = lexopt::Parser::from_env(); + while let Some(arg) = args.next()? { + match arg { + Value(input) => { + res.input = Some(input.string()?); + } + _ => return Err(arg.unexpected()), + } + } + Ok(res) + } +} diff --git a/examples/pratt/parser.rs b/examples/pratt/parser.rs new file mode 100644 index 000000000..33a2bfcaf --- /dev/null +++ b/examples/pratt/parser.rs @@ -0,0 +1,631 @@ +use winnow::combinator::{cut_err, empty, fail, not, opt, peek, separated_pair, trace}; +use winnow::error::ContextError; +use winnow::prelude::*; +use winnow::stream::AsChar as _; +use winnow::token::{any, take, take_while}; +use winnow::{ + ascii::{digit1, multispace0}, + combinator::alt, + combinator::delimited, + dispatch, + token::one_of, +}; + +pub(crate) enum Expr { + Name(String), + Value(i64), + + Assign(Box, Box), + + Addr(Box), + Deref(Box), + + Dot(Box, Box), + ArrowOp(Box, Box), + Neg(Box), + Add(Box, Box), + Sub(Box, Box), + Mul(Box, Box), + Div(Box, Box), + Pow(Box, Box), + Fac(Box), + + PreIncr(Box), + PostIncr(Box), + PreDecr(Box), + PostDecr(Box), + + And(Box, Box), + Or(Box, Box), + + // `==` + Eq(Box, Box), + // `!=` + NotEq(Box, Box), + // `!` + Not(Box), + Greater(Box, Box), + GreaterEqual(Box, Box), + Less(Box, Box), + LessEqual(Box, Box), + + // A parenthesized expression. + Paren(Box), + FunctionCall(Box, Option>), + Ternary(Box, Box, Box), + // foo[...] + Index(Box, Box), + // a, b + Comma(Box, Box), + + // % + Rem(Box, Box), + BitXor(Box, Box), + BitAnd(Box, Box), + BitwiseNot(Box), +} + +// Parser definition + +pub(crate) fn pratt_parser(i: &mut &str) -> PResult { + use winnow::combinator::precedence::{self, Assoc}; + // precedence is based on https://en.cppreference.com/w/c/language/operator_precedence + // but specified in reverse order, because the `cppreference` table + // uses `descending` precedence, but we need ascending one + fn parser<'i>(start_power: i64) -> impl Parser<&'i str, Expr, ContextError> { + move |i: &mut &str| { + precedence::precedence( + start_power, + trace( + "operand", + delimited( + multispace0, + dispatch! {peek(any); + '(' => delimited('(', parser(0).map(|e| Expr::Paren(Box::new(e))), cut_err(')')), + _ => alt(( + identifier.map(|s| Expr::Name(s.into())), + digit1.parse_to::().map(Expr::Value) + )), + }, + multispace0, + ), + ), + trace( + "prefix", + delimited( + multispace0, + dispatch! {any; + '+' => alt(( + // ++ + '+'.value((18, (|_: &mut _, a| Ok(Expr::PreIncr(Box::new(a)))) as _)), + empty.value((18, (|_: &mut _, a| Ok(a)) as _)) + )), + '-' => alt(( + // -- + '-'.value((18, (|_: &mut _, a| Ok(Expr::PreDecr(Box::new(a)))) as _)), + empty.value((18, (|_: &mut _, a| Ok(Expr::Neg(Box::new(a)))) as _)) + )), + '&' => empty.value((18, (|_: &mut _, a| Ok(Expr::Addr(Box::new(a)))) as _)), + '*' => empty.value((18, (|_: &mut _, a| Ok(Expr::Deref(Box::new(a)))) as _)), + '!' => empty.value((18, (|_: &mut _, a| Ok(Expr::Not(Box::new(a)))) as _)), + '~' => empty.value((18, (|_: &mut _, a| Ok(Expr::BitwiseNot(Box::new(a)))) as _)), + _ => fail + }, + multispace0, + ), + ), + trace( + "postfix", + delimited( + multispace0, + alt(( + dispatch! {any; + '!' => not('=').value((19, (|_: &mut _, a| Ok(Expr::Fac(Box::new(a)))) as _)), + '?' => empty.value((3, (|i: &mut &str, cond| { + let (left, right) = cut_err(separated_pair(parser(0), delimited(multispace0, ':', multispace0), parser(3))).parse_next(i)?; + Ok(Expr::Ternary(Box::new(cond), Box::new(left), Box::new(right))) + }) as _)), + '[' => empty.value((20, (|i: &mut &str, a| { + let index = delimited(multispace0, parser(0), (multispace0, cut_err(']'), multispace0)).parse_next(i)?; + Ok(Expr::Index(Box::new(a), Box::new(index))) + }) as _)), + '(' => empty.value((20, (|i: &mut &str, a| { + let args = delimited(multispace0, opt(parser(0)), (multispace0, cut_err(')'), multispace0)).parse_next(i)?; + Ok(Expr::FunctionCall(Box::new(a), args.map(Box::new))) + }) as _)), + _ => fail, + }, + dispatch! {take(2usize); + "++" => empty.value((20, (|_: &mut _, a| Ok(Expr::PostIncr(Box::new(a)))) as _)), + "--" => empty.value((20, (|_: &mut _, a| Ok(Expr::PostDecr(Box::new(a)))) as _)), + _ => fail, + }, + )), + multispace0, + ), + ), + trace( + "infix", + alt(( + dispatch! {any; + '*' => alt(( + // ** + "*".value((Assoc::Right(28), (|_: &mut _, a, b| Ok(Expr::Pow(Box::new(a), Box::new(b)))) as _)), + empty.value((Assoc::Left(16), (|_: &mut _, a, b| Ok(Expr::Mul(Box::new(a), Box::new(b)))) as _)), + )), + '/' => empty.value((Assoc::Left(16), (|_: &mut _, a, b| Ok(Expr::Div(Box::new(a), Box::new(b)))) as _)), + '%' => empty.value((Assoc::Left(16), (|_: &mut _, a, b| Ok(Expr::Rem(Box::new(a), Box::new(b)))) as _)), + + '+' => empty.value((Assoc::Left(14), (|_: &mut _, a, b| Ok(Expr::Add(Box::new(a), Box::new(b)))) as _)), + '-' => alt(( + dispatch!{take(2usize); + "ne" => empty.value((Assoc::Neither(10), (|_: &mut _, a, b| Ok(Expr::NotEq(Box::new(a), Box::new(b)))) as _)), + "eq" => empty.value((Assoc::Neither(10), (|_: &mut _, a, b| Ok(Expr::Eq(Box::new(a), Box::new(b)))) as _)), + "gt" => empty.value((Assoc::Neither(12), (|_: &mut _, a, b| Ok(Expr::Greater(Box::new(a), Box::new(b)))) as _)), + "ge" => empty.value((Assoc::Neither(12), (|_: &mut _, a, b| Ok(Expr::GreaterEqual(Box::new(a), Box::new(b)))) as _)), + "lt" => empty.value((Assoc::Neither(12), (|_: &mut _, a, b| Ok(Expr::Less(Box::new(a), Box::new(b)))) as _)), + "le" => empty.value((Assoc::Neither(12), (|_: &mut _, a, b| Ok(Expr::LessEqual(Box::new(a), Box::new(b)))) as _)), + _ => fail + }, + '>'.value((Assoc::Left(20), (|_: &mut _, a, b| Ok(Expr::ArrowOp(Box::new(a), Box::new(b)))) as _)), + empty.value((Assoc::Left(14), (|_: &mut _, a, b| Ok(Expr::Sub(Box::new(a), Box::new(b)))) as _)) + )), + '.' => empty.value((Assoc::Left(20), (|_: &mut _, a, b| Ok(Expr::Dot(Box::new(a), Box::new(b)))) as _)), + '&' => alt(( + // && + "&".value((Assoc::Left(6), (|_: &mut _, a, b| Ok(Expr::And(Box::new(a), Box::new(b)))) as _) ), + + empty.value((Assoc::Left(12), (|_: &mut _, a, b| Ok(Expr::BitAnd(Box::new(a), Box::new(b)))) as _)), + )), + '^' => empty.value((Assoc::Left(8), (|_: &mut _, a, b| Ok(Expr::BitXor(Box::new(a), Box::new(b)))) as _)), + '=' => alt(( + // == + "=".value((Assoc::Neither(10), (|_: &mut _, a, b| Ok(Expr::Eq(Box::new(a), Box::new(b)))) as _)), + empty.value((Assoc::Right(2), (|_: &mut _, a, b| Ok(Expr::Assign(Box::new(a), Box::new(b)))) as _)) + )), + + '>' => alt(( + // >= + "=".value((Assoc::Neither(12), (|_: &mut _, a, b| Ok(Expr::GreaterEqual(Box::new(a), Box::new(b)))) as _)), + empty.value((Assoc::Neither(12), (|_: &mut _, a, b| Ok(Expr::Greater(Box::new(a), Box::new(b)))) as _)) + )), + '<' => alt(( + // <= + "=".value((Assoc::Neither(12), (|_: &mut _, a, b| Ok(Expr::LessEqual(Box::new(a), Box::new(b)))) as _)), + empty.value((Assoc::Neither(12), (|_: &mut _, a, b| Ok(Expr::Less(Box::new(a), Box::new(b)))) as _)) + )), + ',' => empty.value((Assoc::Left(0), (|_: &mut _, a, b| Ok(Expr::Comma(Box::new(a), Box::new(b)))) as _)), + _ => fail + }, + dispatch! {take(2usize); + "!=" => empty.value((Assoc::Neither(10), (|_: &mut _, a, b| Ok(Expr::NotEq(Box::new(a), Box::new(b)))) as _)), + "||" => empty.value((Assoc::Left(4), (|_: &mut _, a, b| Ok(Expr::Or(Box::new(a), Box::new(b)))) as _)), + _ => fail + }, + )), + ), + ).parse_next(i) + } + } + parser(0).parse_next(i) +} + +fn identifier<'i>(i: &mut &'i str) -> PResult<&'i str> { + trace( + "identifier", + ( + one_of(|c: char| c.is_alpha() || c == '_'), + take_while(0.., |c: char| c.is_alphanum() || c == '_'), + ), + ) + .take() + .parse_next(i) +} + +impl Expr { + fn fmt_ast_with_indent( + &self, + indent: u32, + f: &mut core::fmt::Formatter<'_>, + ) -> core::fmt::Result { + for _ in 0..indent { + write!(f, " ")?; + } + macro_rules! binary_fmt { + ($a:ident, $b:ident, $name:literal) => {{ + writeln!(f, $name)?; + $a.fmt_ast_with_indent(indent + 1, f)?; + $b.fmt_ast_with_indent(indent + 1, f) + }}; + } + macro_rules! unary_fmt { + ($a:ident, $name:literal) => {{ + writeln!(f, $name)?; + $a.fmt_ast_with_indent(indent + 1, f) + }}; + } + match self { + Self::Name(name) => writeln!(f, "NAME {name}"), + Self::Value(value) => writeln!(f, "VAL {value}"), + Self::Addr(a) => unary_fmt!(a, "ADDR"), + Self::Deref(a) => unary_fmt!(a, "DEREF"), + Self::Neg(a) => unary_fmt!(a, "NEG"), + Self::Fac(a) => unary_fmt!(a, "FAC"), + Self::PreIncr(a) => unary_fmt!(a, "PRE_INCR"), + Self::PostIncr(a) => unary_fmt!(a, "POST_INCR"), + Self::PreDecr(a) => unary_fmt!(a, "PRE_DECR"), + Self::PostDecr(a) => unary_fmt!(a, "POST_DECR"), + Self::Not(a) => unary_fmt!(a, "NOT"), + Self::BitwiseNot(a) => unary_fmt!(a, "BIT_NOT"), + Self::Paren(a) => unary_fmt!(a, "PAREN"), + Self::Assign(a, b) => binary_fmt!(a, b, "ASSIGN"), + Self::ArrowOp(a, b) => binary_fmt!(a, b, "ARROW"), + Self::Dot(a, b) => binary_fmt!(a, b, "ARROW"), + Self::FunctionCall(a, b) => { + writeln!(f, "CALL")?; + a.fmt_ast_with_indent(indent + 1, f)?; + if let Some(b) = b { + b.fmt_ast_with_indent(indent + 1, f)?; + } + Ok(()) + } + Self::Add(a, b) => binary_fmt!(a, b, "ADD"), + Self::Sub(a, b) => binary_fmt!(a, b, "SUB"), + Self::Mul(a, b) => binary_fmt!(a, b, "MUL"), + Self::Div(a, b) => binary_fmt!(a, b, "DIV"), + Self::Pow(a, b) => binary_fmt!(a, b, "POW"), + Self::And(a, b) => binary_fmt!(a, b, "AND"), + Self::Or(a, b) => binary_fmt!(a, b, "OR"), + Self::Eq(a, b) => binary_fmt!(a, b, "EQ"), + Self::NotEq(a, b) => binary_fmt!(a, b, "NEQ"), + Self::Greater(a, b) => binary_fmt!(a, b, "GREATER"), + Self::GreaterEqual(a, b) => binary_fmt!(a, b, "GTEQ"), + Self::Less(a, b) => binary_fmt!(a, b, "LESS"), + Self::LessEqual(a, b) => binary_fmt!(a, b, "LESSEQ"), + Self::BitXor(a, b) => binary_fmt!(a, b, "BIT_XOR"), + Self::Rem(a, b) => binary_fmt!(a, b, "REM"), + Self::BitAnd(a, b) => binary_fmt!(a, b, "BIT_AND"), + Self::Index(a, b) => binary_fmt!(a, b, "INDEX"), + Self::Comma(a, b) => binary_fmt!(a, b, "COMMA"), + Self::Ternary(cond, a, b) => { + writeln!(f, "TERNARY")?; + cond.fmt_ast_with_indent(indent + 1, f)?; + a.fmt_ast_with_indent(indent + 2, f)?; + b.fmt_ast_with_indent(indent + 2, f) + } + } + } + fn fmt_delimited(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::Name(name) => return write!(f, "{name}"), + Self::Value(value) => return write!(f, "{value}"), + Self::Paren(a) => return a.fmt_delimited(f), + _ => (), + } + macro_rules! unary { + ($op:literal, $a:ident) => {{ + write!(f, $op)?; + $a.fmt_delimited(f)?; + }}; + } + macro_rules! binary { + ($op:literal, $a:ident, $b:ident) => {{ + write!(f, "{} ", $op)?; + $a.fmt_delimited(f)?; + write!(f, " ")?; + $b.fmt_delimited(f)?; + }}; + } + write!(f, "(")?; + match self { + Self::Assign(a, b) => binary!("=", a, b), + Self::FunctionCall(a, b) => { + write!(f, "call ")?; + a.fmt_delimited(f)?; + if let Some(b) = b { + write!(f, " ")?; + b.fmt_delimited(f)?; + } + } + Self::ArrowOp(a, b) => binary!("->", a, b), + Self::Dot(a, b) => binary!(".", a, b), + Self::Addr(a) => unary!("&", a), + Self::Deref(a) => unary!("*", a), + Self::Neg(a) => unary!("-", a), + Self::Fac(a) => unary!("!", a), + Self::Not(a) => unary!("!", a), + Self::BitwiseNot(a) => unary!("~", a), + Self::PreIncr(a) => unary!("pre++", a), + Self::PostIncr(a) => unary!("post++", a), + Self::PreDecr(a) => unary!("pre--", a), + Self::PostDecr(a) => unary!("post--", a), + Self::Add(a, b) => binary!("+", a, b), + Self::Sub(a, b) => binary!("-", a, b), + Self::Mul(a, b) => binary!("*", a, b), + Self::Div(a, b) => binary!("/", a, b), + Self::Pow(a, b) => binary!("**", a, b), + Self::And(a, b) => binary!("&&", a, b), + Self::Or(a, b) => binary!("||", a, b), + Self::Eq(a, b) => binary!("==", a, b), + Self::NotEq(a, b) => binary!("!=", a, b), + Self::Greater(a, b) => binary!(">", a, b), + Self::GreaterEqual(a, b) => binary!(">=", a, b), + Self::Less(a, b) => binary!("<", a, b), + Self::LessEqual(a, b) => binary!("<=", a, b), + Self::BitXor(a, b) => binary!("^", a, b), + Self::Rem(a, b) => binary!("%", a, b), + Self::BitAnd(a, b) => binary!("&", a, b), + Self::Index(a, b) => binary!("[]", a, b), + Self::Comma(a, b) => binary!(",", a, b), + Self::Ternary(cond, a, b) => { + write!(f, "? ")?; + cond.fmt_delimited(f)?; + write!(f, " ")?; + a.fmt_delimited(f)?; + write!(f, " ")?; + b.fmt_delimited(f)?; + } + _ => unreachable!(), + } + + write!(f, ")") + } +} + +impl core::fmt::Display for Expr { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.fmt_ast_with_indent(0, f)?; + writeln!(f)?; + self.fmt_delimited(f) + } +} + +#[cfg(test)] +mod test { + + #[allow(clippy::useless_attribute)] + #[allow(unused_imports)] // its dead for benches + use super::*; + use winnow::error::ParseError; + + #[allow(dead_code)] + // to invoke fmt_delimited() + struct PrefixNotation(Expr); + + impl core::fmt::Display for PrefixNotation { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.0.fmt_delimited(f) + } + } + + #[allow(dead_code)] + fn parse(i: &str) -> Result> { + pratt_parser + .parse(i) + .map(|r| format!("{}", PrefixNotation(r))) + } + + #[allow(dead_code)] + fn parse_ok(i: &str, expect: &str) { + assert_eq!(parse(i).unwrap(), expect); + } + + #[test] + fn op() { + parse_ok(" 1 ", "1"); + } + + #[test] + fn neither() { + assert!(parse("1 == 2 == 3").is_err()); + assert!(parse("1 -le 2 -gt 3").is_err()); + assert!(parse("1 < 2 < 3").is_err()); + assert!(parse("1 != 2 == 3").is_err()); + } + + #[test] + fn equal() { + parse_ok("x=3", "(= x 3)"); + parse_ok("x = 2*3", "(= x (* 2 3))"); + parse_ok("x = y", "(= x y)"); + parse_ok("a = b = 10", "(= a (= b 10))"); + parse_ok("x = ((y*4)-2)", "(= x (- (* y 4) 2))"); + } + + #[test] + fn unary() { + parse_ok("- - a", "(-(-a))"); + parse_ok("+ - a", "(-a)"); + parse_ok("++ -- a", "(pre++(pre--a))"); + parse_ok("a ++ --", "(post--(post++a))"); + parse_ok("!x", "(!x)"); + parse_ok("x--", "(post--x)"); + parse_ok("x[1]--", "(post--([] x 1))"); + parse_ok("--x", "(pre--x)"); + parse_ok("++x[1]", "(pre++([] x 1))"); + parse_ok("!x--", "(!(post--x))"); + parse_ok("~x++", "(~(post++x))"); + parse_ok("x++ - y++", "(- (post++x) (post++y))"); + parse_ok("++x - ++y", "(- (pre++x) (pre++y))"); + parse_ok("--1 * 2", "(* (pre--1) 2)"); + parse_ok("--f . g", "(pre--(. f g))"); + } + + #[test] + fn same_precedence() { + // left associative + parse_ok("1 + 2 + 3", "(+ (+ 1 2) 3)"); + parse_ok("1 - 2 - 3", "(- (- 1 2) 3)"); + parse_ok("1 * 2 * 3", "(* (* 1 2) 3)"); + parse_ok("1 / 2 / 3", "(/ (/ 1 2) 3)"); + parse_ok("1 % 2 % 3", "(% (% 1 2) 3)"); + parse_ok("1 ^ 2 ^ 3", "(^ (^ 1 2) 3)"); + parse_ok("+-+1", "(-1)"); + parse_ok("f . g . h", "(. (. f g) h)"); + parse_ok("++--++1", "(pre++(pre--(pre++1)))"); + // right associative + parse_ok("2 ** 3 ** 2", "(** 2 (** 3 2))"); + } + + #[test] + fn different_precedence() { + parse_ok("1 + 2 * 3", "(+ 1 (* 2 3))"); + parse_ok("1 + 2 * 3 - 4 / 5", "(- (+ 1 (* 2 3)) (/ 4 5))"); + parse_ok("a + b * c * d + e", "(+ (+ a (* (* b c) d)) e)"); + parse_ok("1 + ++2 * 3 * 5 + 6", "(+ (+ 1 (* (* (pre++2) 3) 5)) 6)"); + parse_ok("**3 + &1", "(+ (*(*3)) (&1))"); + parse_ok("x*y - y*z", "(- (* x y) (* y z))"); + parse_ok("x/y - y%z", "(- (/ x y) (% y z))"); + parse_ok("1<2 * 3", "(< 1 (* 2 3))"); + parse_ok( + " 1 + 2 + f . g . h * 3 * 4", + "(+ (+ 1 2) (* (* (. (. f g) h) 3) 4))", + ); + } + + #[test] + fn prefix_postfix_power() { + // https://en.cppreference.com/w/c/language/operator_precedence + // `post++` has `1`, `pre--` and `*` have 2 + parse_ok("--**3++", "(pre--(*(*(post++3))))"); + parse_ok("**--3++", "(*(*(pre--(post++3))))"); + parse_ok("&foo()[0]", "(&([] (call foo) 0))"); + parse_ok("-9!", "(-(!9))"); + parse_ok("f . g !", "(!(. f g))"); + } + + #[test] + fn prefix_infix() { + parse_ok("x - -y", "(- x (-y))"); + parse_ok("-1 * -2", "(* (-1) (-2))"); + parse_ok("-x * -y", "(* (-x) (-y))"); + parse_ok("x - -234", "(- x (-234))"); + } + + #[test] + fn ternary() { + parse_ok("a ? 2 + c : -2 * 2", "(? a (+ 2 c) (* (-2) 2))"); + parse_ok("a ? b : c ? d : e", "(? a b (? c d e))"); + parse_ok("2! > 1 ? 3 : 1", "(? (> (!2) 1) 3 1)"); + parse_ok( + "2 > 1 ? 1 -ne 3 ? 4 : 5 : 1", + "(? (> 2 1) (? (!= 1 3) 4 5) 1)", + ); + parse_ok("a > b ? 0 : 1", "(? (> a b) 0 1)"); + parse_ok("a > b ? x+1 : y+1", "(? (> a b) (+ x 1) (+ y 1))"); + parse_ok( + "1 ? true1 : 2 ? true2 : false", + "(? 1 true1 (? 2 true2 false))", + ); + parse_ok( + "1 ? true1 : (2 ? true2 : false)", + "(? 1 true1 (? 2 true2 false))", + ); + + parse_ok( + "1 ? (2 ? true : false1) : false2", + "(? 1 (? 2 true false1) false2)", + ); + parse_ok( + "1 ? 2 ? true : false1 : false2", + "(? 1 (? 2 true false1) false2)", + ); + } + + #[test] + fn comma() { + parse_ok("x=1,y=2,z=3", "(, (, (= x 1) (= y 2)) (= z 3))"); + parse_ok("a, b, c", "(, (, a b) c)"); + parse_ok("(a, b, c)", "(, (, a b) c)"); + parse_ok("f(a, b, c), d", "(, (call f (, (, a b) c)) d)"); + parse_ok("(a, b, c), d", "(, (, (, a b) c) d)"); + } + + #[test] + fn comma_ternary() { + parse_ok("x ? 1 : 2, y ? 3 : 4", "(, (? x 1 2) (? y 3 4))"); + // Comma expressions can be inside + parse_ok("a , b ? c, d : e, f", "(, (, a (? b (, c d) e)) f)"); + parse_ok("a = 0 ? b : c = d", "(= a (= (? 0 b c) d))"); + } + + #[test] + fn braces() { + parse_ok("4*(2+3)", "(* 4 (+ 2 3))"); + parse_ok("(2+3)*4", "(* (+ 2 3) 4)"); + parse_ok("(((0)))", "0"); + } + + #[test] + fn logical() { + parse_ok("a && b || c && d", "(|| (&& a b) (&& c d))"); + parse_ok("!a && !b", "(&& (!a) (!b))"); + parse_ok("a != b && c == d", "(&& (!= a b) (== c d))"); + } + + #[test] + fn array() { + parse_ok("x[1,2]", "([] x (, 1 2))"); + parse_ok("x[1]", "([] x 1)"); + parse_ok("x[a+b]", "([] x (+ a b))"); + parse_ok("c = pal[i*8]", "(= c ([] pal (* i 8)))"); + parse_ok("f[x] = 1", "(= ([] f x) 1)"); + parse_ok("x[0][1]", "([] ([] x 0) 1)"); + } + + #[test] + fn function_call() { + parse_ok("a()", "(call a)"); + parse_ok("a(+1)", "(call a 1)"); + parse_ok("a()+1", "(+ (call a) 1)"); + parse_ok("f(a, b, c)", "(call f (, (, a b) c))"); + parse_ok("print(x)", "(call print x)"); + parse_ok( + "x = y(2)*3 + y(4)*5", + "(= x (+ (* (call y 2) 3) (* (call y 4) 5)))", + ); + parse_ok("x(1,2)+y(3,4)", "(+ (call x (, 1 2)) (call y (, 3 4)))"); + parse_ok("x(a,b,c[d])", "(call x (, (, a b) ([] c d)))"); + parse_ok( + "x(1,2)*j+y(3,4)*k+z(5,6)*l", + "(+ (+ (* (call x (, 1 2)) j) (* (call y (, 3 4)) k)) (* (call z (, 5 6)) l))", + ); + parse_ok("print(test(2,3))", "(call print (call test (, 2 3)))"); + parse_ok("min(255,n*2)", "(call min (, 255 (* n 2)))"); + } + + #[test] + fn member_access() { + parse_ok("a.b", "(. a b)"); + parse_ok("a.b.c", "(. (. a b) c)"); + parse_ok("a->b", "(-> a b)"); + parse_ok("++a->b", "(pre++(-> a b))"); + parse_ok("a++ ->b", "(-> (post++a) b)"); + parse_ok("a.(x)", "(. a x)"); + parse_ok("a.(x+3)", "(. a (+ x 3))"); + } + + #[test] + fn errors() { + assert!(parse("x + a b").is_err()); + assert!(parse("x[a b]").is_err()); + assert!(parse("x[a)]").is_err()); + assert!(parse("x(a])").is_err()); + assert!(parse("[a + b]").is_err()); + assert!(parse("[a b]").is_err()); + assert!(parse("+").is_err()); + assert!(parse("a +").is_err()); + assert!(parse("<=").is_err()); + assert!(parse("<= - a + b").is_err()); + assert!(parse("a b").is_err()); + assert!(parse("a + b @").is_err()); + assert!(parse("a + b )").is_err()); + assert!(parse("( a + b").is_err()); + assert!(parse("( a + b) c").is_err()); + assert!(parse("f ( a + b ) c").is_err()); + assert!(parse("@ a + b").is_err()); + assert!(parse("a @ b").is_err()); + assert!(parse("(a @ b)").is_err()); + assert!(parse(")").is_err()); + } +} diff --git a/src/combinator/mod.rs b/src/combinator/mod.rs index df791adaa..574c906bf 100644 --- a/src/combinator/mod.rs +++ b/src/combinator/mod.rs @@ -166,6 +166,8 @@ mod multi; mod parser; mod sequence; +pub mod precedence; + #[cfg(test)] mod tests; @@ -174,6 +176,7 @@ pub use self::core::*; pub use self::debug::*; pub use self::multi::*; pub use self::parser::*; +pub use self::precedence::*; pub use self::sequence::*; #[allow(unused_imports)] diff --git a/src/combinator/precedence.rs b/src/combinator/precedence.rs new file mode 100644 index 000000000..4566694d4 --- /dev/null +++ b/src/combinator/precedence.rs @@ -0,0 +1,208 @@ +use crate::{ + combinator::{opt, trace}, + error::{ErrMode, ParserError}, + stream::{Stream, StreamIsPartial}, + PResult, Parser, +}; + +/// Parses an expression based on operator precedence. +#[doc(alias = "pratt")] +#[doc(alias = "separated")] +#[doc(alias = "shunting_yard")] +#[doc(alias = "precedence_climbing")] +#[inline(always)] +pub fn precedence( + start_power: i64, + mut operand: ParseOperand, + mut prefix: ParsePrefix, + mut postfix: ParsePostfix, + mut infix: ParseInfix, +) -> impl Parser +where + I: Stream + StreamIsPartial, + ParseOperand: Parser, + ParseInfix: Parser PResult), E>, + ParsePrefix: Parser PResult), E>, + ParsePostfix: Parser PResult), E>, + E: ParserError, +{ + trace("precedence", move |i: &mut I| { + let result = precedence_impl( + i, + &mut operand, + &mut prefix, + &mut postfix, + &mut infix, + start_power, + )?; + Ok(result) + }) +} + +#[derive(Debug, Clone, Copy)] +pub enum Assoc { + Left(i64), + Right(i64), + Neither(i64), +} + +// recursive function +fn precedence_impl( + i: &mut I, + parse_operand: &mut ParseOperand, + prefix: &mut ParsePrefix, + postfix: &mut ParsePostfix, + infix: &mut ParseInfix, + min_power: i64, +) -> PResult +where + I: Stream + StreamIsPartial, + ParseOperand: Parser, + ParseInfix: Parser PResult), E>, + ParsePrefix: Parser PResult), E>, + ParsePostfix: Parser PResult), E>, + E: ParserError, +{ + let operand = opt(parse_operand.by_ref()).parse_next(i)?; + let mut operand = if let Some(operand) = operand { + operand + } else { + // Prefix unary operators + let len = i.eof_offset(); + let (power, fold_prefix) = prefix.parse_next(i)?; + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`prefix` parsers must always consume")); + } + let operand = precedence_impl(i, parse_operand, prefix, postfix, infix, power)?; + fold_prefix(i, operand)? + }; + + // A variable to stop the `'parse` loop when `Assoc::Neither` with the same + // precedence is encountered e.g. `a == b == c`. `Assoc::Neither` has similar + // associativity rules as `Assoc::Left`, but we stop parsing when the next operator + // is the same as the current one. + let mut prev_op_is_neither = None; + 'parse: while i.eof_offset() > 0 { + // Postfix unary operators + let start = i.checkpoint(); + if let Some((power, fold_postfix)) = opt(postfix.by_ref()).parse_next(i)? { + // control precedence over the prefix e.g.: + // `--(i++)` or `(--i)++` + if power < min_power { + i.reset(&start); + break 'parse; + } + operand = fold_postfix(i, operand)?; + + continue 'parse; + } + + // Infix binary operators + let start = i.checkpoint(); + let parse_result = opt(infix.by_ref()).parse_next(i)?; + if let Some((assoc, fold_infix)) = parse_result { + let mut is_neither = None; + let (lpower, rpower) = match assoc { + Assoc::Right(p) => (p, p - 1), + Assoc::Left(p) => (p, p + 1), + Assoc::Neither(p) => { + is_neither = Some(p); + (p, p + 1) + } + }; + if lpower < min_power || prev_op_is_neither.is_some_and(|p| lpower == p) { + i.reset(&start); + break 'parse; + } + prev_op_is_neither = is_neither; + let rhs = precedence_impl(i, parse_operand, prefix, postfix, infix, rpower)?; + operand = fold_infix(i, operand, rhs)?; + + continue 'parse; + } + + break 'parse; + } + + Ok(operand) +} + +#[cfg(test)] +mod tests { + use crate::ascii::{digit1, space0}; + use crate::combinator::{delimited, empty, fail, peek}; + use crate::dispatch; + use crate::error::ContextError; + use crate::token::any; + + use super::*; + + fn factorial(x: i32) -> i32 { + if x == 0 { + 1 + } else { + x * factorial(x - 1) + } + } + fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> { + move |i: &mut &str| { + precedence( + 0, + trace( + "operand", + delimited( + space0, + dispatch! {peek(any); + '(' => delimited('(', parser(), ')'), + _ => digit1.parse_to::() + }, + space0, + ), + ), + trace( + "prefix", + dispatch! {any; + '+' => empty.value((9, (|_: &mut _, a| Ok(a)) as _)), + '-' => empty.value((9, (|_: &mut _, a: i32| Ok(-a)) as _)), + _ => fail + }, + ), + trace( + "postfix", + dispatch! {any; + '!' => empty.value((9, (|_: &mut _, a| {Ok(factorial(a))}) as _)), + _ => fail + }, + ), + trace( + "infix", + dispatch! {any; + '+' => empty.value((Assoc::Left(5), (|_: &mut _, a, b| Ok(a + b)) as _ )), + '-' => empty.value((Assoc::Left(5), (|_: &mut _, a, b| Ok(a - b)) as _)), + '*' => empty.value((Assoc::Left(7), (|_: &mut _, a, b| Ok(a * b)) as _)), + '/' => empty.value((Assoc::Left(7), (|_: &mut _, a, b| Ok(a / b)) as _)), + '%' => empty.value((Assoc::Left(7), (|_: &mut _, a, b| Ok(a % b)) as _)), + '^' => empty.value((Assoc::Right(9), (|_: &mut _, a, b| Ok(a ^ b)) as _)), + _ => fail + }, + ), + ) + .parse_next(i) + } + } + + #[test] + fn test_precedence() { + // assert_eq!(parser().parse("-3!+-3 * 4"), Ok(-18)); + // assert_eq!(parser().parse("+2 + 3 * 4"), Ok(14)); + assert_eq!(parser().parse("2 * 3+4"), Ok(10)); + } + #[test] + fn test_unary() { + assert_eq!(parser().parse("-2"), Ok(-2)); + assert_eq!(parser().parse("4!"), Ok(24)); + assert_eq!(parser().parse("2 + 4!"), Ok(26)); + assert_eq!(parser().parse("-2 + 2"), Ok(0)); + } +}