Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
fed8c90
feat: implement Pratt parser
39555 Nov 10, 2024
ee4459d
commit suggestion
39555 Nov 16, 2024
4b1499d
remove spaces from #[doc(alias = "...")]
39555 Nov 16, 2024
acf4577
remove `UnaryOp` and `BinaryOp` in favor of `Fn`
39555 Nov 16, 2024
a816a1c
remove redundant trait impl
39555 Nov 16, 2024
2a80e65
remove `allow_unused`, move `allow(non_snake_case)` to where it shoul…
39555 Nov 16, 2024
29fe18d
stop dumping pratt into `combinator` namespace
39555 Nov 16, 2024
5a4f4b4
move important things to go first
39555 Nov 16, 2024
919a1cb
strip fancy api for now
39555 Nov 16, 2024
0273a29
remove wrong and long doc for now
39555 Nov 16, 2024
f218911
fix: precedence for associativity, remove `trace()`
39555 Nov 16, 2024
3d7ef41
switch from `&dyn Fn(O) -> O` to `fn(O) -> O`
39555 Nov 17, 2024
a6cbc1a
feat: pass Input into operator closures
39555 Nov 17, 2024
29b64fa
add `trace` for `tests` parser
39555 Nov 17, 2024
b31a3a3
feat: operator closures must return PResult
39555 Nov 18, 2024
33c82f3
feat: allow the user to specify starting power
39555 Nov 18, 2024
040dd85
feat: enum `Assoc` for infix operators. Add `Neither` associativity
39555 Nov 19, 2024
6d88dff
fix: switch to i64, fix precedence checking
39555 Nov 19, 2024
8f18fc2
example: pratt expression parser
39555 Nov 17, 2024
a4ad844
feat: complex postfix operators
39555 Nov 17, 2024
54cb315
pratt_example: operator closures return PResult
39555 Nov 18, 2024
d6da343
test: add tests
39555 Nov 18, 2024
c1a8535
specify the parser start precedence
39555 Nov 18, 2024
a85291b
style: fix indentation
39555 Nov 18, 2024
39cc484
refactor: remove unnecessarily multispace0
39555 Nov 18, 2024
c52c10d
fix: failed tests
39555 Nov 18, 2024
d3c3d0a
use `Assoc` enum. tests for associativity `Neither`
39555 Nov 19, 2024
b7b0629
fix: switch to i64
39555 Nov 19, 2024
5e7fb65
tests ill-formed expressions
39555 Nov 19, 2024
7b6e3e0
update benchmark
39555 Nov 19, 2024
63e30e1
PoC: Pratt parsing with `shunting yard` algorithm
39555 Nov 14, 2024
4ff9b25
rename `unwind_operators_stack` -> `unwind_operators_stack_to`
39555 Nov 16, 2024
7b82b0e
refactor: make steps more distinct
39555 Nov 16, 2024
be02d0a
update shunting_yard
39555 Nov 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,17 @@ required-features = ["alloc"]
name = "string"
required-features = ["alloc"]

[[example]]
name = "pratt"
required-features = ["std"]


[[bench]]
name = "pratt"
path = "examples/pratt/bench.rs"
harness = false
required-features = ["std"]

[[bench]]
name = "arithmetic"
path = "examples/arithmetic/bench.rs"
Expand Down
16 changes: 16 additions & 0 deletions examples/pratt/bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
mod parser;

use criterion::black_box;
use winnow::prelude::*;

fn pratt(c: &mut criterion::Criterion) {
let i =
"a = 2*-2 * (a ? 1 + 2 * 4 - --a.bar + 2 : 2) / ( &**foo.a->p! -+1) + 3^1 / 4 == 1 * (2 - 7 + 567 *12 /2) + 3*(1+2*( 45 /2))";
parser::pratt_parser.parse(i).expect("should parse");
c.bench_function("pratt_parser", |b| {
b.iter(|| black_box(parser::pratt_parser.parse(i).unwrap()));
});
}

criterion::criterion_group!(benches, pratt);
criterion::criterion_main!(benches);
44 changes: 44 additions & 0 deletions examples/pratt/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
use winnow::prelude::*;

mod parser;

fn main() -> Result<(), lexopt::Error> {
let args = Args::parse()?;

let input = args.input.as_deref().unwrap_or("1 + 1");
match parser::pratt_parser.parse(input) {
Ok(result) => {
println!("{result}");
}
Err(err) => {
println!("FAILED");
println!("{err}");
}
}

Ok(())
}

#[derive(Default)]
struct Args {
input: Option<String>,
}

impl Args {
fn parse() -> Result<Self, lexopt::Error> {
use lexopt::prelude::*;

let mut res = Args::default();

let mut args = lexopt::Parser::from_env();
while let Some(arg) = args.next()? {
match arg {
Value(input) => {
res.input = Some(input.string()?);
}
_ => return Err(arg.unexpected()),
}
}
Ok(res)
}
}
776 changes: 776 additions & 0 deletions examples/pratt/parser.rs

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions src/combinator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ mod multi;
mod parser;
mod sequence;

pub mod shunting_yard;
pub mod precedence;

#[cfg(test)]
mod tests;

Expand Down
208 changes: 208 additions & 0 deletions src/combinator/precedence.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
use crate::{
combinator::{opt, trace},
error::{ErrMode, ParserError},
stream::{Stream, StreamIsPartial},
PResult, Parser,
};

/// Parses an expression based on operator precedence.
#[doc(alias = "pratt")]
#[doc(alias = "separated")]
#[doc(alias = "shunting_yard")]
#[doc(alias = "precedence_climbing")]
#[inline(always)]
pub fn precedence<I, ParseOperand, ParseInfix, ParsePrefix, ParsePostfix, Operand: 'static, E>(
start_power: i64,
mut operand: ParseOperand,
mut prefix: ParsePrefix,
mut postfix: ParsePostfix,
mut infix: ParseInfix,
) -> impl Parser<I, Operand, E>
where
I: Stream + StreamIsPartial,
ParseOperand: Parser<I, Operand, E>,
ParseInfix: Parser<I, (Assoc, fn(&mut I, Operand, Operand) -> PResult<Operand, E>), E>,
ParsePrefix: Parser<I, (i64, fn(&mut I, Operand) -> PResult<Operand, E>), E>,
ParsePostfix: Parser<I, (i64, fn(&mut I, Operand) -> PResult<Operand, E>), E>,
E: ParserError<I>,
{
trace("precedence", move |i: &mut I| {
let result = precedence_impl(
i,
&mut operand,
&mut prefix,
&mut postfix,
&mut infix,
start_power,
)?;
Ok(result)
})
}

#[derive(Debug, Clone, Copy)]
pub enum Assoc {
Left(i64),
Right(i64),
Neither(i64),
}

// recursive function
fn precedence_impl<I, ParseOperand, ParseInfix, ParsePrefix, ParsePostfix, Operand: 'static, E>(
i: &mut I,
parse_operand: &mut ParseOperand,
prefix: &mut ParsePrefix,
postfix: &mut ParsePostfix,
infix: &mut ParseInfix,
min_power: i64,
) -> PResult<Operand, E>
where
I: Stream + StreamIsPartial,
ParseOperand: Parser<I, Operand, E>,
ParseInfix: Parser<I, (Assoc, fn(&mut I, Operand, Operand) -> PResult<Operand, E>), E>,
ParsePrefix: Parser<I, (i64, fn(&mut I, Operand) -> PResult<Operand, E>), E>,
ParsePostfix: Parser<I, (i64, fn(&mut I, Operand) -> PResult<Operand, E>), E>,
E: ParserError<I>,
{
let operand = opt(parse_operand.by_ref()).parse_next(i)?;
let mut operand = if let Some(operand) = operand {
operand
} else {
// Prefix unary operators
let len = i.eof_offset();
let (power, fold_prefix) = prefix.parse_next(i)?;
// infinite loop check: the parser must always consume
if i.eof_offset() == len {
return Err(ErrMode::assert(i, "`prefix` parsers must always consume"));
}
let operand = precedence_impl(i, parse_operand, prefix, postfix, infix, power)?;
fold_prefix(i, operand)?
};

// A variable to stop the `'parse` loop when `Assoc::Neither` with the same
// precedence is encountered e.g. `a == b == c`. `Assoc::Neither` has similar
// associativity rules as `Assoc::Left`, but we stop parsing when the next operator
// is the same as the current one.
let mut prev_op_is_neither = None;
'parse: while i.eof_offset() > 0 {
// Postfix unary operators
let start = i.checkpoint();
if let Some((power, fold_postfix)) = opt(postfix.by_ref()).parse_next(i)? {
// control precedence over the prefix e.g.:
// `--(i++)` or `(--i)++`
if power < min_power {
i.reset(&start);
break 'parse;
}
operand = fold_postfix(i, operand)?;

continue 'parse;
}

// Infix binary operators
let start = i.checkpoint();
let parse_result = opt(infix.by_ref()).parse_next(i)?;
if let Some((assoc, fold_infix)) = parse_result {
let mut is_neither = None;
let (lpower, rpower) = match assoc {
Assoc::Right(p) => (p, p - 1),
Assoc::Left(p) => (p, p + 1),
Assoc::Neither(p) => {
is_neither = Some(p);
(p, p + 1)
}
};
if lpower < min_power || prev_op_is_neither.is_some_and(|p| lpower == p) {
i.reset(&start);
break 'parse;
}
prev_op_is_neither = is_neither;
let rhs = precedence_impl(i, parse_operand, prefix, postfix, infix, rpower)?;
operand = fold_infix(i, operand, rhs)?;

continue 'parse;
}

break 'parse;
}

Ok(operand)
}

#[cfg(test)]
mod tests {
use crate::ascii::{digit1, space0};
use crate::combinator::{delimited, empty, fail, peek};
use crate::dispatch;
use crate::error::ContextError;
use crate::token::any;

use super::*;

fn factorial(x: i32) -> i32 {
if x == 0 {
1
} else {
x * factorial(x - 1)
}
}
fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> {
move |i: &mut &str| {
precedence(
0,
trace(
"operand",
delimited(
space0,
dispatch! {peek(any);
'(' => delimited('(', parser(), ')'),
_ => digit1.parse_to::<i32>()
},
space0,
),
),
trace(
"prefix",
dispatch! {any;
'+' => empty.value((9, (|_: &mut _, a| Ok(a)) as _)),
'-' => empty.value((9, (|_: &mut _, a: i32| Ok(-a)) as _)),
_ => fail
},
),
trace(
"postfix",
dispatch! {any;
'!' => empty.value((9, (|_: &mut _, a| {Ok(factorial(a))}) as _)),
_ => fail
},
),
trace(
"infix",
dispatch! {any;
'+' => empty.value((Assoc::Left(5), (|_: &mut _, a, b| Ok(a + b)) as _ )),
'-' => empty.value((Assoc::Left(5), (|_: &mut _, a, b| Ok(a - b)) as _)),
'*' => empty.value((Assoc::Left(7), (|_: &mut _, a, b| Ok(a * b)) as _)),
'/' => empty.value((Assoc::Left(7), (|_: &mut _, a, b| Ok(a / b)) as _)),
'%' => empty.value((Assoc::Left(7), (|_: &mut _, a, b| Ok(a % b)) as _)),
'^' => empty.value((Assoc::Right(9), (|_: &mut _, a, b| Ok(a ^ b)) as _)),
_ => fail
},
),
)
.parse_next(i)
}
}

#[test]
fn test_precedence() {
// assert_eq!(parser().parse("-3!+-3 * 4"), Ok(-18));
// assert_eq!(parser().parse("+2 + 3 * 4"), Ok(14));
assert_eq!(parser().parse("2 * 3+4"), Ok(10));
}
#[test]
fn test_unary() {
assert_eq!(parser().parse("-2"), Ok(-2));
assert_eq!(parser().parse("4!"), Ok(24));
assert_eq!(parser().parse("2 + 4!"), Ok(26));
assert_eq!(parser().parse("-2 + 2"), Ok(0));
}
}
Loading