diff --git a/Cargo.lock b/Cargo.lock index 43666d64fbb..384660ee204 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1283,10 +1283,10 @@ dependencies = [ "gix-hash 0.11.4", "gix-testtools", "itoa", - "nom", "pretty_assertions", "serde", "thiserror", + "winnow", ] [[package]] @@ -1873,11 +1873,11 @@ dependencies = [ "gix-testtools", "gix-validate 0.8.0", "itoa", - "nom", "pretty_assertions", "serde", "smallvec", "thiserror", + "winnow", ] [[package]] @@ -2116,10 +2116,10 @@ dependencies = [ "gix-utils 0.1.5", "gix-validate 0.8.0", "memmap2 0.7.1", - "nom", "serde", "tempfile", "thiserror", + "winnow", ] [[package]] @@ -2274,11 +2274,11 @@ dependencies = [ "gix-worktree 0.17.1", "io-close", "is_ci", - "nom", "once_cell", "parking_lot", "tar", "tempfile", + "winnow", "xz2", ] @@ -4771,9 +4771,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.5.1" +version = "0.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25b5872fa2e10bd067ae946f927e726d7d603eaeb6e02fa6a350e0722d2b8c11" +checksum = "d09770118a7eb1ccaf4a594a221334119a44a814fcb0d31c5b85e83e97227a97" dependencies = [ "memchr", ] diff --git a/cargo-smart-release/Cargo.lock b/cargo-smart-release/Cargo.lock index 4c1d9a57741..9a99692ac8d 100644 --- a/cargo-smart-release/Cargo.lock +++ b/cargo-smart-release/Cargo.lock @@ -2667,9 +2667,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.5.1" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25b5872fa2e10bd067ae946f927e726d7d603eaeb6e02fa6a350e0722d2b8c11" +checksum = "83817bbecf72c73bad717ee86820ebf286203d2e04c3951f3cd538869c897364" dependencies = [ "memchr", ] diff --git a/cargo-smart-release/Cargo.toml b/cargo-smart-release/Cargo.toml index b8bcee66b32..ee628557d13 100644 --- a/cargo-smart-release/Cargo.toml +++ b/cargo-smart-release/Cargo.toml @@ -34,7 +34,7 @@ toml_edit = "0.19.1" semver = "1.0.4" crates-index = { version = "2.1.0", default-features = false, features = ["git-performance", "git-https"] } cargo_toml = "0.15.1" -winnow = "0.5.1" +winnow = "0.5.12" git-conventional = "0.12.0" time = "0.3.23" pulldown-cmark = "0.9.0" diff --git a/gix-actor/Cargo.toml b/gix-actor/Cargo.toml index 669e3a53031..6ddfb64b36c 100644 --- a/gix-actor/Cargo.toml +++ b/gix-actor/Cargo.toml @@ -23,7 +23,7 @@ gix-date = { version = "^0.7.1", path = "../gix-date" } thiserror = "1.0.38" btoi = "0.4.2" bstr = { version = "1.3.0", default-features = false, features = ["std", "unicode"]} -nom = { version = "7", default-features = false, features = ["std"]} +winnow = { version = "0.5.14", features = ["simd"] } itoa = "1.0.1" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} diff --git a/gix-actor/src/identity.rs b/gix-actor/src/identity.rs index 0e7c0e6d508..382c0615ae6 100644 --- a/gix-actor/src/identity.rs +++ b/gix-actor/src/identity.rs @@ -1,14 +1,16 @@ use bstr::ByteSlice; +use winnow::error::StrContext; +use winnow::prelude::*; use crate::{signature::decode, Identity, IdentityRef}; impl<'a> IdentityRef<'a> { /// Deserialize an identity from the given `data`. - pub fn from_bytes(data: &'a [u8]) -> Result> + pub fn from_bytes(mut data: &'a [u8]) -> Result> where - E: nom::error::ParseError<&'a [u8]> + nom::error::ContextError<&'a [u8]>, + E: winnow::error::ParserError<&'a [u8]> + winnow::error::AddContext<&'a [u8], StrContext>, { - decode::identity(data).map(|(_, t)| t) + decode::identity.parse_next(&mut data) } /// Create an owned instance from this shared one. diff --git a/gix-actor/src/signature/decode.rs b/gix-actor/src/signature/decode.rs index 2d80df72f6e..87673b451e9 100644 --- a/gix-actor/src/signature/decode.rs +++ b/gix-actor/src/signature/decode.rs @@ -2,101 +2,75 @@ pub(crate) mod function { use bstr::ByteSlice; use btoi::btoi; use gix_date::{time::Sign, OffsetInSeconds, SecondsSinceUnixEpoch, Time}; - use nom::multi::many1_count; - use nom::{ - branch::alt, - bytes::complete::{tag, take, take_until, take_while_m_n}, - character::is_digit, - error::{context, ContextError, ParseError}, - sequence::{terminated, tuple}, - IResult, + use winnow::{ + combinator::alt, + combinator::separated_pair, + combinator::terminated, + error::{AddContext, ParserError, StrContext}, + prelude::*, + stream::AsChar, + token::{take, take_until0, take_while}, }; - use std::cell::RefCell; use crate::{IdentityRef, SignatureRef}; const SPACE: &[u8] = b" "; /// Parse a signature from the bytes input `i` using `nom`. - pub fn decode<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>( - i: &'a [u8], - ) -> IResult<&'a [u8], SignatureRef<'a>, E> { - use nom::Parser; - let tzsign = RefCell::new(b'-'); // TODO: there should be no need for this. - let (i, (identity, _, time, _tzsign_count, hours, minutes)) = context( - " <> <+|->", - tuple(( - identity, - tag(b" "), - context("", |i| { - terminated(take_until(SPACE), take(1usize))(i).and_then(|(i, v)| { - btoi::(v) - .map(|v| (i, v)) - .map_err(|_| nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::MapRes))) - }) + pub fn decode<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( + i: &mut &'a [u8], + ) -> PResult, E> { + separated_pair( + identity, + b" ", + ( + terminated(take_until0(SPACE), take(1usize)) + .verify_map(|v| btoi::(v).ok()) + .context(StrContext::Expected("".into())), + alt(( + take_while(1.., b'-').map(|_| Sign::Minus), + take_while(1.., b'+').map(|_| Sign::Plus), + )) + .context(StrContext::Expected("+|-".into())), + take_while(2, AsChar::is_dec_digit) + .verify_map(|v| btoi::(v).ok()) + .context(StrContext::Expected("HH".into())), + take_while(1..=2, AsChar::is_dec_digit) + .verify_map(|v| btoi::(v).ok()) + .context(StrContext::Expected("MM".into())), + ) + .map(|(time, sign, hours, minutes)| { + let offset = (hours * 3600 + minutes * 60) * if sign == Sign::Minus { -1 } else { 1 }; + Time { + seconds: time, + offset, + sign, + } }), - context( - "+|-", - alt(( - many1_count(tag(b"-")).map(|_| *tzsign.borrow_mut() = b'-'), // TODO: this should be a non-allocating consumer of consecutive tags - many1_count(tag(b"+")).map(|_| *tzsign.borrow_mut() = b'+'), - )), - ), - context("HH", |i| { - take_while_m_n(2usize, 2, is_digit)(i).and_then(|(i, v)| { - btoi::(v) - .map(|v| (i, v)) - .map_err(|_| nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::MapRes))) - }) - }), - context("MM", |i| { - take_while_m_n(1usize, 2, is_digit)(i).and_then(|(i, v)| { - btoi::(v) - .map(|v| (i, v)) - .map_err(|_| nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::MapRes))) - }) - }), - )), - )(i)?; - - let tzsign = tzsign.into_inner(); - debug_assert!(tzsign == b'-' || tzsign == b'+', "parser assure it's +|- only"); - let sign = if tzsign == b'-' { Sign::Minus } else { Sign::Plus }; // - let offset = (hours * 3600 + minutes * 60) * if sign == Sign::Minus { -1 } else { 1 }; - - Ok(( - i, - SignatureRef { - name: identity.name, - email: identity.email, - time: Time { - seconds: time, - offset, - sign, - }, - }, - )) + ) + .context(StrContext::Expected(" <> <+|->".into())) + .map(|(identity, time)| SignatureRef { + name: identity.name, + email: identity.email, + time, + }) + .parse_next(i) } /// Parse an identity from the bytes input `i` (like `name `) using `nom`. - pub fn identity<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>( - i: &'a [u8], - ) -> IResult<&'a [u8], IdentityRef<'a>, E> { - let (i, (name, email)) = context( - " <>", - tuple(( - context("", terminated(take_until(&b" <"[..]), take(2usize))), - context("", terminated(take_until(&b">"[..]), take(1usize))), - )), - )(i)?; - - Ok(( - i, - IdentityRef { + pub fn identity<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( + i: &mut &'a [u8], + ) -> PResult, E> { + ( + terminated(take_until0(&b" <"[..]), take(2usize)).context(StrContext::Expected("".into())), + terminated(take_until0(&b">"[..]), take(1usize)).context(StrContext::Expected("".into())), + ) + .map(|(name, email): (&[u8], &[u8])| IdentityRef { name: name.as_bstr(), email: email.as_bstr(), - }, - )) + }) + .context(StrContext::Expected(" <>".into())) + .parse_next(i) } } pub use function::identity; @@ -107,12 +81,14 @@ mod tests { use bstr::ByteSlice; use gix_date::{time::Sign, OffsetInSeconds, SecondsSinceUnixEpoch}; use gix_testtools::to_bstr_err; - use nom::IResult; + use winnow::prelude::*; use crate::{signature, SignatureRef, Time}; - fn decode(i: &[u8]) -> IResult<&[u8], SignatureRef<'_>, nom::error::VerboseError<&[u8]>> { - signature::decode(i) + fn decode<'i>( + i: &mut &'i [u8], + ) -> PResult, winnow::error::TreeError<&'i [u8], winnow::error::StrContext>> { + signature::decode.parse_next(i) } fn signature( @@ -132,7 +108,8 @@ mod tests { #[test] fn tz_minus() { assert_eq!( - decode(b"Sebastian Thiel 1528473343 -0230") + decode + .parse_peek(b"Sebastian Thiel 1528473343 -0230") .expect("parse to work") .1, signature("Sebastian Thiel", "byronimo@gmail.com", 1528473343, Sign::Minus, -9000) @@ -142,7 +119,8 @@ mod tests { #[test] fn tz_plus() { assert_eq!( - decode(b"Sebastian Thiel 1528473343 +0230") + decode + .parse_peek(b"Sebastian Thiel 1528473343 +0230") .expect("parse to work") .1, signature("Sebastian Thiel", "byronimo@gmail.com", 1528473343, Sign::Plus, 9000) @@ -152,7 +130,8 @@ mod tests { #[test] fn negative_offset_0000() { assert_eq!( - decode(b"Sebastian Thiel 1528473343 -0000") + decode + .parse_peek(b"Sebastian Thiel 1528473343 -0000") .expect("parse to work") .1, signature("Sebastian Thiel", "byronimo@gmail.com", 1528473343, Sign::Minus, 0) @@ -162,7 +141,8 @@ mod tests { #[test] fn negative_offset_double_dash() { assert_eq!( - decode(b"name 1288373970 --700") + decode + .parse_peek(b"name 1288373970 --700") .expect("parse to work") .1, signature("name", "name@example.com", 1288373970, Sign::Minus, -252000) @@ -172,7 +152,7 @@ mod tests { #[test] fn empty_name_and_email() { assert_eq!( - decode(b" <> 12345 -1215").expect("parse to work").1, + decode.parse_peek(b" <> 12345 -1215").expect("parse to work").1, signature("", "", 12345, Sign::Minus, -44100) ); } @@ -180,22 +160,22 @@ mod tests { #[test] fn invalid_signature() { assert_eq!( - decode(b"hello < 12345 -1215") + decode.parse_peek(b"hello < 12345 -1215") .map_err(to_bstr_err) .expect_err("parse fails as > is missing") .to_string(), - "Parse error:\nTakeUntil at: 12345 -1215\nin section '', at: 12345 -1215\nin section ' <>', at: hello < 12345 -1215\nin section ' <> <+|->', at: hello < 12345 -1215\n" + "in slice at ' 12345 -1215'\n 0: expected `` at ' 12345 -1215'\n 1: expected ` <>` at ' 12345 -1215'\n 2: expected ` <> <+|->` at ' 12345 -1215'\n" ); } #[test] fn invalid_time() { assert_eq!( - decode(b"hello <> abc -1215") + decode.parse_peek(b"hello <> abc -1215") .map_err(to_bstr_err) .expect_err("parse fails as > is missing") .to_string(), - "Parse error:\nMapRes at: -1215\nin section '', at: abc -1215\nin section ' <> <+|->', at: hello <> abc -1215\n" + "in predicate verification at 'abc -1215'\n 0: expected `` at 'abc -1215'\n 1: expected ` <> <+|->` at 'abc -1215'\n" ); } } diff --git a/gix-actor/src/signature/mod.rs b/gix-actor/src/signature/mod.rs index 8634ba595af..c70a4f3cdcd 100644 --- a/gix-actor/src/signature/mod.rs +++ b/gix-actor/src/signature/mod.rs @@ -1,15 +1,17 @@ mod _ref { use bstr::ByteSlice; + use winnow::error::StrContext; + use winnow::prelude::*; use crate::{signature::decode, IdentityRef, Signature, SignatureRef}; impl<'a> SignatureRef<'a> { /// Deserialize a signature from the given `data`. - pub fn from_bytes(data: &'a [u8]) -> Result, nom::Err> + pub fn from_bytes(mut data: &'a [u8]) -> Result, winnow::error::ErrMode> where - E: nom::error::ParseError<&'a [u8]> + nom::error::ContextError<&'a [u8]>, + E: winnow::error::ParserError<&'a [u8]> + winnow::error::AddContext<&'a [u8], StrContext>, { - decode(data).map(|(_, t)| t) + decode.parse_next(&mut data) } /// Create an owned instance from this shared one. diff --git a/gix-actor/tests/identity/mod.rs b/gix-actor/tests/identity/mod.rs index 2085e1fc549..3f1e71cc10c 100644 --- a/gix-actor/tests/identity/mod.rs +++ b/gix-actor/tests/identity/mod.rs @@ -9,7 +9,7 @@ fn round_trip() -> gix_testtools::Result { b".. whitespace \t is explicitly allowed - unicode aware trimming must be done elsewhere " ]; for input in DEFAULTS { - let signature: Identity = gix_actor::IdentityRef::from_bytes::<()>(input)?.into(); + let signature: Identity = gix_actor::IdentityRef::from_bytes::<()>(input).unwrap().into(); let mut output = Vec::new(); signature.write_to(&mut output)?; assert_eq!(output.as_bstr(), input.as_bstr()); diff --git a/gix-actor/tests/signature/mod.rs b/gix-actor/tests/signature/mod.rs index bd66814fb6b..e0c7e663d2e 100644 --- a/gix-actor/tests/signature/mod.rs +++ b/gix-actor/tests/signature/mod.rs @@ -73,7 +73,7 @@ fn round_trip() -> Result<(), Box> { ]; for input in DEFAULTS { - let signature: Signature = gix_actor::SignatureRef::from_bytes::<()>(input)?.into(); + let signature: Signature = gix_actor::SignatureRef::from_bytes::<()>(input).unwrap().into(); let mut output = Vec::new(); signature.write_to(&mut output)?; assert_eq!(output.as_bstr(), input.as_bstr()); diff --git a/gix-config/Cargo.toml b/gix-config/Cargo.toml index a1b2cd68544..79c875fa25c 100644 --- a/gix-config/Cargo.toml +++ b/gix-config/Cargo.toml @@ -25,7 +25,7 @@ gix-ref = { version = "^0.34.0", path = "../gix-ref" } gix-glob = { version = "^0.10.2", path = "../gix-glob" } log = "0.4.17" -winnow = "0.5" +winnow = { version = "0.5.14", features = ["simd"] } memchr = "2" thiserror = "1.0.26" unicode-bom = "2.0.2" diff --git a/gix-object/Cargo.toml b/gix-object/Cargo.toml index 43f39d2dfb0..4d61c29b017 100644 --- a/gix-object/Cargo.toml +++ b/gix-object/Cargo.toml @@ -25,7 +25,7 @@ serde = ["dep:serde", "bstr/serde", "smallvec/serde", "gix-hash/serde", "gix-act ## details information about the error location will be collected. ## Use it in applications which expect broken or invalid objects or for debugging purposes. Incorrectly formatted objects aren't at all ## common otherwise. -verbose-object-parsing-errors = ["nom/std"] +verbose-object-parsing-errors = [] [dependencies] gix-features = { version = "^0.32.1", path = "../gix-features", features = ["rustsha1"] } @@ -38,7 +38,7 @@ btoi = "0.4.2" itoa = "1.0.1" thiserror = "1.0.34" bstr = { version = "1.3.0", default-features = false, features = ["std", "unicode"] } -nom = { version = "7", default-features = false, features = ["std"]} +winnow = { version = "0.5.14", features = ["simd"] } smallvec = { version = "1.4.0", features = ["write"] } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} diff --git a/gix-object/src/commit/decode.rs b/gix-object/src/commit/decode.rs index 821feaabba5..1d361b255c1 100644 --- a/gix-object/src/commit/decode.rs +++ b/gix-object/src/commit/decode.rs @@ -1,71 +1,75 @@ use std::borrow::Cow; -use nom::{ - branch::alt, - bytes::complete::{is_not, tag}, - combinator::{all_consuming, opt}, - error::{context, ContextError, ParseError}, - multi::many0, - IResult, Parser, -}; use smallvec::SmallVec; +use winnow::{ + combinator::alt, + combinator::preceded, + combinator::repeat, + combinator::terminated, + combinator::{eof, opt, rest}, + error::{AddContext, ParserError, StrContext}, + prelude::*, + token::take_till1, +}; use crate::{parse, parse::NL, BStr, ByteSlice, CommitRef}; -pub fn message<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a BStr, E> { +pub fn message<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( + i: &mut &'a [u8], +) -> PResult<&'a BStr, E> { if i.is_empty() { // newline + [message] - return Err(nom::Err::Error(E::add_context( - i, - "newline + ", - E::from_error_kind(i, nom::error::ErrorKind::Eof), - ))); + return Err( + winnow::error::ErrMode::from_error_kind(i, winnow::error::ErrorKind::Eof) + .add_context(i, StrContext::Expected("newline + ".into())), + ); } - let (i, _) = context("a newline separates headers from the message", tag(NL))(i)?; - Ok((&[], i.as_bstr())) + preceded(NL, rest.map(ByteSlice::as_bstr)) + .context(StrContext::Expected( + "a newline separates headers from the message".into(), + )) + .parse_next(i) } -pub fn commit<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>( - i: &'a [u8], -) -> IResult<&'a [u8], CommitRef<'_>, E> { - let (i, tree) = context("tree <40 lowercase hex char>", |i| { - parse::header_field(i, b"tree", parse::hex_hash) - })(i)?; - let (i, parents) = context( - "zero or more 'parent <40 lowercase hex char>'", - many0(|i| parse::header_field(i, b"parent", parse::hex_hash)), - )(i)?; - let (i, author) = context("author ", |i| { - parse::header_field(i, b"author", parse::signature) - })(i)?; - let (i, committer) = context("committer ", |i| { - parse::header_field(i, b"committer", parse::signature) - })(i)?; - let (i, encoding) = context( - "encoding ", - opt(|i| parse::header_field(i, b"encoding", is_not(NL))), - )(i)?; - let (i, extra_headers) = context( - " ", - many0(alt(( - parse::any_header_field_multi_line.map(|(k, o)| (k.as_bstr(), Cow::Owned(o))), - |i| { - parse::any_header_field(i, is_not(NL)).map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Borrowed(o.as_bstr())))) +pub fn commit<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( + i: &mut &'a [u8], +) -> PResult, E> { + ( + (|i: &mut _| parse::header_field(i, b"tree", parse::hex_hash)) + .context(StrContext::Expected("tree <40 lowercase hex char>".into())), + repeat(0.., |i: &mut _| parse::header_field(i, b"parent", parse::hex_hash)) + .map(|p: Vec<_>| p) + .context(StrContext::Expected( + "zero or more 'parent <40 lowercase hex char>'".into(), + )), + (|i: &mut _| parse::header_field(i, b"author", parse::signature)) + .context(StrContext::Expected("author ".into())), + (|i: &mut _| parse::header_field(i, b"committer", parse::signature)) + .context(StrContext::Expected("committer ".into())), + opt(|i: &mut _| parse::header_field(i, b"encoding", take_till1(NL))) + .context(StrContext::Expected("encoding ".into())), + repeat( + 0.., + alt(( + parse::any_header_field_multi_line.map(|(k, o)| (k.as_bstr(), Cow::Owned(o))), + |i: &mut _| { + parse::any_header_field(i, take_till1(NL)).map(|(k, o)| (k.as_bstr(), Cow::Borrowed(o.as_bstr()))) + }, + )), + ) + .context(StrContext::Expected(" ".into())), + terminated(message, eof), + ) + .map( + |(tree, parents, author, committer, encoding, extra_headers, message)| CommitRef { + tree, + parents: SmallVec::from(parents), + author, + committer, + encoding: encoding.map(ByteSlice::as_bstr), + message, + extra_headers, }, - ))), - )(i)?; - let (i, message) = all_consuming(message)(i)?; - - Ok(( - i, - CommitRef { - tree, - parents: SmallVec::from(parents), - author, - committer, - encoding: encoding.map(ByteSlice::as_bstr), - message, - extra_headers, - }, - )) + ) + .parse_next(i) } diff --git a/gix-object/src/commit/message/body.rs b/gix-object/src/commit/message/body.rs index 855f031be93..779d4ac0bd0 100644 --- a/gix-object/src/commit/message/body.rs +++ b/gix-object/src/commit/message/body.rs @@ -1,11 +1,13 @@ use std::ops::Deref; -use nom::{ - bytes::complete::{tag, take_until1}, - combinator::all_consuming, - error::{ErrorKind, ParseError}, - sequence::terminated, - IResult, +use winnow::{ + combinator::eof, + combinator::rest, + combinator::separated_pair, + combinator::terminated, + error::{ErrorKind, ParserError}, + prelude::*, + token::take_until1, }; use crate::{ @@ -32,12 +34,14 @@ pub struct TrailerRef<'a> { pub value: &'a BStr, } -fn parse_single_line_trailer<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, &'a BStr), E> { - let (value, token) = terminated(take_until1(b":".as_ref()), tag(b": "))(i.trim_end())?; +fn parse_single_line_trailer<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<(&'a BStr, &'a BStr), E> { + *i = i.trim_end(); + let (token, value) = separated_pair(take_until1(b":".as_ref()), b": ", rest).parse_next(i)?; + if token.trim_end().len() != token.len() || value.trim_start().len() != value.len() { - Err(nom::Err::Failure(E::from_error_kind(i, ErrorKind::Fail))) + Err(winnow::error::ErrMode::from_error_kind(i, ErrorKind::Fail).cut()) } else { - Ok((&[], (token.as_bstr(), value.as_bstr()))) + Ok((token.as_bstr(), value.as_bstr())) } } @@ -48,15 +52,15 @@ impl<'a> Iterator for Trailers<'a> { if self.cursor.is_empty() { return None; } - for line in self.cursor.lines_with_terminator() { + for mut line in self.cursor.lines_with_terminator() { self.cursor = &self.cursor[line.len()..]; - if let Some(trailer) = - all_consuming(parse_single_line_trailer::<()>)(line) - .ok() - .map(|(_, (token, value))| TrailerRef { - token: token.trim().as_bstr(), - value: value.trim().as_bstr(), - }) + if let Some(trailer) = terminated(parse_single_line_trailer::<()>, eof) + .parse_next(&mut line) + .ok() + .map(|(token, value)| TrailerRef { + token: token.trim().as_bstr(), + value: value.trim().as_bstr(), + }) { return Some(trailer); } @@ -118,7 +122,7 @@ mod test_parse_trailer { use super::*; fn parse(input: &str) -> (&BStr, &BStr) { - parse_single_line_trailer::<()>(input.as_bytes()).unwrap().1 + parse_single_line_trailer::<()>.parse_peek(input.as_bytes()).unwrap().1 } #[test] @@ -141,8 +145,8 @@ mod test_parse_trailer { #[test] fn extra_whitespace_before_token_or_value_is_error() { - assert!(parse_single_line_trailer::<()>(b"foo : bar").is_err()); - assert!(parse_single_line_trailer::<()>(b"foo: bar").is_err()) + assert!(parse_single_line_trailer::<()>.parse_peek(b"foo : bar").is_err()); + assert!(parse_single_line_trailer::<()>.parse_peek(b"foo: bar").is_err()) } #[test] diff --git a/gix-object/src/commit/message/decode.rs b/gix-object/src/commit/message/decode.rs index 6224909bdf1..8a63bdc782e 100644 --- a/gix-object/src/commit/message/decode.rs +++ b/gix-object/src/commit/message/decode.rs @@ -1,57 +1,46 @@ -use nom::{ - branch::alt, - bytes::complete::{tag, take_till1}, - combinator::all_consuming, - error::ParseError, - sequence::pair, - IResult, +use winnow::{ + combinator::alt, combinator::eof, combinator::preceded, combinator::rest, combinator::terminated, + error::ParserError, prelude::*, stream::Offset, stream::Stream, token::take_till1, }; use crate::bstr::{BStr, ByteSlice}; -pub(crate) fn newline<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E> { - alt((tag(b"\r\n"), tag(b"\n")))(i) +pub(crate) fn newline<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<&'a [u8], E> { + alt((b"\n", b"\r\n")).parse_next(i) } -fn subject_and_body<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, Option<&'a BStr>), E> { - let mut c = i; - let mut consumed_bytes = 0; - while !c.is_empty() { - c = match take_till1::<_, _, E>(|c| c == b'\n' || c == b'\r')(c) { - Ok((i1, segment)) => { - consumed_bytes += segment.len(); - match pair::<_, _, _, E, _, _>(newline, newline)(i1) { - Ok((body, _)) => { - return Ok(( - &[], - ( - i[0usize..consumed_bytes].as_bstr(), - (!body.is_empty()).then(|| body.as_bstr()), - ), - )); +fn subject_and_body<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<(&'a BStr, Option<&'a BStr>), E> { + let start_i = *i; + let start = i.checkpoint(); + while !i.is_empty() { + match take_till1::<_, _, E>(|c| c == b'\n' || c == b'\r').parse_next(i) { + Ok(_) => { + let consumed_bytes = i.offset_from(&start); + match preceded((newline::, newline::), rest).parse_next(i) { + Ok(body) => { + let body = (!body.is_empty()).then(|| body.as_bstr()); + return Ok((start_i[0usize..consumed_bytes].as_bstr(), body)); } - Err(_) => match i1.get(1..) { - Some(next) => { - consumed_bytes += 1; - next - } + Err(_) => match i.next_token() { + Some(_) => {} None => break, }, } } - Err(_) => match c.get(1..) { - Some(next) => { - consumed_bytes += 1; - next - } + Err(_) => match i.next_token() { + Some(_) => {} None => break, }, - }; + } } - Ok((&[], (i.as_bstr(), None))) + + i.reset(start); + rest.map(|r: &[u8]| (r.as_bstr(), None)).parse_next(i) } /// Returns title and body, without separator -pub fn message(input: &[u8]) -> (&BStr, Option<&BStr>) { - all_consuming(subject_and_body::<()>)(input).expect("cannot fail").1 +pub fn message(mut input: &[u8]) -> (&BStr, Option<&BStr>) { + terminated(subject_and_body::<()>, eof) + .parse_next(&mut input) + .expect("cannot fail") } diff --git a/gix-object/src/commit/mod.rs b/gix-object/src/commit/mod.rs index a7ac7f6b22a..f1748b558a4 100644 --- a/gix-object/src/commit/mod.rs +++ b/gix-object/src/commit/mod.rs @@ -58,8 +58,8 @@ mod write; /// Lifecycle impl<'a> CommitRef<'a> { /// Deserialize a commit from the given `data` bytes while avoiding most allocations. - pub fn from_bytes(data: &'a [u8]) -> Result, crate::decode::Error> { - decode::commit(data).map(|(_, t)| t).map_err(crate::decode::Error::from) + pub fn from_bytes(mut data: &'a [u8]) -> Result, crate::decode::Error> { + decode::commit(&mut data).map_err(crate::decode::Error::with_err) } } diff --git a/gix-object/src/commit/ref_iter.rs b/gix-object/src/commit/ref_iter.rs index e4f643accb8..e411bc7fd51 100644 --- a/gix-object/src/commit/ref_iter.rs +++ b/gix-object/src/commit/ref_iter.rs @@ -3,11 +3,13 @@ use std::ops::Range; use bstr::BStr; use gix_hash::{oid, ObjectId}; -use nom::{ - branch::alt, - bytes::complete::is_not, - combinator::{all_consuming, opt}, - error::context, +use winnow::{ + combinator::alt, + combinator::terminated, + combinator::{eof, opt}, + error::StrContext, + prelude::*, + token::take_till1, }; use crate::commit::SignedData; @@ -149,13 +151,21 @@ fn missing_field() -> crate::decode::Error { } impl<'a> CommitRefIter<'a> { + #[inline] fn next_inner(i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { + Self::next_inner_(i, state).map_err(crate::decode::Error::with_err) + } + + fn next_inner_( + mut i: &'a [u8], + state: &mut State, + ) -> Result<(&'a [u8], Token<'a>), winnow::error::ErrMode> { use State::*; Ok(match state { Tree => { - let (i, tree) = context("tree <40 lowercase hex char>", |i| { - parse::header_field(i, b"tree", parse::hex_hash) - })(i)?; + let tree = (|i: &mut _| parse::header_field(i, b"tree", parse::hex_hash)) + .context(StrContext::Expected("tree <40 lowercase hex char>".into())) + .parse_next(&mut i)?; *state = State::Parents; ( i, @@ -165,10 +175,9 @@ impl<'a> CommitRefIter<'a> { ) } Parents => { - let (i, parent) = context( - "commit <40 lowercase hex char>", - opt(|i| parse::header_field(i, b"parent", parse::hex_hash)), - )(i)?; + let parent = opt(|i: &mut _| parse::header_field(i, b"parent", parse::hex_hash)) + .context(StrContext::Expected("commit <40 lowercase hex char>".into())) + .parse_next(&mut i)?; match parent { Some(parent) => ( i, @@ -180,7 +189,7 @@ impl<'a> CommitRefIter<'a> { *state = State::Signature { of: SignatureKind::Author, }; - return Self::next_inner(i, state); + return Self::next_inner_(i, state); } } } @@ -196,7 +205,9 @@ impl<'a> CommitRefIter<'a> { (&b"committer"[..], "committer ") } }; - let (i, signature) = context(err_msg, |i| parse::header_field(i, field_name, parse::signature))(i)?; + let signature = (|i: &mut _| parse::header_field(i, field_name, parse::signature)) + .context(StrContext::Expected(err_msg.into())) + .parse_next(&mut i)?; ( i, match who { @@ -206,37 +217,35 @@ impl<'a> CommitRefIter<'a> { ) } Encoding => { - let (i, encoding) = context( - "encoding ", - opt(|i| parse::header_field(i, b"encoding", is_not(NL))), - )(i)?; + let encoding = opt(|i: &mut _| parse::header_field(i, b"encoding", take_till1(NL))) + .context(StrContext::Expected("encoding ".into())) + .parse_next(&mut i)?; *state = State::ExtraHeaders; match encoding { Some(encoding) => (i, Token::Encoding(encoding.as_bstr())), - None => return Self::next_inner(i, state), + None => return Self::next_inner_(i, state), } } ExtraHeaders => { - let (i, extra_header) = context( - " ", - opt(alt(( - |i| parse::any_header_field_multi_line(i).map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Owned(o)))), - |i| { - parse::any_header_field(i, is_not(NL)) - .map(|(i, (k, o))| (i, (k.as_bstr(), Cow::Borrowed(o.as_bstr())))) - }, - ))), - )(i)?; + let extra_header = opt(alt(( + |i: &mut _| parse::any_header_field_multi_line(i).map(|(k, o)| (k.as_bstr(), Cow::Owned(o))), + |i: &mut _| { + parse::any_header_field(i, take_till1(NL)) + .map(|(k, o)| (k.as_bstr(), Cow::Borrowed(o.as_bstr()))) + }, + ))) + .context(StrContext::Expected(" ".into())) + .parse_next(&mut i)?; match extra_header { Some(extra_header) => (i, Token::ExtraHeader(extra_header)), None => { *state = State::Message; - return Self::next_inner(i, state); + return Self::next_inner_(i, state); } } } Message => { - let (i, message) = all_consuming(decode::message)(i)?; + let message = terminated(decode::message, eof).parse_next(&mut i)?; debug_assert!( i.is_empty(), "we should have consumed all data - otherwise iter may go forever" diff --git a/gix-object/src/lib.rs b/gix-object/src/lib.rs index 83d75a9d949..56e0019fd8c 100644 --- a/gix-object/src/lib.rs +++ b/gix-object/src/lib.rs @@ -258,16 +258,12 @@ pub struct Data<'a> { pub mod decode { #[cfg(feature = "verbose-object-parsing-errors")] mod _decode { - use crate::bstr::{BString, ByteSlice}; - /// The type to be used for parse errors. - pub type ParseError<'a> = nom::error::VerboseError<&'a [u8]>; - /// The owned type to be used for parse errors. - pub type ParseErrorOwned = nom::error::VerboseError; + pub type ParseError = winnow::error::ContextError; pub(crate) fn empty_error() -> Error { Error { - inner: nom::error::VerboseError:: { errors: Vec::new() }, + inner: winnow::error::ContextError::new(), } } @@ -275,22 +271,13 @@ pub mod decode { #[derive(Debug, Clone)] pub struct Error { /// The actual error - pub inner: ParseErrorOwned, + pub inner: ParseError, } - impl<'a> From>> for Error { - fn from(v: nom::Err>) -> Self { - Error { - inner: match v { - nom::Err::Error(err) | nom::Err::Failure(err) => nom::error::VerboseError { - errors: err - .errors - .into_iter() - .map(|(i, v)| (i.as_bstr().to_owned(), v)) - .collect(), - }, - nom::Err::Incomplete(_) => unreachable!("we don't have streaming parsers"), - }, + impl Error { + pub(crate) fn with_err(err: winnow::error::ErrMode) -> Self { + Self { + inner: err.into_inner().expect("we don't have streaming parsers"), } } } @@ -306,9 +293,7 @@ pub mod decode { #[cfg(not(feature = "verbose-object-parsing-errors"))] mod _decode { /// The type to be used for parse errors, discards everything and is zero size - pub type ParseError<'a> = (); - /// The owned type to be used for parse errors, discards everything and is zero size - pub type ParseErrorOwned = (); + pub type ParseError = (); pub(crate) fn empty_error() -> Error { Error { inner: () } @@ -318,16 +303,13 @@ pub mod decode { #[derive(Debug, Clone)] pub struct Error { /// The actual error - pub inner: ParseErrorOwned, + pub inner: ParseError, } - impl<'a> From>> for Error { - fn from(v: nom::Err>) -> Self { - Error { - inner: match v { - nom::Err::Error(err) | nom::Err::Failure(err) => err, - nom::Err::Incomplete(_) => unreachable!("we don't have streaming parsers"), - }, + impl Error { + pub(crate) fn with_err(err: winnow::error::ErrMode) -> Self { + Self { + inner: err.into_inner().expect("we don't have streaming parsers"), } } } @@ -339,7 +321,7 @@ pub mod decode { } } pub(crate) use _decode::empty_error; - pub use _decode::{Error, ParseError, ParseErrorOwned}; + pub use _decode::{Error, ParseError}; impl std::error::Error for Error {} /// Returned by [`loose_header()`] diff --git a/gix-object/src/parse.rs b/gix-object/src/parse.rs index 20dd443c007..4b60429a645 100644 --- a/gix-object/src/parse.rs +++ b/gix-object/src/parse.rs @@ -1,11 +1,11 @@ use bstr::{BStr, BString, ByteVec}; -use nom::{ - bytes::complete::{is_not, tag, take_until, take_while_m_n}, - combinator::{peek, recognize}, - error::{context, ContextError, ParseError}, - multi::many1_count, - sequence::{preceded, terminated, tuple}, - IResult, +use winnow::{ + combinator::repeat, + combinator::{preceded, terminated}, + error::{AddContext, ParserError, StrContext}, + prelude::*, + token::{take_till1, take_until0, take_while}, + Parser, }; use crate::ByteSlice; @@ -14,68 +14,63 @@ pub(crate) const NL: &[u8] = b"\n"; pub(crate) const SPACE: &[u8] = b" "; const SPACE_OR_NL: &[u8] = b" \n"; -pub(crate) fn any_header_field_multi_line<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>( - i: &'a [u8], -) -> IResult<&'a [u8], (&'a [u8], BString), E> { - let (i, (k, o)) = context( - "name ", - peek(tuple(( - terminated(is_not(SPACE_OR_NL), tag(SPACE)), - recognize(tuple(( - is_not(NL), - tag(NL), - many1_count(terminated(tuple((tag(SPACE), take_until(NL))), tag(NL))), - ))), - ))), - )(i)?; - assert!(!o.is_empty(), "we have parsed more than one value here"); - let end = &o[o.len() - 1] as *const u8 as usize; - let start_input = &i[0] as *const u8 as usize; - - let bytes = o[..o.len() - 1].as_bstr(); - let mut out = BString::from(Vec::with_capacity(bytes.len())); - let mut lines = bytes.lines(); - out.push_str(lines.next().expect("first line")); - for line in lines { - out.push(b'\n'); - out.push_str(&line[1..]); // cut leading space - } - Ok((&i[end - start_input + 1..], (k, out))) +pub(crate) fn any_header_field_multi_line<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( + i: &mut &'a [u8], +) -> PResult<(&'a [u8], BString), E> { + ( + terminated(take_till1(SPACE_OR_NL), SPACE), + ( + take_till1(NL), + NL, + repeat(1.., terminated((SPACE, take_until0(NL)), NL)).map(|()| ()), + ) + .recognize() + .map(|o: &[u8]| { + let bytes = o.as_bstr(); + let mut out = BString::from(Vec::with_capacity(bytes.len())); + let mut lines = bytes.lines(); + out.push_str(lines.next().expect("first line")); + for line in lines { + out.push(b'\n'); + out.push_str(&line[1..]); // cut leading space + } + out + }), + ) + .context(StrContext::Expected("name ".into())) + .parse_next(i) } -pub(crate) fn header_field<'a, T, E: ParseError<&'a [u8]>>( - i: &'a [u8], +pub(crate) fn header_field<'a, T, E: ParserError<&'a [u8]>>( + i: &mut &'a [u8], name: &'static [u8], - parse_value: impl Fn(&'a [u8]) -> IResult<&'a [u8], T, E>, -) -> IResult<&'a [u8], T, E> { - terminated(preceded(terminated(tag(name), tag(SPACE)), parse_value), tag(NL))(i) + parse_value: impl Parser<&'a [u8], T, E>, +) -> PResult { + terminated(preceded(terminated(name, SPACE), parse_value), NL).parse_next(i) } -pub(crate) fn any_header_field<'a, T, E: ParseError<&'a [u8]>>( - i: &'a [u8], - parse_value: impl Fn(&'a [u8]) -> IResult<&'a [u8], T, E>, -) -> IResult<&'a [u8], (&'a [u8], T), E> { - terminated( - tuple((terminated(is_not(SPACE_OR_NL), tag(SPACE)), parse_value)), - tag(NL), - )(i) +pub(crate) fn any_header_field<'a, T, E: ParserError<&'a [u8]>>( + i: &mut &'a [u8], + parse_value: impl Parser<&'a [u8], T, E>, +) -> PResult<(&'a [u8], T), E> { + terminated((terminated(take_till1(SPACE_OR_NL), SPACE), parse_value), NL).parse_next(i) } fn is_hex_digit_lc(b: u8) -> bool { matches!(b, b'0'..=b'9' | b'a'..=b'f') } -pub fn hex_hash<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a BStr, E> { - take_while_m_n( - gix_hash::Kind::shortest().len_in_hex(), - gix_hash::Kind::longest().len_in_hex(), +pub fn hex_hash<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<&'a BStr, E> { + take_while( + gix_hash::Kind::shortest().len_in_hex()..=gix_hash::Kind::longest().len_in_hex(), is_hex_digit_lc, - )(i) - .map(|(i, hex)| (i, hex.as_bstr())) + ) + .map(ByteSlice::as_bstr) + .parse_next(i) } -pub(crate) fn signature<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>( - i: &'a [u8], -) -> IResult<&'a [u8], gix_actor::SignatureRef<'a>, E> { +pub(crate) fn signature<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( + i: &mut &'a [u8], +) -> PResult, E> { gix_actor::signature::decode(i) } diff --git a/gix-object/src/tag/decode.rs b/gix-object/src/tag/decode.rs index ba9460af980..804c70285af 100644 --- a/gix-object/src/tag/decode.rs +++ b/gix-object/src/tag/decode.rs @@ -1,90 +1,79 @@ -use nom::{ - branch::alt, - bytes::complete::{tag, take_until, take_while, take_while1}, - character::is_alphabetic, - combinator::{all_consuming, opt, recognize}, - error::{context, ContextError, ParseError}, - sequence::{preceded, tuple}, - IResult, +use winnow::{ + combinator::alt, + combinator::delimited, + combinator::rest, + combinator::{eof, opt}, + combinator::{preceded, terminated}, + error::{AddContext, ParserError, StrContext}, + prelude::*, + stream::AsChar, + token::{take_until0, take_while}, }; use crate::{parse, parse::NL, BStr, ByteSlice, TagRef}; -pub fn git_tag<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>(i: &'a [u8]) -> IResult<&[u8], TagRef<'a>, E> { - let (i, target) = context("object <40 lowercase hex char>", |i| { - parse::header_field(i, b"object", parse::hex_hash) - })(i)?; - - let (i, kind) = context("type ", |i| { - parse::header_field(i, b"type", take_while1(is_alphabetic)) - })(i)?; - let kind = crate::Kind::from_bytes(kind) - .map_err(|_| nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::MapRes)))?; - - let (i, tag_version) = context("tag ", |i| { - parse::header_field(i, b"tag", take_while1(|b| b != NL[0])) - })(i)?; - - let (i, signature) = context( - "tagger ", - opt(|i| parse::header_field(i, b"tagger", parse::signature)), - )(i)?; - let (i, (message, pgp_signature)) = all_consuming(message)(i)?; - Ok(( - i, - TagRef { - target, - name: tag_version.as_bstr(), - target_kind: kind, - message, - tagger: signature, - pgp_signature, - }, - )) +pub fn git_tag<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( + i: &mut &'a [u8], +) -> PResult, E> { + ( + (|i: &mut _| parse::header_field(i, b"object", parse::hex_hash)) + .context(StrContext::Expected("object <40 lowercase hex char>".into())), + (|i: &mut _| parse::header_field(i, b"type", take_while(1.., AsChar::is_alpha))) + .verify_map(|kind| crate::Kind::from_bytes(kind).ok()) + .context(StrContext::Expected("type ".into())), + (|i: &mut _| parse::header_field(i, b"tag", take_while(1.., |b| b != NL[0]))) + .context(StrContext::Expected("tag ".into())), + opt(|i: &mut _| parse::header_field(i, b"tagger", parse::signature)) + .context(StrContext::Expected("tagger ".into())), + terminated(message, eof), + ) + .map( + |(target, kind, tag_version, signature, (message, pgp_signature))| TagRef { + target, + name: tag_version.as_bstr(), + target_kind: kind, + message, + tagger: signature, + pgp_signature, + }, + ) + .parse_next(i) } -pub fn message<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a BStr, Option<&'a BStr>), E> { +pub fn message<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<(&'a BStr, Option<&'a BStr>), E> { const PGP_SIGNATURE_BEGIN: &[u8] = b"\n-----BEGIN PGP SIGNATURE-----"; const PGP_SIGNATURE_END: &[u8] = b"-----END PGP SIGNATURE-----"; if i.is_empty() { - return Ok((i, (i.as_bstr(), None))); - } - let (i, _) = tag(NL)(i)?; - fn all_to_end<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], (&'a [u8], &'a [u8]), E> { - if i.is_empty() { - // Empty message. That's OK. - return Ok((&[], (&[], &[]))); - } - // an empty signature message signals that there is none - the function signature is needed - // to work with 'alt(…)'. PGP signatures are never empty - Ok((&[], (i, &[]))) + return Ok((b"".as_bstr(), None)); } - let (i, (message, signature)) = alt(( - tuple(( - take_until(PGP_SIGNATURE_BEGIN), - preceded( - tag(NL), - recognize(tuple(( - tag(&PGP_SIGNATURE_BEGIN[1..]), - take_until(PGP_SIGNATURE_END), - tag(PGP_SIGNATURE_END), - take_while(|_| true), - ))), + delimited( + NL, + alt(( + ( + take_until0(PGP_SIGNATURE_BEGIN), + preceded( + NL, + ( + &PGP_SIGNATURE_BEGIN[1..], + take_until0(PGP_SIGNATURE_END), + PGP_SIGNATURE_END, + rest, + ) + .recognize() + .map(|signature: &[u8]| { + if signature.is_empty() { + None + } else { + Some(signature.as_bstr()) + } + }), + ), ), + rest.map(|rest: &[u8]| (rest, None)), )), - all_to_end, - ))(i)?; - let (i, _) = opt(tag(NL))(i)?; - Ok(( - i, - ( - message.as_bstr(), - if signature.is_empty() { - None - } else { - Some(signature.as_bstr()) - }, - ), - )) + opt(NL), + ) + .map(|(message, signature)| (message.as_bstr(), signature)) + .parse_next(i) } diff --git a/gix-object/src/tag/mod.rs b/gix-object/src/tag/mod.rs index 1cd353ffb72..3813bfc1425 100644 --- a/gix-object/src/tag/mod.rs +++ b/gix-object/src/tag/mod.rs @@ -10,10 +10,8 @@ pub mod ref_iter; impl<'a> TagRef<'a> { /// Deserialize a tag from `data`. - pub fn from_bytes(data: &'a [u8]) -> Result, crate::decode::Error> { - decode::git_tag(data) - .map(|(_, t)| t) - .map_err(crate::decode::Error::from) + pub fn from_bytes(mut data: &'a [u8]) -> Result, crate::decode::Error> { + decode::git_tag(&mut data).map_err(crate::decode::Error::with_err) } /// The object this tag points to as `Id`. pub fn target(&self) -> gix_hash::ObjectId { diff --git a/gix-object/src/tag/ref_iter.rs b/gix-object/src/tag/ref_iter.rs index 1138016b5cb..fcb3e4bc3da 100644 --- a/gix-object/src/tag/ref_iter.rs +++ b/gix-object/src/tag/ref_iter.rs @@ -1,10 +1,13 @@ use bstr::BStr; use gix_hash::{oid, ObjectId}; -use nom::{ - bytes::complete::take_while1, - character::is_alphabetic, - combinator::{all_consuming, opt}, - error::{context, ParseError}, +use winnow::{ + combinator::terminated, + combinator::{eof, opt}, + error::ParserError, + error::StrContext, + prelude::*, + stream::AsChar, + token::take_while, }; use crate::{bstr::ByteSlice, parse, parse::NL, tag::decode, Kind, TagRefIter}; @@ -57,13 +60,21 @@ fn missing_field() -> crate::decode::Error { } impl<'a> TagRefIter<'a> { + #[inline] fn next_inner(i: &'a [u8], state: &mut State) -> Result<(&'a [u8], Token<'a>), crate::decode::Error> { + Self::next_inner_(i, state).map_err(crate::decode::Error::with_err) + } + + fn next_inner_( + mut i: &'a [u8], + state: &mut State, + ) -> Result<(&'a [u8], Token<'a>), winnow::error::ErrMode> { use State::*; Ok(match state { Target => { - let (i, target) = context("object <40 lowercase hex char>", |i| { - parse::header_field(i, b"object", parse::hex_hash) - })(i)?; + let target = (|i: &mut _| parse::header_field(i, b"object", parse::hex_hash)) + .context(StrContext::Expected("object <40 lowercase hex char>".into())) + .parse_next(&mut i)?; *state = TargetKind; ( i, @@ -73,36 +84,30 @@ impl<'a> TagRefIter<'a> { ) } TargetKind => { - let (i, kind) = context("type ", |i| { - parse::header_field(i, b"type", take_while1(is_alphabetic)) - })(i)?; - let kind = Kind::from_bytes(kind).map_err(|_| { - #[allow(clippy::let_unit_value)] - { - let err = crate::decode::ParseError::from_error_kind(i, nom::error::ErrorKind::MapRes); - nom::Err::Error(err) - } - })?; + let kind = (|i: &mut _| parse::header_field(i, b"type", take_while(1.., AsChar::is_alpha))) + .context(StrContext::Expected("type ".into())) + .parse_next(&mut i)?; + let kind = Kind::from_bytes(kind) + .map_err(|_| winnow::error::ErrMode::from_error_kind(&i, winnow::error::ErrorKind::Verify))?; *state = Name; (i, Token::TargetKind(kind)) } Name => { - let (i, tag_version) = context("tag ", |i| { - parse::header_field(i, b"tag", take_while1(|b| b != NL[0])) - })(i)?; + let tag_version = (|i: &mut _| parse::header_field(i, b"tag", take_while(1.., |b| b != NL[0]))) + .context(StrContext::Expected("tag ".into())) + .parse_next(&mut i)?; *state = Tagger; (i, Token::Name(tag_version.as_bstr())) } Tagger => { - let (i, signature) = context( - "tagger ", - opt(|i| parse::header_field(i, b"tagger", parse::signature)), - )(i)?; + let signature = opt(|i: &mut _| parse::header_field(i, b"tagger", parse::signature)) + .context(StrContext::Expected("tagger ".into())) + .parse_next(&mut i)?; *state = Message; (i, Token::Tagger(signature)) } Message => { - let (i, (message, pgp_signature)) = all_consuming(decode::message)(i)?; + let (message, pgp_signature) = terminated(decode::message, eof).parse_next(&mut i)?; debug_assert!( i.is_empty(), "we should have consumed all data - otherwise iter may go forever" diff --git a/gix-object/src/tree/ref_iter.rs b/gix-object/src/tree/ref_iter.rs index 98014094858..14453c4bd86 100644 --- a/gix-object/src/tree/ref_iter.rs +++ b/gix-object/src/tree/ref_iter.rs @@ -1,7 +1,7 @@ use bstr::BStr; use std::convert::TryFrom; -use nom::error::ParseError; +use winnow::error::ParserError; use crate::{tree, tree::EntryRef, TreeRef, TreeRefIter}; @@ -14,8 +14,8 @@ impl<'a> TreeRefIter<'a> { impl<'a> TreeRef<'a> { /// Deserialize a Tree from `data`. - pub fn from_bytes(data: &'a [u8]) -> Result, crate::decode::Error> { - decode::tree(data).map(|(_, t)| t).map_err(crate::decode::Error::from) + pub fn from_bytes(mut data: &'a [u8]) -> Result, crate::decode::Error> { + decode::tree(&mut data).map_err(crate::decode::Error::with_err) } /// Find an entry named `name` knowing if the entry is a directory or not, using a binary search. @@ -68,12 +68,11 @@ impl<'a> Iterator for TreeRefIter<'a> { } None => { self.data = &[]; + let empty = &[] as &[u8]; #[allow(clippy::unit_arg)] - Some(Err(nom::Err::Error(crate::decode::ParseError::from_error_kind( - &[] as &[u8], - nom::error::ErrorKind::MapRes, - )) - .into())) + Some(Err(crate::decode::Error::with_err( + winnow::error::ErrMode::from_error_kind(&empty, winnow::error::ErrorKind::Verify), + ))) } } } @@ -117,14 +116,14 @@ mod decode { use std::convert::TryFrom; use bstr::ByteSlice; - use nom::{ - bytes::complete::{tag, take, take_while1, take_while_m_n}, - character::is_digit, - combinator::all_consuming, - error::ParseError, - multi::many0, - sequence::terminated, - IResult, + use winnow::{ + combinator::eof, + combinator::repeat, + combinator::terminated, + error::ParserError, + prelude::*, + stream::AsChar, + token::{take, take_while}, }; use crate::{parse::SPACE, tree, tree::EntryRef, TreeRef}; @@ -145,7 +144,7 @@ mod decode { let mode = tree::EntryMode::try_from(mode).ok()?; let (filename, i) = i.split_at(i.find_byte(0)?); let i = &i[1..]; - const HASH_LEN_FIXME: usize = 20; // TODO: know actual /desired length or we may overshoot + const HASH_LEN_FIXME: usize = 20; // TODO(SHA256): know actual/desired length or we may overshoot let (oid, i) = match i.len() { len if len < HASH_LEN_FIXME => return None, _ => i.split_at(20), @@ -160,25 +159,24 @@ mod decode { )) } - pub fn entry<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&[u8], EntryRef<'_>, E> { - let (i, mode) = terminated(take_while_m_n(5, 6, is_digit), tag(SPACE))(i)?; - let mode = tree::EntryMode::try_from(mode) - .map_err(|invalid| nom::Err::Error(E::from_error_kind(invalid, nom::error::ErrorKind::MapRes)))?; - let (i, filename) = terminated(take_while1(|b| b != NULL[0]), tag(NULL))(i)?; - let (i, oid) = take(20u8)(i)?; // TODO: make this compatible with other hash lengths - - Ok(( - i, - EntryRef { + pub fn entry<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult, E> { + ( + terminated(take_while(5..=6, AsChar::is_dec_digit), SPACE) + .verify_map(|mode| tree::EntryMode::try_from(mode).ok()), + terminated(take_while(1.., |b| b != NULL[0]), NULL), + take(20u8), // TODO(SHA256): make this compatible with other hash lengths + ) + .map(|(mode, filename, oid): (_, &[u8], _)| EntryRef { mode, filename: filename.as_bstr(), oid: gix_hash::oid::try_from_bytes(oid).expect("we counted exactly 20 bytes"), - }, - )) + }) + .parse_next(i) } - pub fn tree<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], TreeRef<'a>, E> { - let (i, entries) = all_consuming(many0(entry))(i)?; - Ok((i, TreeRef { entries })) + pub fn tree<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult, E> { + terminated(repeat(0.., entry), eof) + .map(|entries| TreeRef { entries }) + .parse_next(i) } } diff --git a/gix-ref/Cargo.toml b/gix-ref/Cargo.toml index 16e2d0a466f..11d49058788 100644 --- a/gix-ref/Cargo.toml +++ b/gix-ref/Cargo.toml @@ -31,7 +31,7 @@ gix-lock = { version = "^7.0.0", path = "../gix-lock" } gix-tempfile = { version = "^7.0.0", default-features = false, path = "../gix-tempfile" } thiserror = "1.0.34" -nom = { version = "7", default-features = false, features = ["std"]} +winnow = { version = "0.5.14", features = ["simd"] } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} # packed refs diff --git a/gix-ref/src/parse.rs b/gix-ref/src/parse.rs index 9656c81971f..afbd434913a 100644 --- a/gix-ref/src/parse.rs +++ b/gix-ref/src/parse.rs @@ -1,27 +1,22 @@ use gix_object::bstr::{BStr, ByteSlice}; -use nom::{ - branch::alt, - bytes::complete::{tag, take_while_m_n}, - error::ParseError, - IResult, -}; +use winnow::{combinator::alt, error::ParserError, prelude::*, token::take_while}; fn is_hex_digit_lc(b: u8) -> bool { matches!(b, b'0'..=b'9' | b'a'..=b'f') } /// Copy from https://github.com/Byron/gitoxide/blob/f270850ff92eab15258023b8e59346ec200303bd/gix-object/src/immutable/parse.rs#L64 -pub fn hex_hash<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a BStr, E> { +pub fn hex_hash<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<&'a BStr, E> { // NOTE: It's important to be able to read all hashes, do not parameterize it. Hashes can be rejected at a later stage // if needed. - take_while_m_n( - gix_hash::Kind::shortest().len_in_hex(), - gix_hash::Kind::longest().len_in_hex(), + take_while( + gix_hash::Kind::shortest().len_in_hex()..=gix_hash::Kind::longest().len_in_hex(), is_hex_digit_lc, - )(i) - .map(|(i, hex)| (i, hex.as_bstr())) + ) + .map(ByteSlice::as_bstr) + .parse_next(i) } -pub fn newline<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a [u8], E> { - alt((tag(b"\r\n"), tag(b"\n")))(i) +pub fn newline<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<&'a [u8], E> { + alt((b"\r\n", b"\n")).parse_next(i) } diff --git a/gix-ref/src/store/file/log/line.rs b/gix-ref/src/store/file/log/line.rs index 396e826a4c3..434e89c43b1 100644 --- a/gix-ref/src/store/file/log/line.rs +++ b/gix-ref/src/store/file/log/line.rs @@ -74,12 +74,17 @@ impl<'a> From> for Line { /// pub mod decode { use gix_object::bstr::{BStr, ByteSlice}; - use nom::{ - bytes::complete::{tag, take_while}, + use winnow::{ + combinator::alt, + combinator::eof, + combinator::fail, combinator::opt, - error::{context, ContextError, ParseError}, - sequence::{terminated, tuple}, - IResult, + combinator::preceded, + combinator::rest, + combinator::terminated, + error::{AddContext, ParserError, StrContext}, + prelude::*, + token::take_while, }; use crate::{file::log::LineRef, parse::hex_hash}; @@ -118,52 +123,52 @@ pub mod decode { impl<'a> LineRef<'a> { /// Decode a line from the given bytes which are expected to start at a hex sha. - pub fn from_bytes(input: &'a [u8]) -> Result, Error> { - one::<()>(input).map(|(_, l)| l).map_err(|_| Error::new(input)) + pub fn from_bytes(mut input: &'a [u8]) -> Result, Error> { + one::<()>(&mut input).map_err(|_| Error::new(input)) } } - fn message<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&'a [u8], &'a BStr, E> { + fn message<'a, E: ParserError<&'a [u8]>>(i: &mut &'a [u8]) -> PResult<&'a BStr, E> { if i.is_empty() { - Ok((&[], i.as_bstr())) + rest.map(ByteSlice::as_bstr).parse_next(i) } else { - terminated(take_while(|c| c != b'\n'), opt(tag(b"\n")))(i).map(|(i, o)| (i, o.as_bstr())) + terminated(take_while(0.., |c| c != b'\n'), opt(b'\n')) + .map(ByteSlice::as_bstr) + .parse_next(i) } } - fn one<'a, E: ParseError<&'a [u8]> + ContextError<&'a [u8]>>(bytes: &'a [u8]) -> IResult<&[u8], LineRef<'a>, E> { - let (i, (old, new, signature, message_sep, message)) = context( - " <> \\t", - tuple(( - context("", terminated(hex_hash, tag(b" "))), - context("", terminated(hex_hash, tag(b" "))), - context(" <> ", gix_actor::signature::decode), - opt(tag(b"\t")), - context("", message), + fn one<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>( + bytes: &mut &'a [u8], + ) -> PResult, E> { + ( + ( + terminated(hex_hash, b" ").context(StrContext::Expected("".into())), + terminated(hex_hash, b" ").context(StrContext::Expected("".into())), + gix_actor::signature::decode.context(StrContext::Expected(" <> ".into())), + ) + .context(StrContext::Expected( + " <> \\t".into(), + )), + alt(( + preceded( + b'\t', + message.context(StrContext::Expected("".into())), + ), + b'\n'.value(Default::default()), + eof.value(Default::default()), + fail.context(StrContext::Expected( + "log message must be separated from signature with whitespace".into(), + )), )), - )(bytes)?; - - if message_sep.is_none() { - if let Some(first) = message.first() { - if !first.is_ascii_whitespace() { - return Err(nom::Err::Error(E::add_context( - i, - "log message must be separated from signature with whitespace", - E::from_error_kind(i, nom::error::ErrorKind::MapRes), - ))); - } - } - } - - Ok(( - i, - LineRef { + ) + .map(|((old, new, signature), message)| LineRef { previous_oid: old, new_oid: new, signature, message, - }, - )) + }) + .parse_next(bytes) } #[cfg(test)] @@ -185,13 +190,15 @@ pub mod decode { mod invalid { use gix_testtools::to_bstr_err; - use nom::error::VerboseError; + use winnow::error::TreeError; + use winnow::prelude::*; use super::one; #[test] fn completely_bogus_shows_error_with_context() { - let err = one::>(b"definitely not a log entry") + let err = one::> + .parse_peek(b"definitely not a log entry") .map_err(to_bstr_err) .expect_err("this should fail"); assert!(err.to_string().contains(" ")); @@ -200,12 +207,15 @@ pub mod decode { #[test] fn missing_whitespace_between_signature_and_message() { let line = "0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 one 1234567890 -0000message"; - let err = one::>(line.as_bytes()) + let err = one::> + .parse_peek(line.as_bytes()) .map_err(to_bstr_err) .expect_err("this should fail"); - assert!(err - .to_string() - .contains("log message must be separated from signature with whitespace")); + assert!( + err.to_string() + .contains("log message must be separated from signature with whitespace"), + "expected\n `log message must be separated from signature with whitespace`\nin\n```\n{err}\n```" + ); } } @@ -217,7 +227,10 @@ pub mod decode { let line_with_nl = with_newline(line_without_nl.clone()); for input in &[line_without_nl, line_with_nl] { assert_eq!( - one::>(input).expect("successful parsing").1, + one::> + .parse_peek(input) + .expect("successful parsing") + .1, LineRef { previous_oid: NULL_SHA1.as_bstr(), new_oid: NULL_SHA1.as_bstr(), @@ -242,7 +255,9 @@ pub mod decode { let line_with_nl = with_newline(line_without_nl.clone()); for input in &[line_without_nl, line_with_nl] { - let (remaining, res) = one::>(input).expect("successful parsing"); + let (remaining, res) = one::> + .parse_peek(input) + .expect("successful parsing"); assert!(remaining.is_empty(), "all consuming even without trailing newline"); let actual = LineRef { previous_oid: b"a5828ae6b52137b913b978e16cd2334482eb4c1f".as_bstr(), @@ -270,10 +285,14 @@ pub mod decode { #[test] fn two_lines_in_a_row_with_and_without_newline() { let lines = b"0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 one 1234567890 -0000\t\n0000000000000000000000000000000000000000 0000000000000000000000000000000000000000 two 1234567890 -0000\thello"; - let (remainder, parsed) = one::>(lines).expect("parse single line"); + let (remainder, parsed) = one::> + .parse_peek(lines) + .expect("parse single line"); assert_eq!(parsed.message, b"".as_bstr(), "first message is empty"); - let (remainder, parsed) = one::>(remainder).expect("parse single line"); + let (remainder, parsed) = one::> + .parse_peek(remainder) + .expect("parse single line"); assert_eq!( parsed.message, b"hello".as_bstr(), diff --git a/gix-ref/src/store/file/loose/reference/decode.rs b/gix-ref/src/store/file/loose/reference/decode.rs index ece14bb484c..d2e71851d7c 100644 --- a/gix-ref/src/store/file/loose/reference/decode.rs +++ b/gix-ref/src/store/file/loose/reference/decode.rs @@ -2,12 +2,7 @@ use std::convert::{TryFrom, TryInto}; use gix_hash::ObjectId; use gix_object::bstr::BString; -use nom::{ - bytes::complete::{tag, take_while}, - combinator::{map, opt}, - sequence::terminated, - IResult, -}; +use winnow::{combinator::opt, combinator::terminated, prelude::*, token::take_while}; use crate::{ parse::{hex_hash, newline}, @@ -57,29 +52,26 @@ impl TryFrom for Target { impl Reference { /// Create a new reference of the given `parent` store with `relative_path` service as unique identifier /// at which the `path_contents` was read to obtain the refs value. - pub fn try_from_path(name: FullName, path_contents: &[u8]) -> Result { + pub fn try_from_path(name: FullName, mut path_contents: &[u8]) -> Result { Ok(Reference { name, - target: parse(path_contents) + target: parse(&mut path_contents) .map_err(|_| Error::Parse { content: path_contents.into(), })? - .1 .try_into()?, }) } } -fn parse(bytes: &[u8]) -> IResult<&[u8], MaybeUnsafeState> { - let is_space = |b: u8| b == b' '; - if let (path, Some(_ref_prefix)) = opt(terminated(tag("ref: "), take_while(is_space)))(bytes)? { - map( - terminated(take_while(|b| b != b'\r' && b != b'\n'), opt(newline)), - |path| MaybeUnsafeState::UnvalidatedPath(path.into()), - )(path) +fn parse(i: &mut &[u8]) -> PResult { + if let Some(_ref_prefix) = opt(terminated("ref: ", take_while(0.., b' '))).parse_next(i)? { + terminated(take_while(0.., |b| b != b'\r' && b != b'\n'), opt(newline)) + .map(|path| MaybeUnsafeState::UnvalidatedPath(path.into())) + .parse_next(i) } else { - map(terminated(hex_hash, opt(newline)), |hex| { - MaybeUnsafeState::Id(ObjectId::from_hex(hex).expect("prior validation")) - })(bytes) + terminated(hex_hash, opt(newline)) + .map(|hex| MaybeUnsafeState::Id(ObjectId::from_hex(hex).expect("prior validation"))) + .parse_next(i) } } diff --git a/gix-ref/src/store/packed/buffer.rs b/gix-ref/src/store/packed/buffer.rs index 6786e4a9f66..7135ab0f346 100644 --- a/gix-ref/src/store/packed/buffer.rs +++ b/gix-ref/src/store/packed/buffer.rs @@ -20,6 +20,8 @@ pub mod open { use std::path::PathBuf; use memmap2::Mmap; + use winnow::prelude::*; + use winnow::stream::Offset; use crate::store_impl::packed; @@ -45,10 +47,12 @@ pub mod open { }; let (offset, sorted) = { - let data = backing.as_ref(); - if *data.first().unwrap_or(&b' ') == b'#' { - let (records, header) = packed::decode::header::<()>(data).map_err(|_| Error::HeaderParsing)?; - let offset = records.as_ptr() as usize - data.as_ptr() as usize; + let mut input = backing.as_ref(); + if *input.first().unwrap_or(&b' ') == b'#' { + let header = packed::decode::header::<()> + .parse_next(&mut input) + .map_err(|_| Error::HeaderParsing)?; + let offset = input.offset_from(&backing.as_ref()); (offset, header.sorted) } else { (0, false) diff --git a/gix-ref/src/store/packed/decode.rs b/gix-ref/src/store/packed/decode.rs index 950d2981df5..da246fdc817 100644 --- a/gix-ref/src/store/packed/decode.rs +++ b/gix-ref/src/store/packed/decode.rs @@ -1,12 +1,12 @@ use std::convert::TryInto; use gix_object::bstr::{BStr, ByteSlice}; -use nom::{ - bytes::complete::{tag, take_while}, - combinator::{map, map_res, opt}, - error::{FromExternalError, ParseError}, - sequence::{delimited, preceded, terminated, tuple}, - IResult, +use winnow::{ + combinator::opt, + combinator::{delimited, preceded, terminated}, + error::{FromExternalError, ParserError}, + prelude::*, + token::take_while, }; use crate::{ @@ -37,46 +37,47 @@ impl Default for Header { } } -fn until_newline<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], &'a BStr, E> +fn until_newline<'a, E>(input: &mut &'a [u8]) -> PResult<&'a BStr, E> where - E: ParseError<&'a [u8]>, + E: ParserError<&'a [u8]>, { - map( - terminated(take_while(|b: u8| b != b'\r' && b != b'\n'), newline), - ByteSlice::as_bstr, - )(input) + terminated(take_while(0.., |b: u8| b != b'\r' && b != b'\n'), newline) + .map(ByteSlice::as_bstr) + .parse_next(input) } -pub fn header<'a, E>(input: &'a [u8]) -> IResult<&'a [u8], Header, E> +pub fn header<'a, E>(input: &mut &'a [u8]) -> PResult where - E: ParseError<&'a [u8]>, + E: ParserError<&'a [u8]>, { - let (rest, traits) = preceded(tag(b"# pack-refs with: "), until_newline)(input)?; - - let mut peeled = Peeled::Unspecified; - let mut sorted = false; - for token in traits.as_bstr().split_str(b" ") { - if token == b"fully-peeled" { - peeled = Peeled::Fully; - } else if token == b"peeled" { - peeled = Peeled::Partial; - } else if token == b"sorted" { - sorted = true; - } - } - - Ok((rest, Header { peeled, sorted })) + preceded(b"# pack-refs with: ", until_newline) + .map(|traits| { + let mut peeled = Peeled::Unspecified; + let mut sorted = false; + for token in traits.as_bstr().split_str(b" ") { + if token == b"fully-peeled" { + peeled = Peeled::Fully; + } else if token == b"peeled" { + peeled = Peeled::Partial; + } else if token == b"sorted" { + sorted = true; + } + } + Header { peeled, sorted } + }) + .parse_next(input) } -pub fn reference<'a, E: ParseError<&'a [u8]> + FromExternalError<&'a [u8], crate::name::Error>>( - input: &'a [u8], -) -> IResult<&'a [u8], packed::Reference<'a>, E> { - let (input, (target, name)) = tuple(( - terminated(hex_hash, tag(b" ")), - map_res(until_newline, TryInto::try_into), - ))(input)?; - let (rest, object) = opt(delimited(tag(b"^"), hex_hash, newline))(input)?; - Ok((rest, packed::Reference { name, target, object })) +pub fn reference<'a, E: ParserError<&'a [u8]> + FromExternalError<&'a [u8], crate::name::Error>>( + input: &mut &'a [u8], +) -> PResult, E> { + ( + terminated(hex_hash, b" "), + until_newline.try_map(TryInto::try_into), + opt(delimited(b"^", hex_hash, newline)), + ) + .map(|(target, name, object)| packed::Reference { name, target, object }) + .parse_next(input) } #[cfg(test)] diff --git a/gix-ref/src/store/packed/decode/tests.rs b/gix-ref/src/store/packed/decode/tests.rs index 6c8f315c1f6..92a1d0434d3 100644 --- a/gix-ref/src/store/packed/decode/tests.rs +++ b/gix-ref/src/store/packed/decode/tests.rs @@ -1,7 +1,8 @@ type Result = std::result::Result<(), Box>; mod reference { - use nom::error::VerboseError; + use winnow::error::TreeError; + use winnow::prelude::*; use super::Result; use crate::{ @@ -16,9 +17,13 @@ mod reference { #[test] fn invalid() { - assert!(decode::reference::<()>(b"# what looks like a comment",).is_err()); + assert!(decode::reference::<()> + .parse_peek(b"# what looks like a comment",) + .is_err()); assert!( - decode::reference::<()>(b"^e9cdc958e7ce2290e2d7958cdb5aa9323ef35d37\n",).is_err(), + decode::reference::<()> + .parse_peek(b"^e9cdc958e7ce2290e2d7958cdb5aa9323ef35d37\n",) + .is_err(), "lonely peel" ); } @@ -27,7 +32,7 @@ mod reference { fn two_refs_in_a_row() -> Result { let input: &[u8] = b"d53c4b0f91f1b29769c9430f2d1c0bcab1170c75 refs/heads/alternates-after-packs-and-loose ^e9cdc958e7ce2290e2d7958cdb5aa9323ef35d37\neaae9c1bc723209d793eb93f5587fa2604d5cd92 refs/heads/avoid-double-lookup\n"; - let (input, parsed) = decode::reference::>(input)?; + let (input, parsed) = decode::reference::>.parse_peek(input).unwrap(); assert_eq!( parsed, @@ -40,7 +45,7 @@ mod reference { assert_eq!(parsed.target(), hex_to_id("d53c4b0f91f1b29769c9430f2d1c0bcab1170c75")); assert_eq!(parsed.object(), hex_to_id("e9cdc958e7ce2290e2d7958cdb5aa9323ef35d37")); - let (input, parsed) = decode::reference::>(input)?; + let (input, parsed) = decode::reference::>.parse_peek(input).unwrap(); assert!(input.is_empty(), "exhausted"); assert_eq!( parsed.name, @@ -55,6 +60,7 @@ mod reference { mod header { use gix_object::bstr::ByteSlice; use gix_testtools::to_bstr_err; + use winnow::prelude::*; use super::Result; use crate::store_impl::packed::{ @@ -65,12 +71,15 @@ mod header { #[test] fn invalid() { assert!( - decode::header::<()>(b"# some user comment").is_err(), + decode::header::<()>.parse_peek(b"# some user comment").is_err(), "something the user put there" ); - assert!(decode::header::<()>(b"# pack-refs: ").is_err(), "looks right but isn't"); assert!( - decode::header::<()>(b" # pack-refs with: ").is_err(), + decode::header::<()>.parse_peek(b"# pack-refs: ").is_err(), + "looks right but isn't" + ); + assert!( + decode::header::<()>.parse_peek(b" # pack-refs with: ").is_err(), "does not start with #" ); } @@ -78,7 +87,9 @@ mod header { #[test] fn valid_fully_peeled_stored() -> Result { let input: &[u8] = b"# pack-refs with: peeled fully-peeled sorted \nsomething else"; - let (rest, header) = decode::header::>(input).map_err(to_bstr_err)?; + let (rest, header) = decode::header::> + .parse_peek(input) + .map_err(to_bstr_err)?; assert_eq!(rest.as_bstr(), "something else", "remainder starts after newline"); assert_eq!( @@ -94,7 +105,7 @@ mod header { #[test] fn valid_peeled_unsorted() -> Result { let input: &[u8] = b"# pack-refs with: peeled\n"; - let (rest, header) = decode::header::<()>(input)?; + let (rest, header) = decode::header::<()>.parse_peek(input).unwrap(); assert!(rest.is_empty()); assert_eq!( @@ -110,7 +121,7 @@ mod header { #[test] fn valid_empty() -> Result { let input: &[u8] = b"# pack-refs with: \n"; - let (rest, header) = decode::header::<()>(input)?; + let (rest, header) = decode::header::<()>.parse_peek(input).unwrap(); assert!(rest.is_empty()); assert_eq!( diff --git a/gix-ref/src/store/packed/find.rs b/gix-ref/src/store/packed/find.rs index 8c1dcb5b2cc..002f76b0f50 100644 --- a/gix-ref/src/store/packed/find.rs +++ b/gix-ref/src/store/packed/find.rs @@ -1,6 +1,7 @@ use std::convert::TryInto; use gix_object::bstr::{BStr, BString, ByteSlice}; +use winnow::prelude::*; use crate::{store_impl::packed, FullNameRef, PartialNameRef}; @@ -40,11 +41,14 @@ impl packed::Buffer { pub(crate) fn try_find_full_name(&self, name: &FullNameRef) -> Result>, Error> { match self.binary_search_by(name.as_bstr()) { - Ok(line_start) => Ok(Some( - packed::decode::reference::<()>(&self.as_ref()[line_start..]) - .map_err(|_| Error::Parse)? - .1, - )), + Ok(line_start) => { + let mut input = &self.as_ref()[line_start..]; + Ok(Some( + packed::decode::reference::<()> + .parse_next(&mut input) + .map_err(|_| Error::Parse)?, + )) + } Err((parse_failure, _)) => { if parse_failure { Err(Error::Parse) @@ -90,9 +94,10 @@ impl packed::Buffer { let mut encountered_parse_failure = false; a.binary_search_by_key(&full_name.as_ref(), |b: &u8| { let ofs = b as *const u8 as usize - a.as_ptr() as usize; - let line = &a[search_start_of_record(ofs)..]; - packed::decode::reference::<()>(line) - .map(|(_rest, r)| r.name.as_bstr().as_bytes()) + let mut line = &a[search_start_of_record(ofs)..]; + packed::decode::reference::<()> + .parse_next(&mut line) + .map(|r| r.name.as_bstr().as_bytes()) .map_err(|err| { encountered_parse_failure = true; err diff --git a/gix-ref/src/store/packed/iter.rs b/gix-ref/src/store/packed/iter.rs index d9c49956b4f..85934042f25 100644 --- a/gix-ref/src/store/packed/iter.rs +++ b/gix-ref/src/store/packed/iter.rs @@ -1,4 +1,8 @@ use gix_object::bstr::{BString, ByteSlice}; +use winnow::combinator::preceded; +use winnow::combinator::rest; +use winnow::prelude::*; +use winnow::stream::Stream as _; use crate::store_impl::{packed, packed::decode}; @@ -29,9 +33,9 @@ impl<'a> Iterator for packed::Iter<'a> { return None; } - match decode::reference::<()>(self.cursor) { - Ok((rest, reference)) => { - self.cursor = rest; + let start = self.cursor.checkpoint(); + match decode::reference::<()>.parse_next(&mut self.cursor) { + Ok(reference) => { self.current_line += 1; if let Some(ref prefix) = self.prefix { if !reference.name.as_bstr().starts_with_str(prefix) { @@ -42,6 +46,7 @@ impl<'a> Iterator for packed::Iter<'a> { Some(Ok(reference)) } Err(_) => { + self.cursor.reset(start); let (failed_line, next_cursor) = self .cursor .find_byte(b'\n') @@ -82,9 +87,12 @@ impl<'a> packed::Iter<'a> { current_line: 1, }) } else if packed[0] == b'#' { - let (refs, _header) = decode::header::<()>(packed).map_err(|_| Error::Header { - invalid_first_line: packed.lines().next().unwrap_or(packed).into(), - })?; + let mut input = packed; + let refs = preceded(decode::header::<()>, rest) + .parse_next(&mut input) + .map_err(|_| Error::Header { + invalid_first_line: packed.lines().next().unwrap_or(packed).into(), + })?; Ok(packed::Iter { cursor: refs, prefix, diff --git a/gix-ref/tests/file/log.rs b/gix-ref/tests/file/log.rs index 0e4bfdce62b..36f49543c34 100644 --- a/gix-ref/tests/file/log.rs +++ b/gix-ref/tests/file/log.rs @@ -186,7 +186,7 @@ mod iter { let mut iter = gix_ref::file::log::iter::forward(log_first_broken.as_bytes()); let err = iter.next().expect("error is not none").expect_err("the line is broken"); - assert_eq!(err.to_string(), "In line 1: \"0000000000000000000000000000000000000000 134385fbroken7062102c6a483440bfda2a03 committer 946771200 +0000\\tcommit\" did not match ' <> \\t'"); + assert_eq!(err.to_string(), "In line 1: \"134385fbroken7062102c6a483440bfda2a03 committer 946771200 +0000\\tcommit\" did not match ' <> \\t'"); assert!(iter.next().expect("a second line").is_ok(), "line parses ok"); assert!(iter.next().is_none(), "iterator exhausted"); } diff --git a/gix-revision/fuzz/Cargo.lock b/gix-revision/fuzz/Cargo.lock index 633500302bb..378a44ee46a 100644 --- a/gix-revision/fuzz/Cargo.lock +++ b/gix-revision/fuzz/Cargo.lock @@ -499,9 +499,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.5.12" +version = "0.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83817bbecf72c73bad717ee86820ebf286203d2e04c3951f3cd538869c897364" +checksum = "d09770118a7eb1ccaf4a594a221334119a44a814fcb0d31c5b85e83e97227a97" dependencies = [ "memchr", ] diff --git a/tests/tools/Cargo.toml b/tests/tools/Cargo.toml index 48aefff93d7..7eae9e8199d 100644 --- a/tests/tools/Cargo.toml +++ b/tests/tools/Cargo.toml @@ -21,7 +21,7 @@ gix-worktree = "0.17.0" gix-fs = { version = "^0.4.1", path = "../../gix-fs" } gix-tempfile = { version = "^7.0.0", default-features = false, features = ["signals"], path = "../../gix-tempfile" } -nom = { version = "7", default-features = false, features = ["std"]} +winnow = { version = "0.5.14", features = ["simd"] } fastrand = "2.0.0" bstr = { version = "1.5.0", default-features = false } crc = "3.0.0" diff --git a/tests/tools/src/lib.rs b/tests/tools/src/lib.rs index 0a85e7fae18..0247eb6aef4 100644 --- a/tests/tools/src/lib.rs +++ b/tests/tools/src/lib.rs @@ -15,7 +15,6 @@ pub use bstr; use bstr::{BStr, ByteSlice}; use io_close::Close; pub use is_ci; -use nom::error::VerboseError; pub use once_cell; use once_cell::sync::Lazy; use parking_lot::Mutex; @@ -691,15 +690,12 @@ fn extract_archive( Ok((archive_identity, platform)) } -/// Transform a verbose bom errors from raw bytes into a `BStr` to make printing/debugging human-readable. -pub fn to_bstr_err(err: nom::Err>) -> VerboseError<&BStr> { - let err = match err { - nom::Err::Error(err) | nom::Err::Failure(err) => err, - nom::Err::Incomplete(_) => unreachable!("not a streaming parser"), - }; - VerboseError { - errors: err.errors.into_iter().map(|(i, v)| (i.as_bstr(), v)).collect(), - } +/// Transform a verbose parser errors from raw bytes into a `BStr` to make printing/debugging human-readable. +pub fn to_bstr_err( + err: winnow::error::ErrMode>, +) -> winnow::error::TreeError<&BStr, winnow::error::StrContext> { + let err = err.into_inner().expect("not a streaming parser"); + err.map_input(ByteSlice::as_bstr) } fn family_name() -> &'static str {