Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ pub mod _tutorial;
/// ```
pub mod prelude {
pub use crate::stream::StreamIsPartial as _;
pub use crate::stream::A;
pub use crate::IResult;
pub use crate::PResult;
pub use crate::Parser;
Expand Down
29 changes: 29 additions & 0 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::combinator::*;
#[cfg(feature = "unstable-recover")]
use crate::error::FromRecoverableError;
use crate::error::{AddContext, FromExternalError, IResult, PResult, ParseError, ParserError};
use crate::stream::AsciiChar;
use crate::stream::{AsChar, Compare, Location, ParseSlice, Stream, StreamIsPartial};
#[cfg(feature = "unstable-recover")]
use crate::stream::{Recover, Recoverable};
Expand Down Expand Up @@ -743,6 +744,34 @@ where
}
}

/// This is a shortcut for [`one_of`][crate::token::one_of].
///
/// # Example
///
/// ```
/// # use winnow::prelude::*;
/// # use winnow::{error::ErrMode, error::{ErrorKind, InputError}};
/// fn parser<'s>(i: &mut &'s str) -> PResult<char, InputError<&'s str>> {
/// A!('a').parse_next(i)
/// }
/// assert_eq!(parser.parse_peek("abc"), Ok(("bc", 'a')));
/// assert_eq!(parser.parse_peek(" abc"), Err(ErrMode::Backtrack(InputError::new(" abc", ErrorKind::Verify))));
/// assert_eq!(parser.parse_peek("bc"), Err(ErrMode::Backtrack(InputError::new("bc", ErrorKind::Verify))));
/// assert_eq!(parser.parse_peek(""), Err(ErrMode::Backtrack(InputError::new("", ErrorKind::Token))));
/// ```
impl<I, E> Parser<I, <I as Stream>::Token, E> for AsciiChar
where
I: StreamIsPartial,
I: Stream,
<I as Stream>::Token: AsChar + Clone,
E: ParserError<I>,
{
#[inline(always)]
fn parse_next(&mut self, i: &mut I) -> PResult<<I as Stream>::Token, E> {
crate::token::one_of(*self).parse_next(i)
}
}

/// This is a shortcut for [`one_of`][crate::token::one_of].
///
/// # Example
Expand Down
180 changes: 180 additions & 0 deletions src/stream/ascii.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
/// One of the 128 Unicode characters from U+0000 through U+007F,
/// often known as the [ASCII] subset.
///
/// Officially, this is the first [block] in Unicode, _Basic Latin_.
/// For details, see the [*C0 Controls and Basic Latin*][chart] code chart.
///
/// This block was based on older 7-bit character code standards such as
/// ANSI X3.4-1977, ISO 646-1973, and [NIST FIPS 1-2].
///
/// **Note:** This is a polyfill for [`ascii::Char`][std::ascii::Char].
///
/// # When to use this
///
/// The main advantage of this subset is that it's always valid UTF-8. As such,
/// the `&[ascii::Char]` -> `&str` conversion function (as well as other related
/// ones) are O(1): *no* runtime checks are needed.
///
/// If you're consuming strings, you should usually handle Unicode and thus
/// accept `str`s, not limit yourself to `ascii::Char`s.
///
/// However, certain formats are intentionally designed to produce ASCII-only
/// output in order to be 8-bit-clean. In those cases, it can be simpler and
/// faster to generate `ascii::Char`s instead of dealing with the variable width
/// properties of general UTF-8 encoded strings, while still allowing the result
/// to be used freely with other Rust things that deal in general `str`s.
///
/// For example, a UUID library might offer a way to produce the string
/// representation of a UUID as an `[ascii::Char; 36]` to avoid memory
/// allocation yet still allow it to be used as UTF-8 via `as_str` without
/// paying for validation (or needing `unsafe` code) the way it would if it
/// were provided as a `[u8; 36]`.
///
/// # Layout
///
/// This type is guaranteed to have a size and alignment of 1 byte.
///
/// # Names
///
/// The variants on this type are [Unicode names][NamesList] of the characters
/// in upper camel case, with a few tweaks:
/// - For `<control>` characters, the primary alias name is used.
/// - `LATIN` is dropped, as this block has no non-latin letters.
/// - `LETTER` is dropped, as `CAPITAL`/`SMALL` suffices in this block.
/// - `DIGIT`s use a single digit rather than writing out `ZERO`, `ONE`, etc.
///
/// [ASCII]: https://www.unicode.org/glossary/index.html#ASCII
/// [block]: https://www.unicode.org/glossary/index.html#block
/// [chart]: https://www.unicode.org/charts/PDF/U0000.pdf
/// [NIST FIPS 1-2]: https://nvlpubs.nist.gov/nistpubs/Legacy/FIPS/fipspub1-2-1977.pdf
/// [NamesList]: https://www.unicode.org/Public/15.0.0/ucd/NamesList.txt
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct AsciiChar(u8);

impl AsciiChar {
/// Creates an ascii character from the byte `b`,
/// or returns `None` if it's too large.
#[inline(always)]
pub const fn from_u8(b: u8) -> Option<Self> {
if b <= 127 {
// SAFETY: Just checked that `b` is in-range
Some(unsafe { Self::from_u8_unchecked(b) })
} else {
None
}
}

/// Creates an ASCII character from the byte `b`,
/// without checking whether it's valid.
///
/// # Safety
///
/// `b` must be in `0..=127`, or else this is UB.
#[inline(always)]
pub const unsafe fn from_u8_unchecked(b: u8) -> Self {
Self(b)
}

/// Gets this ASCII character as a byte.
#[inline(always)]
pub const fn to_u8(self) -> u8 {
self.0 as u8
}

/// Gets this ASCII character as a `char` Unicode Scalar Value.
#[inline(always)]
pub const fn to_char(self) -> char {
self.0 as char
}
}

impl crate::lib::std::fmt::Display for AsciiChar {
fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result {
self.to_char().fmt(f)
}
}

impl crate::lib::std::fmt::Debug for AsciiChar {
fn fmt(&self, f: &mut crate::lib::std::fmt::Formatter<'_>) -> crate::lib::std::fmt::Result {
self.to_char().fmt(f)
}
}

/// Create an [`AsciiChar`] with compile-time validation
#[macro_export]
#[doc(hidden)] // forced to be visible in intended location
macro_rules! A {
($byte: literal) => {{
#![allow(clippy::unnecessary_cast)] // not always the same type

const BYTE: char = $byte as char;
const MAX: char = 127 as char;
const C: $crate::stream::AsciiChar = if BYTE <= MAX {
unsafe { $crate::stream::AsciiChar::from_u8_unchecked(BYTE as u8) }
} else {
panic!()
};
C
}};
}

/// Create an [`&[AsciiChar]`] with compile-time validation
#[macro_export]
#[doc(hidden)] // forced to be visible in intended location
macro_rules! AS {
($s: literal) => {{
#![allow(clippy::unnecessary_cast)] // not always the same type

const S: &'static str = $s;
const BYTES: &'static [u8] = unsafe { core::mem::transmute(S) };
let mut i = 0;
while i < BYTES.len() {
let byte = BYTES[0];
if byte <= 127 {
} else {
panic!()
};
i += 1;
}
const AS: &'static [$crate::stream::AsciiChar] = unsafe { core::mem::transmute(BYTES) };
AS
}};
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn const_number() {
const fn gen() -> AsciiChar {
crate::stream::A!(97)
}
assert_eq!(gen(), AsciiChar::from_u8(b'a').unwrap());
}

#[test]
fn const_u8() {
const fn gen() -> AsciiChar {
crate::stream::A!(b'a')
}
assert_eq!(gen(), AsciiChar::from_u8(b'a').unwrap());
}

#[test]
fn const_char() {
const fn gen() -> AsciiChar {
crate::stream::A!('a')
}
assert_eq!(gen(), AsciiChar::from_u8(b'a').unwrap());
}

#[test]
fn const_str() {
const fn gen() -> &'static [AsciiChar] {
crate::stream::AS!("a")
}
const S: &'static [AsciiChar] = gen();

Check failure

Code scanning / clippy

constants have by default a `'static` lifetime

constants have by default a `'static` lifetime
dbg!(S);
}
}
Loading