Skip to content

Commit

Permalink
Run cargo fmt
Browse files Browse the repository at this point in the history
This commit runs `cargo fmt` on the repository to apply rustfmt
formatting to the repository. As we're preparing to publish the
repository for public consumption using consistent automatic formatting
for the source code makes contribution easier (and avoid needless
arguments on style conventions and preferences).
  • Loading branch information
mtreinish committed Jan 8, 2024
1 parent 218ffec commit 9873459
Show file tree
Hide file tree
Showing 44 changed files with 1,260 additions and 666 deletions.
164 changes: 120 additions & 44 deletions crates/lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,14 @@ pub enum TokenKind {
///
/// Block comments can be recursive, so a sequence like `/* /* */`
/// will not be considered terminated and will result in a parsing error.
BlockComment { terminated: bool },
BlockComment {
terminated: bool,
},

/// Any whitespace character sequence.
Whitespace,

// ClassicalTypeName,

// ClassicalTypeName,
/// "ident" or "continue"
///
/// At this step, keywords are also considered identifiers.
Expand Down Expand Up @@ -82,7 +83,10 @@ pub enum TokenKind {
/// this type will need to check for and reject that case.
///
/// See [LiteralKind] for more details.
Literal { kind: LiteralKind, suffix_start: u32 },
Literal {
kind: LiteralKind,
suffix_start: u32,
},

// One-char tokens:
/// ";"
Expand Down Expand Up @@ -159,19 +163,38 @@ pub enum TokenKind {
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum LiteralKind {
/// "12_u8", "0o100", "0b120i99", "1f32".
Int { base: Base, empty_int: bool },
Int {
base: Base,
empty_int: bool,
},
/// "12.34f32", "1e3", but not "1f32".
Float { base: Base, empty_exponent: bool },
Float {
base: Base,
empty_exponent: bool,
},
/// "b'a'", "b'\\'", "b'''", "b';"
Byte { terminated: bool },
Byte {
terminated: bool,
},
/// ""abc"", ""abc"
Str { terminated: bool },
Str {
terminated: bool,
},
/// "10011" "100_11"
BitStr {terminated: bool, consecutive_underscores: bool },
BitStr {
terminated: bool,
consecutive_underscores: bool,
},
/// Int Timing literal
TimingInt { base: Base, empty_int: bool },
TimingInt {
base: Base,
empty_int: bool,
},
/// Float Timing literal
TimingFloat { base: Base, empty_exponent: bool },
TimingFloat {
base: Base,
empty_exponent: bool,
},
SimpleFloat,
}

Expand All @@ -188,13 +211,16 @@ pub enum Base {
Hexadecimal = 16,
}


/// Creates an iterator that produces tokens from the input string.
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
let mut cursor = Cursor::new(input);
std::iter::from_fn(move || {
let token = cursor.advance_token();
if token.kind != TokenKind::Eof { Some(token) } else { None }
if token.kind != TokenKind::Eof {
Some(token)
} else {
None
}
})
}

Expand Down Expand Up @@ -235,7 +261,7 @@ pub fn is_whitespace(c: char) -> bool {
/// a formal definition of valid identifier name.
pub fn is_id_start(c: char) -> bool {
// This is XID_Start OR '_' (which formally is not a XID_Start).
// c == '_' || c == '$' || unicode_xid::UnicodeXID::is_xid_start(c)
// c == '_' || c == '$' || unicode_xid::UnicodeXID::is_xid_start(c)
c == '_' || unicode_xid::UnicodeXID::is_xid_start(c)
}

Expand Down Expand Up @@ -293,19 +319,39 @@ impl Cursor<'_> {
// Eat suffix, and return true if it is a timing suffix.
if self.timing_suffix() {
match literal_kind {
Float {base: baseval, empty_exponent: emptyval} => {
TokenKind::Literal {kind: TimingFloat {base: baseval, empty_exponent: emptyval}, suffix_start}
Float {
base: baseval,
empty_exponent: emptyval,
} => TokenKind::Literal {
kind: TimingFloat {
base: baseval,
empty_exponent: emptyval,
},
suffix_start,
},
Int {base: baseval, empty_int: emptyval } => {
TokenKind::Literal {kind: TimingInt {base: baseval, empty_int: emptyval}, suffix_start}
Int {
base: baseval,
empty_int: emptyval,
} => TokenKind::Literal {
kind: TimingInt {
base: baseval,
empty_int: emptyval,
},
suffix_start,
},
_ => {
// This is unreachable
TokenKind::Literal { kind: literal_kind, suffix_start }
TokenKind::Literal {
kind: literal_kind,
suffix_start,
}
}
}
} else {
TokenKind::Literal { kind: literal_kind, suffix_start }
TokenKind::Literal {
kind: literal_kind,
suffix_start,
}
}
}

Expand All @@ -325,8 +371,8 @@ impl Cursor<'_> {
'~' => Tilde,
'?' => Question,
':' => Colon,
// FIXME! GJL disabled this ?
// '$' => Dollar,
// FIXME! GJL disabled this ?
// '$' => Dollar,
'=' => Eq,
'!' => Bang,
'<' => Lt,
Expand All @@ -345,13 +391,17 @@ impl Cursor<'_> {

// String literal.
'"' => {
let (terminated, only_ones_and_zeros, consecutive_underscores) = self.double_quoted_string();
let (terminated, only_ones_and_zeros, consecutive_underscores) =
self.double_quoted_string();
let suffix_start = self.pos_within_token();
if terminated {
self.eat_literal_suffix();
}
let kind = match only_ones_and_zeros {
true => BitStr { terminated, consecutive_underscores },
true => BitStr {
terminated,
consecutive_underscores,
},
false => Str { terminated },
};
Literal { kind, suffix_start }
Expand Down Expand Up @@ -399,7 +449,9 @@ impl Cursor<'_> {
}
}

BlockComment { terminated: depth == 0 }
BlockComment {
terminated: depth == 0,
}
}

fn whitespace(&mut self) -> TokenKind {
Expand All @@ -416,7 +468,7 @@ impl Cursor<'_> {
// we see a prefix here, it is definitely an unknown prefix.

match self.first() {
// '#' | '"' | '\'' => UnknownPrefix,
// '#' | '"' | '\'' => UnknownPrefix,
c if !c.is_ascii() && c.is_emoji_char() => self.fake_ident_or_unknown_prefix(),
_ => Ident,
}
Expand All @@ -430,13 +482,13 @@ impl Cursor<'_> {
self.eat_while(is_id_continue);
self.fake_ident_or_unknown_prefix()
}
_ => { if !self.eat_decimal_digits() {
Dollar
_ => {
if !self.eat_decimal_digits() {
Dollar
} else {
HardwareIdent
}
}
else {
HardwareIdent
}
},
}
}

Expand Down Expand Up @@ -477,21 +529,30 @@ impl Cursor<'_> {
base = Base::Binary;
self.bump();
if !self.eat_decimal_digits() {
return Int { base, empty_int: true };
return Int {
base,
empty_int: true,
};
}
}
'o' => {
base = Base::Octal;
self.bump();
if !self.eat_decimal_digits() {
return Int { base, empty_int: true };
return Int {
base,
empty_int: true,
};
}
}
'x' => {
base = Base::Hexadecimal;
self.bump();
if !self.eat_hexadecimal_digits() {
return Int { base, empty_int: true };
return Int {
base,
empty_int: true,
};
}
}
// Not a base prefix; consume additional digits.
Expand All @@ -503,7 +564,12 @@ impl Cursor<'_> {
'.' | 'e' | 'E' => {}

// Just a 0.
_ => return Int { base, empty_int: false },
_ => {
return Int {
base,
empty_int: false,
}
}
}
} else {
// No base prefix, parse number in the usual way.
Expand All @@ -522,7 +588,8 @@ impl Cursor<'_> {
self.bump();
// n.b. example of `empty_exponent` : 3.4e; This is a syntax error
let mut empty_exponent = false;
if self.first().is_ascii_digit() { // preferred to is_digit(10), in rust lexer
// preferred to is_digit(10), in rust lexer
if self.first().is_ascii_digit() {
self.eat_decimal_digits();
match self.first() {
'e' | 'E' => {
Expand All @@ -532,14 +599,23 @@ impl Cursor<'_> {
_ => (),
}
}
Float { base, empty_exponent }
Float {
base,
empty_exponent,
}
}
'e' | 'E' => {
self.bump();
let empty_exponent = !self.eat_float_exponent();
Float { base, empty_exponent }
Float {
base,
empty_exponent,
}
}
_ => Int { base, empty_int: false },
_ => Int {
base,
empty_int: false,
},
}
}

Expand Down Expand Up @@ -696,9 +772,9 @@ impl Cursor<'_> {
self.bump();
timing = true;
} else {
// TODO: greek mu is encoded in more than one way. We only get one here.
// TODO: greek mu is encoded in more than one way. We only get one here.
for (f, s) in [('d', 't'), ('n', 's'), ('u', 's'), ('m', 's'), ('µ', 's')] {
if self.first() == f && self.second() == s {
if self.first() == f && self.second() == s {
self.bump();
self.bump();
timing = true;
Expand All @@ -708,9 +784,9 @@ impl Cursor<'_> {
if timing {
if is_id_continue(self.first()) {
self.eat_while(is_id_continue);
return false
return false;
}
return true
return true;
}
self.eat_literal_suffix();
false
Expand Down
5 changes: 3 additions & 2 deletions crates/lexer/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ use super::*;
use expect_test::{expect, Expect};

fn check_lexing(src: &str, expect: Expect) {
let actual: String = tokenize(src).map(|token| format!("{:?}\n", token)).collect();
let actual: String = tokenize(src)
.map(|token| format!("{:?}\n", token))
.collect();
expect.assert_eq(&actual)
}

Expand Down Expand Up @@ -99,7 +101,6 @@ fn nested_block_comments() {
// );
// }


#[test]
fn literal_suffixes() {
check_lexing(
Expand Down
Loading

0 comments on commit 9873459

Please sign in to comment.