Skip to content

Commit 3e51e1f

Browse files
committed
Read extended JSON output format and print byte strings as b"...".
This allows round-tripping values output by jaq.
1 parent 303f5d7 commit 3e51e1f

File tree

5 files changed

+71
-37
lines changed

5 files changed

+71
-37
lines changed

Cargo.lock

Lines changed: 1 addition & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ codegen-units = 1
1515

1616
[patch.crates-io]
1717
regex-lite = { git = 'https://github.com/01mf02/regex', package = 'regex-lite', branch = 'bytes' }
18+
hifijson = { git = "https://github.com/01mf02/hifijson.git" }

jaq-json/src/json.rs

Lines changed: 67 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,72 @@
11
//! JSON support.
2-
use crate::{Map, Num, Val};
2+
use crate::{Map, Num, Tag, Val};
33
use alloc::{string::ToString, vec::Vec};
44
use hifijson::token::{Expect, Lex, Token};
5-
use hifijson::{str, IterLexer, LexAlloc, SliceLexer};
5+
use hifijson::{IterLexer, LexAlloc, SliceLexer};
66
use std::io;
77

88
pub use crate::write::write;
99

10+
/// Eat whitespace/comments, then try to return a token.
11+
fn ws_tk<L: Lex>(lexer: &mut L) -> Option<Token> {
12+
loop {
13+
lexer.eat_whitespace();
14+
if lexer.peek_next() == Some(b'#') {
15+
lexer.skip_until(|c| c == b'\n');
16+
} else {
17+
break;
18+
}
19+
}
20+
lexer.peek_next().map(|next| lexer.token(next))
21+
}
22+
1023
/// Parse a sequence of JSON values.
1124
pub fn parse_many(slice: &[u8]) -> impl Iterator<Item = Result<Val, hifijson::Error>> + '_ {
1225
let mut lexer = SliceLexer::new(slice);
13-
core::iter::from_fn(move || Some(parse(lexer.ws_token()?, &mut lexer)))
26+
core::iter::from_fn(move || Some(parse(ws_tk(&mut lexer)?, &mut lexer)))
1427
}
1528

1629
/// Read a sequence of JSON values.
1730
pub fn read_many<'a>(read: impl io::BufRead + 'a) -> impl Iterator<Item = io::Result<Val>> + 'a {
1831
use crate::invalid_data;
1932
let mut lexer = IterLexer::new(read.bytes());
2033
core::iter::from_fn(move || {
21-
let v = parse(lexer.ws_token()?, &mut lexer);
22-
Some(v.map_err(|e| core::mem::take(&mut lexer.error).unwrap_or_else(|| invalid_data(e))))
34+
let v = ws_tk(&mut lexer).map(|token| parse(token, &mut lexer).map_err(invalid_data));
35+
// always return I/O error if present, regardless of the output value!
36+
lexer.error.take().map(Err).or(v)
2337
})
2438
}
2539

2640
/// Parse exactly one JSON value.
2741
pub fn parse_single(s: &[u8]) -> Result<Val, hifijson::Error> {
28-
SliceLexer::new(s).exactly_one(parse)
42+
SliceLexer::new(s).exactly_one(ws_tk, parse)
2943
}
3044

3145
/// Parse a JSON string as byte string, preserving invalid UTF-8 as-is.
32-
fn parse_string<L: LexAlloc>(lexer: &mut L) -> Result<Vec<u8>, hifijson::Error> {
46+
fn parse_string<L: LexAlloc>(lexer: &mut L, tag: Tag) -> Result<Vec<u8>, hifijson::Error> {
3347
let on_string = |bytes: &mut L::Bytes, out: &mut Vec<u8>| {
3448
out.extend(bytes.iter());
3549
Ok(())
3650
};
37-
lexer.str_fold(Vec::new(), on_string, |lexer, escape, out| {
38-
let c = lexer.escape_char(escape).map_err(str::Error::Escape)?;
39-
out.extend(c.encode_utf8(&mut [0; 4]).as_bytes());
51+
let s = lexer.str_fold(Vec::new(), on_string, |lexer, out| {
52+
use hifijson::escape::Error;
53+
match (tag, lexer.take_next().ok_or(Error::Eof)?) {
54+
(Tag::Bytes, b'u') => Err(Error::InvalidKind(b'u'))?,
55+
(Tag::Bytes, b'x') => out.push(lexer.hex()?),
56+
(_, c) => out.extend(lexer.escape(c)?.encode_utf8(&mut [0; 4]).as_bytes()),
57+
}
4058
Ok(())
59+
});
60+
s.map_err(hifijson::Error::Str)
61+
}
62+
63+
fn parse_num<L: LexAlloc>(lexer: &mut L) -> Result<Num, hifijson::Error> {
64+
let (num, parts) = lexer.num_string()?;
65+
// if we are dealing with an integer ...
66+
Ok(if parts.dot.is_none() && parts.exp.is_none() {
67+
Num::try_from_int_str(&num, 10).unwrap()
68+
} else {
69+
Num::Dec(num.to_string().into())
4170
})
4271
}
4372

@@ -46,41 +75,45 @@ fn parse_string<L: LexAlloc>(lexer: &mut L) -> Result<Vec<u8>, hifijson::Error>
4675
/// If the underlying lexer reads input fallibly (for example `IterLexer`),
4776
/// the error returned by this function might be misleading.
4877
/// In that case, always check whether the lexer contains an error.
49-
fn parse(token: Token, lexer: &mut impl LexAlloc) -> Result<Val, hifijson::Error> {
50-
match token {
51-
Token::Null => Ok(Val::Null),
52-
Token::True => Ok(Val::Bool(true)),
53-
Token::False => Ok(Val::Bool(false)),
54-
Token::DigitOrMinus => Ok(Val::Num({
55-
let (num, parts) = lexer.num_string()?;
56-
// if we are dealing with an integer ...
57-
if parts.dot.is_none() && parts.exp.is_none() {
58-
Num::try_from_int_str(&num, 10).unwrap()
59-
} else {
60-
Num::Dec(num.to_string().into())
61-
}
62-
})),
63-
Token::Quote => Ok(Val::utf8_str(parse_string(lexer)?)),
64-
Token::LSquare => Ok(Val::Arr({
78+
fn parse<L: LexAlloc>(token: Token, lexer: &mut L) -> Result<Val, hifijson::Error> {
79+
Ok(match token {
80+
Token::Other(b'n') if lexer.strip_prefix(b"null") => Val::Null,
81+
Token::Other(b't') if lexer.strip_prefix(b"true") => Val::Bool(true),
82+
Token::Other(b'f') if lexer.strip_prefix(b"false") => Val::Bool(false),
83+
Token::Other(b'b') if lexer.strip_prefix(b"b\"") => {
84+
Val::byte_str(parse_string(lexer, Tag::Bytes)?)
85+
}
86+
Token::Other(b'N') if lexer.strip_prefix(b"NaN") => Val::Num(Num::Float(f64::NAN)),
87+
Token::Other(b'I') if lexer.strip_prefix(b"Infinity") => {
88+
Val::Num(Num::Float(f64::INFINITY))
89+
}
90+
Token::Minus => Val::Num(match lexer.peek_next() {
91+
Some(b'I') if lexer.strip_prefix(b"Infinity") => Num::Float(f64::NEG_INFINITY),
92+
Some(b'I') => Err(Expect::Value)?,
93+
_ => -parse_num(lexer)?,
94+
}),
95+
Token::Other(b'0'..=b'9') => Val::Num(parse_num(lexer)?),
96+
Token::Quote => Val::utf8_str(parse_string(lexer, Tag::Utf8)?),
97+
Token::LSquare => Val::Arr({
6598
let mut arr = Vec::new();
66-
lexer.seq(Token::RSquare, |token, lexer| {
99+
lexer.seq(Token::RSquare, ws_tk, |token, lexer| {
67100
arr.push(parse(token, lexer)?);
68101
Ok::<_, hifijson::Error>(())
69102
})?;
70103
arr.into()
71-
})),
72-
Token::LCurly => Ok(Val::obj({
104+
}),
105+
Token::LCurly => Val::obj({
73106
let mut obj = Map::default();
74-
lexer.seq(Token::RCurly, |token, lexer| {
107+
lexer.seq(Token::RCurly, ws_tk, |token, lexer| {
75108
let is_colon = |t: &Token| *t == Token::Colon;
76109
let key = parse(token, lexer)?;
77-
lexer.ws_token().filter(is_colon).ok_or(Expect::Colon)?;
78-
let value = parse(lexer.ws_token().ok_or(Expect::Value)?, lexer)?;
110+
ws_tk(lexer).filter(is_colon).ok_or(Expect::Colon)?;
111+
let value = parse(ws_tk(lexer).ok_or(Expect::Value)?, lexer)?;
79112
obj.insert(key, value);
80113
Ok::<_, hifijson::Error>(())
81114
})?;
82115
obj
83-
})),
116+
}),
84117
_ => Err(Expect::Value)?,
85-
}
118+
})
86119
}

jaq-json/src/num.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ impl core::ops::Neg for Num {
235235
Self::Int(x) => int_or_big(x.checked_neg(), [x], |[x]| -x),
236236
Self::BigInt(x) => Self::big_int(-&*x),
237237
Self::Float(x) => Self::Float(-x),
238+
// TODO: store sign next to string
238239
Self::Dec(n) => -Self::from_dec_str(&n),
239240
}
240241
}

jaq-json/src/write.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ macro_rules! write_utf8 {
5757
#[macro_export]
5858
macro_rules! write_bytes {
5959
($w:ident, $s: ident) => {{
60-
write!($w, "\"")?;
60+
write!($w, "b\"")?;
6161
$s.iter()
6262
.try_for_each(|c| write_byte!($w, *c, write!($w, "\\x{c:02x}")))?;
6363
write!($w, "\"")

0 commit comments

Comments
 (0)