diff --git a/Cargo.toml b/Cargo.toml index d50243f..749659a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,13 +9,14 @@ license = "Apache-2.0/MIT" categories = ["api-bindings"] keywords = ["json", "jq", "query"] readme = "README.md" -edition = "2018" +edition = "2021" [features] default = [] bundled = ["jq-sys/bundled"] [dependencies] +itertools = "0.12.1" jq-sys = "0.2.*" [dev-dependencies] @@ -23,6 +24,7 @@ criterion = "0.2" serde_json = "1.0" matches = "0.1.8" error-chain = "0.12.*" +clap = { version = "4.5.4", features = ["derive"] } [package.metadata.docs.rs] features = ["bundled"] diff --git a/examples/simple-cli.rs b/examples/simple-cli.rs index a1717e4..fdac3d0 100644 --- a/examples/simple-cli.rs +++ b/examples/simple-cli.rs @@ -1,13 +1,46 @@ -extern crate jq_rs; -use std::env; +use clap::Parser; +use jq_rs::Chunks; -fn main() { - let mut args = env::args().skip(1); +#[derive(Parser)] +struct Args { + #[arg(short)] + slurp: bool, + /// Concatenate multiple inputs together directly, instead of treating them as separate inputs. + #[arg(long)] + concat: bool, + program: String, + inputs: Vec, +} + +fn main() -> jq_rs::Result<()> { + let args = Args::parse(); - let program = args.next().expect("jq program"); - let input = args.next().expect("data input"); - match jq_rs::run(&program, &input) { - Ok(s) => print!("{}", s), // The output will include a trailing newline - Err(e) => eprintln!("{}", e), + let mut program = jq_rs::compile(&args.program)?; + + if args.slurp { + let mut outputs = Vec::new(); + match program.run_slurp( + Chunks( + args.inputs + .iter() + .map(String::as_bytes) + .flat_map(|input| [input, if args.concat { b"" } else { b"\n" }]), + ), + |value| outputs.push(value), + ) { + Ok(()) => { + for item in outputs { + println!("{}", item.as_dump_string().unwrap()); + } + }, + Err(e) => eprintln!("{}", e), + } + } else { + match program.run(args.inputs.get(0).map(String::as_str).unwrap_or("")) { + Ok(s) => print!("{}", s), // The output will include a trailing newline + Err(e) => eprintln!("{}", e), + } } + + Ok(()) } diff --git a/src/jq.rs b/src/jq.rs index 7e0e114..a356843 100644 --- a/src/jq.rs +++ b/src/jq.rs @@ -6,19 +6,71 @@ use crate::errors::{Error, Result}; use jq_sys::{ jq_compile, jq_format_error, jq_get_exit_code, jq_halted, jq_init, jq_next, jq_set_error_cb, - jq_start, jq_state, jq_teardown, jv, jv_copy, jv_dump_string, jv_free, jv_get_kind, - jv_invalid_get_msg, jv_invalid_has_msg, jv_kind_JV_KIND_INVALID, jv_kind_JV_KIND_NUMBER, - jv_kind_JV_KIND_STRING, jv_number_value, jv_parser, jv_parser_free, jv_parser_new, - jv_parser_next, jv_parser_set_buf, jv_string_value, + jq_start, jq_state, jq_teardown, jv, jv_array, jv_array_append, jv_copy, jv_dump_string, + jv_free, jv_get_kind, jv_invalid_get_msg, jv_invalid_has_msg, jv_kind_JV_KIND_INVALID, + jv_kind_JV_KIND_NUMBER, jv_kind_JV_KIND_STRING, jv_number_value, jv_parser, jv_parser_free, + jv_parser_new, jv_parser_next, jv_parser_remaining, jv_parser_set_buf, jv_string_value, }; use std::ffi::{CStr, CString}; +use std::iter; +use std::marker::PhantomData; +use std::mem::ManuallyDrop; use std::os::raw::{c_char, c_void}; +use itertools::{Itertools as _, Position}; + pub struct Jq { state: *mut jq_state, err_buf: String, } +/// A type that can be resolved into an iterator of JSON values, +/// which could serve as the inputs to a JQ program. +pub trait IntoJv<'a> { + /// Instantiates a new iterator of JSON objects. + fn into_jv(self) -> impl Iterator> + 'a; +} + +impl IntoJv<'static> for Vec { + fn into_jv(self) -> impl Iterator> { + self.into_iter().map(Ok) + } +} + +impl<'a> IntoJv<'a> for &'a [Jv] { + fn into_jv(self) -> impl Iterator> + 'a { + self.iter().cloned().map(Ok) + } +} + +impl<'a> IntoJv<'a> for &'a CStr { + fn into_jv(self) -> impl Iterator> + 'a { + self.to_bytes().into_jv() + } +} + +impl<'a> IntoJv<'a> for &'a str { + fn into_jv(self) -> impl Iterator> + 'a { + self.as_bytes().into_jv() + } +} + +impl<'a> IntoJv<'a> for &'a [u8] { + fn into_jv(self) -> impl Iterator> + 'a { + Chunks([self]).into_jv() + } +} + +/// An adaptor for [`IntoJv`] for discontinuous chunks of bytes. +pub struct Chunks(pub I); + +impl<'a, I: IntoIterator + 'a> IntoJv<'a> for Chunks { + fn into_jv(self) -> impl Iterator> + 'a { + let parser = Parser::new(); + parser.parse_multi(self.0.into_iter()) + } +} + impl Jq { pub fn compile_program(program: CString) -> Result { let mut jq = Jq { @@ -67,7 +119,7 @@ impl Jq { } fn get_exit_code(&self) -> ExitCode { - let exit_code = JV { + let exit_code = Jv { ptr: unsafe { jq_get_exit_code(self.state) }, }; @@ -84,18 +136,16 @@ impl Jq { } } - /// Run the jq program against an input. - pub fn execute(&mut self, input: CString) -> Result { - let mut parser = Parser::new(); - self.process(parser.parse(input)?) - } - - /// Unwind the parser and return the rendered result. - /// - /// When this results in `Err`, the String value should contain a message about - /// what failed. - fn process(&mut self, initial_value: JV) -> Result { - let mut buf = String::new(); + /// Run the jq program against the first value in the input. + pub fn execute<'a>( + &mut self, + input: impl IntoJv<'a>, + mut output_sink: impl FnMut(Jv), + ) -> Result<()> { + let Some(initial_value) = input.into_jv().next() else { + return Ok(()); // no input, so no output + }; + let initial_value = initial_value?; unsafe { // `jq_start` seems to be a consuming call. @@ -106,10 +156,50 @@ impl Jq { // it is no longer needed. drop(initial_value); - dump(self, &mut buf)?; + while let Some(result) = self.next_result() { + let result = result?; + + output_sink(result); + } + } + + Ok(()) + } + + /// Run the jq program against an iterator of inputs that would get slurped. + pub fn execute_slurped<'a>( + &mut self, + input: impl IntoJv<'a>, + output_sink: impl FnMut(Jv), + ) -> Result<()> { + let mut slurped = Jv { + ptr: unsafe { jv_array() }, + }; + + for item in input.into_jv() { + slurped.ptr = unsafe { jv_array_append(slurped.ptr, item?.into_raw()) }; } - Ok(buf) + self.execute(vec![slurped], output_sink) + } + + fn next_result(&mut self) -> Option> { + let value = Jv { + ptr: unsafe { jq_next(self.state) }, + }; + if value.is_valid() { + Some(Ok(value)) + } else { + if let Err(err) = unsafe { report_jq_err(self, &value) } { + return Some(Err(err)); + } + + value.get_msg().map(|reason| { + Err(Error::System { + reason: Some(reason), + }) + }) + } } } @@ -119,14 +209,15 @@ impl Drop for Jq { } } -struct JV { +/// A handle to a JSON value that can be passed into a JQ program or rendered as a string. +pub struct Jv { ptr: jv, } -impl JV { +impl Jv { /// Convert the current `JV` into the "dump string" rendering of itself. pub fn as_dump_string(&self) -> Result { - let dump = JV { + let dump = Jv { ptr: unsafe { jv_dump_string(jv_copy(self.ptr), 0) }, }; unsafe { get_string_value(jv_string_value(dump.ptr)) } @@ -136,7 +227,7 @@ impl JV { pub fn get_msg(&self) -> Option { if self.invalid_has_msg() { let reason = { - let msg = JV { + let msg = Jv { ptr: unsafe { // This call is gross since we're dipping outside of the // safe/drop-enabled wrapper to get a copy which will be freed @@ -157,6 +248,7 @@ impl JV { } } + /// Returns the underlying number if the value is a number. pub fn as_number(&self) -> Option { unsafe { if jv_get_kind(self.ptr) == jv_kind_JV_KIND_NUMBER { @@ -167,6 +259,7 @@ impl JV { } } + /// Returns the underlying string if the value is a number. pub fn as_string(&self) -> Result { unsafe { if jv_get_kind(self.ptr) == jv_kind_JV_KIND_STRING { @@ -177,16 +270,30 @@ impl JV { } } + /// Returns whether the value is a valid, serializable value. pub fn is_valid(&self) -> bool { unsafe { jv_get_kind(self.ptr) != jv_kind_JV_KIND_INVALID } } - pub fn invalid_has_msg(&self) -> bool { + fn invalid_has_msg(&self) -> bool { unsafe { jv_invalid_has_msg(jv_copy(self.ptr)) == 1 } } + + pub(crate) fn into_raw(self) -> jv { + let this = ManuallyDrop::new(self); + this.ptr + } } -impl Drop for JV { +impl Clone for Jv { + fn clone(&self) -> Self { + Jv { + ptr: unsafe { jv_copy(self.ptr) }, + } + } +} + +impl Drop for Jv { fn drop(&mut self) { unsafe { jv_free(self.ptr) }; } @@ -203,42 +310,75 @@ impl Parser { } } - pub fn parse(&mut self, input: CString) -> Result { - // For a single run, we could set this to `1` (aka `true`) but this will - // break the repeated `JqProgram` usage. - // It may be worth exposing this to the caller so they can set it for each - // use case, but for now we'll just "leave it open." - let is_last = 0; - - // Originally I planned to have a separate "set_buf" method, but it looks like - // the C api really wants you to set the buffer, then call `jv_parser_next()` in - // the same logical block. - // Mainly I think the important thing is to ensure the `input` outlives both the - // set_buf and next calls. - unsafe { - jv_parser_set_buf( - self.ptr, - input.as_ptr(), - input.as_bytes().len() as i32, - is_last, - ) - }; + pub fn parse_multi<'a>( + self, + inputs: impl Iterator + 'a, + ) -> impl Iterator> + 'a { + let jq = self.ptr; + + inputs + .with_position() + .flat_map(move |(pos, input)| { + unsafe { + let is_partial = match pos { + Position::First | Position::Middle => 1, + Position::Only | Position::Last => 0, + }; + jv_parser_set_buf( + jq, + input.as_ptr().cast::(), + input.len() as i32, + is_partial, + ); + } - let value = JV { - ptr: unsafe { jv_parser_next(self.ptr) }, - }; - if value.is_valid() { - Ok(value) - } else { - Err(Error::System { - reason: Some( - value - .get_msg() - .unwrap_or_else(|| "JQ: Parser error".to_string()), - ), + iter::repeat(()) + .take_while(move |()| unsafe { jv_parser_remaining(jq) } != 0) + .filter_map(move |()| { + let value = Jv { + ptr: unsafe { jv_parser_next(jq) }, + }; + + if value.is_valid() { + Some(Ok(value)) + } else if unsafe { jv_invalid_has_msg(jv_copy(value.ptr)) } != 0 { + Some(Err(Error::System { + reason: Some( + value + .get_msg() + .unwrap_or_else(|| "JQ: Parser error".to_string()), + ), + })) + } else { + None + } + }) }) + .take_while_inclusive(|result| result.is_ok()) // fuse the iterator when an error is encountered to avoid working on invalid objects + .chain(empty_iter_with_finalizer(move || { + drop(self); + })) + } +} + +fn empty_iter_with_finalizer(f: F) -> impl iter::FusedIterator { + struct Iter(Option, PhantomData); + + impl Iterator for Iter { + type Item = T; + + fn next(&mut self) -> Option { + if let Some(f) = self.0.take() { + f(); + } + + None } } + + impl iter::FusedIterator for Iter {} + + Iter(Some(f), PhantomData) } impl Drop for Parser { @@ -255,24 +395,7 @@ unsafe fn get_string_value(value: *const c_char) -> Result { Ok(s.to_owned()) } -/// Renders the data from the parser and pushes it into the buffer. -unsafe fn dump(jq: &Jq, buf: &mut String) -> Result<()> { - // Looks a lot like an iterator... - - let mut value = JV { - ptr: jq_next(jq.state), - }; - - while value.is_valid() { - let s = value.as_dump_string()?; - buf.push_str(&s); - buf.push('\n'); - - value = JV { - ptr: jq_next(jq.state), - }; - } - +unsafe fn report_jq_err(jq: &Jq, value: &Jv) -> Result<()> { if jq.is_halted() { use ExitCode::*; match jq.get_exit_code() { @@ -293,10 +416,6 @@ unsafe fn dump(jq: &Jq, buf: &mut String) -> Result<()> { JQ_OK | JQ_OK_NULL_KIND | JQ_OK_NO_OUTPUT => Ok(()), JQ_ERROR_UNKNOWN => Err(Error::Unknown), } - } else if let Some(reason) = value.get_msg() { - Err(Error::System { - reason: Some(reason), - }) } else { Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 1f4c90f..ed1ebae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -151,6 +151,8 @@ extern crate serde_json; mod errors; mod jq; +pub use jq::{Chunks, IntoJv, Jv}; + use std::ffi::CString; pub use errors::{Error, Result}; @@ -171,16 +173,36 @@ pub struct JqProgram { } impl JqProgram { - /// Runs a json string input against a pre-compiled jq program. + /// Runs a JSON string input against a pre-compiled jq program. pub fn run(&mut self, data: &str) -> Result { - if data.trim().is_empty() { - // During work on #4, #7, the parser test which allows us to avoid a memory - // error shows that an empty input just yields an empty response BUT our - // implementation would yield a parse error. - return Ok("".into()); - } - let input = CString::new(data)?; - self.jq.execute(input) + let mut outputs = Vec::new(); + self.run_raw(data, |value| outputs.push(value))?; + + outputs.into_iter().map(|jv| { + let mut dump = jv.as_dump_string()?; + dump.push('\n'); + Ok(dump) + }).collect() + } + + /// Runs the pre-compiled jq program with the given inputs and collects the results in the + /// sink. + pub fn run_raw<'a>( + &mut self, + data: impl IntoJv<'a>, + output_sink: impl FnMut(Jv), + ) -> Result<()> { + self.jq.execute(data, output_sink) + } + + /// Slurps the inputs into a JSON array, runs the pre-compiled jq program with the array + /// and collects the results in the sink. + pub fn run_slurp<'a>( + &mut self, + inputs: impl IntoJv<'a>, + output_sink: impl FnMut(Jv), + ) -> Result<()> { + self.jq.execute_slurped(inputs, output_sink) } } diff --git a/tests/slurp.rs b/tests/slurp.rs new file mode 100644 index 0000000..7d99643 --- /dev/null +++ b/tests/slurp.rs @@ -0,0 +1,13 @@ +use jq_rs::Chunks; + +#[test] +fn test_slurp() { + let mut program = jq_rs::compile(".").unwrap(); + + let mut outputs = Vec::new(); + program.run_slurp(Chunks(["123", "45 67", " ", "89 ", "12", " 345"].into_iter().map(str::as_bytes)), |output| outputs.push(output)).unwrap(); + + assert_eq!(outputs.len(), 1); + + assert_eq!(serde_json::from_str::<[i32; 5]>(&outputs[0].as_dump_string().unwrap()).unwrap(), [12345, 67, 89, 12, 345]); +}