diff --git a/src/cmd/diff.rs b/src/cmd/diff.rs index a6b51a66f..1f36470d2 100644 --- a/src/cmd/diff.rs +++ b/src/cmd/diff.rs @@ -23,6 +23,13 @@ Find the difference between two CSVs, but only for the first two columns and sort the result by the first and second column: qsv diff -k 0,1 --sort-columns 0,1 left.csv right.csv +Find the difference between two CSVs, but do not output headers in the result: + qsv diff --no-headers-result left.csv right.csv + +Find the difference between two CSVs. Both CSVs have no headers, but the result should have +headers, so generic headers will be used in the form of: _col_1, _col_2, etc.: + qsv diff --no-headers-left --no-headers-right left.csv right.csv + For more examples, see https://github.com/jqnatividad/qsv/blob/master/tests/test_diff.rs Usage: @@ -38,6 +45,9 @@ diff options: the right CSV to diff. (When not set, the first row is the header row and will be skipped during the diff. It will always appear in the output.) + --no-headers-result When set, the diff result won't have a header row in + it's output. If not set and both CSVs have no headers, + headers in the result will be: _col_1,_col_2, etc. --delimiter-left The field delimiter for reading CSV data on the left. Must be a single character. (default: ,) --delimiter-right The field delimiter for reading CSV data on the right. @@ -61,11 +71,15 @@ Common options: -o, --output Write output to instead of stdout. "#; -use std::io::{self, Read, Write}; +use std::io::{self, Write}; -use csv_diff::{csv_diff::CsvByteDiffBuilder, diff_row::DiffByteRecord}; +use csv_diff::{ + csv_diff::CsvByteDiffBuilder, csv_headers::Headers, diff_result::DiffByteRecords, + diff_row::DiffByteRecord, +}; use serde::Deserialize; +use super::rename::rename_headers_all_generic; use crate::{ clitypes::CliError, config::{Config, Delimiter}, @@ -74,16 +88,17 @@ use crate::{ #[derive(Deserialize)] struct Args { - arg_input_left: Option, - arg_input_right: Option, - flag_output: Option, - flag_jobs: Option, - flag_no_headers_left: bool, - flag_no_headers_right: bool, - flag_delimiter_left: Option, - flag_delimiter_right: Option, - flag_key: Option, - flag_sort_columns: Option, + arg_input_left: Option, + arg_input_right: Option, + flag_output: Option, + flag_jobs: Option, + flag_no_headers_left: bool, + flag_no_headers_right: bool, + flag_delimiter_left: Option, + flag_delimiter_right: Option, + flag_no_headers_result: bool, + flag_key: Option, + flag_sort_columns: Option, } pub fn run(argv: &[&str]) -> CliResult<()> { @@ -123,15 +138,12 @@ pub fn run(argv: &[&str]) -> CliResult<()> { .transpose()?; let wtr = Config::new(&args.flag_output).writer()?; - let mut csv_rdr_left = rconfig_left.reader()?; - let mut csv_rdr_right = rconfig_right.reader()?; + let csv_rdr_left = rconfig_left.reader()?; + let csv_rdr_right = rconfig_right.reader()?; // set RAYON_NUM_THREADS util::njobs(args.flag_jobs); - let mut csv_diff_writer = CsvDiffWriter::new(wtr); - csv_diff_writer.write_headers(&mut csv_rdr_left, &mut csv_rdr_right)?; - let Ok(csv_diff) = CsvByteDiffBuilder::new() .primary_key_columns(primary_key_cols) .build() @@ -154,56 +166,53 @@ pub fn run(argv: &[&str]) -> CliResult<()> { }, } + let mut csv_diff_writer = CsvDiffWriter::new(wtr, args.flag_no_headers_result); Ok(csv_diff_writer.write_diff_byte_records(diff_byte_records)?) } struct CsvDiffWriter { csv_writer: csv::Writer, + no_headers: bool, } impl CsvDiffWriter { - fn new(csv_writer: csv::Writer) -> Self { - Self { csv_writer } + fn new(csv_writer: csv::Writer, no_headers: bool) -> Self { + Self { + csv_writer, + no_headers, + } } - fn write_headers( - &mut self, - rdr_left: &mut csv::Reader, - rdr_right: &mut csv::Reader, - ) -> csv::Result<()> { - match (rdr_left.has_headers(), rdr_right.has_headers()) { - (true, true) => { - let rdr_bh = rdr_left.byte_headers()?; - - rdr_bh.write_diffresult_header(&mut self.csv_writer)?; - // we also read the headers from the right CSV, so that both readers end up - // before the actual records. Otherwise, it would lead to errors when we - // diff the CSVs, because the header of one CSV would have been read and the other - // not. - #[allow(clippy::let_underscore_untyped)] - let _ = rdr_right.byte_headers()?; + fn write_headers(&mut self, headers: &Headers, num_columns: Option) -> csv::Result<()> { + match (headers.headers_left(), headers.headers_right()) { + (Some(lbh), Some(_rbh)) => { + // currently, `diff` can only handle two CSVs that have the same + // headers ordering, so in this case we can either choose the left + // or right headers, because both are the same + if !self.no_headers { + lbh.write_diffresult_header(&mut self.csv_writer)?; + } }, - (true, false) => { - let rdr_bh = rdr_left.byte_headers()?; - - rdr_bh.write_diffresult_header(&mut self.csv_writer)?; + (Some(bh), None) | (None, Some(bh)) => { + if !self.no_headers { + bh.write_diffresult_header(&mut self.csv_writer)?; + } }, - (false, true) => { - let rdr_bh = rdr_right.byte_headers()?; - - rdr_bh.write_diffresult_header(&mut self.csv_writer)?; + (None, None) => { + if let (Some(num_cols), false) = (num_columns.filter(|&c| c > 0), self.no_headers) { + let headers_generic = rename_headers_all_generic(num_cols); + let mut new_rdr = csv::Reader::from_reader(headers_generic.as_bytes()); + let new_headers = new_rdr.byte_headers()?; + new_headers.write_diffresult_header(&mut self.csv_writer)?; + } }, - // nothing to do, because there are no headers - (false, false) => {}, } Ok(()) } - fn write_diff_byte_records( - &mut self, - diff_byte_records: impl IntoIterator, - ) -> io::Result<()> { + fn write_diff_byte_records(&mut self, diff_byte_records: DiffByteRecords) -> io::Result<()> { + self.write_headers(diff_byte_records.headers(), diff_byte_records.num_columns())?; for dbr in diff_byte_records { self.write_diff_byte_record(&dbr)?; } diff --git a/src/cmd/rename.rs b/src/cmd/rename.rs index 815e000bd..2a5f612c6 100644 --- a/src/cmd/rename.rs +++ b/src/cmd/rename.rs @@ -66,13 +66,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> { let headers = rdr.byte_headers()?; if args.arg_headers.to_lowercase() == "_all_generic" { - let mut generic_headers = String::new(); - for (i, _) in headers.iter().enumerate() { - generic_headers.push_str(&format!("_col_{},", i + 1)); - } - // remove the trailing comma - generic_headers.pop(); - args.arg_headers = generic_headers; + args.arg_headers = rename_headers_all_generic(headers.len()); } let mut new_rdr = csv::Reader::from_reader(args.arg_headers.as_bytes()); @@ -95,3 +89,13 @@ pub fn run(argv: &[&str]) -> CliResult<()> { wtr.flush()?; Ok(()) } + +pub(crate) fn rename_headers_all_generic(num_of_cols: usize) -> String { + let mut generic_headers = String::new(); + for i in 1..=num_of_cols { + generic_headers.push_str(&format!("_col_{},", i)); + } + // remove the trailing comma + generic_headers.pop(); + generic_headers +} diff --git a/tests/test_diff.rs b/tests/test_diff.rs index 16fd62387..d45c782ae 100644 --- a/tests/test_diff.rs +++ b/tests/test_diff.rs @@ -216,6 +216,215 @@ diffresult,case_enquiry_id,open_dt,target_dt,closed_dt,ontime,case_status,closur } } +#[test] +fn diff_with_no_headers_in_result() { + let wrk = Workdir::new("diff_no_headers_in_result"); + + let left = vec![svec!["h1", "h2", "h3"], svec!["1", "foo", "bar"]]; + wrk.create("left.csv", left); + + let right = vec![svec!["h1", "h2", "h3"], svec!["1", "foo_changed", "bar"]]; + wrk.create("right.csv", right); + + let mut cmd = wrk.command("diff"); + cmd.args(["left.csv", "right.csv", "--no-headers-result"]); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec!["-", "1", "foo", "bar",], + svec!["+", "1", "foo_changed", "bar",], + ]; + + assert_eq!(got, expected); +} + +#[test] +fn diff_no_diff_with_no_headers_in_result() { + let wrk = Workdir::new("diff_no_diff_with_no_headers_in_result"); + + let left = vec![svec!["h1", "h2", "h3"], svec!["1", "foo", "bar"]]; + wrk.create("left.csv", left); + + let right = vec![svec!["h1", "h2", "h3"], svec!["1", "foo", "bar"]]; + wrk.create("right.csv", right); + + let mut cmd = wrk.command("diff"); + cmd.args(["left.csv", "right.csv", "--no-headers-result"]); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected: Vec> = vec![]; + + assert_eq!(got, expected); +} + +#[test] +fn diff_only_left_has_headers_headers_in_result() { + let wrk = Workdir::new("diff_only_left_has_headers_headers_in_result"); + + let left = vec![svec!["h1", "h2", "h3"], svec!["1", "foo", "bar"]]; + wrk.create("left.csv", left); + + let right = vec![svec!["1", "foo_changed", "bar"]]; + wrk.create("right.csv", right); + + let mut cmd = wrk.command("diff"); + cmd.args(["left.csv", "right.csv", "--no-headers-right"]); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec!["diffresult", "h1", "h2", "h3"], + svec!["-", "1", "foo", "bar",], + svec!["+", "1", "foo_changed", "bar",], + ]; + + assert_eq!(got, expected); +} + +#[test] +fn diff_only_right_has_headers_headers_in_result() { + let wrk = Workdir::new("diff_only_left_has_headers_headers_in_result"); + + let left = vec![svec!["1", "foo", "bar"]]; + wrk.create("left.csv", left); + + let right = vec![svec!["h1", "h2", "h3"], svec!["1", "foo_changed", "bar"]]; + wrk.create("right.csv", right); + + let mut cmd = wrk.command("diff"); + cmd.args(["left.csv", "right.csv", "--no-headers-left"]); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec!["diffresult", "h1", "h2", "h3"], + svec!["-", "1", "foo", "bar",], + svec!["+", "1", "foo_changed", "bar",], + ]; + + assert_eq!(got, expected); +} + +#[test] +fn diff_with_generic_headers_in_result() { + let wrk = Workdir::new("diff_with_generic_headers_in_result"); + + let left = vec![svec!["1", "foo", "bar"]]; + wrk.create("left.csv", left); + + let right = vec![svec!["1", "foo_changed", "bar"]]; + wrk.create("right.csv", right); + + let mut cmd = wrk.command("diff"); + cmd.args([ + "left.csv", + "right.csv", + "--no-headers-left", + "--no-headers-right", + ]); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec!["diffresult", "_col_1", "_col_2", "_col_3",], + svec!["-", "1", "foo", "bar",], + svec!["+", "1", "foo_changed", "bar",], + ]; + + assert_eq!(got, expected); +} + +#[test] +fn diff_with_no_left_no_right_and_no_headers_in_result() { + let wrk = Workdir::new("diff_with_no_left_no_right_and_no_headers_in_result"); + + let left = vec![svec!["1", "foo", "bar"]]; + wrk.create("left.csv", left); + + let right = vec![svec!["1", "foo_changed", "bar"]]; + wrk.create("right.csv", right); + + let mut cmd = wrk.command("diff"); + cmd.args([ + "left.csv", + "right.csv", + "--no-headers-left", + "--no-headers-right", + "--no-headers-result", + ]); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec!["-", "1", "foo", "bar",], + svec!["+", "1", "foo_changed", "bar",], + ]; + + assert_eq!(got, expected); +} + +#[test] +fn diff_no_diff_with_generic_headers_in_result() { + let wrk = Workdir::new("diff_no_diff_with_generic_headers_in_result"); + + let left = vec![svec!["1", "foo", "bar"]]; + wrk.create("left.csv", left); + + let right = vec![svec!["1", "foo", "bar"]]; + wrk.create("right.csv", right); + + let mut cmd = wrk.command("diff"); + cmd.args([ + "left.csv", + "right.csv", + "--no-headers-left", + "--no-headers-right", + ]); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![svec!["diffresult", "_col_1", "_col_2", "_col_3",]]; + + assert_eq!(got, expected); +} + +#[test] +fn diff_no_diff_and_zero_columns_flag_true_for_headers_in_result_but_none_are_in_result() { + let wrk = Workdir::new( + "diff_no_diff_and_zero_columns_flag_true_for_headers_in_result_but_none_are_in_result", + ); + + let left: Vec> = vec![]; + wrk.create("left.csv", left); + + let right: Vec> = vec![]; + wrk.create("right.csv", right); + + let mut cmd = wrk.command("diff"); + cmd.args(["left.csv", "right.csv"]); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected: Vec> = vec![]; + + assert_eq!(got, expected); +} + +#[test] +fn diff_left_has_one_column_right_has_none_headers_in_result() { + let wrk = Workdir::new( + "diff_no_diff_and_zero_columns_flag_true_for_headers_in_result_but_none_are_in_result", + ); + + let left = vec![svec!["h1"]]; + wrk.create("left.csv", left); + + let right: Vec> = vec![]; + wrk.create("right.csv", right); + + let mut cmd = wrk.command("diff"); + cmd.args(["left.csv", "right.csv", "--no-headers-right"]); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![svec!["diffresult", "h1"]]; + + assert_eq!(got, expected); +} + fn create_file_with_delim(wrk: &Workdir, file_path_new: &str, file_path: &str, delimiter: u8) { let mut select_cmd = wrk.command("select"); select_cmd.args(["1-", file_path]);