Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

diff: add option/flag for headers in output #1395

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ console = { version = "0.15", optional = true }
cpc = { version = "1.9", optional = true }
crossbeam-channel = "0.5"
csv = "1.3"
csv-diff = "0.1.0-beta.4"
csv-diff = "0.1.0"
csv-index = "0.1"
csvs_convert = { version = "0.8", default-features = false, features = [
"converters",
Expand Down
107 changes: 58 additions & 49 deletions src/cmd/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ Find the difference between two CSVs, but only for the first two columns and
sort the result by the first and second column:
qsv diff -k 0,1 --sort-columns 0,1 left.csv right.csv

Find the difference between two CSVs, but do not output headers in the result:
qsv diff --no-headers-result left.csv right.csv

Find the difference between two CSVs. Both CSVs have no headers, but the result should have
headers, so generic headers will be used in the form of: _col_1, _col_2, etc.:
qsv diff --no-headers-left --no-headers-right left.csv right.csv

For more examples, see https://github.com/jqnatividad/qsv/blob/master/tests/test_diff.rs

Usage:
Expand All @@ -38,6 +45,9 @@ diff options:
the right CSV to diff. (When not set, the
first row is the header row and will be skipped during
the diff. It will always appear in the output.)
--no-headers-result When set, the diff result won't have a header row in
it's output. If not set and both CSVs have no headers,
headers in the result will be: _col_1,_col_2, etc.
--delimiter-left <arg> The field delimiter for reading CSV data on the left.
Must be a single character. (default: ,)
--delimiter-right <arg> The field delimiter for reading CSV data on the right.
Expand All @@ -61,11 +71,15 @@ Common options:
-o, --output <file> Write output to <file> instead of stdout.
"#;

use std::io::{self, Read, Write};
use std::io::{self, Write};

use csv_diff::{csv_diff::CsvByteDiffBuilder, diff_row::DiffByteRecord};
use csv_diff::{
csv_diff::CsvByteDiffBuilder, csv_headers::Headers, diff_result::DiffByteRecords,
diff_row::DiffByteRecord,
};
use serde::Deserialize;

use super::rename::rename_headers_all_generic;
use crate::{
clitypes::CliError,
config::{Config, Delimiter},
Expand All @@ -74,16 +88,17 @@ use crate::{

#[derive(Deserialize)]
struct Args {
arg_input_left: Option<String>,
arg_input_right: Option<String>,
flag_output: Option<String>,
flag_jobs: Option<usize>,
flag_no_headers_left: bool,
flag_no_headers_right: bool,
flag_delimiter_left: Option<Delimiter>,
flag_delimiter_right: Option<Delimiter>,
flag_key: Option<String>,
flag_sort_columns: Option<String>,
arg_input_left: Option<String>,
arg_input_right: Option<String>,
flag_output: Option<String>,
flag_jobs: Option<usize>,
flag_no_headers_left: bool,
flag_no_headers_right: bool,
flag_delimiter_left: Option<Delimiter>,
flag_delimiter_right: Option<Delimiter>,
flag_no_headers_result: bool,
flag_key: Option<String>,
flag_sort_columns: Option<String>,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
Expand Down Expand Up @@ -123,15 +138,12 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
.transpose()?;

let wtr = Config::new(&args.flag_output).writer()?;
let mut csv_rdr_left = rconfig_left.reader()?;
let mut csv_rdr_right = rconfig_right.reader()?;
let csv_rdr_left = rconfig_left.reader()?;
let csv_rdr_right = rconfig_right.reader()?;

// set RAYON_NUM_THREADS
util::njobs(args.flag_jobs);

let mut csv_diff_writer = CsvDiffWriter::new(wtr);
csv_diff_writer.write_headers(&mut csv_rdr_left, &mut csv_rdr_right)?;

let Ok(csv_diff) = CsvByteDiffBuilder::new()
.primary_key_columns(primary_key_cols)
.build()
Expand All @@ -154,56 +166,53 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
},
}

let mut csv_diff_writer = CsvDiffWriter::new(wtr, args.flag_no_headers_result);
Ok(csv_diff_writer.write_diff_byte_records(diff_byte_records)?)
}

struct CsvDiffWriter<W: Write> {
csv_writer: csv::Writer<W>,
no_headers: bool,
}

impl<W: Write> CsvDiffWriter<W> {
fn new(csv_writer: csv::Writer<W>) -> Self {
Self { csv_writer }
fn new(csv_writer: csv::Writer<W>, no_headers: bool) -> Self {
Self {
csv_writer,
no_headers,
}
}

fn write_headers<R: Read>(
&mut self,
rdr_left: &mut csv::Reader<R>,
rdr_right: &mut csv::Reader<R>,
) -> csv::Result<()> {
match (rdr_left.has_headers(), rdr_right.has_headers()) {
(true, true) => {
let rdr_bh = rdr_left.byte_headers()?;

rdr_bh.write_diffresult_header(&mut self.csv_writer)?;
// we also read the headers from the right CSV, so that both readers end up
// before the actual records. Otherwise, it would lead to errors when we
// diff the CSVs, because the header of one CSV would have been read and the other
// not.
#[allow(clippy::let_underscore_untyped)]
let _ = rdr_right.byte_headers()?;
fn write_headers(&mut self, headers: &Headers, num_columns: Option<usize>) -> csv::Result<()> {
match (headers.headers_left(), headers.headers_right()) {
(Some(lbh), Some(_rbh)) => {
// currently, `diff` can only handle two CSVs that have the same
// headers ordering, so in this case we can either choose the left
// or right headers, because both are the same
if !self.no_headers {
lbh.write_diffresult_header(&mut self.csv_writer)?;
}
},
(true, false) => {
let rdr_bh = rdr_left.byte_headers()?;

rdr_bh.write_diffresult_header(&mut self.csv_writer)?;
(Some(bh), None) | (None, Some(bh)) => {
if !self.no_headers {
bh.write_diffresult_header(&mut self.csv_writer)?;
}
},
(false, true) => {
let rdr_bh = rdr_right.byte_headers()?;

rdr_bh.write_diffresult_header(&mut self.csv_writer)?;
(None, None) => {
if let (Some(num_cols), false) = (num_columns.filter(|&c| c > 0), self.no_headers) {
let headers_generic = rename_headers_all_generic(num_cols);
let mut new_rdr = csv::Reader::from_reader(headers_generic.as_bytes());
let new_headers = new_rdr.byte_headers()?;
new_headers.write_diffresult_header(&mut self.csv_writer)?;
}
},
// nothing to do, because there are no headers
(false, false) => {},
}

Ok(())
}

fn write_diff_byte_records(
&mut self,
diff_byte_records: impl IntoIterator<Item = DiffByteRecord>,
) -> io::Result<()> {
fn write_diff_byte_records(&mut self, diff_byte_records: DiffByteRecords) -> io::Result<()> {
self.write_headers(diff_byte_records.headers(), diff_byte_records.num_columns())?;
for dbr in diff_byte_records {
self.write_diff_byte_record(&dbr)?;
}
Expand Down
18 changes: 11 additions & 7 deletions src/cmd/rename.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let headers = rdr.byte_headers()?;

if args.arg_headers.to_lowercase() == "_all_generic" {
let mut generic_headers = String::new();
for (i, _) in headers.iter().enumerate() {
generic_headers.push_str(&format!("_col_{},", i + 1));
}
// remove the trailing comma
generic_headers.pop();
args.arg_headers = generic_headers;
args.arg_headers = rename_headers_all_generic(headers.len());
}

let mut new_rdr = csv::Reader::from_reader(args.arg_headers.as_bytes());
Expand All @@ -95,3 +89,13 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
wtr.flush()?;
Ok(())
}

pub(crate) fn rename_headers_all_generic(num_of_cols: usize) -> String {
let mut generic_headers = String::new();
for i in 1..=num_of_cols {
generic_headers.push_str(&format!("_col_{},", i));
}
// remove the trailing comma
generic_headers.pop();
generic_headers
}
Loading
Loading