diff --git a/src/cmd/joinp.rs b/src/cmd/joinp.rs index e04b23a6e2..d25dd0a412 100644 --- a/src/cmd/joinp.rs +++ b/src/cmd/joinp.rs @@ -168,6 +168,7 @@ Common options: "#; use std::{ + env, fs::File, io::{self, Write}, path::Path, @@ -473,6 +474,12 @@ impl Args { b',' }; + let comment_char: Option = if let Ok(comment_char) = env::var("QSV_COMMENT_CHAR") { + Some(comment_char.as_bytes().first().unwrap().to_owned()) + } else { + None + }; + let num_rows = if infer_len == 0 { None } else { @@ -482,6 +489,7 @@ impl Args { let mut left_lf = LazyCsvReader::new(&self.arg_input1) .has_header(true) .with_missing_is_null(self.flag_nulls) + .with_comment_char(comment_char) .with_separator(delim) .with_infer_schema_length(num_rows) .with_try_parse_dates(try_parsedates) @@ -497,6 +505,7 @@ impl Args { let mut right_lf = LazyCsvReader::new(&self.arg_input2) .has_header(true) .with_missing_is_null(self.flag_nulls) + .with_comment_char(comment_char) .with_separator(delim) .with_infer_schema_length(num_rows) .with_try_parse_dates(try_parsedates) diff --git a/src/cmd/sqlp.rs b/src/cmd/sqlp.rs index 7e990b689b..027e799cd5 100644 --- a/src/cmd/sqlp.rs +++ b/src/cmd/sqlp.rs @@ -403,6 +403,12 @@ pub fn run(argv: &[&str]) -> CliResult<()> { b',' }; + let comment_char: Option = if let Ok(comment_char) = env::var("QSV_COMMENT_CHAR") { + Some(comment_char.as_bytes().first().unwrap().to_owned()) + } else { + None + }; + let optimization_state = if args.flag_no_optimizations { // use default optimization state polars::lazy::frame::OptState { @@ -467,6 +473,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> { let lf = LazyCsvReader::new(table) .has_header(true) .with_missing_is_null(true) + .with_comment_char(comment_char) .with_null_values(Some(NullValues::AllColumns(rnull_values.clone()))) .with_separator(delim) .with_infer_schema_length(args.flag_infer_len) diff --git a/tests/test_joinp.rs b/tests/test_joinp.rs index d7b6be6e9b..8ed95afeeb 100644 --- a/tests/test_joinp.rs +++ b/tests/test_joinp.rs @@ -20,6 +20,26 @@ macro_rules! joinp_test { }; } +macro_rules! joinp_test_comments { + ($name2:ident, $fun:expr) => { + mod $name2 { + use std::process; + + #[allow(unused_imports)] + use super::{make_rows, setup}; + use crate::workdir::Workdir; + + #[test] + fn headers() { + let wrk = setup(stringify!($name2)); + let mut cmd = wrk.command("joinp"); + cmd.args(&["city", "cities_comments.csv", "city", "places.csv"]); + $fun(wrk, cmd); + } + } + }; +} + fn setup(name: &str) -> Workdir { let cities = vec![ svec!["city", "state"], @@ -28,6 +48,15 @@ fn setup(name: &str) -> Workdir { svec!["San Francisco", "CA"], svec!["Buffalo", "NY"], ]; + let cities_comments = vec![ + svec!["#this is a comment", ""], + svec!["city", "state"], + svec!["Boston", "MA"], + svec!["New York", "NY"], + svec!["#Washington", "DC"], + svec!["San Francisco", "CA"], + svec!["Buffalo", "NY"], + ]; let places = vec![ svec!["city", "place"], svec!["Boston", "Logan Airport"], @@ -38,6 +67,7 @@ fn setup(name: &str) -> Workdir { let wrk = Workdir::new(name); wrk.create("cities.csv", cities); + wrk.create("cities_comments.csv", cities_comments); wrk.create("places.csv", places); wrk } @@ -66,6 +96,22 @@ joinp_test!(joinp_inner, |wrk: Workdir, mut cmd: process::Command| { assert_eq!(got, expected); }); +joinp_test_comments!( + joinp_inner_comments, + |wrk: Workdir, mut cmd: process::Command| { + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = make_rows( + false, + vec![ + svec!["Boston", "MA", "Logan Airport"], + svec!["Boston", "MA", "Boston Garden"], + svec!["Buffalo", "NY", "Ralph Wilson Stadium"], + ], + ); + assert_eq!(got, expected); + } +); + joinp_test!( joinp_outer_left, |wrk: Workdir, mut cmd: process::Command| { @@ -85,6 +131,25 @@ joinp_test!( } ); +joinp_test_comments!( + joinp_outer_left_comments, + |wrk: Workdir, mut cmd: process::Command| { + cmd.arg("--left"); + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = make_rows( + false, + vec![ + svec!["Boston", "MA", "Logan Airport"], + svec!["Boston", "MA", "Boston Garden"], + svec!["New York", "NY", ""], + svec!["San Francisco", "CA", ""], + svec!["Buffalo", "NY", "Ralph Wilson Stadium"], + ], + ); + assert_eq!(got, expected); + } +); + joinp_test!( joinp_outer_left_filter_left, |wrk: Workdir, mut cmd: process::Command| { @@ -101,6 +166,22 @@ joinp_test!( } ); +joinp_test_comments!( + joinp_outer_left_filter_left_comments, + |wrk: Workdir, mut cmd: process::Command| { + cmd.arg("--left").args(["--filter-left", "city = 'Boston'"]); + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = make_rows( + false, + vec![ + svec!["Boston", "MA", "Logan Airport"], + svec!["Boston", "MA", "Boston Garden"], + ], + ); + assert_eq!(got, expected); + } +); + joinp_test!( joinp_inner_filter_right, |wrk: Workdir, mut cmd: process::Command| { @@ -111,6 +192,16 @@ joinp_test!( } ); +joinp_test_comments!( + joinp_inner_filter_right_comments, + |wrk: Workdir, mut cmd: process::Command| { + cmd.args(["--filter-right", "place ~* 'w'"]); + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = make_rows(false, vec![svec!["Buffalo", "NY", "Ralph Wilson Stadium"]]); + assert_eq!(got, expected); + } +); + joinp_test!( joinp_outer_left_validate_none, |wrk: Workdir, mut cmd: process::Command| { @@ -130,6 +221,25 @@ joinp_test!( } ); +joinp_test_comments!( + joinp_outer_left_validate_none_comments, + |wrk: Workdir, mut cmd: process::Command| { + cmd.arg("--left").args(["--validate", "none"]); + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = make_rows( + false, + vec![ + svec!["Boston", "MA", "Logan Airport"], + svec!["Boston", "MA", "Boston Garden"], + svec!["New York", "NY", ""], + svec!["San Francisco", "CA", ""], + svec!["Buffalo", "NY", "Ralph Wilson Stadium"], + ], + ); + assert_eq!(got, expected); + } +); + // joinp_test!( // joinp_outer_left_validate_manytoone, // |wrk: Workdir, mut cmd: process::Command| { @@ -142,7 +252,7 @@ joinp_test!( // ); // wrk.assert_err(&mut cmd); // } -// );x +// ); joinp_test!(joinp_full, |wrk: Workdir, mut cmd: process::Command| { cmd.arg("--full"); @@ -172,6 +282,37 @@ joinp_test!(joinp_full, |wrk: Workdir, mut cmd: process::Command| { assert!(got == expected1 || got == expected2); }); +joinp_test_comments!( + joinp_full_comments, + |wrk: Workdir, mut cmd: process::Command| { + cmd.arg("--full"); + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected1 = make_rows( + false, + vec![ + svec!["Boston", "MA", "Logan Airport"], + svec!["Boston", "MA", "Boston Garden"], + svec!["Buffalo", "NY", "Ralph Wilson Stadium"], + svec!["Orlando", "", "Disney World"], + svec!["San Francisco", "CA", ""], + svec!["New York", "NY", ""], + ], + ); + let expected2 = make_rows( + false, + vec![ + svec!["Boston", "MA", "Logan Airport"], + svec!["Boston", "MA", "Boston Garden"], + svec!["Buffalo", "NY", "Ralph Wilson Stadium"], + svec!["Orlando", "", "Disney World"], + svec!["New York", "NY", ""], + svec!["San Francisco", "CA", ""], + ], + ); + assert!(got == expected1 || got == expected2); + } +); + joinp_test!( joinp_left_semi, |wrk: Workdir, mut cmd: process::Command| { @@ -182,6 +323,16 @@ joinp_test!( } ); +joinp_test_comments!( + joinp_left_semi_comments, + |wrk: Workdir, mut cmd: process::Command| { + cmd.arg("--left-semi"); + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = make_rows(true, vec![svec!["Boston", "MA"], svec!["Buffalo", "NY"]]); + assert_eq!(got, expected); + } +); + joinp_test!( joinp_left_anti, |wrk: Workdir, mut cmd: process::Command| { @@ -195,6 +346,19 @@ joinp_test!( } ); +joinp_test_comments!( + joinp_left_anti_comments, + |wrk: Workdir, mut cmd: process::Command| { + cmd.arg("--left-anti"); + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = make_rows( + true, + vec![svec!["New York", "NY"], svec!["San Francisco", "CA"]], + ); + assert_eq!(got, expected); + } +); + #[test] fn joinp_cross() { let wrk = Workdir::new("join_cross"); @@ -260,6 +424,47 @@ fn joinp_asof_date() { assert_eq!(got, expected); } +#[test] +fn joinp_asof_dat_comments() { + let wrk = Workdir::new("join_asof_date_comments"); + wrk.create( + "gdp.csv", + vec![ + svec!["#comment", "here"], + svec!["date", "gdp"], + svec!["2016-01-01", "4164"], + svec!["2017-01-01", "4411"], + svec!["2018-01-01", "4566"], + svec!["2019-01-01", "4696"], + ], + ); + wrk.create( + "population.csv", + vec![ + svec!["date", "population"], + svec!["2016-05-12", "82.19"], + svec!["2017-05-12", "82.66"], + svec!["#comment", "in the middle"], + svec!["2018-05-12", "83.12"], + svec!["2019-05-12", "83.52"], + ], + ); + + let mut cmd = wrk.command("joinp"); + cmd.arg("--asof") + .args(["date", "population.csv", "date", "gdp.csv"]); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec!["date", "population", "gdp"], + svec!["2016-05-12", "82.19", "4164"], + svec!["2017-05-12", "82.66", "4411"], + svec!["2018-05-12", "83.12", "4566"], + svec!["2019-05-12", "83.52", "4696"], + ]; + assert_eq!(got, expected); +} + #[test] fn joinp_asofby_1() { let wrk = Workdir::new("join_asofby_timeseries"); diff --git a/tests/test_sqlp.rs b/tests/test_sqlp.rs index 5be1415d6c..a5d8c4b76a 100644 --- a/tests/test_sqlp.rs +++ b/tests/test_sqlp.rs @@ -723,6 +723,42 @@ fn sqlp_boston311_try_parsedates_format() { assert_eq!(got, expected); } +#[test] +fn sqlp_comments() { + let wrk = Workdir::new("sqlp_comments"); + // let test_file = wrk.load_test_file("inputcommenttest.csv"); + wrk.create( + "comments.csv", + vec![ + svec!["# test file to see how comments work", ""], + svec!["# this is another comment before the header", ""], + svec!["# DATA DICTIONARY", ""], + svec!["# column1 - alphabetic; id of the column", ""], + svec!["# column2 - numeric; just a number", ""], + svec!["column1", "column2"], + svec!["a", "1"], + svec!["#b", "2"], + svec!["c", "3"], + svec!["#d - this row is corrupted skip", "extra col2"], + svec!["e", "5"], + ], + ); + + let mut cmd = wrk.command("sqlp"); + cmd.env("QSV_COMMENT_CHAR", "#"); + cmd.arg("comments.csv") + .arg("select column1, column2 from comments order by column2 desc"); + + let got: Vec> = wrk.read_stdout(&mut cmd); + let expected = vec![ + svec!["column1", "column2"], + svec!["e", "5"], + svec!["c", "3"], + svec!["a", "1"], + ]; + assert_eq!(got, expected); +} + #[test] fn sqlp_boston311_explain() { let wrk = Workdir::new("sqlp_boston311_explain");