Skip to content

Commit

Permalink
Merge pull request #2298 from jqnatividad/slice-invert-option
Browse files Browse the repository at this point in the history
`slice`: add `--invert` option
  • Loading branch information
jqnatividad authored Nov 19, 2024
2 parents 7e4a599 + 229eef1 commit bb9bd8f
Show file tree
Hide file tree
Showing 2 changed files with 224 additions and 16 deletions.
87 changes: 71 additions & 16 deletions src/cmd/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ slice options:
The value is the field value. The output is a
JSON array. If --no-headers is set, then
the keys are the column indices (zero-based).
--invert slice all records EXCEPT those in the specified range.
Examples:
# Slice from the 3rd record to the end
Expand All @@ -46,6 +47,9 @@ Examples:
# Slice the last 10 records
qsv slice -s -10 data.csv
# Get everything except the last 10 records
qsv slice -s -10 --invert data.csv
# Slice the first three records of the last 10 records
qsv slice -s -10 -l 3 data.csv
Expand All @@ -60,6 +64,9 @@ Examples:
qsv slice -s 9 -e 19 --json data.csv
qsv slice -s 9 -l 10 --json data.csv
# Slice records 1 to 9 and 21 to the end as JSON
qsv slice -s 9 -l 10 --invert --json data.csv
Common options:
-h, --help Display this message
-o, --output <file> Write output to <file> instead of stdout.
Expand Down Expand Up @@ -92,6 +99,7 @@ struct Args {
flag_output: Option<String>,
flag_no_headers: bool,
flag_delimiter: Option<Delimiter>,
flag_invert: bool,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
Expand All @@ -111,11 +119,18 @@ impl Args {
let (start, end) = self.range()?;
if self.flag_json {
let headers = rdr.byte_headers()?.clone();
let records = rdr
.byte_records()
.skip(start)
.take(end - start)
.map(|r| r.unwrap());
let records = rdr.byte_records().enumerate().filter_map(move |(i, r)| {
let should_include = if self.flag_invert {
i < start || i >= end
} else {
i >= start && i < end
};
if should_include {
Some(r.unwrap())
} else {
None
}
});
util::write_json(
self.flag_output.as_ref(),
self.flag_no_headers,
Expand All @@ -125,36 +140,76 @@ impl Args {
} else {
let mut wtr = self.wconfig().writer()?;
self.rconfig().write_headers(&mut rdr, &mut wtr)?;
for r in rdr.byte_records().skip(start).take(end - start) {
wtr.write_byte_record(&r?)?;

for (i, r) in rdr.byte_records().enumerate() {
if self.flag_invert == (i < start || i >= end) {
wtr.write_byte_record(&r?)?;
}
}
Ok(wtr.flush()?)
}
}

fn with_index(&self, mut indexed_file: Indexed<fs::File, fs::File>) -> CliResult<()> {
let (start, end) = self.range()?;
if end - start == 0 {
if end - start == 0 && !self.flag_invert {
return Ok(());
}
indexed_file.seek(start as u64)?;

if self.flag_json {
let headers = indexed_file.byte_headers()?.clone();
let records = indexed_file
.byte_records()
.take(end - start)
.map(|r| r.unwrap());
let total_rows = util::count_rows(&self.rconfig())?;
let records = if self.flag_invert {
let mut records: Vec<csv::ByteRecord> =
Vec::with_capacity(start + (total_rows as usize - end));
// Get records before start
indexed_file.seek(0)?;
for r in indexed_file.byte_records().take(start) {
records.push(r.unwrap());
}

// Get records after end
indexed_file.seek(end as u64)?;
for r in indexed_file.byte_records().take(total_rows as usize - end) {
records.push(r.unwrap());
}
records
} else {
indexed_file.seek(start as u64)?;
indexed_file
.byte_records()
.take(end - start)
.map(|r| r.unwrap())
.collect::<Vec<_>>()
};
util::write_json(
self.flag_output.as_ref(),
self.flag_no_headers,
&headers,
records,
records.into_iter(),
)
} else {
let mut wtr = self.wconfig().writer()?;
self.rconfig().write_headers(&mut *indexed_file, &mut wtr)?;
for r in indexed_file.byte_records().take(end - start) {
wtr.write_byte_record(&r?)?;

let total_rows = util::count_rows(&self.rconfig())? as usize;
if self.flag_invert {
// Get records before start
indexed_file.seek(0)?;
for r in indexed_file.byte_records().take(start) {
wtr.write_byte_record(&r?)?;
}

// Get records after end
indexed_file.seek(end as u64)?;
for r in indexed_file.byte_records().take(total_rows - end) {
wtr.write_byte_record(&r?)?;
}
} else {
indexed_file.seek(start as u64)?;
for r in indexed_file.byte_records().take(end - start) {
wtr.write_byte_record(&r?)?;
}
}
Ok(wtr.flush()?)
}
Expand Down
153 changes: 153 additions & 0 deletions tests/test_slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,156 @@ fn slice_neg_index_withindex() {
fn slice_neg_index_no_headers_withindex() {
test_index("slice_neg_index_no_headers_withindex", -2, "d", false, true);
}

fn test_slice_invert(
name: &str,
start: Option<isize>,
end: Option<usize>,
expected: &[&str],
headers: bool,
use_index: bool,
as_len: bool,
json_output: bool,
) {
let (wrk, mut cmd) = setup(name, headers, use_index);
if let Some(start) = start {
cmd.arg("--start").arg(&start.to_string());
}
if let Some(end) = end {
if as_len {
let start = start.unwrap_or(0);
if start < 0 {
cmd.arg("--len").arg(&end.to_string());
} else {
cmd.arg("--len")
.arg(&(end - start.unsigned_abs()).to_string());
}
} else {
cmd.arg("--end").arg(&end.to_string());
}
}
if !headers {
cmd.arg("--no-headers");
}
cmd.arg("--invert");

if json_output {
let output_file = wrk.path("output.json").to_string_lossy().to_string();

cmd.arg("--json").args(&["--output", &output_file]);

wrk.assert_success(&mut cmd);

let gots = wrk.read_to_string(&output_file);
let gotj: serde_json::Value = serde_json::from_str(&gots).unwrap();
let got = gotj.to_string();

let expected_vec = expected
.iter()
.map(|&s| {
if headers {
format!("{{\"header\":\"{}\"}}", s)
} else {
format!("{{\"0\":\"{}\"}}", s)
}
})
.collect::<Vec<String>>();
let expected = format!("[{}]", expected_vec.join(","));

assert_eq!(got, expected);
} else {
let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let mut expected = expected
.iter()
.map(|&s| vec![s.to_owned()])
.collect::<Vec<Vec<String>>>();
if headers {
expected.insert(0, svec!["header"]);
}
assert_eq!(got, expected);
}
}

#[test]
fn slice_invert_simple() {
test_slice_invert(
"slice_invert_simple",
Some(0),
Some(1),
&["b", "c", "d", "e"],
true,
false,
false,
false,
);
}

#[test]
fn slice_invert_middle() {
test_slice_invert(
"slice_invert_middle",
Some(1),
Some(3),
&["a", "d", "e"],
true,
false,
false,
false,
);
}

#[test]
fn slice_invert_with_index() {
test_slice_invert(
"slice_invert_with_index",
Some(1),
Some(3),
&["a", "d", "e"],
true,
true,
false,
false,
);
}

#[test]
fn slice_invert_json() {
test_slice_invert(
"slice_invert_json",
Some(1),
Some(3),
&["a", "d", "e"],
true,
false,
false,
true,
);
}

#[test]
fn slice_invert_negative() {
test_slice_invert(
"slice_invert_negative",
Some(-2),
None,
&["a", "b", "c"],
true,
false,
false,
false,
);
}

#[test]
fn slice_invert_with_len() {
test_slice_invert(
"slice_invert_with_len",
Some(1),
Some(2),
&["a", "c", "d", "e"],
true,
false,
true,
false,
);
}

0 comments on commit bb9bd8f

Please sign in to comment.