Skip to content

Commit

Permalink
Merge pull request #1470 from jqnatividad/1469-apply-regex_replace-empty
Browse files Browse the repository at this point in the history
` apply` & `applydp`: `operations regex_replace` now supports  empty `--replacement` with the "<EMPTY>" special value
  • Loading branch information
jqnatividad authored Dec 11, 2023
2 parents d69798a + c67c23d commit caf2c03
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 20 deletions.
38 changes: 27 additions & 11 deletions src/cmd/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ It has 36 supported operations:
* replace: Replace all matches of a pattern (using --comparand)
with a string (using --replacement) (Rust replace)
* regex_replace: Replace all regex matches in --comparand w/ --replacement.
Specify <EMPTY> as --replacement to remove matches.
* titlecase - capitalizes English text using Daring Fireball titlecase style
https://daringfireball.net/2008/05/title_case
* censor: profanity filter. Add additional comma-delimited profanities with --comparand.
Expand Down Expand Up @@ -387,7 +388,7 @@ use crate::{
CliResult,
};

#[derive(Clone, EnumString)]
#[derive(Clone, EnumString, PartialEq)]
#[strum(use_phf)]
#[strum(ascii_case_insensitive)]
#[allow(non_camel_case_types)]
Expand Down Expand Up @@ -477,6 +478,7 @@ static INDIANCOMMA_POLICY: SeparatorPolicy = SeparatorPolicy {
};

// valid subcommands
#[derive(PartialEq)]
enum ApplySubCmd {
Operations,
DateFmt,
Expand Down Expand Up @@ -582,6 +584,20 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
wtr.write_record(&headers)?;
}

// if there is a regex_replace operation and replacement is <empty> case-insensitive,
// we set it to empty string
let flag_replacement = if apply_cmd == ApplySubCmd::Operations
&& ops_vec.contains(&Operations::Regex_Replace)
&& args.flag_replacement.to_lowercase() == "<empty>"
{
String::new()
} else {
args.flag_replacement
};
let flag_comparand = args.flag_comparand;
let flag_formatstr = args.flag_formatstr;
let flag_new_column = args.flag_new_column;

// prep progress bar
let show_progress =
(args.flag_progressbar || util::get_envvar_flag("QSV_PROGRESSBAR")) && !rconfig.is_stdin();
Expand Down Expand Up @@ -645,11 +661,11 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
apply_operations(
&ops_vec,
&mut cell,
&args.flag_comparand,
&args.flag_replacement,
&args.flag_formatstr,
&flag_comparand,
&flag_replacement,
&flag_formatstr,
);
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, *col_index, &cell);
Expand All @@ -661,9 +677,9 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
for col_index in &*sel {
record[*col_index].clone_into(&mut cell);
if cell.trim().is_empty() {
cell = args.flag_replacement.clone();
cell = flag_replacement.clone();
}
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, *col_index, &cell);
Expand All @@ -678,7 +694,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let parsed_date = parse_with_preference(&cell, prefer_dmy);
if let Ok(format_date) = parsed_date {
let formatted_date =
format_date.format(&args.flag_formatstr).to_string();
format_date.format(&flag_formatstr).to_string();
if !args.flag_keep_zero_time
&& formatted_date.ends_with("T00:00:00+00:00")
{
Expand All @@ -688,7 +704,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
}
}
}
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, *col_index, &cell);
Expand All @@ -708,7 +724,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
cell = formatted.to_string();
}
}
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, column_index, &cell);
Expand Down Expand Up @@ -750,7 +766,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
}
};

if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&result);
} else {
record = replace_column_value(&record, column_index, &result);
Expand Down
34 changes: 25 additions & 9 deletions src/cmd/applydp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ It has 18 supported operations:
* replace: Replace all matches of a pattern (using --comparand)
with a string (using --replacement) (Rust replace)
* regex_replace: Replace all regex matches in --comparand w/ --replacement.
Specify <EMPTY> as --replacement to remove matches.
* round: Round numeric values to the specified number of decimal places using
Midpoint Nearest Even Rounding Strategy AKA "Bankers Rounding."
Specify the number of decimal places with --formatstr (default: 3).
Expand Down Expand Up @@ -264,7 +265,7 @@ use crate::{
CliResult,
};

#[derive(Clone, EnumString)]
#[derive(Clone, EnumString, PartialEq)]
#[strum(use_phf)]
#[strum(ascii_case_insensitive)]
#[allow(non_camel_case_types)]
Expand Down Expand Up @@ -382,6 +383,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
String::new()
};

#[derive(PartialEq)]
enum ApplydpSubCmd {
Operations,
DateFmt,
Expand Down Expand Up @@ -420,6 +422,20 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
wtr.write_record(&headers)?;
}

// if there is a regex_replace operation and replacement is <empty> case-insensitive,
// we set it to empty string
let flag_replacement = if applydp_cmd == ApplydpSubCmd::Operations
&& ops_vec.contains(&Operations::Regex_Replace)
&& args.flag_replacement.to_lowercase() == "<empty>"
{
String::new()
} else {
args.flag_replacement
};
let flag_comparand = args.flag_comparand;
let flag_formatstr = args.flag_formatstr;
let flag_new_column = args.flag_new_column;

let prefer_dmy = args.flag_prefer_dmy || rconfig.get_dmy_preference();

// amortize memory allocation by reusing record
Expand Down Expand Up @@ -472,10 +488,10 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
applydp_operations(
&ops_vec,
&mut cell,
&args.flag_comparand,
&args.flag_replacement,
&flag_comparand,
&flag_replacement,
);
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, *col_index, &cell);
Expand All @@ -487,9 +503,9 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
for col_index in sel.iter() {
record[*col_index].clone_into(&mut cell);
if cell.trim().is_empty() {
cell = args.flag_replacement.clone();
cell = flag_replacement.clone();
}
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, *col_index, &cell);
Expand All @@ -504,7 +520,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let parsed_date = parse_with_preference(&cell, prefer_dmy);
if let Ok(format_date) = parsed_date {
let formatted_date =
format_date.format(&args.flag_formatstr).to_string();
format_date.format(&flag_formatstr).to_string();
if !args.flag_keep_zero_time
&& formatted_date.ends_with("T00:00:00+00:00")
{
Expand All @@ -514,7 +530,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
}
}
}
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, *col_index, &cell);
Expand All @@ -534,7 +550,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
cell = formatted.to_string();
}
}
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, column_index, &cell);
Expand Down
32 changes: 32 additions & 0 deletions tests/test_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,38 @@ fn apply_dynfmt_issue1458() {
assert_eq!(got, expected);
}

#[test]
fn apply_regex_replace_issue1469() {
let wrk = Workdir::new("apply_regex_replace_issue1469");
wrk.create(
"data.csv",
vec![
svec!["col1", "col2", "col3",],
svec!["(Adam)", "B", "Case(hello)Name "],
svec!["Derek(foo)", "(bar)E", "Fos(this needs to go)ter"],
svec!["Gordon", "H", "(cmon)Irvin"],
svec!["Jack(ie)", "K", "Lynch(-Chan)"],
],
);
let mut cmd = wrk.command("apply");
cmd.arg("operations")
.arg("regex_replace")
.arg("col1,col2,col3")
.args(["--comparand", r"\([^)]+\)"])
.args(["--replacement", "<EmpTY>"])
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["col1", "col2", "col3"],
svec!["", "B", "CaseName "],
svec!["Derek", "E", "Foster"],
svec!["Gordon", "H", "Irvin"],
svec!["Jack", "K", "Lynch"],
];
assert_eq!(got, expected);
}

#[test]
fn apply_calcconv() {
let wrk = Workdir::new("apply");
Expand Down
32 changes: 32 additions & 0 deletions tests/test_applydp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,38 @@ fn applydp_ops_regex_replace() {
assert_eq!(got, expected);
}

#[test]
fn applydp_regex_replace_issue1469() {
let wrk = Workdir::new("applydp_regex_replace_issue1469");
wrk.create(
"data.csv",
vec![
svec!["col1", "col2", "col3",],
svec!["(Adam)", "B", "Case(hello)Name "],
svec!["Derek(foo)", "(bar)E", "Fos(this needs to go)ter"],
svec!["Gordon", "H", "(cmon)Irvin"],
svec!["Jack(ie)", "K", "Lynch(-Chan)"],
],
);
let mut cmd = wrk.command("applydp");
cmd.arg("operations")
.arg("regex_replace")
.arg("col1,col2,col3")
.args(["--comparand", r"\([^)]+\)"])
.args(["--replacement", "<EMPTY>"])
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["col1", "col2", "col3"],
svec!["", "B", "CaseName "],
svec!["Derek", "E", "Foster"],
svec!["Gordon", "H", "Irvin"],
svec!["Jack", "K", "Lynch"],
];
assert_eq!(got, expected);
}

#[test]
fn applydp_ops_regex_replace_validation_error() {
let wrk = Workdir::new("applydp");
Expand Down

0 comments on commit caf2c03

Please sign in to comment.