Skip to content

Commit

Permalink
apply & applydp: setting regex_replace --replacement to <EMPTY> r…
Browse files Browse the repository at this point in the history
…emoves matches
  • Loading branch information
jqnatividad committed Dec 11, 2023
1 parent d69798a commit facf4f9
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 20 deletions.
38 changes: 27 additions & 11 deletions src/cmd/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ It has 36 supported operations:
* replace: Replace all matches of a pattern (using --comparand)
with a string (using --replacement) (Rust replace)
* regex_replace: Replace all regex matches in --comparand w/ --replacement.
Specify <EMPTY> as --replacement to remove matches.
* titlecase - capitalizes English text using Daring Fireball titlecase style
https://daringfireball.net/2008/05/title_case
* censor: profanity filter. Add additional comma-delimited profanities with --comparand.
Expand Down Expand Up @@ -387,7 +388,7 @@ use crate::{
CliResult,
};

#[derive(Clone, EnumString)]
#[derive(Clone, EnumString, PartialEq)]
#[strum(use_phf)]
#[strum(ascii_case_insensitive)]
#[allow(non_camel_case_types)]
Expand Down Expand Up @@ -477,6 +478,7 @@ static INDIANCOMMA_POLICY: SeparatorPolicy = SeparatorPolicy {
};

// valid subcommands
#[derive(PartialEq)]
enum ApplySubCmd {
Operations,
DateFmt,
Expand Down Expand Up @@ -582,6 +584,20 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
wtr.write_record(&headers)?;
}

// if there is a regex_replace operation and replacement is <empty> case-insensitive,
// we set it to empty string
let flag_replacement = if apply_cmd == ApplySubCmd::Operations
&& ops_vec.contains(&Operations::Regex_Replace)
&& args.flag_replacement.to_lowercase() == "<empty>"
{
String::new()
} else {
args.flag_replacement
};
let flag_comparand = args.flag_comparand;
let flag_formatstr = args.flag_formatstr;
let flag_new_column = args.flag_new_column;

// prep progress bar
let show_progress =
(args.flag_progressbar || util::get_envvar_flag("QSV_PROGRESSBAR")) && !rconfig.is_stdin();
Expand Down Expand Up @@ -645,11 +661,11 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
apply_operations(
&ops_vec,
&mut cell,
&args.flag_comparand,
&args.flag_replacement,
&args.flag_formatstr,
&flag_comparand,
&flag_replacement,
&flag_formatstr,
);
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, *col_index, &cell);
Expand All @@ -661,9 +677,9 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
for col_index in &*sel {
record[*col_index].clone_into(&mut cell);
if cell.trim().is_empty() {
cell = args.flag_replacement.clone();
cell = flag_replacement.clone();
}
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, *col_index, &cell);
Expand All @@ -678,7 +694,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let parsed_date = parse_with_preference(&cell, prefer_dmy);
if let Ok(format_date) = parsed_date {
let formatted_date =
format_date.format(&args.flag_formatstr).to_string();
format_date.format(&flag_formatstr).to_string();
if !args.flag_keep_zero_time
&& formatted_date.ends_with("T00:00:00+00:00")
{
Expand All @@ -688,7 +704,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
}
}
}
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, *col_index, &cell);
Expand All @@ -708,7 +724,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
cell = formatted.to_string();
}
}
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, column_index, &cell);
Expand Down Expand Up @@ -750,7 +766,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
}
};

if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&result);
} else {
record = replace_column_value(&record, column_index, &result);
Expand Down
34 changes: 25 additions & 9 deletions src/cmd/applydp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ It has 18 supported operations:
* replace: Replace all matches of a pattern (using --comparand)
with a string (using --replacement) (Rust replace)
* regex_replace: Replace all regex matches in --comparand w/ --replacement.
Specify <EMPTY> as --replacement to remove matches.
* round: Round numeric values to the specified number of decimal places using
Midpoint Nearest Even Rounding Strategy AKA "Bankers Rounding."
Specify the number of decimal places with --formatstr (default: 3).
Expand Down Expand Up @@ -264,7 +265,7 @@ use crate::{
CliResult,
};

#[derive(Clone, EnumString)]
#[derive(Clone, EnumString, PartialEq)]
#[strum(use_phf)]
#[strum(ascii_case_insensitive)]
#[allow(non_camel_case_types)]
Expand Down Expand Up @@ -382,6 +383,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
String::new()
};

#[derive(PartialEq)]
enum ApplydpSubCmd {
Operations,
DateFmt,
Expand Down Expand Up @@ -420,6 +422,20 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
wtr.write_record(&headers)?;
}

// if there is a regex_replace operation and replacement is <empty> case-insensitive,
// we set it to empty string
let flag_replacement = if applydp_cmd == ApplydpSubCmd::Operations
&& ops_vec.contains(&Operations::Regex_Replace)
&& args.flag_replacement.to_lowercase() == "<empty>"
{
String::new()
} else {
args.flag_replacement
};
let flag_comparand = args.flag_comparand;
let flag_formatstr = args.flag_formatstr;
let flag_new_column = args.flag_new_column;

let prefer_dmy = args.flag_prefer_dmy || rconfig.get_dmy_preference();

// amortize memory allocation by reusing record
Expand Down Expand Up @@ -472,10 +488,10 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
applydp_operations(
&ops_vec,
&mut cell,
&args.flag_comparand,
&args.flag_replacement,
&flag_comparand,
&flag_replacement,
);
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, *col_index, &cell);
Expand All @@ -487,9 +503,9 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
for col_index in sel.iter() {
record[*col_index].clone_into(&mut cell);
if cell.trim().is_empty() {
cell = args.flag_replacement.clone();
cell = flag_replacement.clone();
}
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, *col_index, &cell);
Expand All @@ -504,7 +520,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let parsed_date = parse_with_preference(&cell, prefer_dmy);
if let Ok(format_date) = parsed_date {
let formatted_date =
format_date.format(&args.flag_formatstr).to_string();
format_date.format(&flag_formatstr).to_string();
if !args.flag_keep_zero_time
&& formatted_date.ends_with("T00:00:00+00:00")
{
Expand All @@ -514,7 +530,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
}
}
}
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, *col_index, &cell);
Expand All @@ -534,7 +550,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
cell = formatted.to_string();
}
}
if args.flag_new_column.is_some() {
if flag_new_column.is_some() {
record.push_field(&cell);
} else {
record = replace_column_value(&record, column_index, &cell);
Expand Down

0 comments on commit facf4f9

Please sign in to comment.