Skip to content

Commit

Permalink
sqlp: refactor automatic delimiter setting for TSV and TAB files
Browse files Browse the repository at this point in the history
- now autosets delimiting to tab when an input file has a TSV or TAB extension
- streamlined how output TSV/TAB files are processed
  • Loading branch information
jqnatividad committed Jan 28, 2024
1 parent c3c8732 commit 201dd15
Showing 1 changed file with 24 additions and 8 deletions.
32 changes: 24 additions & 8 deletions src/cmd/sqlp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ sqlp options:
Only use this when you get query errors.
--truncate-ragged-lines Truncate ragged lines when parsing CSVs. If set, rows with more
columns than the header will be truncated. If not set, the query
will fail.
will fail. Use this only when you get an error about ragged lines.
--ignore-errors Ignore errors when parsing CSVs. If set, rows with errors
will be skipped. If not set, the query will fail.
Only use this when debugging queries, as polars does batched
Expand Down Expand Up @@ -280,13 +280,12 @@ impl OutputMode {
Some(x) => {
let path = Path::new(&x);
// if the output file ends with ".tsv" or ".tab", we use tab as the delimiter
let output_extension = path
.extension()
.map_or("", |ext| ext.to_str().unwrap_or_default());
delim = if *self == OutputMode::Csv
&& path
.extension()
.map_or(false, |ext| ext.eq_ignore_ascii_case("tsv"))
|| path
.extension()
.map_or(false, |ext| ext.eq_ignore_ascii_case("tab"))
&& output_extension.eq_ignore_ascii_case("tsv")
|| output_extension.eq_ignore_ascii_case("tab")
{
b'\t'
} else {
Expand Down Expand Up @@ -421,13 +420,14 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let output_mode: OutputMode = args.flag_format.parse().unwrap_or(OutputMode::Csv);
let no_output: OutputMode = OutputMode::None;

let delim = if let Some(delimiter) = args.flag_delimiter {
let mut delim = if let Some(delimiter) = args.flag_delimiter {
delimiter.as_byte()
} else if let Ok(delim) = env::var("QSV_DEFAULT_DELIMITER") {
Delimiter::decode_delimiter(&delim)?.as_byte()
} else {
b','
};
let orig_delim = delim;

let comment_char = if let Ok(comment_char) = env::var("QSV_COMMENT_CHAR") {
Some(comment_char)
Expand Down Expand Up @@ -477,6 +477,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let mut table_aliases = HashMap::with_capacity(args.arg_input.len());
let mut lossy_table_name = Cow::default();
let mut table_name;
let mut inputfile_extension;

for (idx, table) in args.arg_input.iter().enumerate() {
// as we are using the table name as alias, we need to make sure that the table name is a
Expand All @@ -488,6 +489,21 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
lossy_table_name = table.to_string_lossy();
&lossy_table_name
});

// if the file has a TSV or TAB extension, we automatically use tab as the delimiter
inputfile_extension = Path::new(table)
.extension()
.and_then(std::ffi::OsStr::to_str)
.unwrap_or_default();

if inputfile_extension.eq_ignore_ascii_case("tsv")
|| inputfile_extension.eq_ignore_ascii_case("tab")
{
delim = b'\t';
} else {
delim = orig_delim;
}

table_aliases.insert(table_name.to_string(), format!("_t_{}", idx + 1));

if debuglog_flag {
Expand Down

0 comments on commit 201dd15

Please sign in to comment.