Skip to content

Commit

Permalink
excel: minor refactor
Browse files Browse the repository at this point in the history
- set parallel iterator chunk_size based on available number of cores, instead of hardcoding it at 10k rows
- simplify range error handling - just propagate calamine error
  • Loading branch information
jqnatividad committed Dec 13, 2023
1 parent 86f9da6 commit fb9350e
Showing 1 changed file with 8 additions and 12 deletions.
20 changes: 8 additions & 12 deletions src/cmd/excel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,6 @@ use crate::{
util, CliError, CliResult,
};

// number of rows to process in each core/thread
const CHUNK_SIZE: usize = 10_000;

#[derive(Deserialize)]
struct Args {
arg_input: String,
Expand Down Expand Up @@ -293,8 +290,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
match result {
Ok(result) => result,
Err(e) => {
let sheet_type = workbook.sheets_metadata()[i].typ;
if sheet_type == SheetType::ChartSheet {
if workbook.sheets_metadata()[i].typ == SheetType::ChartSheet {
// return an empty range for ChartSheet
Range::empty()
} else {
Expand Down Expand Up @@ -495,10 +491,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
}

let mut range = if let Some(result) = workbook.worksheet_range_at(sheet_index) {
match result {
Ok(result) => result,
Err(e) => return fail_clierror!("Cannot retrieve range from {sheet}: {e}"),
}
result?
} else {
Range::empty()
};
Expand Down Expand Up @@ -583,10 +576,13 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
}

// set RAYON_NUM_THREADS
util::njobs(args.flag_jobs);
let ncpus = util::njobs(args.flag_jobs);

// set chunk_size to number of rows per core/thread
let chunk_size = row_count.div_ceil(ncpus);

let processed_rows: Vec<Vec<csv::StringRecord>> = rows
.par_chunks(CHUNK_SIZE)
.par_chunks(chunk_size)
.map(|chunk| {
let mut record = csv::StringRecord::with_capacity(500, col_count);
let mut trimmed_record = csv::StringRecord::with_capacity(500, col_count);
Expand All @@ -598,7 +594,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
let mut itoa_buffer = itoa::Buffer::new();
let mut formatted_date = String::new();

let mut processed_chunk: Vec<csv::StringRecord> = Vec::with_capacity(CHUNK_SIZE);
let mut processed_chunk: Vec<csv::StringRecord> = Vec::with_capacity(chunk_size);

for row in chunk {
for cell in *row {
Expand Down

0 comments on commit fb9350e

Please sign in to comment.