Skip to content

Commit f3d3861

Browse files
committed
split: minor parallel_split optimizations; wordsmith output summary
parallel_split() - remove unnecessary var args, cloning self - amortized `write_row` byterecord allocation for hot write loop output summary now accounts for singular chunk
1 parent 212140d commit f3d3861

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

src/cmd/split.rs

+12-12
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ impl Args {
147147

148148
if !self.flag_quiet {
149149
eprintln!(
150-
"Wrote {} chunks to '{}'. Rows/chunk: {} Num records: {}",
150+
"Wrote {} chunk/s to '{}'. Rows/chunk: {} Num records: {}",
151151
nchunks + 1,
152152
Path::new(&self.arg_outdir).canonicalize()?.display(),
153153
chunk_size,
@@ -159,16 +159,15 @@ impl Args {
159159
}
160160

161161
fn parallel_split(&self, idx: &Indexed<fs::File, fs::File>) -> CliResult<()> {
162-
let args = self.clone();
163162
let chunk_size;
164163
let idx_count = idx.count();
165164

166165
#[allow(clippy::cast_precision_loss)]
167-
let nchunks = if let Some(flag_chunks) = args.flag_chunks {
166+
let nchunks = if let Some(flag_chunks) = self.flag_chunks {
168167
chunk_size = (idx_count as f64 / flag_chunks as f64).ceil() as usize;
169168
flag_chunks
170169
} else {
171-
chunk_size = args.flag_size;
170+
chunk_size = self.flag_size;
172171
util::num_of_chunks(idx_count as usize, self.flag_size)
173172
};
174173
if nchunks == 1 {
@@ -177,38 +176,39 @@ impl Args {
177176
return self.sequential_split();
178177
}
179178

180-
util::njobs(args.flag_jobs);
179+
util::njobs(self.flag_jobs);
181180

182181
// safety: we cannot use ? here because we're in a closure
183182
(0..nchunks).into_par_iter().for_each(|i| {
184-
let conf = args.rconfig();
183+
let conf = self.rconfig();
185184
// safety: safe to unwrap because we know the file is indexed
186185
let mut idx = conf.indexed().unwrap().unwrap();
187186
// safety: the only way this can fail is if the file first row of the chunk
188187
// is not a valid CSV record, which is impossible because we're reading
189188
// from a file with a valid index
190189
let headers = idx.byte_headers().unwrap();
191190

192-
let mut wtr = args
191+
let mut wtr = self
193192
// safety: the only way this can fail is if we cannot create a file
194-
.new_writer(headers, i * chunk_size, args.flag_pad)
193+
.new_writer(headers, i * chunk_size, self.flag_pad)
195194
.unwrap();
196195

197196
// safety: we know that there is more than one chunk, so we can safely
198197
// seek to the start of the chunk
199198
idx.seek((i * chunk_size) as u64).unwrap();
199+
let mut write_row;
200200
for row in idx.byte_records().take(chunk_size) {
201-
let row = row.unwrap();
202-
wtr.write_byte_record(&row).unwrap();
201+
write_row = row.unwrap();
202+
wtr.write_byte_record(&write_row).unwrap();
203203
}
204204
// safety: safe to unwrap because we know the writer is a file
205205
// the only way this can fail is if we cannot write to the file
206206
wtr.flush().unwrap();
207207
});
208208

209-
if !args.flag_quiet {
209+
if !self.flag_quiet {
210210
eprintln!(
211-
"Wrote {} chunks to '{}'. Rows/chunk: {} Num records: {}",
211+
"Wrote {} chunk/s to '{}'. Rows/chunk: {} Num records: {}",
212212
nchunks,
213213
Path::new(&self.arg_outdir).canonicalize()?.display(),
214214
chunk_size,

0 commit comments

Comments
 (0)