Skip to content

Commit 1debdbd

Browse files
authored
Merge pull request #1 from scpike/master
Merge PR for nullcount in stats
2 parents 3de6c04 + 89639fb commit 1debdbd

File tree

3 files changed

+20
-0
lines changed

3 files changed

+20
-0
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/stats.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ stats options:
4747
This requires storing all CSV data in memory.
4848
--median Show the median.
4949
This requires storing all CSV data in memory.
50+
--nullcount Show the number of NULLs.
5051
--nulls Include NULLs in the population size for computing
5152
mean and standard deviation.
5253
-j, --jobs <arg> The number of jobs to run in parallel.
@@ -76,6 +77,7 @@ struct Args {
7677
flag_cardinality: bool,
7778
flag_median: bool,
7879
flag_nulls: bool,
80+
flag_nullcount: bool,
7981
flag_jobs: usize,
8082
flag_output: Option<String>,
8183
flag_no_headers: bool,
@@ -209,6 +211,7 @@ impl Args {
209211
range: true,
210212
dist: true,
211213
cardinality: self.flag_cardinality || self.flag_everything,
214+
nullcount: self.flag_nullcount || self.flag_everything,
212215
median: self.flag_median || self.flag_everything,
213216
mode: self.flag_mode || self.flag_everything,
214217
})).take(record_len).collect()
@@ -223,6 +226,7 @@ impl Args {
223226
if self.flag_median || all { fields.push("median"); }
224227
if self.flag_mode || all { fields.push("mode"); }
225228
if self.flag_cardinality || all { fields.push("cardinality"); }
229+
if self.flag_nullcount || all { fields.push("nullcount"); }
226230
csv::StringRecord::from(fields)
227231
}
228232
}
@@ -234,6 +238,7 @@ struct WhichStats {
234238
range: bool,
235239
dist: bool,
236240
cardinality: bool,
241+
nullcount: bool,
237242
median: bool,
238243
mode: bool,
239244
}
@@ -252,6 +257,7 @@ struct Stats {
252257
online: Option<OnlineStats>,
253258
mode: Option<Unsorted<Vec<u8>>>,
254259
median: Option<Unsorted<f64>>,
260+
nullcount: u64,
255261
which: WhichStats,
256262
}
257263

@@ -271,6 +277,7 @@ impl Stats {
271277
online: online,
272278
mode: mode,
273279
median: median,
280+
nullcount: 0,
274281
which: which,
275282
}
276283
}
@@ -283,6 +290,7 @@ impl Stats {
283290
self.sum.as_mut().map(|v| v.add(t, sample));
284291
self.minmax.as_mut().map(|v| v.add(t, sample));
285292
self.mode.as_mut().map(|v| v.add(sample.to_vec()));
293+
if sample_type.is_null() { self.nullcount += 1; }
286294
match self.typ {
287295
TUnknown => {}
288296
TNull => {
@@ -365,6 +373,9 @@ impl Stats {
365373
}
366374
}
367375
}
376+
if self.which.nullcount {
377+
pieces.push(self.nullcount.to_string());
378+
}
368379
csv::StringRecord::from(pieces)
369380
}
370381
}
@@ -377,6 +388,7 @@ impl Commute for Stats {
377388
self.online.merge(other.online);
378389
self.mode.merge(other.mode);
379390
self.median.merge(other.median);
391+
self.nullcount += other.nullcount;
380392
self.which.merge(other.which);
381393
}
382394
}

tests/test_stats.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ fn setup<S>(name: S, rows: &[&str], headers: bool,
9090

9191
fn get_field_value(wrk: &Workdir, cmd: &mut process::Command, field: &str)
9292
-> String {
93+
if field == "nullcount" { cmd.arg("--nullcount"); }
9394
if field == "median" { cmd.arg("--median"); }
9495
if field == "cardinality" { cmd.arg("--cardinality"); }
9596
if field == "mode" { cmd.arg("--mode"); }
@@ -181,6 +182,11 @@ stats_tests!(stats_median_even_null, "median",
181182
&["", "1", "2", "3", "4"], "2.5");
182183
stats_tests!(stats_median_mix, "median", &["1", "2.5", "3"], "2.5");
183184

185+
stats_tests!(stats_nullcount, "nullcount", &["", "1", "2"], "1");
186+
stats_tests!(stats_nullcount_none, "nullcount", &["a", "1", "2"], "0");
187+
stats_tests!(stats_nullcount_spacenotnull, "nullcount", &[" ", "1", "2"], "0");
188+
stats_tests!(stats_nullcount_all, "nullcount", &["", "", ""], "3");
189+
184190
mod stats_infer_nothing {
185191
// Only test CSV data with headers.
186192
// Empty CSV data with no headers won't produce any statistical analysis.

0 commit comments

Comments
 (0)