@@ -47,6 +47,7 @@ stats options:
4747 This requires storing all CSV data in memory.
4848 --median Show the median.
4949 This requires storing all CSV data in memory.
50+ --nullcount Show the number of NULLs.
5051 --nulls Include NULLs in the population size for computing
5152 mean and standard deviation.
5253 -j, --jobs <arg> The number of jobs to run in parallel.
@@ -76,6 +77,7 @@ struct Args {
7677 flag_cardinality : bool ,
7778 flag_median : bool ,
7879 flag_nulls : bool ,
80+ flag_nullcount : bool ,
7981 flag_jobs : usize ,
8082 flag_output : Option < String > ,
8183 flag_no_headers : bool ,
@@ -209,6 +211,7 @@ impl Args {
209211 range : true ,
210212 dist : true ,
211213 cardinality : self . flag_cardinality || self . flag_everything ,
214+ nullcount : self . flag_nullcount || self . flag_everything ,
212215 median : self . flag_median || self . flag_everything ,
213216 mode : self . flag_mode || self . flag_everything ,
214217 } ) ) . take ( record_len) . collect ( )
@@ -223,6 +226,7 @@ impl Args {
223226 if self . flag_median || all { fields. push ( "median" ) ; }
224227 if self . flag_mode || all { fields. push ( "mode" ) ; }
225228 if self . flag_cardinality || all { fields. push ( "cardinality" ) ; }
229+ if self . flag_nullcount || all { fields. push ( "nullcount" ) ; }
226230 csv:: StringRecord :: from ( fields)
227231 }
228232}
@@ -234,6 +238,7 @@ struct WhichStats {
234238 range : bool ,
235239 dist : bool ,
236240 cardinality : bool ,
241+ nullcount : bool ,
237242 median : bool ,
238243 mode : bool ,
239244}
@@ -252,6 +257,7 @@ struct Stats {
252257 online : Option < OnlineStats > ,
253258 mode : Option < Unsorted < Vec < u8 > > > ,
254259 median : Option < Unsorted < f64 > > ,
260+ nullcount : u64 ,
255261 which : WhichStats ,
256262}
257263
@@ -271,6 +277,7 @@ impl Stats {
271277 online : online,
272278 mode : mode,
273279 median : median,
280+ nullcount : 0 ,
274281 which : which,
275282 }
276283 }
@@ -283,6 +290,7 @@ impl Stats {
283290 self . sum . as_mut ( ) . map ( |v| v. add ( t, sample) ) ;
284291 self . minmax . as_mut ( ) . map ( |v| v. add ( t, sample) ) ;
285292 self . mode . as_mut ( ) . map ( |v| v. add ( sample. to_vec ( ) ) ) ;
293+ if sample_type. is_null ( ) { self . nullcount += 1 ; }
286294 match self . typ {
287295 TUnknown => { }
288296 TNull => {
@@ -365,6 +373,9 @@ impl Stats {
365373 }
366374 }
367375 }
376+ if self . which . nullcount {
377+ pieces. push ( self . nullcount . to_string ( ) ) ;
378+ }
368379 csv:: StringRecord :: from ( pieces)
369380 }
370381}
@@ -377,6 +388,7 @@ impl Commute for Stats {
377388 self . online . merge ( other. online ) ;
378389 self . mode . merge ( other. mode ) ;
379390 self . median . merge ( other. median ) ;
391+ self . nullcount += other. nullcount ;
380392 self . which . merge ( other. which ) ;
381393 }
382394}
0 commit comments