@@ -214,7 +214,7 @@ fn hash_dictionary<K: ArrowDictionaryKeyType>(
214214 // redundant hashing for large dictionary elements (e.g. strings)
215215 let dict_values = array. values ( ) ;
216216 let mut dict_hashes = vec ! [ 0 ; dict_values. len( ) ] ;
217- create_hashes_from_arrays ( & [ dict_values. as_ref ( ) ] , random_state, & mut dict_hashes) ?;
217+ create_hashes ( [ dict_values] , random_state, & mut dict_hashes) ?;
218218
219219 // combine hash for each index in values
220220 for ( hash, key) in hashes_buffer. iter_mut ( ) . zip ( array. keys ( ) . iter ( ) ) {
@@ -308,7 +308,7 @@ where
308308 let offsets = array. value_offsets ( ) ;
309309 let nulls = array. nulls ( ) ;
310310 let mut values_hashes = vec ! [ 0u64 ; values. len( ) ] ;
311- create_hashes_from_arrays ( & [ values. as_ref ( ) ] , random_state, & mut values_hashes) ?;
311+ create_hashes ( [ values] , random_state, & mut values_hashes) ?;
312312 if let Some ( nulls) = nulls {
313313 for ( i, ( start, stop) ) in offsets. iter ( ) . zip ( offsets. iter ( ) . skip ( 1 ) ) . enumerate ( ) {
314314 if nulls. is_valid ( i) {
@@ -339,7 +339,7 @@ fn hash_fixed_list_array(
339339 let value_length = array. value_length ( ) as usize ;
340340 let nulls = array. nulls ( ) ;
341341 let mut values_hashes = vec ! [ 0u64 ; values. len( ) ] ;
342- create_hashes_from_arrays ( & [ values. as_ref ( ) ] , random_state, & mut values_hashes) ?;
342+ create_hashes ( [ values] , random_state, & mut values_hashes) ?;
343343 if let Some ( nulls) = nulls {
344344 for i in 0 ..array. len ( ) {
345345 if nulls. is_valid ( i) {
@@ -434,41 +434,51 @@ fn hash_single_array(
434434 Ok ( ( ) )
435435}
436436
437- /// Creates hash values for every row, based on the values in the columns.
438- ///
439- /// The number of rows to hash is determined by `hashes_buffer.len()`.
440- /// `hashes_buffer` should be pre-sized appropriately
441- ///
442- /// This is the same as [`create_hashes`] but accepts `&dyn Array`s instead of requiring
443- /// `ArrayRef`s.
444- pub fn create_hashes_from_arrays < ' a > (
445- arrays : & [ & dyn Array ] ,
446- random_state : & RandomState ,
447- hashes_buffer : & ' a mut Vec < u64 > ,
448- ) -> Result < & ' a mut Vec < u64 > > {
449- for ( i, & array) in arrays. iter ( ) . enumerate ( ) {
450- // combine hashes with `combine_hashes` for all columns besides the first
451- let rehash = i >= 1 ;
452- hash_single_array ( array, random_state, hashes_buffer, rehash) ?;
437+ pub trait AsDynArray {
438+ fn as_dyn_array ( & self ) -> & dyn Array ;
439+ }
440+
441+ impl AsDynArray for dyn Array {
442+ fn as_dyn_array ( & self ) -> & dyn Array {
443+ self
444+ }
445+ }
446+
447+ impl AsDynArray for & dyn Array {
448+ fn as_dyn_array ( & self ) -> & dyn Array {
449+ * self
450+ }
451+ }
452+
453+ impl AsDynArray for ArrayRef {
454+ fn as_dyn_array ( & self ) -> & dyn Array {
455+ self . as_ref ( )
456+ }
457+ }
458+
459+ impl AsDynArray for & ArrayRef {
460+ fn as_dyn_array ( & self ) -> & dyn Array {
461+ self . as_ref ( )
453462 }
454- Ok ( hashes_buffer)
455463}
456464
457465/// Creates hash values for every row, based on the values in the columns.
458466///
459467/// The number of rows to hash is determined by `hashes_buffer.len()`.
460468/// `hashes_buffer` should be pre-sized appropriately.
461- ///
462- /// This is the same as [`create_hashes_from_arrays`] but accepts `ArrayRef`s.
463- pub fn create_hashes < ' a > (
464- arrays : & [ ArrayRef ] ,
469+ pub fn create_hashes < ' a , I , T > (
470+ arrays : I ,
465471 random_state : & RandomState ,
466472 hashes_buffer : & ' a mut Vec < u64 > ,
467- ) -> Result < & ' a mut Vec < u64 > > {
468- for ( i, array) in arrays. iter ( ) . enumerate ( ) {
473+ ) -> Result < & ' a mut Vec < u64 > >
474+ where
475+ I : IntoIterator < Item = T > ,
476+ T : AsDynArray ,
477+ {
478+ for ( i, array) in arrays. into_iter ( ) . enumerate ( ) {
469479 // combine hashes with `combine_hashes` for all columns besides the first
470480 let rehash = i >= 1 ;
471- hash_single_array ( array. as_ref ( ) , random_state, hashes_buffer, rehash) ?;
481+ hash_single_array ( array. as_dyn_array ( ) , random_state, hashes_buffer, rehash) ?;
472482 }
473483 Ok ( hashes_buffer)
474484}
@@ -491,7 +501,7 @@ mod tests {
491501 . collect :: < Decimal128Array > ( )
492502 . with_precision_and_scale ( 20 , 3 )
493503 . unwrap ( ) ;
494- let array_ref = Arc :: new ( array) ;
504+ let array_ref: ArrayRef = Arc :: new ( array) ;
495505 let random_state = RandomState :: with_seeds ( 0 , 0 , 0 , 0 ) ;
496506 let hashes_buff = & mut vec ! [ 0 ; array_ref. len( ) ] ;
497507 let hashes = create_hashes ( & [ array_ref] , & random_state, hashes_buff) ?;
@@ -504,15 +514,21 @@ mod tests {
504514 let empty_array = FixedSizeListBuilder :: new ( StringBuilder :: new ( ) , 1 ) . finish ( ) ;
505515 let random_state = RandomState :: with_seeds ( 0 , 0 , 0 , 0 ) ;
506516 let hashes_buff = & mut vec ! [ 0 ; 0 ] ;
507- let hashes = create_hashes ( & [ Arc :: new ( empty_array) ] , & random_state, hashes_buff) ?;
517+ let hashes = create_hashes (
518+ & [ Arc :: new ( empty_array) as ArrayRef ] ,
519+ & random_state,
520+ hashes_buff,
521+ ) ?;
508522 assert_eq ! ( hashes, & Vec :: <u64 >:: new( ) ) ;
509523 Ok ( ( ) )
510524 }
511525
512526 #[ test]
513527 fn create_hashes_for_float_arrays ( ) -> Result < ( ) > {
514- let f32_arr = Arc :: new ( Float32Array :: from ( vec ! [ 0.12 , 0.5 , 1f32 , 444.7 ] ) ) ;
515- let f64_arr = Arc :: new ( Float64Array :: from ( vec ! [ 0.12 , 0.5 , 1f64 , 444.7 ] ) ) ;
528+ let f32_arr: ArrayRef =
529+ Arc :: new ( Float32Array :: from ( vec ! [ 0.12 , 0.5 , 1f32 , 444.7 ] ) ) ;
530+ let f64_arr: ArrayRef =
531+ Arc :: new ( Float64Array :: from ( vec ! [ 0.12 , 0.5 , 1f64 , 444.7 ] ) ) ;
516532
517533 let random_state = RandomState :: with_seeds ( 0 , 0 , 0 , 0 ) ;
518534 let hashes_buff = & mut vec ! [ 0 ; f32_arr. len( ) ] ;
@@ -540,8 +556,10 @@ mod tests {
540556 Some ( b"Longer than 12 bytes string" ) ,
541557 ] ;
542558
543- let binary_array = Arc :: new( binary. iter( ) . cloned( ) . collect:: <$ARRAY>( ) ) ;
544- let ref_array = Arc :: new( binary. iter( ) . cloned( ) . collect:: <BinaryArray >( ) ) ;
559+ let binary_array: ArrayRef =
560+ Arc :: new( binary. iter( ) . cloned( ) . collect:: <$ARRAY>( ) ) ;
561+ let ref_array: ArrayRef =
562+ Arc :: new( binary. iter( ) . cloned( ) . collect:: <BinaryArray >( ) ) ;
545563
546564 let random_state = RandomState :: with_seeds( 0 , 0 , 0 , 0 ) ;
547565
@@ -579,7 +597,7 @@ mod tests {
579597 #[ test]
580598 fn create_hashes_fixed_size_binary ( ) -> Result < ( ) > {
581599 let input_arg = vec ! [ vec![ 1 , 2 ] , vec![ 5 , 6 ] , vec![ 5 , 6 ] ] ;
582- let fixed_size_binary_array =
600+ let fixed_size_binary_array: ArrayRef =
583601 Arc :: new ( FixedSizeBinaryArray :: try_from_iter ( input_arg. into_iter ( ) ) . unwrap ( ) ) ;
584602
585603 let random_state = RandomState :: with_seeds ( 0 , 0 , 0 , 0 ) ;
@@ -606,8 +624,9 @@ mod tests {
606624 Some ( "Longer than 12 bytes string" ) ,
607625 ] ;
608626
609- let string_array = Arc :: new( strings. iter( ) . cloned( ) . collect:: <$ARRAY>( ) ) ;
610- let dict_array = Arc :: new(
627+ let string_array: ArrayRef =
628+ Arc :: new( strings. iter( ) . cloned( ) . collect:: <$ARRAY>( ) ) ;
629+ let dict_array: ArrayRef = Arc :: new(
611630 strings
612631 . iter( )
613632 . cloned( )
@@ -655,8 +674,9 @@ mod tests {
655674 fn create_hashes_for_dict_arrays ( ) {
656675 let strings = [ Some ( "foo" ) , None , Some ( "bar" ) , Some ( "foo" ) , None ] ;
657676
658- let string_array = Arc :: new ( strings. iter ( ) . cloned ( ) . collect :: < StringArray > ( ) ) ;
659- let dict_array = Arc :: new (
677+ let string_array: ArrayRef =
678+ Arc :: new ( strings. iter ( ) . cloned ( ) . collect :: < StringArray > ( ) ) ;
679+ let dict_array: ArrayRef = Arc :: new (
660680 strings
661681 . iter ( )
662682 . cloned ( )
@@ -891,8 +911,9 @@ mod tests {
891911 let strings1 = [ Some ( "foo" ) , None , Some ( "bar" ) ] ;
892912 let strings2 = [ Some ( "blarg" ) , Some ( "blah" ) , None ] ;
893913
894- let string_array = Arc :: new ( strings1. iter ( ) . cloned ( ) . collect :: < StringArray > ( ) ) ;
895- let dict_array = Arc :: new (
914+ let string_array: ArrayRef =
915+ Arc :: new ( strings1. iter ( ) . cloned ( ) . collect :: < StringArray > ( ) ) ;
916+ let dict_array: ArrayRef = Arc :: new (
896917 strings2
897918 . iter ( )
898919 . cloned ( )
@@ -925,23 +946,36 @@ mod tests {
925946
926947 #[ test]
927948 fn test_create_hashes_from_arrays ( ) {
928- let int_array = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 , 4 ] ) ) ;
929- let float_array = Arc :: new ( Float64Array :: from ( vec ! [ 1.0 , 2.0 , 3.0 , 4.0 ] ) ) ;
949+ let int_array: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 , 4 ] ) ) ;
950+ let float_array: ArrayRef =
951+ Arc :: new ( Float64Array :: from ( vec ! [ 1.0 , 2.0 , 3.0 , 4.0 ] ) ) ;
930952
931953 let random_state = RandomState :: with_seeds ( 0 , 0 , 0 , 0 ) ;
932954 let hashes_buff = & mut vec ! [ 0 ; int_array. len( ) ] ;
933- let hashes = create_hashes_from_arrays (
934- & [ int_array. as_ref ( ) , float_array. as_ref ( ) ] ,
935- & random_state,
936- hashes_buff,
937- )
938- . unwrap ( ) ;
955+ let hashes =
956+ create_hashes ( & [ int_array, float_array] , & random_state, hashes_buff) . unwrap ( ) ;
939957 assert_eq ! ( hashes. len( ) , 4 , ) ;
940958 }
941959
960+ #[ test]
961+ fn test_create_hashes_from_dyn_arrays ( ) {
962+ let int_array: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 , 4 ] ) ) ;
963+ let float_array: ArrayRef =
964+ Arc :: new ( Float64Array :: from ( vec ! [ 1.0 , 2.0 , 3.0 , 4.0 ] ) ) ;
965+
966+ // Verify that we can call create_hashes with only &dyn Array
967+ fn test ( arr1 : & dyn Array , arr2 : & dyn Array ) {
968+ let random_state = RandomState :: with_seeds ( 0 , 0 , 0 , 0 ) ;
969+ let hashes_buff = & mut vec ! [ 0 ; arr1. len( ) ] ;
970+ let hashes = create_hashes ( [ arr1, arr2] , & random_state, hashes_buff) . unwrap ( ) ;
971+ assert_eq ! ( hashes. len( ) , 4 , ) ;
972+ }
973+ test ( & * int_array, & * float_array) ;
974+ }
975+
942976 #[ test]
943977 fn test_create_hashes_equivalence ( ) {
944- let array = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 , 4 ] ) ) ;
978+ let array: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [ 1 , 2 , 3 , 4 ] ) ) ;
945979 let random_state = RandomState :: with_seeds ( 0 , 0 , 0 , 0 ) ;
946980
947981 let mut hashes1 = vec ! [ 0 ; array. len( ) ] ;
@@ -953,8 +987,7 @@ mod tests {
953987 . unwrap ( ) ;
954988
955989 let mut hashes2 = vec ! [ 0 ; array. len( ) ] ;
956- create_hashes_from_arrays ( & [ array. as_ref ( ) ] , & random_state, & mut hashes2)
957- . unwrap ( ) ;
990+ create_hashes ( [ array] , & random_state, & mut hashes2) . unwrap ( ) ;
958991
959992 assert_eq ! ( hashes1, hashes2) ;
960993 }
0 commit comments