From dd1fa17aff5afb528012769529de95e2ae7502f0 Mon Sep 17 00:00:00 2001 From: Swoorup Joshi Date: Fri, 29 Mar 2024 14:11:29 +1100 Subject: [PATCH 1/2] Fix field accesses serializing when using names like `min` and `max` --- arrow_convert_derive/src/derive_struct.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow_convert_derive/src/derive_struct.rs b/arrow_convert_derive/src/derive_struct.rs index 82fd78d..d8cfede 100644 --- a/arrow_convert_derive/src/derive_struct.rs +++ b/arrow_convert_derive/src/derive_struct.rs @@ -204,7 +204,7 @@ pub fn expand_serialize(input: DeriveStruct) -> TokenStream { match item { Some(i) => { - let i = i.borrow(); + let i = i.borrow() as &#original_name; #( <#field_types as arrow_convert::serialize::ArrowSerialize>::arrow_serialize(i.#field_names.borrow(), &mut self.#field_idents)?; )*; From 6ae0e04ca86447f8197f679a67cdf8029a92f798 Mon Sep 17 00:00:00 2001 From: Swoorup Joshi Date: Fri, 29 Mar 2024 15:45:16 +1100 Subject: [PATCH 2/2] Added support for arrays --- Cargo.toml | 12 ++++ arrow_convert/Cargo.toml | 16 ++---- arrow_convert/src/deserialize/mod.rs | 24 +++++++- arrow_convert/src/field.rs | 14 +++++ arrow_convert/src/serialize/mod.rs | 25 +++++++++ arrow_convert/tests/test_array.rs | 68 +++++++++++++++++++++++ arrow_convert_derive/Cargo.toml | 16 ++---- arrow_convert_derive/src/derive_struct.rs | 11 ++-- 8 files changed, 160 insertions(+), 26 deletions(-) create mode 100644 arrow_convert/tests/test_array.rs diff --git a/Cargo.toml b/Cargo.toml index d82b277..e2cc550 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,3 +5,15 @@ members = [ "arrow_convert_derive", "examples/simple" ] + +[workspace.package] +version = "0.6.1" +authors = [ + "Swoorup Joshi ", + "Jorge Leitao ", + "Chandra Penke ", +] +edition = "2021" +license = "Apache-2.0 OR MIT" +keywords = ["Arrow", "arrow"] +repository = "https://github.com/Swoorup/arrow-convert" \ No newline at end of file diff --git a/arrow_convert/Cargo.toml b/arrow_convert/Cargo.toml index 65cac97..0912332 100644 --- a/arrow_convert/Cargo.toml +++ b/arrow_convert/Cargo.toml @@ -1,15 +1,11 @@ [package] name = "arrow_convert" -version = "0.6.0" -authors = [ - "Swoorup Joshi ", - "Jorge Leitao ", - "Chandra Penke ", -] -edition = "2021" -license = "Apache-2.0 OR MIT" -keywords = ["Arrow", "arrow"] -repository = "https://github.com/Swoorup/arrow-convert" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +keywords.workspace = true +repository.workspace = true description = "Convert between nested rust types and Arrow with arrow" [dependencies] diff --git a/arrow_convert/src/deserialize/mod.rs b/arrow_convert/src/deserialize/mod.rs index 3229f43..89e2ed6 100644 --- a/arrow_convert/src/deserialize/mod.rs +++ b/arrow_convert/src/deserialize/mod.rs @@ -386,7 +386,6 @@ where arrow_deserialize_vec_helper::(v) } } - impl<'a, OffsetSize: OffsetSizeTrait> IntoArrowArrayIterator for &'a GenericListArray { type Item = Option>; @@ -432,6 +431,29 @@ where arrow_deserialize_vec_helper::(v) } } +impl ArrowDeserialize for [T; SIZE] +where + T: ArrowDeserialize + ArrowEnableVecForType + 'static, + ::ArrayType: 'static, + for<'b> &'b ::ArrayType: IntoArrowArrayIterator, +{ + type ArrayType = FixedSizeListArray; + + fn arrow_deserialize(v: Option>) -> Option<::Type> { + let result = arrow_deserialize_vec_helper::(v)?; + let length = result.len(); + + match <[::Type; SIZE]>::try_from(result).ok() { + None => panic!( + "Expected size of {} deserializing array of type `{}`, got {}", + std::any::type_name::(), + SIZE, + length + ), + array => array, + } + } +} impl_arrow_array!(BooleanArray); impl_arrow_array!(StringArray); diff --git a/arrow_convert/src/field.rs b/arrow_convert/src/field.rs index 7f6815e..6db4766 100644 --- a/arrow_convert/src/field.rs +++ b/arrow_convert/src/field.rs @@ -308,6 +308,20 @@ where } } +impl ArrowField for [T; SIZE] +where + T: ArrowField + ArrowEnableVecForType, +{ + type Type = [::Type; SIZE]; + type Native = [::Native; SIZE]; + + #[inline] + fn data_type() -> arrow::datatypes::DataType { + let field = Field::new("item", ::data_type(), true); + arrow::datatypes::DataType::FixedSizeList(Arc::new(field), SIZE as i32) + } +} + arrow_enable_vec_for_type!(String); arrow_enable_vec_for_type!(LargeString); arrow_enable_vec_for_type!(bool); diff --git a/arrow_convert/src/serialize/mod.rs b/arrow_convert/src/serialize/mod.rs index 9beac79..496f5c5 100644 --- a/arrow_convert/src/serialize/mod.rs +++ b/arrow_convert/src/serialize/mod.rs @@ -384,6 +384,31 @@ where } } +impl ArrowSerialize for [T; SIZE] +where + T: ArrowSerialize + ArrowEnableVecForType + 'static, + ::ArrayBuilderType: Default, +{ + type ArrayBuilderType = FixedSizeListBuilder<::ArrayBuilderType>; + + #[inline] + fn new_array() -> Self::ArrayBuilderType { + Self::ArrayBuilderType::new(::new_array(), SIZE as i32) + } + + fn arrow_serialize( + v: &::Type, + array: &mut Self::ArrayBuilderType, + ) -> arrow::error::Result<()> { + let values = array.values(); + for i in v.iter() { + ::arrow_serialize(i, values)?; + } + array.append(true); + Ok(()) + } +} + // internal helper method to extend a mutable array fn arrow_serialize_extend_internal< 'a, diff --git a/arrow_convert/tests/test_array.rs b/arrow_convert/tests/test_array.rs new file mode 100644 index 0000000..daa6e46 --- /dev/null +++ b/arrow_convert/tests/test_array.rs @@ -0,0 +1,68 @@ +use arrow::array::Array; +use arrow::array::ArrayRef; +use arrow_convert::deserialize::TryIntoCollection; +use arrow_convert::serialize::TryIntoArrow; +/// Simple example +use arrow_convert::{ArrowDeserialize, ArrowField, ArrowSerialize}; + +#[derive(Debug, PartialEq, Clone, Copy, ArrowField, ArrowSerialize, ArrowDeserialize)] +pub struct QuadPoints { + points: [Point; 4], +} + +#[derive(Clone, Copy, ArrowField, ArrowSerialize, ArrowDeserialize)] +pub struct AABB { + min: Point, + max: Point, +} + +#[derive(Debug, PartialEq, Clone, Copy, ArrowField, ArrowSerialize, ArrowDeserialize)] +pub struct Point { + x: f64, + y: f64, +} + +#[test] +fn test_simple_roundtrip() { + let original_array = vec![ + QuadPoints { + points: [ + Point { x: 0.0, y: 0.0 }, + Point { x: 1.0, y: 0.0 }, + Point { x: 1.0, y: 1.0 }, + Point { x: 0.0, y: 1.0 }, + ], + }, + QuadPoints { + points: [ + Point { x: 0.0, y: 0.0 }, + Point { x: 2.0, y: 0.0 }, + Point { x: 2.0, y: 2.0 }, + Point { x: 0.0, y: 2.0 }, + ], + }, + QuadPoints { + points: [ + Point { x: 0.0, y: 0.0 }, + Point { x: 3.0, y: 0.0 }, + Point { x: 3.0, y: 3.0 }, + Point { x: 0.0, y: 3.0 }, + ], + }, + ]; + + // serialize to an arrow array. try_into_arrow() is enabled by the TryIntoArrow trait + let arrow_array: ArrayRef = original_array.try_into_arrow().unwrap(); + + // which can be cast to an Arrow StructArray and be used for all kinds of IPC, FFI, etc. + // supported by `arrow` + let struct_array = arrow_array + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(struct_array.len(), 3); + + // deserialize back to our original vector via TryIntoCollection trait. + let round_trip_array: Vec = arrow_array.try_into_collection().unwrap(); + assert_eq!(round_trip_array, original_array); +} diff --git a/arrow_convert_derive/Cargo.toml b/arrow_convert_derive/Cargo.toml index d443683..d928e58 100644 --- a/arrow_convert_derive/Cargo.toml +++ b/arrow_convert_derive/Cargo.toml @@ -1,15 +1,11 @@ [package] name = "arrow_convert_derive" -version = "0.6.0" -authors = [ - "Swoorup Joshi ", - "Jorge Leitao ", - "Chandra Penke " -] -edition = "2021" -license = "Apache-2.0 OR MIT" -keywords = ["Arrow", "arrow"] -repository = "https://github.com/Swoorup/arrow-convert" +version.workspace = true +authors.workspace = true +edition.workspace = true +license.workspace = true +keywords.workspace = true +repository.workspace = true description = "Proc macros for arrow_convert" [lib] diff --git a/arrow_convert_derive/src/derive_struct.rs b/arrow_convert_derive/src/derive_struct.rs index d8cfede..6427438 100644 --- a/arrow_convert_derive/src/derive_struct.rs +++ b/arrow_convert_derive/src/derive_struct.rs @@ -12,7 +12,7 @@ struct Common<'a> { field_idents: Vec, skipped_field_names: Vec, field_indices: Vec, - field_types: Vec<&'a syn::TypePath>, + field_types: Vec<&'a syn::Type>, } impl<'a> From<&'a DeriveStruct> for Common<'a> { @@ -73,13 +73,14 @@ impl<'a> From<&'a DeriveStruct> for Common<'a> { }) .collect::>(); - let field_types: Vec<&syn::TypePath> = fields + let field_types: Vec<&syn::Type> = fields .iter() .map(|field| match &field.field_type { - syn::Type::Path(path) => path, - _ => panic!("Only types are supported atm"), + syn::Type::Path(_) => &field.field_type, + syn::Type::Array(_) => &field.field_type, + x => panic!("Only types are supported atm: {:#?}", x), }) - .collect::>(); + .collect::>(); Self { original_name,