Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for tinystr, rust_decimal, and glam types #12

Merged
merged 1 commit into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,6 @@ quote = "1"
syn = "2"
trybuild = "1.0"
pretty_assertions = "1.4"
tinystr = "0.7"
rust_decimal = "1.36"
glam = "0.29"
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ Default implementations of the above traits are provided for the following:
- Large Arrow types [`LargeBinary`], [`LargeString`], [`LargeList`] are supported via the `type` attribute. Please see the [complex_example.rs](./arrow_convert/tests/complex_example.rs) for usage.
- Fixed size types [`FixedSizeBinary`], [`FixedSizeList`] are supported via the `FixedSizeVec` type override.
- Note: nesting of [`FixedSizeList`] is not supported.
- `TinyAsciiStr` from the [tinystr](https://github.com/zbraniecki/tinystr) crate (with the `tinystr` feature enabled)
- `Decimal` from the [rust_decimal](https://github.com/paupino/rust-decimal) crate (with the `rust_decimal` feature enabled)
- `Glam` vector and matrix types (with the `glam` feature enabled):
- `Vec2`, `Vec3`, `Vec4`
- `DVec2`, `DVec3`, `DVec4`
- `BVec2`, `BVec3`, `BVec4`
- `Mat2`, `Mat3`, `Mat4`
- `DMat2`, `DMat3`, `DMat4`

### Enums

Expand Down
20 changes: 16 additions & 4 deletions arrow_convert/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,35 @@ keywords.workspace = true
repository.workspace = true
description = "Convert between nested rust types and Arrow with arrow"

[features]
default = ["derive"]

derive = ["arrow_convert_derive"]
tinystr = ["dep:tinystr"]
rust_decimal = ["dep:rust_decimal"]
glam = ["dep:glam"]

[dependencies]
arrow = { workspace = true }
arrow_convert_derive = { workspace = true, optional = true }
half = { workspace = true }
chrono = { workspace = true, features = ["std"] }
err-derive = { workspace = true }

# optional deps
tinystr = { workspace = true, optional = true }
rust_decimal = { workspace = true, optional = true }
glam = { workspace = true, optional = true }

[dev-dependencies]
arrow_convert_derive = { workspace = true }
glam = { workspace = true }
tinystr = { workspace = true }
rust_decimal = { workspace = true }
criterion = { workspace = true }
trybuild = { workspace = true }
pretty_assertions = { workspace = true }

[features]
default = ["derive"]
derive = ["arrow_convert_derive"]

[lib]
bench = false

Expand Down
172 changes: 172 additions & 0 deletions arrow_convert/src/features/glam.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
use arrow::datatypes::DataType;

use crate::arrow_enable_vec_for_type;
use crate::deserialize::ArrowDeserialize;
use crate::field::ArrowField;
use crate::serialize::ArrowSerialize;
use arrow::datatypes::Field;

use crate::deserialize::arrow_deserialize_vec_helper;
use arrow::array::ArrayRef;
use arrow::array::{BooleanBuilder, Float32Builder, Float64Builder};
use arrow::array::{FixedSizeListArray, FixedSizeListBuilder};
use std::sync::Arc;

/// This macro implements the `ArrowSerialize` and `ArrowDeserialize` traits for a given `glam` vector or matrix type.
///
/// The macro takes the following parameters:
/// - `$type`: The type of the `glam` vector or matrix to implement the traits for.
/// - `$size`: The size of the vector or matrix (e.g. 2 for `glam::Vec2`, 4 for `glam::Mat4`).
/// - `$dt`: The data type of the elements in the vector or matrix (e.g. `bool`, `f32`).
/// - `$arrow_dt`: The corresponding Arrow data type for the element type.
/// - `$array_builder`: The Arrow array builder type to use for the element type.
/// - `$se`: A closure that serializes the `$type` to a slice of the element type.
/// - `$de`: A closure that deserializes a `Vec` of the element type to the `$type`.
macro_rules! impl_glam_ty {
($type:ty, $size:expr, $dt:ident, $arrow_dt:expr, $array_builder:ident, $se:expr, $de:expr) => {
impl ArrowField for $type {
type Type = Self;

fn data_type() -> DataType {
let field = Field::new("scalar", $arrow_dt, false);
DataType::FixedSizeList(Arc::new(field), $size)
}
}

arrow_enable_vec_for_type!($type);

impl ArrowSerialize for $type {
type ArrayBuilderType = FixedSizeListBuilder<$array_builder>;

fn new_array() -> Self::ArrayBuilderType {
let field = Field::new("scalar", $arrow_dt, false);
Self::ArrayBuilderType::new(<$dt as ArrowSerialize>::new_array(), $size).with_field(field)
}

fn arrow_serialize(v: &Self::Type, array: &mut Self::ArrayBuilderType) -> arrow::error::Result<()> {
let v = $se(v);

array.values().append_slice(v.as_ref());
array.append(true);
Ok(())
}
}

impl ArrowDeserialize for $type {
type ArrayType = FixedSizeListArray;

fn arrow_deserialize(v: Option<ArrayRef>) -> Option<Self> {
let v = arrow_deserialize_vec_helper::<$dt>(v)?;
Some($de(v))
}
}
};
}

/// Implements the `ArrowSerialize` and `ArrowDeserialize` traits for the given `glam::Vec<bool>` type.
macro_rules! impl_glam_vec_bool {
($type:ty, $size:expr) => {
impl_glam_ty!(
$type,
$size,
bool,
DataType::Boolean,
BooleanBuilder,
|v: &$type| <[bool; $size]>::from(*v),
|v: Vec<bool>| {
let length = v.len();

match <[bool; $size]>::try_from(v).ok() {
None => panic!(
"Expected size of {} deserializing array of type `{}`, got {}",
std::any::type_name::<$type>(),
$size,
length
),
Some(array) => Self::from_array(array),
}
}
);
};
}

/// Implements the `ArrowSerialize` and `ArrowDeserialize` traits for the given `glam::Vec2` type.
macro_rules! impl_glam_vec_f32 {
($type:ty, $size:expr) => {
impl_glam_ty!(
$type,
$size,
f32,
DataType::Float32,
Float32Builder,
|v: &$type| *v,
|v: Vec<f32>| Self::from_slice(&v)
);
};
}

/// Implements the `ArrowSerialize` and `ArrowDeserialize` traits for the given `glam::Mat2`, `glam::Mat3`, and `glam::Mat4` types.
macro_rules! impl_glam_mat_f32 {
($type:ty, $size:expr) => {
impl_glam_ty!(
$type,
$size,
f32,
DataType::Float32,
Float32Builder,
|v: &$type| *v,
|v: Vec<f32>| Self::from_cols_slice(&v)
);
};
}

/// Implements the `ArrowSerialize` and `ArrowDeserialize` traits for the given `glam::DVec2`, `glam::DVec3`, and `glam::DVec4` types.
macro_rules! impl_glam_vec_f64 {
($type:ty, $size:expr) => {
impl_glam_ty!(
$type,
$size,
f64,
DataType::Float64,
Float64Builder,
|v: &$type| *v,
|v: Vec<f64>| Self::from_slice(&v)
);
};
}

/// Implements the `ArrowSerialize` and `ArrowDeserialize` traits for the given `glam::DMat2`, `glam::DMat3`, and `glam::DMat4` types.
macro_rules! impl_glam_mat_f64 {
($type:ty, $size:expr) => {
impl_glam_ty!(
$type,
$size,
f64,
DataType::Float64,
Float64Builder,
|v: &$type| *v,
|v: Vec<f64>| Self::from_cols_slice(&v)
);
};
}

// Boolean vectors
impl_glam_vec_bool!(glam::BVec2, 2);
impl_glam_vec_bool!(glam::BVec3, 3);
impl_glam_vec_bool!(glam::BVec4, 4);

// Float32 vectors and matrices
impl_glam_vec_f32!(glam::Vec2, 2);
impl_glam_vec_f32!(glam::Vec3, 3);
impl_glam_vec_f32!(glam::Vec4, 4);
impl_glam_mat_f32!(glam::Mat2, 4);
impl_glam_mat_f32!(glam::Mat3, 9);
impl_glam_mat_f32!(glam::Mat4, 16);

// Float64 vectors and matrices
impl_glam_vec_f64!(glam::DVec2, 2);
impl_glam_vec_f64!(glam::DVec3, 3);
impl_glam_vec_f64!(glam::DVec4, 4);
impl_glam_mat_f64!(glam::DMat2, 4);
impl_glam_mat_f64!(glam::DMat3, 9);
impl_glam_mat_f64!(glam::DMat4, 16);
8 changes: 8 additions & 0 deletions arrow_convert/src/features/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#[cfg(feature = "tinystr")]
mod tinystr;

#[cfg(feature = "rust_decimal")]
mod rust_decimal;

#[cfg(feature = "glam")]
mod glam;
55 changes: 55 additions & 0 deletions arrow_convert/src/features/rust_decimal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use crate::arrow_enable_vec_for_type;
use crate::deserialize::ArrowDeserialize;
use crate::field::ArrowField;
use crate::serialize::ArrowSerialize;

use arrow::datatypes::{DataType, DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE};
use rust_decimal::Decimal;

use arrow::array::{Decimal128Array, Decimal128Builder};

impl ArrowField for Decimal {
type Type = Decimal;

#[inline]
fn data_type() -> DataType {
DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
}
}

arrow_enable_vec_for_type!(Decimal);

impl ArrowSerialize for Decimal {
type ArrayBuilderType = Decimal128Builder;

fn new_array() -> Self::ArrayBuilderType {
Decimal128Builder::new().with_data_type(Self::data_type())
}

fn arrow_serialize(v: &Self::Type, array: &mut Self::ArrayBuilderType) -> arrow::error::Result<()> {
array.append_value(decimal_to_scaled_i128(*v));
Ok(())
}
}

impl ArrowDeserialize for Decimal {
type ArrayType = Decimal128Array;

fn arrow_deserialize(v: Option<i128>) -> Option<Decimal> {
v.map(|d| Decimal::from_i128_with_scale(d, DECIMAL_DEFAULT_SCALE as _))
}
}

/// Converts a `Decimal` value to an `i128` representation, adjusting the scale to match the default scale.
fn decimal_to_scaled_i128(decimal: Decimal) -> i128 {
let m = decimal.mantissa();
let scale_diff = DECIMAL_DEFAULT_SCALE as i32 - decimal.scale() as i32;

if scale_diff == 0 {
m
} else if scale_diff < 0 {
m / 10_i128.pow(scale_diff.unsigned_abs())
} else {
m * 10_i128.pow(scale_diff as u32)
}
}
37 changes: 37 additions & 0 deletions arrow_convert/src/features/tinystr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
use arrow::datatypes::DataType;
use tinystr::TinyAsciiStr;

use crate::deserialize::ArrowDeserialize;
use crate::field::ArrowField;
use crate::serialize::ArrowSerialize;

use arrow::array::{FixedSizeBinaryArray, FixedSizeBinaryBuilder};

impl<const N: usize> ArrowField for TinyAsciiStr<N> {
type Type = Self;

fn data_type() -> DataType {
DataType::FixedSizeBinary(N as i32)
}
}

impl<const N: usize> ArrowSerialize for TinyAsciiStr<N> {
type ArrayBuilderType = FixedSizeBinaryBuilder;

fn new_array() -> Self::ArrayBuilderType {
FixedSizeBinaryBuilder::new(N as i32)
}

fn arrow_serialize(v: &Self::Type, array: &mut Self::ArrayBuilderType) -> arrow::error::Result<()> {
array.append_value(v.as_bytes())?;
Ok(())
}
}

impl<const N: usize> ArrowDeserialize for TinyAsciiStr<N> {
type ArrayType = FixedSizeBinaryArray;

fn arrow_deserialize(v: Option<&[u8]>) -> Option<Self> {
v.and_then(|bytes| TinyAsciiStr::from_bytes(bytes).ok())
}
}
2 changes: 2 additions & 0 deletions arrow_convert/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ pub use arrow_convert_derive::{ArrowDeserialize, ArrowField, ArrowSerialize};
#[cfg_attr(not(target_os = "windows"), doc = include_str!("../README.md"))]
#[cfg(doctest)]
struct ReadmeDoctests;

mod features;
Loading
Loading