diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml index 12da1af79fe0..f3309783fb38 100644 --- a/arrow-cast/Cargo.toml +++ b/arrow-cast/Cargo.toml @@ -43,6 +43,7 @@ force_validate = [] arrow-array = { workspace = true } arrow-buffer = { workspace = true } arrow-data = { workspace = true } +arrow-ord = { workspace = true } arrow-schema = { workspace = true } arrow-select = { workspace = true } chrono = { workspace = true } diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index fe38298b017c..bb3247ca3c3c 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -41,11 +41,13 @@ mod decimal; mod dictionary; mod list; mod map; +mod run_array; mod string; use crate::cast::decimal::*; use crate::cast::dictionary::*; use crate::cast::list::*; use crate::cast::map::*; +use crate::cast::run_array::*; use crate::cast::string::*; use arrow_buffer::IntervalMonthDayNano; @@ -139,6 +141,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { can_cast_types(from_value_type, to_value_type) } (Dictionary(_, value_type), _) => can_cast_types(value_type, to_type), + (RunEndEncoded(_, value_type), _) => can_cast_types(value_type.data_type(), to_type), + (_, RunEndEncoded(_, value_type)) => can_cast_types(from_type, value_type.data_type()), (_, Dictionary(_, value_type)) => can_cast_types(from_type, value_type), (List(list_from) | LargeList(list_from), List(list_to) | LargeList(list_to)) => { can_cast_types(list_from.data_type(), list_to.data_type()) @@ -791,6 +795,37 @@ pub fn cast_with_options( | Map(_, _) | Dictionary(_, _), ) => Ok(new_null_array(to_type, array.len())), + (RunEndEncoded(index_type, _), _) => match index_type.data_type() { + Int16 => run_end_encoded_cast::(array, to_type, cast_options), + Int32 => run_end_encoded_cast::(array, to_type, cast_options), + Int64 => run_end_encoded_cast::(array, to_type, cast_options), + _ => Err(ArrowError::CastError(format!( + "Casting from run end encoded type {from_type:?} to {to_type:?} not supported", + ))), + }, + (_, RunEndEncoded(index_type, value_type)) => { + let array_ref = make_array(array.to_data()); + match index_type.data_type() { + Int16 => cast_to_run_end_encoded::( + &array_ref, + value_type.data_type(), + cast_options, + ), + Int32 => cast_to_run_end_encoded::( + &array_ref, + value_type.data_type(), + cast_options, + ), + Int64 => cast_to_run_end_encoded::( + &array_ref, + value_type.data_type(), + cast_options, + ), + _ => Err(ArrowError::CastError(format!( + "Casting from type {from_type:?} to run end encoded type {to_type:?} not supported", + ))), + } + } (Dictionary(index_type, _), _) => match **index_type { Int8 => dictionary_cast::(array, to_type, cast_options), Int16 => dictionary_cast::(array, to_type, cast_options), @@ -2640,10 +2675,14 @@ where #[cfg(test)] mod tests { use super::*; + use DataType::*; + use arrow_array::{Int64Array, RunArray, StringArray}; use arrow_buffer::i256; use arrow_buffer::{Buffer, IntervalDayTime, NullBuffer}; + use arrow_schema::{DataType, Field}; use chrono::NaiveDate; use half::f16; + use std::sync::Arc; #[derive(Clone)] struct DecimalCastTestConfig { @@ -7794,8 +7833,6 @@ mod tests { #[test] fn test_cast_utf8_dict() { // FROM a dictionary with of Utf8 values - use DataType::*; - let mut builder = StringDictionaryBuilder::::new(); builder.append("one").unwrap(); builder.append_null(); @@ -7850,7 +7887,6 @@ mod tests { #[test] fn test_cast_dict_to_dict_bad_index_value_primitive() { - use DataType::*; // test converting from an array that has indexes of a type // that are out of bounds for a particular other kind of // index. @@ -7878,7 +7914,6 @@ mod tests { #[test] fn test_cast_dict_to_dict_bad_index_value_utf8() { - use DataType::*; // Same test as test_cast_dict_to_dict_bad_index_value but use // string values (and encode the expected behavior here); @@ -7907,8 +7942,6 @@ mod tests { #[test] fn test_cast_primitive_dict() { // FROM a dictionary with of INT32 values - use DataType::*; - let mut builder = PrimitiveDictionaryBuilder::::new(); builder.append(1).unwrap(); builder.append_null(); @@ -7929,8 +7962,6 @@ mod tests { #[test] fn test_cast_primitive_array_to_dict() { - use DataType::*; - let mut builder = PrimitiveBuilder::::new(); builder.append_value(1); builder.append_null(); @@ -11417,4 +11448,422 @@ mod tests { "Invalid argument error: -1.0 is too small to store in a Decimal32 of precision 1. Min is -0.9" ); } + + #[test] + fn test_run_end_encoded_to_primitive() { + // Create a RunEndEncoded array: [1, 1, 2, 2, 2, 3] + let run_ends = Int32Array::from(vec![2, 5, 6]); + let values = Int32Array::from(vec![1, 2, 3]); + let run_array = RunArray::::try_new(&run_ends, &values).unwrap(); + let array_ref = Arc::new(run_array) as ArrayRef; + // Cast to Int64 + let cast_result = cast(&array_ref, &DataType::Int64).unwrap(); + // Verify the result is a RunArray with Int64 values + let result_run_array = cast_result.as_any().downcast_ref::().unwrap(); + assert_eq!( + result_run_array.values(), + &[1i64, 1i64, 2i64, 2i64, 2i64, 3i64] + ); + } + + #[test] + fn test_run_end_encoded_to_string() { + let run_ends = Int32Array::from(vec![2, 3, 5]); + let values = Int32Array::from(vec![10, 20, 30]); + let run_array = RunArray::::try_new(&run_ends, &values).unwrap(); + let array_ref = Arc::new(run_array) as ArrayRef; + + // Cast to String + let cast_result = cast(&array_ref, &DataType::Utf8).unwrap(); + + // Verify the result is a RunArray with String values + let result_array = cast_result.as_any().downcast_ref::().unwrap(); + // Check that values are correct + assert_eq!(result_array.value(0), "10"); + assert_eq!(result_array.value(1), "10"); + assert_eq!(result_array.value(2), "20"); + } + + #[test] + fn test_primitive_to_run_end_encoded() { + // Create an Int32 array with repeated values: [1, 1, 2, 2, 2, 3] + let source_array = Int32Array::from(vec![1, 1, 2, 2, 2, 3]); + let array_ref = Arc::new(source_array) as ArrayRef; + + // Cast to RunEndEncoded + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int32, false)), + Arc::new(Field::new("values", DataType::Int32, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + + // Verify the result is a RunArray + let result_run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + + // Check run structure: runs should end at positions [2, 5, 6] + assert_eq!(result_run_array.run_ends().values(), &[2, 5, 6]); + + // Check values: should be [1, 2, 3] + let values_array = result_run_array.values().as_primitive::(); + assert_eq!(values_array.values(), &[1, 2, 3]); + } + + #[test] + fn test_primitive_to_run_end_encoded_with_nulls() { + let source_array = Int32Array::from(vec![ + Some(1), + Some(1), + None, + None, + Some(2), + Some(2), + Some(3), + Some(3), + None, + None, + Some(4), + Some(4), + Some(5), + Some(5), + None, + None, + ]); + let array_ref = Arc::new(source_array) as ArrayRef; + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int32, false)), + Arc::new(Field::new("values", DataType::Int32, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + let result_run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + assert_eq!( + result_run_array.run_ends().values(), + &[2, 4, 6, 8, 10, 12, 14, 16] + ); + assert_eq!( + result_run_array + .values() + .as_primitive::() + .values(), + &[1, 0, 2, 3, 0, 4, 5, 0] + ); + assert_eq!(result_run_array.values().null_count(), 3); + } + + #[test] + fn test_primitive_to_run_end_encoded_with_nulls_consecutive() { + let source_array = Int64Array::from(vec![ + Some(1), + Some(1), + None, + None, + None, + None, + None, + None, + None, + None, + Some(4), + Some(20), + Some(500), + Some(500), + None, + None, + ]); + let array_ref = Arc::new(source_array) as ArrayRef; + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int16, false)), + Arc::new(Field::new("values", DataType::Int64, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + let result_run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + assert_eq!( + result_run_array.run_ends().values(), + &[2, 10, 11, 12, 14, 16] + ); + assert_eq!( + result_run_array + .values() + .as_primitive::() + .values(), + &[1, 0, 4, 20, 500, 0] + ); + assert_eq!(result_run_array.values().null_count(), 2); + } + + #[test] + fn test_string_to_run_end_encoded() { + // Create a String array with repeated values: ["a", "a", "b", "c", "c"] + let source_array = StringArray::from(vec!["a", "a", "b", "c", "c"]); + let array_ref = Arc::new(source_array) as ArrayRef; + + // Cast to RunEndEncoded + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int32, false)), + Arc::new(Field::new("values", DataType::Utf8, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + + // Verify the result is a RunArray + let result_run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + + // Check run structure: runs should end at positions [2, 3, 5] + assert_eq!(result_run_array.run_ends().values(), &[2, 3, 5]); + + // Check values: should be ["a", "b", "c"] + let values_array = result_run_array.values().as_string::(); + assert_eq!(values_array.value(0), "a"); + assert_eq!(values_array.value(1), "b"); + assert_eq!(values_array.value(2), "c"); + } + + #[test] + fn test_empty_array_to_run_end_encoded() { + // Create an empty Int32 array + let source_array = Int32Array::from(Vec::::new()); + let array_ref = Arc::new(source_array) as ArrayRef; + + // Cast to RunEndEncoded + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int32, false)), + Arc::new(Field::new("values", DataType::Int32, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + + // Verify the result is an empty RunArray + let result_run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + + // Check that both run_ends and values are empty + assert_eq!(result_run_array.run_ends().len(), 0); + assert_eq!(result_run_array.values().len(), 0); + } + + #[test] + fn test_run_end_encoded_with_nulls() { + // Create a RunEndEncoded array with nulls: [1, 1, null, 2, 2] + let run_ends = Int32Array::from(vec![2, 3, 5]); + let values = Int32Array::from(vec![Some(1), None, Some(2)]); + let run_array = RunArray::::try_new(&run_ends, &values).unwrap(); + let array_ref = Arc::new(run_array) as ArrayRef; + + // Cast to String + let cast_result = cast(&array_ref, &DataType::Utf8).unwrap(); + + // Verify the result preserves nulls + let result_run_array = cast_result.as_any().downcast_ref::().unwrap(); + assert_eq!(result_run_array.value(0), "1"); + assert!(result_run_array.is_null(2)); + assert_eq!(result_run_array.value(4), "2"); + } + + #[test] + fn test_different_index_types() { + // Test with Int16 index type + let source_array = Int32Array::from(vec![1, 1, 2, 3, 3]); + let array_ref = Arc::new(source_array) as ArrayRef; + + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int16, false)), + Arc::new(Field::new("values", DataType::Int32, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + assert_eq!(cast_result.data_type(), &target_type); + + // Verify the cast worked correctly: values are [1, 2, 3] + // and run-ends are [2, 3, 5] + let run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + assert_eq!(run_array.values().as_primitive::().value(0), 1); + assert_eq!(run_array.values().as_primitive::().value(1), 2); + assert_eq!(run_array.values().as_primitive::().value(2), 3); + assert_eq!(run_array.run_ends().values(), &[2i16, 3i16, 5i16]); + + // Test again with Int64 index type + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int64, false)), + Arc::new(Field::new("values", DataType::Int32, true)), + ); + let cast_result = cast(&array_ref, &target_type).unwrap(); + assert_eq!(cast_result.data_type(), &target_type); + + // Verify the cast worked correctly: values are [1, 2, 3] + // and run-ends are [2, 3, 5] + let run_array = cast_result + .as_any() + .downcast_ref::>() + .unwrap(); + assert_eq!(run_array.values().as_primitive::().value(0), 1); + assert_eq!(run_array.values().as_primitive::().value(1), 2); + assert_eq!(run_array.values().as_primitive::().value(2), 3); + assert_eq!(run_array.run_ends().values(), &[2i64, 3i64, 5i64]); + } + + #[test] + fn test_unsupported_cast_to_run_end_encoded() { + // Create a Struct array - complex nested type that might not be supported + let field = Field::new("item", DataType::Int32, false); + let struct_array = StructArray::from(vec![( + Arc::new(field), + Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef, + )]); + let array_ref = Arc::new(struct_array) as ArrayRef; + + // This should fail because: + // 1. The target type is not RunEndEncoded + // 2. The target type is not supported for casting from StructArray + let cast_result = cast(&array_ref, &DataType::FixedSizeBinary(10)); + + // Expect this to fail + assert!(cast_result.is_err()); + } + + /// Test casting RunEndEncoded to RunEndEncoded should fail + #[test] + fn test_cast_run_end_encoded_int64_to_int16_should_fail() { + // Construct a valid REE array with Int64 run-ends + let run_ends = Int64Array::from(vec![100_000, 400_000, 700_000]); // values too large for Int16 + let values = StringArray::from(vec!["a", "b", "c"]); + + let ree_array = RunArray::::try_new(&run_ends, &values).unwrap(); + let array_ref = Arc::new(ree_array) as ArrayRef; + + // Attempt to cast to RunEndEncoded + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int16, false)), + Arc::new(Field::new("values", DataType::Utf8, true)), + ); + let cast_options = CastOptions { + safe: false, // This should make it fail instead of returning nulls + format_options: FormatOptions::default(), + }; + + // This should fail due to run-end overflow + let result: Result, ArrowError> = + cast_with_options(&array_ref, &target_type, &cast_options); + + let e = result.expect_err("Cast should have failed but succeeded"); + assert!( + e.to_string() + .contains("Cast error: Can't cast value 100000 to type Int16") + ); + } + + #[test] + fn test_cast_run_end_encoded_int64_to_int16_with_safe_should_fail_with_null_invalid_error() { + // Construct a valid REE array with Int64 run-ends + let run_ends = Int64Array::from(vec![100_000, 400_000, 700_000]); // values too large for Int16 + let values = StringArray::from(vec!["a", "b", "c"]); + + let ree_array = RunArray::::try_new(&run_ends, &values).unwrap(); + let array_ref = Arc::new(ree_array) as ArrayRef; + + // Attempt to cast to RunEndEncoded + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int16, false)), + Arc::new(Field::new("values", DataType::Utf8, true)), + ); + let cast_options = CastOptions { + safe: true, + format_options: FormatOptions::default(), + }; + + // This fails even though safe is true because the run_ends array has null values + let result: Result, ArrowError> = + cast_with_options(&array_ref, &target_type, &cast_options); + let e = result.expect_err("Cast should have failed but succeeded"); + assert!( + e.to_string() + .contains("Invalid argument error: Found null values in run_ends array. The run_ends array should not have null values.") + ); + } + + /// Test casting RunEndEncoded to RunEndEncoded should succeed + #[test] + fn test_cast_run_end_encoded_int16_to_int64_should_succeed() { + // Construct a valid REE array with Int16 run-ends + let run_ends = Int16Array::from(vec![2, 5, 8]); // values that fit in Int16 + let values = StringArray::from(vec!["a", "b", "c"]); + + let ree_array = RunArray::::try_new(&run_ends, &values).unwrap(); + let array_ref = Arc::new(ree_array) as ArrayRef; + + // Attempt to cast to RunEndEncoded (upcast should succeed) + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int64, false)), + Arc::new(Field::new("values", DataType::Utf8, true)), + ); + let cast_options = CastOptions { + safe: false, + format_options: FormatOptions::default(), + }; + + // This should succeed due to valid upcast + let result: Result, ArrowError> = + cast_with_options(&array_ref, &target_type, &cast_options); + + let array_ref = result.expect("Cast should have succeeded but failed"); + // Downcast to RunArray + let run_array = array_ref + .as_any() + .downcast_ref::>() + .unwrap(); + + // Verify the cast worked correctly + // Assert the values were cast correctly + assert_eq!(run_array.run_ends().values(), &[2i64, 5i64, 8i64]); + assert_eq!(run_array.values().as_string::().value(0), "a"); + assert_eq!(run_array.values().as_string::().value(1), "b"); + assert_eq!(run_array.values().as_string::().value(2), "c"); + } + + #[test] + fn test_cast_run_end_encoded_dictionary_to_run_end_encoded() { + // Construct a valid dictionary encoded array + let values = StringArray::from_iter([Some("a"), Some("b"), Some("c")]); + let keys = UInt64Array::from_iter(vec![1, 1, 1, 0, 0, 0, 2, 2, 2]); + let array_ref = Arc::new(DictionaryArray::new(keys, Arc::new(values))) as ArrayRef; + + // Attempt to cast to RunEndEncoded + let target_type = DataType::RunEndEncoded( + Arc::new(Field::new("run_ends", DataType::Int64, false)), + Arc::new(Field::new("values", DataType::Utf8, true)), + ); + let cast_options = CastOptions { + safe: false, + format_options: FormatOptions::default(), + }; + + // This should succeed + let result = cast_with_options(&array_ref, &target_type, &cast_options) + .expect("Cast should have succeeded but failed"); + + // Verify the cast worked correctly + // Assert the values were cast correctly + let run_array = result + .as_any() + .downcast_ref::>() + .unwrap(); + assert_eq!(run_array.values().as_string::().value(0), "b"); + assert_eq!(run_array.values().as_string::().value(1), "a"); + assert_eq!(run_array.values().as_string::().value(2), "c"); + + // Verify the run-ends were cast correctly (run ends at 3, 6, 9) + assert_eq!(run_array.run_ends().values(), &[3i64, 6i64, 9i64]); + } } diff --git a/arrow-cast/src/cast/run_array.rs b/arrow-cast/src/cast/run_array.rs new file mode 100644 index 000000000000..8d70afef3ab6 --- /dev/null +++ b/arrow-cast/src/cast/run_array.rs @@ -0,0 +1,164 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::cast::*; +use arrow_ord::partition::partition; + +/// Attempts to cast a `RunArray` with index type K into +/// `to_type` for supported types. +pub(crate) fn run_end_encoded_cast( + array: &dyn Array, + to_type: &DataType, + cast_options: &CastOptions, +) -> Result { + match array.data_type() { + DataType::RunEndEncoded(_, _) => { + let run_array = array + .as_any() + .downcast_ref::>() + .ok_or_else(|| ArrowError::CastError("Expected RunArray".to_string()))?; + + let values = run_array.values(); + + match to_type { + // Stay as RunEndEncoded, cast only the values + DataType::RunEndEncoded(target_index_field, target_value_field) => { + let cast_values = + cast_with_options(values, target_value_field.data_type(), cast_options)?; + + let run_ends_array = PrimitiveArray::::from_iter_values( + run_array.run_ends().values().iter().copied(), + ); + let cast_run_ends = cast_with_options( + &run_ends_array, + target_index_field.data_type(), + cast_options, + )?; + let new_run_array: ArrayRef = match target_index_field.data_type() { + DataType::Int16 => { + let re = cast_run_ends.as_primitive::(); + Arc::new(RunArray::::try_new(re, cast_values.as_ref())?) + } + DataType::Int32 => { + let re = cast_run_ends.as_primitive::(); + Arc::new(RunArray::::try_new(re, cast_values.as_ref())?) + } + DataType::Int64 => { + let re = cast_run_ends.as_primitive::(); + Arc::new(RunArray::::try_new(re, cast_values.as_ref())?) + } + _ => { + return Err(ArrowError::CastError( + "Run-end type must be i16, i32, or i64".to_string(), + )); + } + }; + Ok(Arc::new(new_run_array)) + } + + // Expand to logical form + _ => { + let run_ends = run_array.run_ends().values().to_vec(); + let mut indices = Vec::with_capacity(run_array.run_ends().len()); + let mut physical_idx: usize = 0; + for logical_idx in 0..run_array.run_ends().len() { + // If the logical index is equal to the (next) run end, increment the physical index, + // since we are at the end of a run. + if logical_idx == run_ends[physical_idx].as_usize() { + physical_idx += 1; + } + indices.push(physical_idx as i32); + } + + let taken = take(&values, &Int32Array::from_iter_values(indices), None)?; + if taken.data_type() != to_type { + cast_with_options(taken.as_ref(), to_type, cast_options) + } else { + Ok(taken) + } + } + } + } + + _ => Err(ArrowError::CastError(format!( + "Cannot cast array of type {:?} to RunEndEncodedArray", + array.data_type() + ))), + } +} + +/// Attempts to encode an array into a `RunArray` with index type K +/// and value type `value_type` +pub(crate) fn cast_to_run_end_encoded( + array: &ArrayRef, + value_type: &DataType, + cast_options: &CastOptions, +) -> Result { + let mut run_ends_builder = PrimitiveBuilder::::new(); + + // Cast the input array to the target value type if necessary + let cast_array = if array.data_type() == value_type { + array + } else { + &cast_with_options(array, value_type, cast_options)? + }; + + // Return early if the array to cast is empty + if cast_array.is_empty() { + let empty_run_ends = run_ends_builder.finish(); + let empty_values = make_array(ArrayData::new_empty(value_type)); + return Ok(Arc::new(RunArray::::try_new( + &empty_run_ends, + empty_values.as_ref(), + )?)); + } + + // REE arrays are handled by run_end_encoded_cast + if let DataType::RunEndEncoded(_, _) = array.data_type() { + return Err(ArrowError::CastError( + "Source array is already a RunEndEncoded array, should have been handled by run_end_encoded_cast".to_string() + )); + } + + // Partition the array to identify runs of consecutive equal values + let partitions = partition(&[Arc::clone(cast_array)])?; + let mut run_ends = Vec::new(); + let mut values_indexes = Vec::new(); + let mut last_partition_end = 0; + for partition in partitions.ranges() { + values_indexes.push(last_partition_end); + run_ends.push(partition.end); + last_partition_end = partition.end; + } + + // Build the run_ends array + for run_end in run_ends { + run_ends_builder.append_value(K::Native::from_usize(run_end).ok_or_else(|| { + ArrowError::CastError(format!("Run end index out of range: {}", run_end)) + })?); + } + let run_ends_array = run_ends_builder.finish(); + // Build the values array by taking elements at the run start positions + let indices = PrimitiveArray::::from_iter_values( + values_indexes.iter().map(|&idx| idx as u32), + ); + let values_array = take(&cast_array, &indices, None)?; + + // Create and return the RunArray + let run_array = RunArray::::try_new(&run_ends_array, values_array.as_ref())?; + Ok(Arc::new(run_array)) +}