|
19 | 19 |
|
20 | 20 | use crate::utils::make_scalar_function; |
21 | 21 | use arrow::array::{ |
22 | | - Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray, MutableArrayData, |
23 | | - OffsetSizeTrait, |
| 22 | + Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray, |
| 23 | + GenericListViewArray, MutableArrayData, OffsetSizeTrait, UInt32Array, |
| 24 | +}; |
| 25 | +use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; |
| 26 | +use arrow::compute::take; |
| 27 | +use arrow::datatypes::DataType::{ |
| 28 | + FixedSizeList, LargeList, LargeListView, List, ListView, Null, |
24 | 29 | }; |
25 | | -use arrow::buffer::OffsetBuffer; |
26 | | -use arrow::datatypes::DataType::{FixedSizeList, LargeList, List, Null}; |
27 | 30 | use arrow::datatypes::{DataType, FieldRef}; |
28 | 31 | use datafusion_common::cast::{ |
29 | | - as_fixed_size_list_array, as_large_list_array, as_list_array, |
| 32 | + as_fixed_size_list_array, as_large_list_array, as_large_list_view_array, |
| 33 | + as_list_array, as_list_view_array, |
30 | 34 | }; |
31 | 35 | use datafusion_common::{exec_err, utils::take_function_args, Result}; |
32 | 36 | use datafusion_expr::{ |
33 | 37 | ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, |
34 | 38 | }; |
35 | 39 | use datafusion_macros::user_doc; |
36 | 40 | use std::any::Any; |
| 41 | +use std::mem::size_of; |
37 | 42 | use std::sync::Arc; |
38 | 43 |
|
39 | 44 | make_udf_expr_and_func!( |
@@ -133,6 +138,14 @@ pub fn array_reverse_inner(arg: &[ArrayRef]) -> Result<ArrayRef> { |
133 | 138 | fixed_size_array_reverse(array, field) |
134 | 139 | } |
135 | 140 | Null => Ok(Arc::clone(input_array)), |
| 141 | + ListView(field) => { |
| 142 | + let array = as_list_view_array(input_array)?; |
| 143 | + list_view_reverse::<i32>(array, field) |
| 144 | + } |
| 145 | + LargeListView(field) => { |
| 146 | + let array = as_large_list_view_array(input_array)?; |
| 147 | + list_view_reverse::<i64>(array, field) |
| 148 | + } |
136 | 149 | array_type => exec_err!("array_reverse does not support type '{array_type}'."), |
137 | 150 | } |
138 | 151 | } |
@@ -183,6 +196,75 @@ fn general_array_reverse<O: OffsetSizeTrait + TryFrom<i64>>( |
183 | 196 | )?)) |
184 | 197 | } |
185 | 198 |
|
| 199 | +fn list_view_reverse<O: OffsetSizeTrait + TryFrom<i64>>( |
| 200 | + array: &GenericListViewArray<O>, |
| 201 | + field: &FieldRef, |
| 202 | +) -> Result<ArrayRef> { |
| 203 | + let (_, offsets, sizes, values, nulls) = array.clone().into_parts(); |
| 204 | + |
| 205 | + // Construct indices, sizes and offsets for the reversed array by iterating over |
| 206 | + // the list view array in the logical order, and reversing the order of the elements. |
| 207 | + // We end up with a list view array where the elements are in order, |
| 208 | + // even if the original array had elements out of order. |
| 209 | + let mut indices: Vec<O> = Vec::with_capacity(values.len()); |
| 210 | + let mut new_sizes = Vec::with_capacity(sizes.len()); |
| 211 | + let mut new_offsets: Vec<O> = Vec::with_capacity(offsets.len()); |
| 212 | + let mut new_nulls = |
| 213 | + Vec::with_capacity(nulls.clone().map(|nulls| nulls.len()).unwrap_or(0)); |
| 214 | + new_offsets.push(O::zero()); |
| 215 | + let has_nulls = nulls.is_some(); |
| 216 | + for (i, offset) in offsets.iter().enumerate().take(offsets.len()) { |
| 217 | + // If this array is null, we set the new array to null with size 0 and continue |
| 218 | + if let Some(ref nulls) = nulls { |
| 219 | + if nulls.is_null(i) { |
| 220 | + new_nulls.push(false); // null |
| 221 | + new_sizes.push(O::zero()); |
| 222 | + new_offsets.push(new_offsets[i]); |
| 223 | + continue; |
| 224 | + } else { |
| 225 | + new_nulls.push(true); // valid |
| 226 | + } |
| 227 | + } |
| 228 | + |
| 229 | + // Each array is located at [offset, offset + size), so we collect indices in the reverse order |
| 230 | + let array_start = offset.as_usize(); |
| 231 | + let array_end = array_start + sizes[i].as_usize(); |
| 232 | + for idx in (array_start..array_end).rev() { |
| 233 | + indices.push(O::usize_as(idx)); |
| 234 | + } |
| 235 | + new_sizes.push(sizes[i]); |
| 236 | + if i < sizes.len() - 1 { |
| 237 | + new_offsets.push(new_offsets[i] + sizes[i]); |
| 238 | + } |
| 239 | + } |
| 240 | + |
| 241 | + // Materialize values from underlying array with take |
| 242 | + let indices_array: ArrayRef = match size_of::<O>() { |
| 243 | + 4 => Arc::new(UInt32Array::from( |
| 244 | + indices |
| 245 | + .iter() |
| 246 | + .map(|i| i.as_usize() as u32) |
| 247 | + .collect::<Vec<_>>(), |
| 248 | + )), |
| 249 | + 8 => Arc::new(arrow::array::UInt64Array::from( |
| 250 | + indices |
| 251 | + .iter() |
| 252 | + .map(|i| i.as_usize() as u64) |
| 253 | + .collect::<Vec<_>>(), |
| 254 | + )), |
| 255 | + _ => exec_err!("OffsetSizeTrait only supports i32 and i64")?, |
| 256 | + }; |
| 257 | + let values_reversed = take(&values, &indices_array, None)?; |
| 258 | + |
| 259 | + Ok(Arc::new(GenericListViewArray::<O>::try_new( |
| 260 | + Arc::clone(field), |
| 261 | + ScalarBuffer::from(new_offsets), |
| 262 | + ScalarBuffer::from(new_sizes), |
| 263 | + values_reversed, |
| 264 | + has_nulls.then_some(NullBuffer::from(new_nulls)), |
| 265 | + )?)) |
| 266 | +} |
| 267 | + |
186 | 268 | fn fixed_size_array_reverse( |
187 | 269 | array: &FixedSizeListArray, |
188 | 270 | field: &FieldRef, |
@@ -219,3 +301,129 @@ fn fixed_size_array_reverse( |
219 | 301 | Some(nulls.into()), |
220 | 302 | )?)) |
221 | 303 | } |
| 304 | + |
| 305 | +#[cfg(test)] |
| 306 | +mod tests { |
| 307 | + use crate::reverse::list_view_reverse; |
| 308 | + use arrow::{ |
| 309 | + array::{AsArray, Int32Array, LargeListViewArray, ListViewArray}, |
| 310 | + buffer::{NullBuffer, ScalarBuffer}, |
| 311 | + datatypes::{DataType, Field, Int32Type}, |
| 312 | + }; |
| 313 | + use std::sync::Arc; |
| 314 | + |
| 315 | + #[test] |
| 316 | + fn test_reverse_list_view_and_large_list_view() { |
| 317 | + // ListView |
| 318 | + let list_view = ListViewArray::new( |
| 319 | + Arc::new(Field::new("a", DataType::Int32, false)), |
| 320 | + ScalarBuffer::from(vec![0, 1, 6, 6]), |
| 321 | + ScalarBuffer::from(vec![1, 5, 0, 3]), |
| 322 | + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])), |
| 323 | + Some(NullBuffer::from(vec![true, true, false, true])), |
| 324 | + ); |
| 325 | + let result = list_view_reverse( |
| 326 | + &list_view, |
| 327 | + &Arc::new(Field::new("test", DataType::Int32, true)), |
| 328 | + ) |
| 329 | + .unwrap(); |
| 330 | + let list_view_reversed: Vec<_> = result |
| 331 | + .as_list_view::<i32>() |
| 332 | + .iter() |
| 333 | + .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
| 334 | + .collect(); |
| 335 | + |
| 336 | + // LargeListView |
| 337 | + let large_list_view = LargeListViewArray::new( |
| 338 | + Arc::new(Field::new("a", DataType::Int32, false)), |
| 339 | + ScalarBuffer::from(vec![0, 1, 6, 6]), |
| 340 | + ScalarBuffer::from(vec![1, 5, 0, 3]), |
| 341 | + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])), |
| 342 | + Some(NullBuffer::from(vec![true, true, false, true])), |
| 343 | + ); |
| 344 | + let result = list_view_reverse( |
| 345 | + &large_list_view, |
| 346 | + &Arc::new(Field::new("test", DataType::Int32, true)), |
| 347 | + ) |
| 348 | + .unwrap(); |
| 349 | + let large_list_view_reversed: Vec<_> = result |
| 350 | + .as_list_view::<i64>() |
| 351 | + .iter() |
| 352 | + .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
| 353 | + .collect(); |
| 354 | + |
| 355 | + // Check results |
| 356 | + let expected = vec![ |
| 357 | + Some(vec![1]), |
| 358 | + Some(vec![6, 5, 4, 3, 2]), |
| 359 | + None, |
| 360 | + Some(vec![9, 8, 7]), |
| 361 | + ]; |
| 362 | + assert_eq!(expected, list_view_reversed); |
| 363 | + assert_eq!(expected, large_list_view_reversed); |
| 364 | + } |
| 365 | + |
| 366 | + #[test] |
| 367 | + fn test_reverse_list_view_out_of_order() { |
| 368 | + let list_view = ListViewArray::new( |
| 369 | + Arc::new(Field::new("a", DataType::Int32, false)), |
| 370 | + ScalarBuffer::from(vec![6, 1, 6, 0]), // out of order |
| 371 | + ScalarBuffer::from(vec![3, 5, 0, 1]), |
| 372 | + Arc::new(Int32Array::from(vec![ |
| 373 | + 1, // fourth array: offset 0, size 1 |
| 374 | + 2, 3, 4, 5, 6, // second array: offset 1, size 5 |
| 375 | + // third array null but size 0 |
| 376 | + 7, 8, 9, // first array: offset 6, size 3 |
| 377 | + ])), |
| 378 | + Some(NullBuffer::from(vec![true, true, false, true])), |
| 379 | + ); |
| 380 | + let list_view_reversed: Vec<_> = list_view_reverse( |
| 381 | + &list_view, |
| 382 | + &Arc::new(Field::new("test", DataType::Int32, true)), |
| 383 | + ) |
| 384 | + .unwrap() |
| 385 | + .as_list_view::<i32>() |
| 386 | + .iter() |
| 387 | + .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
| 388 | + .collect(); |
| 389 | + let expected = vec![ |
| 390 | + Some(vec![9, 8, 7]), |
| 391 | + Some(vec![6, 5, 4, 3, 2]), |
| 392 | + None, |
| 393 | + Some(vec![1]), |
| 394 | + ]; |
| 395 | + assert_eq!(expected, list_view_reversed); |
| 396 | + } |
| 397 | + |
| 398 | + #[test] |
| 399 | + fn test_reverse_list_view_with_nulls() { |
| 400 | + let list_view = ListViewArray::new( |
| 401 | + Arc::new(Field::new("a", DataType::Int32, false)), |
| 402 | + ScalarBuffer::from(vec![16, 1, 6, 0]), // out of order |
| 403 | + ScalarBuffer::from(vec![3, 5, 10, 1]), |
| 404 | + Arc::new(Int32Array::from(vec![ |
| 405 | + 1, // fourth array: offset 0, size 1 |
| 406 | + 2, 3, 4, 5, 6, // second array: offset 1, size 5 |
| 407 | + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // third array: offset 6, size 10 |
| 408 | + 7, 8, 9, // first array: offset 6, size 3 |
| 409 | + ])), |
| 410 | + Some(NullBuffer::from(vec![true, true, false, true])), |
| 411 | + ); |
| 412 | + let list_view_reversed: Vec<_> = list_view_reverse( |
| 413 | + &list_view, |
| 414 | + &Arc::new(Field::new("test", DataType::Int32, true)), |
| 415 | + ) |
| 416 | + .unwrap() |
| 417 | + .as_list_view::<i32>() |
| 418 | + .iter() |
| 419 | + .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
| 420 | + .collect(); |
| 421 | + let expected = vec![ |
| 422 | + Some(vec![9, 8, 7]), |
| 423 | + Some(vec![6, 5, 4, 3, 2]), |
| 424 | + None, |
| 425 | + Some(vec![1]), |
| 426 | + ]; |
| 427 | + assert_eq!(expected, list_view_reversed); |
| 428 | + } |
| 429 | +} |
0 commit comments