|
19 | 19 |
|
20 | 20 | use crate::utils::make_scalar_function; |
21 | 21 | use arrow::array::{ |
22 | | - Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray, MutableArrayData, |
23 | | - OffsetSizeTrait, |
| 22 | + Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray, |
| 23 | + GenericListViewArray, MutableArrayData, OffsetSizeTrait, UInt32Array, |
| 24 | +}; |
| 25 | +use arrow::buffer::{OffsetBuffer, ScalarBuffer}; |
| 26 | +use arrow::compute::take; |
| 27 | +use arrow::datatypes::DataType::{ |
| 28 | + FixedSizeList, LargeList, LargeListView, List, ListView, Null, |
24 | 29 | }; |
25 | | -use arrow::buffer::OffsetBuffer; |
26 | | -use arrow::datatypes::DataType::{FixedSizeList, LargeList, List, Null}; |
27 | 30 | use arrow::datatypes::{DataType, FieldRef}; |
28 | 31 | use datafusion_common::cast::{ |
29 | | - as_fixed_size_list_array, as_large_list_array, as_list_array, |
| 32 | + as_fixed_size_list_array, as_large_list_array, as_large_list_view_array, |
| 33 | + as_list_array, as_list_view_array, |
30 | 34 | }; |
31 | 35 | use datafusion_common::{exec_err, utils::take_function_args, Result}; |
32 | 36 | use datafusion_expr::{ |
@@ -134,6 +138,14 @@ pub fn array_reverse_inner(arg: &[ArrayRef]) -> Result<ArrayRef> { |
134 | 138 | fixed_size_array_reverse(array, field) |
135 | 139 | } |
136 | 140 | Null => Ok(Arc::clone(input_array)), |
| 141 | + ListView(field) => { |
| 142 | + let array = as_list_view_array(input_array)?; |
| 143 | + list_view_reverse::<i32>(array, field) |
| 144 | + } |
| 145 | + LargeListView(field) => { |
| 146 | + let array = as_large_list_view_array(input_array)?; |
| 147 | + list_view_reverse::<i64>(array, field) |
| 148 | + } |
137 | 149 | array_type => exec_err!("array_reverse does not support type '{array_type}'."), |
138 | 150 | } |
139 | 151 | } |
@@ -175,6 +187,75 @@ fn general_array_reverse<O: OffsetSizeTrait>( |
175 | 187 | )?)) |
176 | 188 | } |
177 | 189 |
|
| 190 | +/// Reverses a list view array. |
| 191 | +/// |
| 192 | +/// Construct indices, sizes and offsets for the reversed array by iterating over |
| 193 | +/// the list view array in the logical order, and reversing the order of the elements. |
| 194 | +/// We end up with a list view array where the elements are in order, |
| 195 | +/// even if the original array had elements out of order. |
| 196 | +fn list_view_reverse<O: OffsetSizeTrait>( |
| 197 | + array: &GenericListViewArray<O>, |
| 198 | + field: &FieldRef, |
| 199 | +) -> Result<ArrayRef> { |
| 200 | + let offsets = array.offsets(); |
| 201 | + let values = array.values(); |
| 202 | + let sizes = array.sizes(); |
| 203 | + |
| 204 | + let mut new_offsets: Vec<O> = Vec::with_capacity(offsets.len()); |
| 205 | + let mut indices: Vec<O> = Vec::with_capacity(values.len()); |
| 206 | + let mut new_sizes = Vec::with_capacity(sizes.len()); |
| 207 | + |
| 208 | + let mut current_offset = O::zero(); |
| 209 | + for (row_index, offset) in offsets.iter().enumerate() { |
| 210 | + new_offsets.push(current_offset); |
| 211 | + |
| 212 | + // If this array is null, we set its size to 0 and continue |
| 213 | + if array.is_null(row_index) { |
| 214 | + new_sizes.push(O::zero()); |
| 215 | + continue; |
| 216 | + } |
| 217 | + let size = sizes[row_index]; |
| 218 | + new_sizes.push(size); |
| 219 | + |
| 220 | + // Each array is located at [offset, offset + size), collect indices in the reverse order |
| 221 | + let array_start = *offset; |
| 222 | + let array_end = array_start + size; |
| 223 | + let mut idx = array_end - O::one(); |
| 224 | + while idx >= array_start { |
| 225 | + indices.push(idx); |
| 226 | + idx = idx - O::one(); |
| 227 | + } |
| 228 | + |
| 229 | + current_offset += size; |
| 230 | + } |
| 231 | + |
| 232 | + // Materialize values from underlying array with take |
| 233 | + let indices_array: ArrayRef = if O::IS_LARGE { |
| 234 | + Arc::new(arrow::array::UInt64Array::from( |
| 235 | + indices |
| 236 | + .iter() |
| 237 | + .map(|i| i.as_usize() as u64) |
| 238 | + .collect::<Vec<_>>(), |
| 239 | + )) |
| 240 | + } else { |
| 241 | + Arc::new(UInt32Array::from( |
| 242 | + indices |
| 243 | + .iter() |
| 244 | + .map(|i| i.as_usize() as u32) |
| 245 | + .collect::<Vec<_>>(), |
| 246 | + )) |
| 247 | + }; |
| 248 | + let values_reversed = take(&values, &indices_array, None)?; |
| 249 | + |
| 250 | + Ok(Arc::new(GenericListViewArray::<O>::try_new( |
| 251 | + Arc::clone(field), |
| 252 | + ScalarBuffer::from(new_offsets), |
| 253 | + ScalarBuffer::from(new_sizes), |
| 254 | + values_reversed, |
| 255 | + array.nulls().cloned(), |
| 256 | + )?)) |
| 257 | +} |
| 258 | + |
178 | 259 | fn fixed_size_array_reverse( |
179 | 260 | array: &FixedSizeListArray, |
180 | 261 | field: &FieldRef, |
@@ -207,3 +288,166 @@ fn fixed_size_array_reverse( |
207 | 288 | array.nulls().cloned(), |
208 | 289 | )?)) |
209 | 290 | } |
| 291 | + |
| 292 | +#[cfg(test)] |
| 293 | +mod tests { |
| 294 | + use crate::reverse::list_view_reverse; |
| 295 | + use arrow::{ |
| 296 | + array::{ |
| 297 | + AsArray, GenericListViewArray, Int32Array, LargeListViewArray, ListViewArray, |
| 298 | + OffsetSizeTrait, |
| 299 | + }, |
| 300 | + buffer::{NullBuffer, ScalarBuffer}, |
| 301 | + datatypes::{DataType, Field, Int32Type}, |
| 302 | + }; |
| 303 | + use datafusion_common::Result; |
| 304 | + use std::sync::Arc; |
| 305 | + |
| 306 | + fn list_view_values<O: OffsetSizeTrait>( |
| 307 | + array: &GenericListViewArray<O>, |
| 308 | + ) -> Vec<Option<Vec<i32>>> { |
| 309 | + array |
| 310 | + .iter() |
| 311 | + .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
| 312 | + .collect() |
| 313 | + } |
| 314 | + |
| 315 | + #[test] |
| 316 | + fn test_reverse_list_view() -> Result<()> { |
| 317 | + let field = Arc::new(Field::new("a", DataType::Int32, false)); |
| 318 | + let offsets = ScalarBuffer::from(vec![0, 1, 6, 6]); |
| 319 | + let sizes = ScalarBuffer::from(vec![1, 5, 0, 3]); |
| 320 | + let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])); |
| 321 | + let nulls = Some(NullBuffer::from(vec![true, true, false, true])); |
| 322 | + let list_view = ListViewArray::new(field, offsets, sizes, values, nulls); |
| 323 | + let result = list_view_reverse( |
| 324 | + &list_view, |
| 325 | + &Arc::new(Field::new("test", DataType::Int32, true)), |
| 326 | + )?; |
| 327 | + let reversed = list_view_values(result.as_list_view::<i32>()); |
| 328 | + let expected = vec![ |
| 329 | + Some(vec![1]), |
| 330 | + Some(vec![6, 5, 4, 3, 2]), |
| 331 | + None, |
| 332 | + Some(vec![9, 8, 7]), |
| 333 | + ]; |
| 334 | + assert_eq!(expected, reversed); |
| 335 | + Ok(()) |
| 336 | + } |
| 337 | + |
| 338 | + #[test] |
| 339 | + fn test_reverse_large_list_view() -> Result<()> { |
| 340 | + let field = Arc::new(Field::new("a", DataType::Int32, false)); |
| 341 | + let offsets = ScalarBuffer::from(vec![0, 1, 6, 6]); |
| 342 | + let sizes = ScalarBuffer::from(vec![1, 5, 0, 3]); |
| 343 | + let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])); |
| 344 | + let nulls = Some(NullBuffer::from(vec![true, true, false, true])); |
| 345 | + let list_view = LargeListViewArray::new(field, offsets, sizes, values, nulls); |
| 346 | + let result = list_view_reverse( |
| 347 | + &list_view, |
| 348 | + &Arc::new(Field::new("test", DataType::Int32, true)), |
| 349 | + )?; |
| 350 | + let reversed = list_view_values(result.as_list_view::<i64>()); |
| 351 | + let expected = vec![ |
| 352 | + Some(vec![1]), |
| 353 | + Some(vec![6, 5, 4, 3, 2]), |
| 354 | + None, |
| 355 | + Some(vec![9, 8, 7]), |
| 356 | + ]; |
| 357 | + assert_eq!(expected, reversed); |
| 358 | + Ok(()) |
| 359 | + } |
| 360 | + |
| 361 | + #[test] |
| 362 | + fn test_reverse_list_view_out_of_order() -> Result<()> { |
| 363 | + let field = Arc::new(Field::new("a", DataType::Int32, false)); |
| 364 | + let offsets = ScalarBuffer::from(vec![6, 1, 6, 0]); // out of order |
| 365 | + let sizes = ScalarBuffer::from(vec![3, 5, 0, 1]); |
| 366 | + let values = Arc::new(Int32Array::from(vec![ |
| 367 | + 1, // fourth array: offset 0, size 1 |
| 368 | + 2, 3, 4, 5, 6, // second array: offset 1, size 5 |
| 369 | + // third array: offset 6, size 0 (and null) |
| 370 | + 7, 8, 9, // first array: offset 6, size 3 |
| 371 | + ])); |
| 372 | + let nulls = Some(NullBuffer::from(vec![true, true, false, true])); |
| 373 | + let list_view = ListViewArray::new(field, offsets, sizes, values, nulls); |
| 374 | + let result = list_view_reverse( |
| 375 | + &list_view, |
| 376 | + &Arc::new(Field::new("test", DataType::Int32, true)), |
| 377 | + )?; |
| 378 | + let reversed = list_view_values(result.as_list_view::<i32>()); |
| 379 | + let expected = vec![ |
| 380 | + Some(vec![9, 8, 7]), |
| 381 | + Some(vec![6, 5, 4, 3, 2]), |
| 382 | + None, |
| 383 | + Some(vec![1]), |
| 384 | + ]; |
| 385 | + assert_eq!(expected, reversed); |
| 386 | + Ok(()) |
| 387 | + } |
| 388 | + |
| 389 | + #[test] |
| 390 | + fn test_reverse_list_view_with_nulls() -> Result<()> { |
| 391 | + let field = Arc::new(Field::new("a", DataType::Int32, false)); |
| 392 | + let offsets = ScalarBuffer::from(vec![16, 1, 6, 0]); // out of order |
| 393 | + let sizes = ScalarBuffer::from(vec![3, 5, 10, 1]); |
| 394 | + let values = Arc::new(Int32Array::from(vec![ |
| 395 | + 1, // fourth array: offset 0, size 1 |
| 396 | + 2, 3, 4, 5, 6, // second array: offset 1, size 5 |
| 397 | + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // third array: offset 6, size 10 |
| 398 | + 7, 8, 9, // first array: offset 6, size 3 |
| 399 | + ])); |
| 400 | + let nulls = Some(NullBuffer::from(vec![true, true, false, true])); |
| 401 | + let list_view = ListViewArray::new(field, offsets, sizes, values, nulls); |
| 402 | + let result = list_view_reverse( |
| 403 | + &list_view, |
| 404 | + &Arc::new(Field::new("test", DataType::Int32, true)), |
| 405 | + )?; |
| 406 | + let reversed = list_view_values(result.as_list_view::<i32>()); |
| 407 | + let expected = vec![ |
| 408 | + Some(vec![9, 8, 7]), |
| 409 | + Some(vec![6, 5, 4, 3, 2]), |
| 410 | + None, |
| 411 | + Some(vec![1]), |
| 412 | + ]; |
| 413 | + assert_eq!(expected, reversed); |
| 414 | + Ok(()) |
| 415 | + } |
| 416 | + |
| 417 | + #[test] |
| 418 | + fn test_reverse_list_view_empty() -> Result<()> { |
| 419 | + let field = Arc::new(Field::new("a", DataType::Int32, false)); |
| 420 | + let offsets = ScalarBuffer::from(vec![]); |
| 421 | + let sizes = ScalarBuffer::from(vec![]); |
| 422 | + let empty_array: Vec<i32> = vec![]; |
| 423 | + let values = Arc::new(Int32Array::from(empty_array)); |
| 424 | + let nulls = None; |
| 425 | + let list_view = ListViewArray::new(field, offsets, sizes, values, nulls); |
| 426 | + let result = list_view_reverse( |
| 427 | + &list_view, |
| 428 | + &Arc::new(Field::new("test", DataType::Int32, true)), |
| 429 | + )?; |
| 430 | + let reversed = list_view_values(result.as_list_view::<i32>()); |
| 431 | + let expected: Vec<Option<Vec<i32>>> = vec![]; |
| 432 | + assert_eq!(expected, reversed); |
| 433 | + Ok(()) |
| 434 | + } |
| 435 | + |
| 436 | + #[test] |
| 437 | + fn test_reverse_list_view_all_nulls() -> Result<()> { |
| 438 | + let field = Arc::new(Field::new("a", DataType::Int32, false)); |
| 439 | + let offsets = ScalarBuffer::from(vec![0, 1, 2, 3]); |
| 440 | + let sizes = ScalarBuffer::from(vec![0, 1, 1, 1]); |
| 441 | + let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4])); |
| 442 | + let nulls = Some(NullBuffer::from(vec![false, false, false, false])); |
| 443 | + let list_view = ListViewArray::new(field, offsets, sizes, values, nulls); |
| 444 | + let result = list_view_reverse( |
| 445 | + &list_view, |
| 446 | + &Arc::new(Field::new("test", DataType::Int32, true)), |
| 447 | + )?; |
| 448 | + let reversed = list_view_values(result.as_list_view::<i32>()); |
| 449 | + let expected: Vec<Option<Vec<i32>>> = vec![None, None, None, None]; |
| 450 | + assert_eq!(expected, reversed); |
| 451 | + Ok(()) |
| 452 | + } |
| 453 | +} |
0 commit comments