Skip to content

Commit 1c0343d

Browse files
committed
Support reverse for ListView
1 parent 73038f5 commit 1c0343d

File tree

2 files changed

+225
-7
lines changed

2 files changed

+225
-7
lines changed

datafusion/common/src/cast.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ use crate::{downcast_value, Result};
2424
use arrow::array::{
2525
BinaryViewArray, Decimal32Array, Decimal64Array, DurationMicrosecondArray,
2626
DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, Float16Array,
27-
Int16Array, Int8Array, LargeBinaryArray, LargeStringArray, StringViewArray,
28-
UInt16Array,
27+
Int16Array, Int8Array, LargeBinaryArray, LargeListViewArray, LargeStringArray,
28+
ListViewArray, StringViewArray, UInt16Array,
2929
};
3030
use arrow::{
3131
array::{
@@ -324,3 +324,13 @@ pub fn as_generic_string_array<T: OffsetSizeTrait>(
324324
) -> Result<&GenericStringArray<T>> {
325325
Ok(downcast_value!(array, GenericStringArray, T))
326326
}
327+
328+
// Downcast Array to ListViewArray
329+
pub fn as_list_view_array(array: &dyn Array) -> Result<&ListViewArray> {
330+
Ok(downcast_value!(array, ListViewArray))
331+
}
332+
333+
// Downcast Array to LargeListViewArray
334+
pub fn as_large_list_view_array(array: &dyn Array) -> Result<&LargeListViewArray> {
335+
Ok(downcast_value!(array, LargeListViewArray))
336+
}

datafusion/functions-nested/src/reverse.rs

Lines changed: 213 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,26 @@
1919
2020
use crate::utils::make_scalar_function;
2121
use arrow::array::{
22-
Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray, MutableArrayData,
23-
OffsetSizeTrait,
22+
Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray,
23+
GenericListViewArray, MutableArrayData, OffsetSizeTrait, UInt32Array,
24+
};
25+
use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
26+
use arrow::compute::take;
27+
use arrow::datatypes::DataType::{
28+
FixedSizeList, LargeList, LargeListView, List, ListView, Null,
2429
};
25-
use arrow::buffer::OffsetBuffer;
26-
use arrow::datatypes::DataType::{FixedSizeList, LargeList, List, Null};
2730
use arrow::datatypes::{DataType, FieldRef};
2831
use datafusion_common::cast::{
29-
as_fixed_size_list_array, as_large_list_array, as_list_array,
32+
as_fixed_size_list_array, as_large_list_array, as_large_list_view_array,
33+
as_list_array, as_list_view_array,
3034
};
3135
use datafusion_common::{exec_err, utils::take_function_args, Result};
3236
use datafusion_expr::{
3337
ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
3438
};
3539
use datafusion_macros::user_doc;
3640
use std::any::Any;
41+
use std::mem::size_of;
3742
use std::sync::Arc;
3843

3944
make_udf_expr_and_func!(
@@ -133,6 +138,14 @@ pub fn array_reverse_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
133138
fixed_size_array_reverse(array, field)
134139
}
135140
Null => Ok(Arc::clone(input_array)),
141+
ListView(field) => {
142+
let array = as_list_view_array(input_array)?;
143+
list_view_reverse::<i32>(array, field)
144+
}
145+
LargeListView(field) => {
146+
let array = as_large_list_view_array(input_array)?;
147+
list_view_reverse::<i64>(array, field)
148+
}
136149
array_type => exec_err!("array_reverse does not support type '{array_type}'."),
137150
}
138151
}
@@ -183,6 +196,75 @@ fn general_array_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
183196
)?))
184197
}
185198

199+
fn list_view_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
200+
array: &GenericListViewArray<O>,
201+
field: &FieldRef,
202+
) -> Result<ArrayRef> {
203+
let (_, offsets, sizes, values, nulls) = array.clone().into_parts();
204+
205+
// Construct indices, sizes and offsets for the reversed array by iterating over
206+
// the list view array in the logical order, and reversing the order of the elements.
207+
// We end up with a list view array where the elements are in order,
208+
// even if the original array had elements out of order.
209+
let mut indices: Vec<O> = Vec::with_capacity(values.len());
210+
let mut new_sizes = Vec::with_capacity(sizes.len());
211+
let mut new_offsets: Vec<O> = Vec::with_capacity(offsets.len());
212+
let mut new_nulls =
213+
Vec::with_capacity(nulls.clone().map(|nulls| nulls.len()).unwrap_or(0));
214+
new_offsets.push(O::zero());
215+
let has_nulls = nulls.is_some();
216+
for (i, offset) in offsets.iter().enumerate().take(offsets.len()) {
217+
// If this array is null, we set the new array to null with size 0 and continue
218+
if let Some(ref nulls) = nulls {
219+
if nulls.is_null(i) {
220+
new_nulls.push(false); // null
221+
new_sizes.push(O::zero());
222+
new_offsets.push(new_offsets[i]);
223+
continue;
224+
} else {
225+
new_nulls.push(true); // valid
226+
}
227+
}
228+
229+
// Each array is located at [offset, offset + size), so we collect indices in the reverse order
230+
let array_start = offset.as_usize();
231+
let array_end = array_start + sizes[i].as_usize();
232+
for idx in (array_start..array_end).rev() {
233+
indices.push(O::usize_as(idx));
234+
}
235+
new_sizes.push(sizes[i]);
236+
if i < sizes.len() - 1 {
237+
new_offsets.push(new_offsets[i] + sizes[i]);
238+
}
239+
}
240+
241+
// Materialize values from underlying array with take
242+
let indices_array: ArrayRef = match size_of::<O>() {
243+
4 => Arc::new(UInt32Array::from(
244+
indices
245+
.iter()
246+
.map(|i| i.as_usize() as u32)
247+
.collect::<Vec<_>>(),
248+
)),
249+
8 => Arc::new(arrow::array::UInt64Array::from(
250+
indices
251+
.iter()
252+
.map(|i| i.as_usize() as u64)
253+
.collect::<Vec<_>>(),
254+
)),
255+
_ => exec_err!("OffsetSizeTrait only supports i32 and i64")?,
256+
};
257+
let values_reversed = take(&values, &indices_array, None)?;
258+
259+
Ok(Arc::new(GenericListViewArray::<O>::try_new(
260+
Arc::clone(field),
261+
ScalarBuffer::from(new_offsets),
262+
ScalarBuffer::from(new_sizes),
263+
values_reversed,
264+
has_nulls.then_some(NullBuffer::from(new_nulls)),
265+
)?))
266+
}
267+
186268
fn fixed_size_array_reverse(
187269
array: &FixedSizeListArray,
188270
field: &FieldRef,
@@ -219,3 +301,129 @@ fn fixed_size_array_reverse(
219301
Some(nulls.into()),
220302
)?))
221303
}
304+
305+
#[cfg(test)]
306+
mod tests {
307+
use crate::reverse::list_view_reverse;
308+
use arrow::{
309+
array::{AsArray, Int32Array, LargeListViewArray, ListViewArray},
310+
buffer::{NullBuffer, ScalarBuffer},
311+
datatypes::{DataType, Field, Int32Type},
312+
};
313+
use std::sync::Arc;
314+
315+
#[test]
316+
fn test_reverse_list_view_and_large_list_view() {
317+
// ListView
318+
let list_view = ListViewArray::new(
319+
Arc::new(Field::new("a", DataType::Int32, false)),
320+
ScalarBuffer::from(vec![0, 1, 6, 6]),
321+
ScalarBuffer::from(vec![1, 5, 0, 3]),
322+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
323+
Some(NullBuffer::from(vec![true, true, false, true])),
324+
);
325+
let result = list_view_reverse(
326+
&list_view,
327+
&Arc::new(Field::new("test", DataType::Int32, true)),
328+
)
329+
.unwrap();
330+
let list_view_reversed: Vec<_> = result
331+
.as_list_view::<i32>()
332+
.iter()
333+
.map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
334+
.collect();
335+
336+
// LargeListView
337+
let large_list_view = LargeListViewArray::new(
338+
Arc::new(Field::new("a", DataType::Int32, false)),
339+
ScalarBuffer::from(vec![0, 1, 6, 6]),
340+
ScalarBuffer::from(vec![1, 5, 0, 3]),
341+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
342+
Some(NullBuffer::from(vec![true, true, false, true])),
343+
);
344+
let result = list_view_reverse(
345+
&large_list_view,
346+
&Arc::new(Field::new("test", DataType::Int32, true)),
347+
)
348+
.unwrap();
349+
let large_list_view_reversed: Vec<_> = result
350+
.as_list_view::<i64>()
351+
.iter()
352+
.map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
353+
.collect();
354+
355+
// Check results
356+
let expected = vec![
357+
Some(vec![1]),
358+
Some(vec![6, 5, 4, 3, 2]),
359+
None,
360+
Some(vec![9, 8, 7]),
361+
];
362+
assert_eq!(expected, list_view_reversed);
363+
assert_eq!(expected, large_list_view_reversed);
364+
}
365+
366+
#[test]
367+
fn test_reverse_list_view_out_of_order() {
368+
let list_view = ListViewArray::new(
369+
Arc::new(Field::new("a", DataType::Int32, false)),
370+
ScalarBuffer::from(vec![6, 1, 6, 0]), // out of order
371+
ScalarBuffer::from(vec![3, 5, 0, 1]),
372+
Arc::new(Int32Array::from(vec![
373+
1, // fourth array: offset 0, size 1
374+
2, 3, 4, 5, 6, // second array: offset 1, size 5
375+
// third array null but size 0
376+
7, 8, 9, // first array: offset 6, size 3
377+
])),
378+
Some(NullBuffer::from(vec![true, true, false, true])),
379+
);
380+
let list_view_reversed: Vec<_> = list_view_reverse(
381+
&list_view,
382+
&Arc::new(Field::new("test", DataType::Int32, true)),
383+
)
384+
.unwrap()
385+
.as_list_view::<i32>()
386+
.iter()
387+
.map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
388+
.collect();
389+
let expected = vec![
390+
Some(vec![9, 8, 7]),
391+
Some(vec![6, 5, 4, 3, 2]),
392+
None,
393+
Some(vec![1]),
394+
];
395+
assert_eq!(expected, list_view_reversed);
396+
}
397+
398+
#[test]
399+
fn test_reverse_list_view_with_nulls() {
400+
let list_view = ListViewArray::new(
401+
Arc::new(Field::new("a", DataType::Int32, false)),
402+
ScalarBuffer::from(vec![16, 1, 6, 0]), // out of order
403+
ScalarBuffer::from(vec![3, 5, 10, 1]),
404+
Arc::new(Int32Array::from(vec![
405+
1, // fourth array: offset 0, size 1
406+
2, 3, 4, 5, 6, // second array: offset 1, size 5
407+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // third array: offset 6, size 10
408+
7, 8, 9, // first array: offset 6, size 3
409+
])),
410+
Some(NullBuffer::from(vec![true, true, false, true])),
411+
);
412+
let list_view_reversed: Vec<_> = list_view_reverse(
413+
&list_view,
414+
&Arc::new(Field::new("test", DataType::Int32, true)),
415+
)
416+
.unwrap()
417+
.as_list_view::<i32>()
418+
.iter()
419+
.map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
420+
.collect();
421+
let expected = vec![
422+
Some(vec![9, 8, 7]),
423+
Some(vec![6, 5, 4, 3, 2]),
424+
None,
425+
Some(vec![1]),
426+
];
427+
assert_eq!(expected, list_view_reversed);
428+
}
429+
}

0 commit comments

Comments
 (0)