Skip to content

Commit 0175167

Browse files
committed
Support reverse for ListView
1 parent 73038f5 commit 0175167

File tree

2 files changed

+224
-7
lines changed

2 files changed

+224
-7
lines changed

datafusion/common/src/cast.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ use crate::{downcast_value, Result};
2424
use arrow::array::{
2525
BinaryViewArray, Decimal32Array, Decimal64Array, DurationMicrosecondArray,
2626
DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, Float16Array,
27-
Int16Array, Int8Array, LargeBinaryArray, LargeStringArray, StringViewArray,
28-
UInt16Array,
27+
Int16Array, Int8Array, LargeBinaryArray, LargeListViewArray, LargeStringArray,
28+
ListViewArray, StringViewArray, UInt16Array,
2929
};
3030
use arrow::{
3131
array::{
@@ -324,3 +324,13 @@ pub fn as_generic_string_array<T: OffsetSizeTrait>(
324324
) -> Result<&GenericStringArray<T>> {
325325
Ok(downcast_value!(array, GenericStringArray, T))
326326
}
327+
328+
// Downcast Array to ListViewArray
329+
pub fn as_list_view_array(array: &dyn Array) -> Result<&ListViewArray> {
330+
Ok(downcast_value!(array, ListViewArray))
331+
}
332+
333+
// Downcast Array to LargeListViewArray
334+
pub fn as_large_list_view_array(array: &dyn Array) -> Result<&LargeListViewArray> {
335+
Ok(downcast_value!(array, LargeListViewArray))
336+
}

datafusion/functions-nested/src/reverse.rs

Lines changed: 212 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,18 @@
1919
2020
use crate::utils::make_scalar_function;
2121
use arrow::array::{
22-
Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray, MutableArrayData,
23-
OffsetSizeTrait,
22+
Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray,
23+
GenericListViewArray, MutableArrayData, OffsetSizeTrait, UInt32Array,
24+
};
25+
use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
26+
use arrow::compute::take;
27+
use arrow::datatypes::DataType::{
28+
FixedSizeList, LargeList, LargeListView, List, ListView, Null,
2429
};
25-
use arrow::buffer::OffsetBuffer;
26-
use arrow::datatypes::DataType::{FixedSizeList, LargeList, List, Null};
2730
use arrow::datatypes::{DataType, FieldRef};
2831
use datafusion_common::cast::{
29-
as_fixed_size_list_array, as_large_list_array, as_list_array,
32+
as_fixed_size_list_array, as_large_list_array, as_large_list_view_array,
33+
as_list_array, as_list_view_array,
3034
};
3135
use datafusion_common::{exec_err, utils::take_function_args, Result};
3236
use datafusion_expr::{
@@ -133,6 +137,14 @@ pub fn array_reverse_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
133137
fixed_size_array_reverse(array, field)
134138
}
135139
Null => Ok(Arc::clone(input_array)),
140+
ListView(field) => {
141+
let array = as_list_view_array(input_array)?;
142+
list_view_reverse::<i32>(array, field)
143+
}
144+
LargeListView(field) => {
145+
let array = as_large_list_view_array(input_array)?;
146+
list_view_reverse::<i64>(array, field)
147+
}
136148
array_type => exec_err!("array_reverse does not support type '{array_type}'."),
137149
}
138150
}
@@ -183,6 +195,75 @@ fn general_array_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
183195
)?))
184196
}
185197

198+
fn list_view_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
199+
array: &GenericListViewArray<O>,
200+
field: &FieldRef,
201+
) -> Result<ArrayRef> {
202+
let (_, offsets, sizes, values, nulls) = array.clone().into_parts();
203+
204+
// Construct indices, sizes and offsets for the reversed array by iterating over
205+
// the list view array in the logical order, and reversing the order of the elements.
206+
// We end up with a list view array where the elements are in order,
207+
// even if the original array had elements out of order.
208+
let mut indices: Vec<O> = Vec::with_capacity(values.len());
209+
let mut new_sizes = Vec::with_capacity(sizes.len());
210+
let mut new_offsets: Vec<O> = Vec::with_capacity(offsets.len());
211+
let mut new_nulls =
212+
Vec::with_capacity(nulls.clone().map(|nulls| nulls.len()).unwrap_or(0));
213+
new_offsets.push(O::zero());
214+
let has_nulls = nulls.is_some();
215+
for (i, offset) in offsets.iter().enumerate().take(offsets.len()) {
216+
// If this array is null, we set the new array to null with size 0 and continue
217+
if let Some(ref nulls) = nulls {
218+
if nulls.is_null(i) {
219+
new_nulls.push(false); // null
220+
new_sizes.push(O::zero());
221+
new_offsets.push(new_offsets[i]);
222+
continue;
223+
} else {
224+
new_nulls.push(true); // valid
225+
}
226+
}
227+
228+
// Each array is located at [offset, offset + size), so we collect indices in the reverse order
229+
let array_start = offset.as_usize();
230+
let array_end = array_start + sizes[i].as_usize();
231+
for idx in (array_start..array_end).rev() {
232+
indices.push(O::usize_as(idx));
233+
}
234+
new_sizes.push(sizes[i]);
235+
if i < sizes.len() - 1 {
236+
new_offsets.push(new_offsets[i] + sizes[i]);
237+
}
238+
}
239+
240+
// Materialize values from underlying array with take
241+
let indices_array: ArrayRef = if O::IS_LARGE {
242+
Arc::new(arrow::array::UInt64Array::from(
243+
indices
244+
.iter()
245+
.map(|i| i.as_usize() as u64)
246+
.collect::<Vec<_>>(),
247+
))
248+
} else {
249+
Arc::new(UInt32Array::from(
250+
indices
251+
.iter()
252+
.map(|i| i.as_usize() as u32)
253+
.collect::<Vec<_>>(),
254+
))
255+
};
256+
let values_reversed = take(&values, &indices_array, None)?;
257+
258+
Ok(Arc::new(GenericListViewArray::<O>::try_new(
259+
Arc::clone(field),
260+
ScalarBuffer::from(new_offsets),
261+
ScalarBuffer::from(new_sizes),
262+
values_reversed,
263+
has_nulls.then_some(NullBuffer::from(new_nulls)),
264+
)?))
265+
}
266+
186267
fn fixed_size_array_reverse(
187268
array: &FixedSizeListArray,
188269
field: &FieldRef,
@@ -219,3 +300,129 @@ fn fixed_size_array_reverse(
219300
Some(nulls.into()),
220301
)?))
221302
}
303+
304+
#[cfg(test)]
305+
mod tests {
306+
use crate::reverse::list_view_reverse;
307+
use arrow::{
308+
array::{AsArray, Int32Array, LargeListViewArray, ListViewArray},
309+
buffer::{NullBuffer, ScalarBuffer},
310+
datatypes::{DataType, Field, Int32Type},
311+
};
312+
use std::sync::Arc;
313+
314+
#[test]
315+
fn test_reverse_list_view_and_large_list_view() {
316+
// ListView
317+
let list_view = ListViewArray::new(
318+
Arc::new(Field::new("a", DataType::Int32, false)),
319+
ScalarBuffer::from(vec![0, 1, 6, 6]),
320+
ScalarBuffer::from(vec![1, 5, 0, 3]),
321+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
322+
Some(NullBuffer::from(vec![true, true, false, true])),
323+
);
324+
let result = list_view_reverse(
325+
&list_view,
326+
&Arc::new(Field::new("test", DataType::Int32, true)),
327+
)
328+
.unwrap();
329+
let list_view_reversed: Vec<_> = result
330+
.as_list_view::<i32>()
331+
.iter()
332+
.map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
333+
.collect();
334+
335+
// LargeListView
336+
let large_list_view = LargeListViewArray::new(
337+
Arc::new(Field::new("a", DataType::Int32, false)),
338+
ScalarBuffer::from(vec![0, 1, 6, 6]),
339+
ScalarBuffer::from(vec![1, 5, 0, 3]),
340+
Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9])),
341+
Some(NullBuffer::from(vec![true, true, false, true])),
342+
);
343+
let result = list_view_reverse(
344+
&large_list_view,
345+
&Arc::new(Field::new("test", DataType::Int32, true)),
346+
)
347+
.unwrap();
348+
let large_list_view_reversed: Vec<_> = result
349+
.as_list_view::<i64>()
350+
.iter()
351+
.map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
352+
.collect();
353+
354+
// Check results
355+
let expected = vec![
356+
Some(vec![1]),
357+
Some(vec![6, 5, 4, 3, 2]),
358+
None,
359+
Some(vec![9, 8, 7]),
360+
];
361+
assert_eq!(expected, list_view_reversed);
362+
assert_eq!(expected, large_list_view_reversed);
363+
}
364+
365+
#[test]
366+
fn test_reverse_list_view_out_of_order() {
367+
let list_view = ListViewArray::new(
368+
Arc::new(Field::new("a", DataType::Int32, false)),
369+
ScalarBuffer::from(vec![6, 1, 6, 0]), // out of order
370+
ScalarBuffer::from(vec![3, 5, 0, 1]),
371+
Arc::new(Int32Array::from(vec![
372+
1, // fourth array: offset 0, size 1
373+
2, 3, 4, 5, 6, // second array: offset 1, size 5
374+
// third array null but size 0
375+
7, 8, 9, // first array: offset 6, size 3
376+
])),
377+
Some(NullBuffer::from(vec![true, true, false, true])),
378+
);
379+
let list_view_reversed: Vec<_> = list_view_reverse(
380+
&list_view,
381+
&Arc::new(Field::new("test", DataType::Int32, true)),
382+
)
383+
.unwrap()
384+
.as_list_view::<i32>()
385+
.iter()
386+
.map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
387+
.collect();
388+
let expected = vec![
389+
Some(vec![9, 8, 7]),
390+
Some(vec![6, 5, 4, 3, 2]),
391+
None,
392+
Some(vec![1]),
393+
];
394+
assert_eq!(expected, list_view_reversed);
395+
}
396+
397+
#[test]
398+
fn test_reverse_list_view_with_nulls() {
399+
let list_view = ListViewArray::new(
400+
Arc::new(Field::new("a", DataType::Int32, false)),
401+
ScalarBuffer::from(vec![16, 1, 6, 0]), // out of order
402+
ScalarBuffer::from(vec![3, 5, 10, 1]),
403+
Arc::new(Int32Array::from(vec![
404+
1, // fourth array: offset 0, size 1
405+
2, 3, 4, 5, 6, // second array: offset 1, size 5
406+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // third array: offset 6, size 10
407+
7, 8, 9, // first array: offset 6, size 3
408+
])),
409+
Some(NullBuffer::from(vec![true, true, false, true])),
410+
);
411+
let list_view_reversed: Vec<_> = list_view_reverse(
412+
&list_view,
413+
&Arc::new(Field::new("test", DataType::Int32, true)),
414+
)
415+
.unwrap()
416+
.as_list_view::<i32>()
417+
.iter()
418+
.map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
419+
.collect();
420+
let expected = vec![
421+
Some(vec![9, 8, 7]),
422+
Some(vec![6, 5, 4, 3, 2]),
423+
None,
424+
Some(vec![1]),
425+
];
426+
assert_eq!(expected, list_view_reversed);
427+
}
428+
}

0 commit comments

Comments
 (0)