Skip to content

Commit d985475

Browse files
vegarstijizezhang
authored andcommitted
Support reverse for ListView (apache#18424)
## Which issue does this PR close? - Closes apache#18350. ## Rationale for this change We want to be able to reverse a ListView. ## What changes are included in this PR? - Downcast `&dyn Array` to `ListView`: `as_list_view_array` - Downcast `&dyn Array` to `LargeListView`: `as_large_list_view_array` - Branches in `array_reverse_inner` to reverse `ListView` and `LargeListView` - Main logic in `list_view_reverse` which materializes a new values array using `take` ## Are these changes tested? Yes
1 parent 2151741 commit d985475

File tree

3 files changed

+268
-7
lines changed

3 files changed

+268
-7
lines changed

datafusion/common/src/cast.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ use crate::{downcast_value, Result};
2424
use arrow::array::{
2525
BinaryViewArray, Decimal32Array, Decimal64Array, DurationMicrosecondArray,
2626
DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, Float16Array,
27-
Int16Array, Int8Array, LargeBinaryArray, LargeStringArray, StringViewArray,
28-
UInt16Array,
27+
Int16Array, Int8Array, LargeBinaryArray, LargeListViewArray, LargeStringArray,
28+
ListViewArray, StringViewArray, UInt16Array,
2929
};
3030
use arrow::{
3131
array::{
@@ -324,3 +324,13 @@ pub fn as_generic_string_array<T: OffsetSizeTrait>(
324324
) -> Result<&GenericStringArray<T>> {
325325
Ok(downcast_value!(array, GenericStringArray, T))
326326
}
327+
328+
// Downcast Array to ListViewArray
329+
pub fn as_list_view_array(array: &dyn Array) -> Result<&ListViewArray> {
330+
Ok(downcast_value!(array, ListViewArray))
331+
}
332+
333+
// Downcast Array to LargeListViewArray
334+
pub fn as_large_list_view_array(array: &dyn Array) -> Result<&LargeListViewArray> {
335+
Ok(downcast_value!(array, LargeListViewArray))
336+
}

datafusion/functions-nested/src/reverse.rs

Lines changed: 249 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,18 @@
1919
2020
use crate::utils::make_scalar_function;
2121
use arrow::array::{
22-
Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray, MutableArrayData,
23-
OffsetSizeTrait,
22+
Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray,
23+
GenericListViewArray, MutableArrayData, OffsetSizeTrait, UInt32Array,
24+
};
25+
use arrow::buffer::{OffsetBuffer, ScalarBuffer};
26+
use arrow::compute::take;
27+
use arrow::datatypes::DataType::{
28+
FixedSizeList, LargeList, LargeListView, List, ListView, Null,
2429
};
25-
use arrow::buffer::OffsetBuffer;
26-
use arrow::datatypes::DataType::{FixedSizeList, LargeList, List, Null};
2730
use arrow::datatypes::{DataType, FieldRef};
2831
use datafusion_common::cast::{
29-
as_fixed_size_list_array, as_large_list_array, as_list_array,
32+
as_fixed_size_list_array, as_large_list_array, as_large_list_view_array,
33+
as_list_array, as_list_view_array,
3034
};
3135
use datafusion_common::{exec_err, utils::take_function_args, Result};
3236
use datafusion_expr::{
@@ -134,6 +138,14 @@ pub fn array_reverse_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
134138
fixed_size_array_reverse(array, field)
135139
}
136140
Null => Ok(Arc::clone(input_array)),
141+
ListView(field) => {
142+
let array = as_list_view_array(input_array)?;
143+
list_view_reverse::<i32>(array, field)
144+
}
145+
LargeListView(field) => {
146+
let array = as_large_list_view_array(input_array)?;
147+
list_view_reverse::<i64>(array, field)
148+
}
137149
array_type => exec_err!("array_reverse does not support type '{array_type}'."),
138150
}
139151
}
@@ -175,6 +187,75 @@ fn general_array_reverse<O: OffsetSizeTrait>(
175187
)?))
176188
}
177189

190+
/// Reverses a list view array.
191+
///
192+
/// Construct indices, sizes and offsets for the reversed array by iterating over
193+
/// the list view array in the logical order, and reversing the order of the elements.
194+
/// We end up with a list view array where the elements are in order,
195+
/// even if the original array had elements out of order.
196+
fn list_view_reverse<O: OffsetSizeTrait>(
197+
array: &GenericListViewArray<O>,
198+
field: &FieldRef,
199+
) -> Result<ArrayRef> {
200+
let offsets = array.offsets();
201+
let values = array.values();
202+
let sizes = array.sizes();
203+
204+
let mut new_offsets: Vec<O> = Vec::with_capacity(offsets.len());
205+
let mut indices: Vec<O> = Vec::with_capacity(values.len());
206+
let mut new_sizes = Vec::with_capacity(sizes.len());
207+
208+
let mut current_offset = O::zero();
209+
for (row_index, offset) in offsets.iter().enumerate() {
210+
new_offsets.push(current_offset);
211+
212+
// If this array is null, we set its size to 0 and continue
213+
if array.is_null(row_index) {
214+
new_sizes.push(O::zero());
215+
continue;
216+
}
217+
let size = sizes[row_index];
218+
new_sizes.push(size);
219+
220+
// Each array is located at [offset, offset + size), collect indices in the reverse order
221+
let array_start = *offset;
222+
let array_end = array_start + size;
223+
let mut idx = array_end - O::one();
224+
while idx >= array_start {
225+
indices.push(idx);
226+
idx = idx - O::one();
227+
}
228+
229+
current_offset += size;
230+
}
231+
232+
// Materialize values from underlying array with take
233+
let indices_array: ArrayRef = if O::IS_LARGE {
234+
Arc::new(arrow::array::UInt64Array::from(
235+
indices
236+
.iter()
237+
.map(|i| i.as_usize() as u64)
238+
.collect::<Vec<_>>(),
239+
))
240+
} else {
241+
Arc::new(UInt32Array::from(
242+
indices
243+
.iter()
244+
.map(|i| i.as_usize() as u32)
245+
.collect::<Vec<_>>(),
246+
))
247+
};
248+
let values_reversed = take(&values, &indices_array, None)?;
249+
250+
Ok(Arc::new(GenericListViewArray::<O>::try_new(
251+
Arc::clone(field),
252+
ScalarBuffer::from(new_offsets),
253+
ScalarBuffer::from(new_sizes),
254+
values_reversed,
255+
array.nulls().cloned(),
256+
)?))
257+
}
258+
178259
fn fixed_size_array_reverse(
179260
array: &FixedSizeListArray,
180261
field: &FieldRef,
@@ -207,3 +288,166 @@ fn fixed_size_array_reverse(
207288
array.nulls().cloned(),
208289
)?))
209290
}
291+
292+
#[cfg(test)]
293+
mod tests {
294+
use crate::reverse::list_view_reverse;
295+
use arrow::{
296+
array::{
297+
AsArray, GenericListViewArray, Int32Array, LargeListViewArray, ListViewArray,
298+
OffsetSizeTrait,
299+
},
300+
buffer::{NullBuffer, ScalarBuffer},
301+
datatypes::{DataType, Field, Int32Type},
302+
};
303+
use datafusion_common::Result;
304+
use std::sync::Arc;
305+
306+
fn list_view_values<O: OffsetSizeTrait>(
307+
array: &GenericListViewArray<O>,
308+
) -> Vec<Option<Vec<i32>>> {
309+
array
310+
.iter()
311+
.map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
312+
.collect()
313+
}
314+
315+
#[test]
316+
fn test_reverse_list_view() -> Result<()> {
317+
let field = Arc::new(Field::new("a", DataType::Int32, false));
318+
let offsets = ScalarBuffer::from(vec![0, 1, 6, 6]);
319+
let sizes = ScalarBuffer::from(vec![1, 5, 0, 3]);
320+
let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]));
321+
let nulls = Some(NullBuffer::from(vec![true, true, false, true]));
322+
let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
323+
let result = list_view_reverse(
324+
&list_view,
325+
&Arc::new(Field::new("test", DataType::Int32, true)),
326+
)?;
327+
let reversed = list_view_values(result.as_list_view::<i32>());
328+
let expected = vec![
329+
Some(vec![1]),
330+
Some(vec![6, 5, 4, 3, 2]),
331+
None,
332+
Some(vec![9, 8, 7]),
333+
];
334+
assert_eq!(expected, reversed);
335+
Ok(())
336+
}
337+
338+
#[test]
339+
fn test_reverse_large_list_view() -> Result<()> {
340+
let field = Arc::new(Field::new("a", DataType::Int32, false));
341+
let offsets = ScalarBuffer::from(vec![0, 1, 6, 6]);
342+
let sizes = ScalarBuffer::from(vec![1, 5, 0, 3]);
343+
let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]));
344+
let nulls = Some(NullBuffer::from(vec![true, true, false, true]));
345+
let list_view = LargeListViewArray::new(field, offsets, sizes, values, nulls);
346+
let result = list_view_reverse(
347+
&list_view,
348+
&Arc::new(Field::new("test", DataType::Int32, true)),
349+
)?;
350+
let reversed = list_view_values(result.as_list_view::<i64>());
351+
let expected = vec![
352+
Some(vec![1]),
353+
Some(vec![6, 5, 4, 3, 2]),
354+
None,
355+
Some(vec![9, 8, 7]),
356+
];
357+
assert_eq!(expected, reversed);
358+
Ok(())
359+
}
360+
361+
#[test]
362+
fn test_reverse_list_view_out_of_order() -> Result<()> {
363+
let field = Arc::new(Field::new("a", DataType::Int32, false));
364+
let offsets = ScalarBuffer::from(vec![6, 1, 6, 0]); // out of order
365+
let sizes = ScalarBuffer::from(vec![3, 5, 0, 1]);
366+
let values = Arc::new(Int32Array::from(vec![
367+
1, // fourth array: offset 0, size 1
368+
2, 3, 4, 5, 6, // second array: offset 1, size 5
369+
// third array: offset 6, size 0 (and null)
370+
7, 8, 9, // first array: offset 6, size 3
371+
]));
372+
let nulls = Some(NullBuffer::from(vec![true, true, false, true]));
373+
let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
374+
let result = list_view_reverse(
375+
&list_view,
376+
&Arc::new(Field::new("test", DataType::Int32, true)),
377+
)?;
378+
let reversed = list_view_values(result.as_list_view::<i32>());
379+
let expected = vec![
380+
Some(vec![9, 8, 7]),
381+
Some(vec![6, 5, 4, 3, 2]),
382+
None,
383+
Some(vec![1]),
384+
];
385+
assert_eq!(expected, reversed);
386+
Ok(())
387+
}
388+
389+
#[test]
390+
fn test_reverse_list_view_with_nulls() -> Result<()> {
391+
let field = Arc::new(Field::new("a", DataType::Int32, false));
392+
let offsets = ScalarBuffer::from(vec![16, 1, 6, 0]); // out of order
393+
let sizes = ScalarBuffer::from(vec![3, 5, 10, 1]);
394+
let values = Arc::new(Int32Array::from(vec![
395+
1, // fourth array: offset 0, size 1
396+
2, 3, 4, 5, 6, // second array: offset 1, size 5
397+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // third array: offset 6, size 10
398+
7, 8, 9, // first array: offset 6, size 3
399+
]));
400+
let nulls = Some(NullBuffer::from(vec![true, true, false, true]));
401+
let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
402+
let result = list_view_reverse(
403+
&list_view,
404+
&Arc::new(Field::new("test", DataType::Int32, true)),
405+
)?;
406+
let reversed = list_view_values(result.as_list_view::<i32>());
407+
let expected = vec![
408+
Some(vec![9, 8, 7]),
409+
Some(vec![6, 5, 4, 3, 2]),
410+
None,
411+
Some(vec![1]),
412+
];
413+
assert_eq!(expected, reversed);
414+
Ok(())
415+
}
416+
417+
#[test]
418+
fn test_reverse_list_view_empty() -> Result<()> {
419+
let field = Arc::new(Field::new("a", DataType::Int32, false));
420+
let offsets = ScalarBuffer::from(vec![]);
421+
let sizes = ScalarBuffer::from(vec![]);
422+
let empty_array: Vec<i32> = vec![];
423+
let values = Arc::new(Int32Array::from(empty_array));
424+
let nulls = None;
425+
let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
426+
let result = list_view_reverse(
427+
&list_view,
428+
&Arc::new(Field::new("test", DataType::Int32, true)),
429+
)?;
430+
let reversed = list_view_values(result.as_list_view::<i32>());
431+
let expected: Vec<Option<Vec<i32>>> = vec![];
432+
assert_eq!(expected, reversed);
433+
Ok(())
434+
}
435+
436+
#[test]
437+
fn test_reverse_list_view_all_nulls() -> Result<()> {
438+
let field = Arc::new(Field::new("a", DataType::Int32, false));
439+
let offsets = ScalarBuffer::from(vec![0, 1, 2, 3]);
440+
let sizes = ScalarBuffer::from(vec![0, 1, 1, 1]);
441+
let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
442+
let nulls = Some(NullBuffer::from(vec![false, false, false, false]));
443+
let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
444+
let result = list_view_reverse(
445+
&list_view,
446+
&Arc::new(Field::new("test", DataType::Int32, true)),
447+
)?;
448+
let reversed = list_view_values(result.as_list_view::<i32>());
449+
let expected: Vec<Option<Vec<i32>>> = vec![None, None, None, None];
450+
assert_eq!(expected, reversed);
451+
Ok(())
452+
}
453+
}

datafusion/sqllogictest/test_files/array.slt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8384,6 +8384,13 @@ select array_contains(a, b) from array_has order by 1 nulls last;
83848384
true
83858385
NULL
83868386

8387+
# TODO: Enable once arrow_cast supports ListView types.
8388+
# Expected output (once supported):
8389+
# ----
8390+
# [5, 4, 3, 2, 1]
8391+
query error
8392+
select array_reverse(arrow_cast(make_array(1, 2, 3, 4, 5), 'ListView(Int64)'));
8393+
83878394
### Delete tables
83888395

83898396
statement ok

0 commit comments

Comments
 (0)