Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions rust/arrow/src/array/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,48 @@ mod tests {
assert_eq!(array, expected)
}

#[test]
fn test_struct_offset() {
let strings: ArrayRef = Arc::new(StringArray::from(vec![
Some("joe"),
None,
None,
Some("mark"),
Some("doe"),
]));
let ints: ArrayRef = Arc::new(Int32Array::from(vec![
Some(1),
Some(2),
Some(3),
Some(4),
Some(5),
]));

let array =
StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
.unwrap()
.slice(1, 3)
.data();
let arrays = vec![array.as_ref()];
let mut mutable = MutableArrayData::new(arrays, false, 0);

mutable.extend(0, 1, 3);
let data = mutable.freeze();
let array = StructArray::from(Arc::new(data));

// Struct equality doesn't seem to work when using slices?
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has me suspicious of something not working. Specifically, using equality on the struct or the strings doesn't seem to work as I would expect.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Related to #9211 . When we slice an array, we increase its offset. However, for StructArrays, I think that we also need to increase the offset of the childs (or something like that). :(

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More specifically with @nevi-me 's analysis here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense. It seems like it may also affect the string array. When I create the expected string directly instead of slicing it this test passes (see newest commit).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am sorry, I was not very specific. There are two different issues: for string arrays, binary, etc., I think that the issue is solved with #9211, via https://github.com/apache/arrow/pull/9211/files#diff-74d8df3798e724950c2eb5aae1a838fc2f2b9e35d89ace2627e8e7a64584c71fR91

For the Struct, we need another patch.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah. That makes sense. Thanks for the clarification!

It's unrelated to this PR but I was also wondering about the implementation of extend for structs. For other types, it seems like it is relatively efficient (extending with an entire slice) but for the struct case it seems like it iterates over each row and extends a single row at a time. Is that an actual issue? Would addressing that require the changes to struct you mention?


// let expected = StructArray::try_from(vec![
// ("f1", strings.slice(2, 2)),
// ("f2", ints.slice(2, 2)),
// ])
// .unwrap();
//
// assert_eq!(array, expected)
// assert_eq!(array.column(0), &strings.slice(2, 2));
assert_eq!(array.column(1), &ints.slice(2, 2));
}

#[test]
fn test_struct_nulls() {
let strings: ArrayRef = Arc::new(StringArray::from(vec![
Expand Down
13 changes: 8 additions & 5 deletions rust/arrow/src/array/transform/structure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,13 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend {
index: usize,
start: usize,
len: usize| {
mutable
.child_data
.iter_mut()
.for_each(|child| child.extend(index, start, start + len))
mutable.child_data.iter_mut().for_each(|child| {
child.extend(
index,
array.offset() + start,
array.offset() + start + len,
)
})
},
)
} else {
Expand All @@ -38,7 +41,7 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend {
index: usize,
start: usize,
len: usize| {
(start..start + len).for_each(|i| {
(array.offset() + start..array.offset() + start + len).for_each(|i| {
if array.is_valid(i) {
mutable
.child_data
Expand Down
136 changes: 136 additions & 0 deletions rust/arrow/src/compute/kernels/concat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,40 @@ mod tests {
Ok(())
}

#[test]
fn test_concat_primitive_array_slices() -> Result<()> {
let input_1 = PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(-1),
Some(2),
None,
None,
])
.slice(1, 3);

let input_2 = PrimitiveArray::<Int64Type>::from(vec![
Some(101),
Some(102),
Some(103),
None,
])
.slice(1, 3);
let arr = concat(&[input_1.as_ref(), input_2.as_ref()])?;

let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(2),
None,
Some(102),
Some(103),
None,
])) as ArrayRef;

assert_eq!(&arr, &expected_output);

Ok(())
}

#[test]
fn test_concat_boolean_primitive_arrays() -> Result<()> {
let arr = concat(&[
Expand Down Expand Up @@ -254,4 +288,106 @@ mod tests {

Ok(())
}

#[test]
fn test_concat_struct_arrays() -> Result<()> {
let field = Field::new("field", DataType::Int64, true);
let input_primitive_1: ArrayRef =
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(-1),
Some(2),
None,
None,
]));
let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);

let input_primitive_2: ArrayRef =
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(101),
Some(102),
Some(103),
None,
]));
let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]);

let input_primitive_3: ArrayRef =
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(256),
Some(512),
Some(1024),
]));
let input_struct_3 = StructArray::from(vec![(field.clone(), input_primitive_3)]);

let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3])?;

let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(-1),
Some(2),
None,
None,
Some(101),
Some(102),
Some(103),
None,
Some(256),
Some(512),
Some(1024),
])) as ArrayRef;

let actual_primitive = arr
.as_any()
.downcast_ref::<StructArray>()
.unwrap()
.column(0);
assert_eq!(actual_primitive, &expected_primitive_output);

Ok(())
}

#[test]
fn test_concat_struct_array_slices() -> Result<()> {
let field = Field::new("field", DataType::Int64, true);
let input_primitive_1: ArrayRef =
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(-1),
Some(2),
None,
None,
]));
let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);

let input_primitive_2: ArrayRef =
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(101),
Some(102),
Some(103),
None,
]));
let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]);

let arr = concat(&[
input_struct_1.slice(1, 3).as_ref(),
input_struct_2.slice(1, 2).as_ref(),
])?;

let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
Some(2),
None,
Some(102),
Some(103),
])) as ArrayRef;

let actual_primitive = arr
.as_any()
.downcast_ref::<StructArray>()
.unwrap()
.column(0);
assert_eq!(actual_primitive, &expected_primitive_output);

Ok(())
}
}