Skip to content

Commit 96d3d95

Browse files
authored
Zip Fragmentation on take (#4747)
Signed-off-by: blaginin <[email protected]>
1 parent f7d88fa commit 96d3d95

File tree

3 files changed

+49
-3
lines changed

3 files changed

+49
-3
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-array/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ futures = { workspace = true, features = ["alloc", "async-await", "std"] }
3939
getrandom_v03 = { workspace = true }
4040
goldenfile = { workspace = true, optional = true }
4141
humansize = { workspace = true }
42+
insta = { workspace = true }
4243
inventory = { workspace = true }
4344
itertools = { workspace = true }
4445
log = { workspace = true }

vortex-array/src/compute/zip.rs

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,15 @@ impl ComputeFnVTable for Zip {
7575
// TODO(os): add invert_mask opt and check if if_false has a kernel like:
7676
// kernel.invoke(Args(if_false, if_true, mask, invert_mask = true))
7777

78+
if !if_true.is_canonical() || !if_false.is_canonical() {
79+
return zip(
80+
if_true.to_canonical().as_ref(),
81+
if_false.to_canonical().as_ref(),
82+
mask,
83+
)
84+
.map(Into::into);
85+
}
86+
7887
Ok(zip_impl(
7988
if_true.to_canonical().as_ref(),
8089
if_false.to_canonical().as_ref(),
@@ -212,11 +221,14 @@ pub(crate) fn zip_impl_with_builder(
212221

213222
#[cfg(test)]
214223
mod tests {
215-
use vortex_array::arrays::PrimitiveArray;
216-
use vortex_array::compute::zip;
217-
use vortex_array::{IntoArray, ToCanonical};
218224
use vortex_buffer::buffer;
225+
use vortex_dtype::Nullability;
219226
use vortex_mask::Mask;
227+
use vortex_scalar::Scalar;
228+
229+
use crate::arrays::{ConstantArray, PrimitiveArray};
230+
use crate::compute::zip;
231+
use crate::{Array, IntoArray, ToCanonical};
220232

221233
#[test]
222234
fn test_zip_basic() {
@@ -260,4 +272,36 @@ mod tests {
260272

261273
zip(&if_true, &if_false, &mask).unwrap();
262274
}
275+
276+
#[test]
277+
fn test_fragmentation() {
278+
let len = 100;
279+
280+
let const1 = ConstantArray::new(
281+
Scalar::utf8("hello_this_is_a_longer_string", Nullability::Nullable),
282+
len,
283+
)
284+
.to_array();
285+
286+
let const2 = ConstantArray::new(
287+
Scalar::utf8("world_this_is_another_string", Nullability::Nullable),
288+
len,
289+
)
290+
.to_array();
291+
292+
// Create a mask that alternates frequently to cause fragmentation
293+
// Pattern: take from const1 at even indices, const2 at odd indices
294+
let indices: Vec<usize> = (0..len).step_by(2).collect();
295+
let mask = Mask::from_indices(len, indices);
296+
297+
let result = zip(&const1, &const2, &mask).unwrap();
298+
299+
insta::assert_snapshot!(result.display_tree(), @r"
300+
root: vortex.varbinview(utf8?, len=100) nbytes=1.66 kB (100.00%)
301+
metadata: EmptyMetadata
302+
buffer (align=1): 29 B (1.75%)
303+
buffer (align=1): 28 B (1.69%)
304+
buffer (align=16): 1.60 kB (96.56%)
305+
");
306+
}
263307
}

0 commit comments

Comments
 (0)