|
15 | 15 | use bumpalo::Bump; |
16 | 16 | use databend_common_base::hints::assume; |
17 | 17 | use databend_common_column::bitmap::Bitmap; |
| 18 | +use databend_common_io::deserialize_bitmap; |
18 | 19 | use databend_common_io::prelude::bincode_deserialize_from_slice; |
19 | 20 | use databend_common_io::prelude::bincode_serialize_into_buf; |
20 | 21 |
|
@@ -42,6 +43,7 @@ use crate::types::TimestampType; |
42 | 43 | use crate::types::decimal::Decimal; |
43 | 44 | use crate::types::decimal::DecimalColumn; |
44 | 45 | use crate::types::i256; |
| 46 | +use crate::utils::bitmap::is_hybrid_encoding; |
45 | 47 | use crate::with_decimal_mapped_type; |
46 | 48 | use crate::with_number_mapped_type; |
47 | 49 |
|
@@ -128,11 +130,33 @@ pub(super) unsafe fn serialize_column_to_rowformat( |
128 | 130 | } |
129 | 131 | } |
130 | 132 | } |
131 | | - Column::Binary(v) | Column::Bitmap(v) | Column::Variant(v) | Column::Geometry(v) => { |
132 | | - for row in select_vector { |
133 | | - let data = arena.alloc_slice_copy(unsafe { v.index_unchecked(row.to_usize()) }); |
| 133 | + Column::Bitmap(v) => { |
| 134 | + for &index in select_vector { |
| 135 | + let value = unsafe { v.index_unchecked(index.to_usize()) }; |
| 136 | + let normalized = if is_hybrid_encoding(value) { |
| 137 | + value |
| 138 | + } else { |
| 139 | + match deserialize_bitmap(value) { |
| 140 | + Ok(bitmap) => { |
| 141 | + scratch.clear(); |
| 142 | + // Safe unwrap: serialize_into writes into Vec<u8>. |
| 143 | + bitmap.serialize_into(&mut *scratch).unwrap(); |
| 144 | + scratch.as_slice() |
| 145 | + } |
| 146 | + Err(_) => value, |
| 147 | + } |
| 148 | + }; |
| 149 | + let data = arena.alloc_slice_copy(normalized); |
134 | 150 | unsafe { |
135 | | - address[*row].write_bytes(offset, data); |
| 151 | + address[index].write_bytes(offset, data); |
| 152 | + } |
| 153 | + } |
| 154 | + } |
| 155 | + Column::Binary(v) | Column::Variant(v) | Column::Geometry(v) => { |
| 156 | + for &index in select_vector { |
| 157 | + let data = arena.alloc_slice_copy(unsafe { v.index_unchecked(index.to_usize()) }); |
| 158 | + unsafe { |
| 159 | + address[index].write_bytes(offset, data); |
136 | 160 | } |
137 | 161 | } |
138 | 162 | } |
@@ -570,3 +594,71 @@ impl<'s> CompareState<'s> { |
570 | 594 | } |
571 | 595 | } |
572 | 596 | } |
| 597 | + |
| 598 | +#[cfg(test)] |
| 599 | +mod tests { |
| 600 | + use databend_common_column::binary::BinaryColumnBuilder; |
| 601 | + use databend_common_io::HybridBitmap; |
| 602 | + use databend_common_io::deserialize_bitmap; |
| 603 | + use roaring::RoaringTreemap; |
| 604 | + |
| 605 | + use super::*; |
| 606 | + |
| 607 | + #[test] |
| 608 | + fn serialize_bitmap_rowformat_normalizes_legacy_bytes() { |
| 609 | + let values = [1_u64, 5, 42]; |
| 610 | + |
| 611 | + let mut hybrid = HybridBitmap::new(); |
| 612 | + for v in values { |
| 613 | + hybrid.insert(v); |
| 614 | + } |
| 615 | + let mut hybrid_bytes = Vec::new(); |
| 616 | + hybrid.serialize_into(&mut hybrid_bytes).unwrap(); |
| 617 | + |
| 618 | + let mut tree = RoaringTreemap::new(); |
| 619 | + for v in values { |
| 620 | + tree.insert(v); |
| 621 | + } |
| 622 | + let mut legacy_bytes = Vec::new(); |
| 623 | + tree.serialize_into(&mut legacy_bytes).unwrap(); |
| 624 | + |
| 625 | + let mut builder = |
| 626 | + BinaryColumnBuilder::with_capacity(2, hybrid_bytes.len() + legacy_bytes.len()); |
| 627 | + builder.put_slice(&hybrid_bytes); |
| 628 | + builder.commit_row(); |
| 629 | + builder.put_slice(&legacy_bytes); |
| 630 | + builder.commit_row(); |
| 631 | + let column = Column::Bitmap(builder.build()); |
| 632 | + |
| 633 | + let arena = Bump::new(); |
| 634 | + let row_size = rowformat_size(&DataType::Bitmap); |
| 635 | + |
| 636 | + let mut row0 = vec![0u8; row_size]; |
| 637 | + let mut row1 = vec![0u8; row_size]; |
| 638 | + let mut addresses = [RowPtr::null(); BATCH_SIZE]; |
| 639 | + addresses[0] = RowPtr::new(row0.as_mut_ptr()); |
| 640 | + addresses[1] = RowPtr::new(row1.as_mut_ptr()); |
| 641 | + |
| 642 | + let select_vector = [RowID::from(0), RowID::from(1)]; |
| 643 | + let mut scratch = Vec::new(); |
| 644 | + unsafe { |
| 645 | + serialize_column_to_rowformat( |
| 646 | + &arena, |
| 647 | + &column, |
| 648 | + &select_vector, |
| 649 | + &mut addresses, |
| 650 | + 0, |
| 651 | + &mut scratch, |
| 652 | + ); |
| 653 | + } |
| 654 | + |
| 655 | + let bytes0 = unsafe { addresses[0].read_bytes(0) }; |
| 656 | + let bytes1 = unsafe { addresses[1].read_bytes(0) }; |
| 657 | + |
| 658 | + assert_eq!(bytes0, bytes1); |
| 659 | + assert!(bytes0.starts_with(b"HB")); |
| 660 | + |
| 661 | + let decoded = deserialize_bitmap(bytes0).unwrap(); |
| 662 | + assert_eq!(decoded.iter().collect::<Vec<_>>(), values); |
| 663 | + } |
| 664 | +} |
0 commit comments