Skip to content

Commit 58d07ea

Browse files
authored
Merge branch 'main' into fix-aarch64-release-profile
2 parents 30eb66f + b855648 commit 58d07ea

File tree

18 files changed

+438
-46
lines changed

18 files changed

+438
-46
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/common/building/src/lib.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ pub fn add_building_env_vars() {
6161
set_env_config().expect("Unable to generate build envs");
6262
add_env_credits_info();
6363
add_target_features();
64+
add_build_profile();
65+
add_opt_level();
6466
add_env_version();
6567
add_env_license();
6668
add_license_public_key();
@@ -185,3 +187,35 @@ pub fn add_target_features() {
185187
}
186188
};
187189
}
190+
191+
pub fn add_build_profile() {
192+
match env::var_os("PROFILE") {
193+
Some(var) => match var.into_string() {
194+
Ok(s) => println!("cargo:rustc-env=DATABEND_BUILD_PROFILE={}", s),
195+
Err(_) => {
196+
println!("cargo:warning=PROFILE was not valid utf-8");
197+
println!("cargo:rustc-env=DATABEND_BUILD_PROFILE=unknown");
198+
}
199+
},
200+
None => {
201+
println!("cargo:warning=PROFILE was not set");
202+
println!("cargo:rustc-env=DATABEND_BUILD_PROFILE=unknown");
203+
}
204+
};
205+
}
206+
207+
pub fn add_opt_level() {
208+
match env::var_os("OPT_LEVEL") {
209+
Some(var) => match var.into_string() {
210+
Ok(s) => println!("cargo:rustc-env=DATABEND_OPT_LEVEL={}", s),
211+
Err(_) => {
212+
println!("cargo:warning=OPT_LEVEL was not valid utf-8");
213+
println!("cargo:rustc-env=DATABEND_OPT_LEVEL=unknown");
214+
}
215+
},
216+
None => {
217+
println!("cargo:warning=OPT_LEVEL was not set");
218+
println!("cargo:rustc-env=DATABEND_OPT_LEVEL=unknown");
219+
}
220+
};
221+
}

src/common/io/src/bitmap.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@ use roaring::treemap::Iter;
2929
use smallvec::SmallVec;
3030

3131
// https://github.com/ClickHouse/ClickHouse/blob/516a6ed6f8bd8c5f6eed3a10e9037580b2fb6152/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h#L914
32-
const LARGE_THRESHOLD: usize = 32;
33-
const HYBRID_MAGIC: [u8; 2] = *b"HB";
34-
const HYBRID_VERSION: u8 = 1;
35-
const HYBRID_KIND_SMALL: u8 = 0;
36-
const HYBRID_KIND_LARGE: u8 = 1;
37-
const HYBRID_HEADER_LEN: usize = 4;
32+
pub const LARGE_THRESHOLD: usize = 32;
33+
pub const HYBRID_MAGIC: [u8; 2] = *b"HB";
34+
pub const HYBRID_VERSION: u8 = 1;
35+
pub const HYBRID_KIND_SMALL: u8 = 0;
36+
pub const HYBRID_KIND_LARGE: u8 = 1;
37+
pub const HYBRID_HEADER_LEN: usize = 4;
3838

3939
type SmallBitmap = SmallVec<[u64; LARGE_THRESHOLD]>;
4040

@@ -43,7 +43,7 @@ type SmallBitmap = SmallVec<[u64; LARGE_THRESHOLD]>;
4343
/// - Calculations may frequently create new Bitmaps; reusing them as much as possible can effectively improve performance.
4444
/// - do not use Box to construct HybridBitmap
4545
#[allow(clippy::large_enum_variant)]
46-
#[derive(Clone)]
46+
#[derive(Clone, PartialEq)]
4747
pub enum HybridBitmap {
4848
Small(SmallBitmap),
4949
Large(RoaringTreemap),

src/common/io/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,13 @@ mod stat_buffer;
4646
pub mod interval;
4747
pub mod wkb;
4848

49+
pub use bitmap::HYBRID_HEADER_LEN;
50+
pub use bitmap::HYBRID_KIND_LARGE;
51+
pub use bitmap::HYBRID_KIND_SMALL;
52+
pub use bitmap::HYBRID_MAGIC;
53+
pub use bitmap::HYBRID_VERSION;
4954
pub use bitmap::HybridBitmap;
55+
pub use bitmap::LARGE_THRESHOLD;
5056
pub use bitmap::deserialize_bitmap;
5157
pub use bitmap::parse_bitmap;
5258
pub use decimal::display_decimal_128;

src/common/version/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ pub const DATABEND_ENTERPRISE_LICENSE_PUBLIC_KEY: &str =
4545

4646
pub const DATABEND_CARGO_CFG_TARGET_FEATURE: &str = env!("DATABEND_CARGO_CFG_TARGET_FEATURE");
4747

48+
pub const DATABEND_BUILD_PROFILE: &str = env!("DATABEND_BUILD_PROFILE");
49+
50+
pub const DATABEND_OPT_LEVEL: &str = env!("DATABEND_OPT_LEVEL");
51+
4852
pub const DATABEND_TELEMETRY_ENDPOINT: &str = env!("DATABEND_TELEMETRY_ENDPOINT");
4953

5054
pub const DATABEND_TELEMETRY_API_KEY: &str = env!("DATABEND_TELEMETRY_API_KEY");

src/query/expression/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ goldenfile = { workspace = true }
7070
pretty_assertions = { workspace = true }
7171
proptest = { workspace = true }
7272
rand = { workspace = true }
73+
roaring = { workspace = true }
7374

7475
[[bench]]
7576
name = "bench"

src/query/expression/src/aggregate/group_hash.rs

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,10 @@ use crate::Scalar;
2626
use crate::ScalarRef;
2727
use crate::Value;
2828
use crate::types::decimal::Decimal;
29+
use crate::types::i256;
30+
use crate::types::number::Number;
2931
use crate::types::*;
32+
use crate::utils::bitmap::normalize_bitmap_column;
3033
use crate::visitor::ValueVisitor;
3134
use crate::with_decimal_mapped_type;
3235
use crate::with_number_mapped_type;
@@ -229,7 +232,8 @@ impl<const IS_FIRST: bool> ValueVisitor for HashVisitor<'_, IS_FIRST> {
229232
}
230233

231234
fn visit_bitmap(&mut self, column: BinaryColumn) -> Result<()> {
232-
self.combine_group_hash_string_column::<BitmapType>(&column);
235+
let column = normalize_bitmap_column(&column);
236+
self.combine_group_hash_string_column::<BitmapType>(column.as_ref());
233237
Ok(())
234238
}
235239

@@ -411,7 +415,11 @@ where I: Index
411415
}
412416

413417
fn visit_bitmap(&mut self, column: crate::types::BinaryColumn) -> Result<()> {
414-
self.visit_binary(column)
418+
let column = normalize_bitmap_column(&column);
419+
self.visit_indices(|i| {
420+
let value = column.as_ref().index(i.to_usize()).unwrap();
421+
value.agg_hash()
422+
})
415423
}
416424

417425
fn visit_string(&mut self, column: crate::types::StringColumn) -> Result<()> {
@@ -628,6 +636,8 @@ mod tests {
628636
use databend_common_column::bitmap::Bitmap;
629637
use databend_common_column::types::months_days_micros;
630638
use databend_common_column::types::timestamp_tz;
639+
use databend_common_io::HybridBitmap;
640+
use roaring::RoaringTreemap;
631641

632642
use super::*;
633643
use crate::BlockEntry;
@@ -636,8 +646,11 @@ mod tests {
636646
use crate::ProjectedBlock;
637647
use crate::Value;
638648
use crate::types::ArgType;
649+
use crate::types::BitmapType;
639650
use crate::types::DecimalSize;
651+
use crate::types::Int32Type;
640652
use crate::types::NullableColumn;
653+
use crate::types::NullableType;
641654
use crate::types::NumberScalar;
642655
use crate::types::OpaqueScalar;
643656
use crate::types::VectorDataType;
@@ -864,4 +877,33 @@ mod tests {
864877
}
865878
Ok(())
866879
}
880+
881+
#[test]
882+
fn test_bitmap_group_hash_legacy_bytes_normalized() -> Result<()> {
883+
let values = [1_u64, 5, 42];
884+
885+
let mut hybrid = HybridBitmap::new();
886+
for v in values {
887+
hybrid.insert(v);
888+
}
889+
let mut hybrid_bytes = Vec::new();
890+
hybrid.serialize_into(&mut hybrid_bytes).unwrap();
891+
892+
let mut tree = RoaringTreemap::new();
893+
for v in values {
894+
tree.insert(v);
895+
}
896+
let mut legacy_bytes = Vec::new();
897+
tree.serialize_into(&mut legacy_bytes).unwrap();
898+
899+
let bitmap_column = BitmapType::from_data(vec![hybrid_bytes, legacy_bytes]);
900+
let block = DataBlock::new(vec![bitmap_column.into()], 2);
901+
902+
let mut hashes = vec![0_u64; block.num_rows()];
903+
group_hash_entries(ProjectedBlock::from(block.columns()), &mut hashes);
904+
905+
// Legacy-encoded bitmap should hash identically to hybrid-encoded bitmap.
906+
assert_eq!(hashes[0], hashes[1]);
907+
Ok(())
908+
}
867909
}

src/query/expression/src/aggregate/payload_row.rs

Lines changed: 96 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
use bumpalo::Bump;
1616
use databend_common_base::hints::assume;
1717
use databend_common_column::bitmap::Bitmap;
18+
use databend_common_io::deserialize_bitmap;
1819
use databend_common_io::prelude::bincode_deserialize_from_slice;
1920
use databend_common_io::prelude::bincode_serialize_into_buf;
2021

@@ -42,6 +43,7 @@ use crate::types::TimestampType;
4243
use crate::types::decimal::Decimal;
4344
use crate::types::decimal::DecimalColumn;
4445
use crate::types::i256;
46+
use crate::utils::bitmap::is_hybrid_encoding;
4547
use crate::with_decimal_mapped_type;
4648
use crate::with_number_mapped_type;
4749

@@ -128,11 +130,33 @@ pub(super) unsafe fn serialize_column_to_rowformat(
128130
}
129131
}
130132
}
131-
Column::Binary(v) | Column::Bitmap(v) | Column::Variant(v) | Column::Geometry(v) => {
132-
for row in select_vector {
133-
let data = arena.alloc_slice_copy(unsafe { v.index_unchecked(row.to_usize()) });
133+
Column::Bitmap(v) => {
134+
for &index in select_vector {
135+
let value = unsafe { v.index_unchecked(index.to_usize()) };
136+
let normalized = if is_hybrid_encoding(value) {
137+
value
138+
} else {
139+
match deserialize_bitmap(value) {
140+
Ok(bitmap) => {
141+
scratch.clear();
142+
// Safe unwrap: serialize_into writes into Vec<u8>.
143+
bitmap.serialize_into(&mut *scratch).unwrap();
144+
scratch.as_slice()
145+
}
146+
Err(_) => value,
147+
}
148+
};
149+
let data = arena.alloc_slice_copy(normalized);
134150
unsafe {
135-
address[*row].write_bytes(offset, data);
151+
address[index].write_bytes(offset, data);
152+
}
153+
}
154+
}
155+
Column::Binary(v) | Column::Variant(v) | Column::Geometry(v) => {
156+
for &index in select_vector {
157+
let data = arena.alloc_slice_copy(unsafe { v.index_unchecked(index.to_usize()) });
158+
unsafe {
159+
address[index].write_bytes(offset, data);
136160
}
137161
}
138162
}
@@ -570,3 +594,71 @@ impl<'s> CompareState<'s> {
570594
}
571595
}
572596
}
597+
598+
#[cfg(test)]
599+
mod tests {
600+
use databend_common_column::binary::BinaryColumnBuilder;
601+
use databend_common_io::HybridBitmap;
602+
use databend_common_io::deserialize_bitmap;
603+
use roaring::RoaringTreemap;
604+
605+
use super::*;
606+
607+
#[test]
608+
fn serialize_bitmap_rowformat_normalizes_legacy_bytes() {
609+
let values = [1_u64, 5, 42];
610+
611+
let mut hybrid = HybridBitmap::new();
612+
for v in values {
613+
hybrid.insert(v);
614+
}
615+
let mut hybrid_bytes = Vec::new();
616+
hybrid.serialize_into(&mut hybrid_bytes).unwrap();
617+
618+
let mut tree = RoaringTreemap::new();
619+
for v in values {
620+
tree.insert(v);
621+
}
622+
let mut legacy_bytes = Vec::new();
623+
tree.serialize_into(&mut legacy_bytes).unwrap();
624+
625+
let mut builder =
626+
BinaryColumnBuilder::with_capacity(2, hybrid_bytes.len() + legacy_bytes.len());
627+
builder.put_slice(&hybrid_bytes);
628+
builder.commit_row();
629+
builder.put_slice(&legacy_bytes);
630+
builder.commit_row();
631+
let column = Column::Bitmap(builder.build());
632+
633+
let arena = Bump::new();
634+
let row_size = rowformat_size(&DataType::Bitmap);
635+
636+
let mut row0 = vec![0u8; row_size];
637+
let mut row1 = vec![0u8; row_size];
638+
let mut addresses = [RowPtr::null(); BATCH_SIZE];
639+
addresses[0] = RowPtr::new(row0.as_mut_ptr());
640+
addresses[1] = RowPtr::new(row1.as_mut_ptr());
641+
642+
let select_vector = [RowID::from(0), RowID::from(1)];
643+
let mut scratch = Vec::new();
644+
unsafe {
645+
serialize_column_to_rowformat(
646+
&arena,
647+
&column,
648+
&select_vector,
649+
&mut addresses,
650+
0,
651+
&mut scratch,
652+
);
653+
}
654+
655+
let bytes0 = unsafe { addresses[0].read_bytes(0) };
656+
let bytes1 = unsafe { addresses[1].read_bytes(0) };
657+
658+
assert_eq!(bytes0, bytes1);
659+
assert!(bytes0.starts_with(b"HB"));
660+
661+
let decoded = deserialize_bitmap(bytes0).unwrap();
662+
assert_eq!(decoded.iter().collect::<Vec<_>>(), values);
663+
}
664+
}

src/query/expression/src/kernels/group_by_hash/method_single_string.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use crate::KeysState;
2222
use crate::ProjectedBlock;
2323
use crate::types::BinaryColumn;
2424
use crate::types::binary::BinaryColumnIter;
25+
use crate::utils::bitmap::normalize_bitmap_column;
2526

2627
#[derive(Debug, Clone, Default, PartialEq, Eq)]
2728
pub struct HashMethodSingleBinary {}
@@ -36,7 +37,13 @@ impl HashMethod for HashMethodSingleBinary {
3637
}
3738

3839
fn build_keys_state(&self, group_columns: ProjectedBlock, _rows: usize) -> Result<KeysState> {
39-
Ok(KeysState::Column(group_columns[0].to_column()))
40+
Ok(KeysState::Column(match group_columns[0].to_column() {
41+
Column::Bitmap(col) => match normalize_bitmap_column(&col) {
42+
std::borrow::Cow::Borrowed(_) => Column::Bitmap(col),
43+
std::borrow::Cow::Owned(col) => Column::Bitmap(col),
44+
},
45+
column => column,
46+
}))
4047
}
4148

4249
fn build_keys_iter<'a>(&self, keys_state: &'a KeysState) -> Result<Self::HashKeyIter<'a>> {

0 commit comments

Comments
 (0)