Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
b1d7282
refactor: exprimental parquet reader
dantengsky Jul 3, 2025
98b96be
revert string iterator( performance degrades)
dantengsky Aug 7, 2025
2a3a68b
fix missing license header
dantengsky Aug 7, 2025
973bccb
revert configs
dantengsky Aug 7, 2025
62eab82
taplo fmt
dantengsky Aug 7, 2025
c4fb49f
toml format
dantengsky Aug 7, 2025
fce45a4
fix: revert arrow converters
dantengsky Aug 8, 2025
380f7f8
fix bitmap len mismatch
dantengsky Aug 8, 2025
0cbbd29
tweak parquet physical type mappings
dantengsky Aug 8, 2025
cdbcd5e
align lz4_flex version
dantengsky Aug 8, 2025
bc24bbc
rename switch settings name to use_experimental_parquet_reader
Aug 11, 2025
9b07356
re-enable dictionary decoding for string column
Aug 11, 2025
c1e3339
optimize plain encoding
dantengsky Aug 13, 2025
c62abb6
try optimize small string rle path
dantengsky Aug 14, 2025
4d6077c
optimize rle decode
dantengsky Aug 14, 2025
90c07b1
optimize string view
dantengsky Aug 14, 2025
6efffe4
optimize ptr access
dantengsky Aug 14, 2025
28e15df
use u8 for small string len
dantengsky Aug 14, 2025
33b349a
try unflold loop
dantengsky Aug 14, 2025
034eee1
try simds
dantengsky Aug 15, 2025
e279c79
tweak logic test
dantengsky Aug 19, 2025
5bdcf2a
settings for dict encoding
dantengsky Aug 19, 2025
a07c9b6
dict for numbers
dantengsky Aug 19, 2025
cba5437
opt batch dict
dantengsky Aug 19, 2025
834bf4a
optimize batch rle(hybrid) decoding
dantengsky Aug 19, 2025
4a827a9
disable bound check
dantengsky Aug 19, 2025
690f848
use un-inited buffer
dantengsky Aug 19, 2025
38cec88
fix avx loading views
dantengsky Aug 20, 2025
18a3f57
resovle rebase conflicts
dantengsky Oct 30, 2025
3afd978
fix: should use Bytes::chunk for PageReader after rebased
dantengsky Nov 4, 2025
887443b
resolve rebase conflicts
dantengsky Dec 13, 2025
fac2336
refactor simd code
dantengsky Dec 13, 2025
0fe8a72
fix incorrect uint32 mapping
dantengsky Dec 13, 2025
8f9c617
tweak release profile opt-level
dantengsky Dec 13, 2025
c375365
adapts to v2 page
dantengsky Dec 14, 2025
de1aaab
cargo fmt
dantengsky Dec 14, 2025
77f7e53
revert release profile opt level, ci build timed out
dantengsky Dec 14, 2025
143e7a0
re-enable opt-level 3
dantengsky Dec 15, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 153 additions & 43 deletions Cargo.lock

Large diffs are not rendered by default.

8 changes: 7 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ databend-common-meta-store = { path = "src/meta/store" }
databend-common-meta-types = { path = "src/meta/types" }
databend-common-metrics = { path = "src/common/metrics" }
databend-common-native = { path = "src/common/native" }
databend-common-parquet-reader-experimental = { path = "src/common/experimental_parquet_reader" }
databend-common-pipeline = { path = "src/query/pipeline" }
databend-common-pipeline-transforms = { path = "src/query/pipeline/transforms" }
databend-common-proto-conv = { path = "src/meta/proto-conv" }
Expand Down Expand Up @@ -370,6 +371,7 @@ logforth = { git = "https://github.com/datafuse-extras/logforth", branch = "main
'fastrace',
] }
lz4 = "1.24.0"
lz4_flex = { version = "^0.9" }
map-api = { version = "0.4.2" }
maplit = "1.0.2"
match-template = "0.0.1"
Expand Down Expand Up @@ -418,6 +420,8 @@ ordq = "0.2.0"
p256 = "0.13"
parking_lot = "0.12.1"
parquet = { version = "56", features = ["async"] }
parquet-format-safe = "0.2.0"
parquet2 = { version = "0.17.0", default-features = false, features = ["serde_types", "async", "zstd", "snappy", "lz4"] }
passwords = { version = "3.1.16" }
paste = "1.0.15"
percent-encoding = "2.3.1"
Expand Down Expand Up @@ -497,6 +501,7 @@ stacker = "0.1"
state = "0.6.0"
state-machine-api = { version = "0.3.4" }
stream-more = "0.1.3"
streaming-decompression = "0.1.2"
strength_reduce = "0.2.4"
stringslice = "0.2.0"
strum = "0.24.1"
Expand Down Expand Up @@ -600,7 +605,8 @@ map_entry = "allow"
debug = 1
lto = "thin"
overflow-checks = false
opt-level = "s" # defaults to be 3
#opt-level = "s" # defaults to be 3
opt-level = 3
incremental = false
codegen-units = 1 ## better performance see below comment
## DONT'T DELETE THIS: If we want best performance, we should use this profile but it will take longer time to compile.
Expand Down
4 changes: 2 additions & 2 deletions src/common/column/src/binview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,9 @@ impl<T: ViewType + ?Sized> Clone for BinaryViewColumnGeneric<T> {
}
}

unsafe impl<T: ViewType + ?Sized> Send for BinaryViewColumnGeneric<T> {}
// impl<T: ViewType + ?Sized> Send for BinaryViewColumnGeneric<T> {}

unsafe impl<T: ViewType + ?Sized> Sync for BinaryViewColumnGeneric<T> {}
// unsafe impl<T: ViewType + ?Sized> Sync for BinaryViewColumnGeneric<T> {}

impl<T: ViewType + ?Sized> BinaryViewColumnGeneric<T> {
fn init_cache(value: Option<usize>) -> OnceLock<usize> {
Expand Down
32 changes: 32 additions & 0 deletions src/common/experimental_parquet_reader/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[package]
name = "databend-common-parquet-reader-experimental"
version = { workspace = true }
authors = { workspace = true }
license = { workspace = true }
publish = { workspace = true }
edition = { workspace = true }

[features]

[dependencies]
databend-common-column = { workspace = true }
databend-common-exception = { workspace = true }
databend-common-expression = { workspace = true }
databend-storages-common-table-meta = { workspace = true }

bytes = { workspace = true }
lz4_flex = { workspace = true }
parquet = { workspace = true, features = ["experimental"] }
parquet-format-safe = { workspace = true }
parquet2 = { workspace = true }
streaming-decompression = { workspace = true }
zstd = { workspace = true }

[dev-dependencies]
# used to test async readers

[package.metadata.cargo-machete]
ignored = ["match-template"]

[lints]
workspace = true
Loading
Loading