From e9a2561ef427dc6c6fcf33d4deb2a774ae150030 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 24 Dec 2025 12:02:38 +0800 Subject: [PATCH 1/3] revert --- Cargo.lock | 441 +++--------------- Cargo.toml | 16 +- src/bendsave/src/storage.rs | 12 +- src/binaries/tool/table_meta_inspector.rs | 8 +- src/common/exception/src/exception_into.rs | 17 +- src/common/native/src/read/reader.rs | 8 +- src/common/storage/Cargo.toml | 2 - src/common/storage/src/http_client.rs | 6 +- src/common/storage/src/lib.rs | 1 - src/common/storage/src/metrics.rs | 10 +- src/common/storage/src/metrics_layer.rs | 8 +- src/common/storage/src/operator.rs | 94 +--- src/common/storage/src/parquet.rs | 6 +- src/common/storage/src/runtime_layer.rs | 30 +- src/common/storage/src/stage.rs | 9 +- src/common/tracing/src/init.rs | 10 +- src/common/tracing/tests/it/remote_log.rs | 6 +- .../fuse/operations/vacuum_table_v2.rs | 36 +- .../fuse/operations/vacuum_temporary_files.rs | 2 +- .../it/storages/fuse/operations/vacuum.rs | 29 +- .../service/src/history_tables/external.rs | 2 +- .../src/history_tables/global_history_log.rs | 29 +- ...transform_exchange_aggregate_serializer.rs | 22 +- src/query/service/src/spillers/adapter.rs | 8 +- .../service/src/spillers/async_buffer.rs | 16 +- .../service/src/spillers/block_reader.rs | 2 +- src/query/service/src/spillers/inner.rs | 8 +- .../service/src/spillers/row_group_encoder.rs | 24 +- src/query/service/src/spillers/serialize.rs | 10 +- .../infer_schema/infer_schema_table.rs | 8 +- .../service/src/test_kits/block_writer.rs | 8 +- src/query/service/src/test_kits/fuse.rs | 16 +- .../it/storages/fuse/bloom_index_meta_size.rs | 12 +- .../it/storages/fuse/meta/column_oriented.rs | 20 +- .../mutation/block_compact_mutator.rs | 4 +- .../tests/it/storages/fuse/statistics.rs | 40 +- .../src/planner/binder/copy_into_location.rs | 6 +- src/query/sql/src/planner/binder/ddl/table.rs | 74 ++- src/query/sql/src/planner/binder/location.rs | 10 +- .../basic/src/result_cache/read/reader.rs | 2 +- .../storages/common/io/src/merge_io_reader.rs | 2 +- .../storages/common/session/src/temp_table.rs | 12 +- src/query/storages/delta/src/table.rs | 2 +- src/query/storages/fuse/src/fuse_table.rs | 71 +-- .../src/io/read/agg_index/agg_index_reader.rs | 4 +- .../src/io/read/block/block_reader_native.rs | 4 +- .../block/block_reader_native_deserialize.rs | 4 +- .../src/io/read/bloom/column_filter_reader.rs | 2 +- .../inverted_index/inverted_index_loader.rs | 6 +- .../inverted_index/inverted_index_reader.rs | 2 +- .../fuse/src/io/read/segment_reader.rs | 8 +- .../read/vector_index/vector_index_loader.rs | 6 +- .../fuse/src/io/write/block_writer.rs | 18 +- .../fuse/src/io/write/bloom_index_writer.rs | 6 +- .../operations/analyze/collect_ndv_source.rs | 18 +- .../storages/fuse/src/operations/commit.rs | 12 +- .../common/processors/sink_commit.rs | 22 +- .../processors/transform_block_writer.rs | 2 +- .../transform_mutation_aggregator.rs | 10 +- .../processors/transform_serialize_block.rs | 4 +- .../processors/transform_serialize_segment.rs | 6 +- .../fuse/src/operations/inverted_index.rs | 8 +- .../merge_into/mutator/matched_mutator.rs | 16 +- .../mutation/mutator/block_compact_mutator.rs | 8 +- .../mutation/mutator/recluster_mutator.rs | 22 +- .../mutator/segment_compact_mutator.rs | 6 +- .../storages/fuse/src/operations/navigate.rs | 12 +- .../fuse/src/operations/read_partitions.rs | 18 +- .../storages/fuse/src/operations/recluster.rs | 10 +- .../mutator/replace_into_operation_agg.rs | 20 +- .../fuse/src/operations/snapshot_hint.rs | 2 +- .../fuse/src/operations/table_index.rs | 18 +- .../storages/fuse/src/pruning/bloom_pruner.rs | 6 +- .../table_functions/fuse_time_travel_size.rs | 14 +- .../storages/hive/hive/src/hive_table.rs | 9 +- .../storages/orc/src/chunk_reader_impl.rs | 2 +- src/query/storages/orc/src/table.rs | 6 +- .../parquet/src/copy_into_table/reader.rs | 6 +- .../parquet/src/copy_into_table/source.rs | 2 +- .../src/parquet_reader/reader/full_reader.rs | 14 +- .../parquet_reader/reader/row_group_reader.rs | 26 +- .../reader/streaming_load_reader.rs | 6 +- .../parquet/src/parquet_reader/row_group.rs | 6 +- .../parquet/src/parquet_table/table.rs | 8 +- .../append/row_based_file/writer_processor.rs | 2 +- .../storages/system/src/temp_files_table.rs | 100 ++-- 86 files changed, 626 insertions(+), 1014 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d1eb1005a7abc..75fa0f0269ef0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -144,6 +144,12 @@ version = "0.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9d4ee0d472d1cd2e28c97dfa124b3d8d992e10eb0a035f33f5d12e3a177ba3b" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -214,9 +220,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" dependencies = [ "backtrace", ] @@ -1526,9 +1532,9 @@ dependencies = [ [[package]] name = "backon" -version = "1.6.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" +checksum = "fd0b50b1b78dbadd44ab18b3c794e496f3a139abb9fbc27d9c94c4eebbb96496" dependencies = [ "fastrand", "gloo-timers", @@ -1596,7 +1602,7 @@ version = "0.1.0" dependencies = [ "arrow 56.2.0", "arrow-schema 56.2.0", - "ctor 0.2.9", + "ctor", "databend-common-base", "databend-common-catalog", "databend-common-config", @@ -1683,7 +1689,7 @@ dependencies = [ "bitflags 2.9.0", "cexpr", "clang-sys", - "itertools 0.11.0", + "itertools 0.12.1", "lazy_static", "lazycell", "log", @@ -2305,16 +2311,17 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" dependencies = [ + "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "serde", "wasm-bindgen", - "windows-link 0.2.1", + "windows-link 0.1.1", ] [[package]] @@ -3120,22 +3127,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "ctor" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "424e0138278faeb2b401f174ad17e715c829512d74f3d1e81eb43365c2e0590e" -dependencies = [ - "ctor-proc-macro", - "dtor", -] - -[[package]] -name = "ctor-proc-macro" -version = "0.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1" - [[package]] name = "ctr" version = "0.9.2" @@ -3738,7 +3729,7 @@ dependencies = [ "bumpalo", "comfy-table", "crc32fast", - "ctor 0.2.9", + "ctor", "databend-common-ast", "databend-common-base", "databend-common-column", @@ -4467,7 +4458,7 @@ dependencies = [ "chrono-tz 0.8.6", "cidr", "cron", - "ctor 0.2.9", + "ctor", "dashmap 6.1.0", "databend-common-ast", "databend-common-base", @@ -4548,8 +4539,6 @@ dependencies = [ "log", "lru", "opendal", - "opendal-layer-immutable-index", - "opendal-layer-observe-metrics-common", "parquet 56.2.0", "prometheus-client 0.22.3", "regex", @@ -5492,7 +5481,7 @@ dependencies = [ "chrono-tz 0.8.6", "concurrent-queue", "cron", - "ctor 0.2.9", + "ctor", "dashmap 6.1.0", "databend-common-ast", "databend-common-base", @@ -6351,21 +6340,6 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04" -[[package]] -name = "dtor" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "404d02eeb088a82cfd873006cb713fe411306c7d182c344905e101fb1167d301" -dependencies = [ - "dtor-proc-macro", -] - -[[package]] -name = "dtor-proc-macro" -version = "0.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5" - [[package]] name = "dtparse" version = "2.0.0" @@ -9087,7 +9061,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.57.0", + "windows-core 0.61.0", ] [[package]] @@ -9102,7 +9076,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.4.0" -source = "git+https://github.com/databendlabs/iceberg-rust?rev=6536f9c#6536f9ccac1ce56a05ad2e738a2fa6760dea8cb5" +source = "git+https://github.com/databendlabs/iceberg-rust?rev=32b1403#32b1403eef8b00d7f2a526c551aa35b8fc31927e" dependencies = [ "anyhow", "apache-avro", @@ -9151,7 +9125,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-glue" version = "0.4.0" -source = "git+https://github.com/databendlabs/iceberg-rust?rev=6536f9c#6536f9ccac1ce56a05ad2e738a2fa6760dea8cb5" +source = "git+https://github.com/databendlabs/iceberg-rust?rev=32b1403#32b1403eef8b00d7f2a526c551aa35b8fc31927e" dependencies = [ "anyhow", "async-trait", @@ -9168,7 +9142,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-hms" version = "0.4.0" -source = "git+https://github.com/databendlabs/iceberg-rust?rev=6536f9c#6536f9ccac1ce56a05ad2e738a2fa6760dea8cb5" +source = "git+https://github.com/databendlabs/iceberg-rust?rev=32b1403#32b1403eef8b00d7f2a526c551aa35b8fc31927e" dependencies = [ "anyhow", "async-trait", @@ -9192,7 +9166,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-rest" version = "0.4.0" -source = "git+https://github.com/databendlabs/iceberg-rust?rev=6536f9c#6536f9ccac1ce56a05ad2e738a2fa6760dea8cb5" +source = "git+https://github.com/databendlabs/iceberg-rust?rev=32b1403#32b1403eef8b00d7f2a526c551aa35b8fc31927e" dependencies = [ "async-trait", "chrono", @@ -9212,7 +9186,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-s3tables" version = "0.4.0" -source = "git+https://github.com/databendlabs/iceberg-rust?rev=6536f9c#6536f9ccac1ce56a05ad2e738a2fa6760dea8cb5" +source = "git+https://github.com/databendlabs/iceberg-rust?rev=32b1403#32b1403eef8b00d7f2a526c551aa35b8fc31927e" dependencies = [ "anyhow", "async-trait", @@ -9768,12 +9742,10 @@ dependencies = [ "jiff-static", "jiff-tzdb", "jiff-tzdb-platform", - "js-sys", "log", "portable-atomic", "portable-atomic-util", "serde_core", - "wasm-bindgen", "windows-sys 0.61.2", ] @@ -10543,15 +10515,6 @@ dependencies = [ "digest", ] -[[package]] -name = "mea" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1a78f54a189049e2f554d43d2021e3010036ed65a8f5376ab12cc0432d9a341" -dependencies = [ - "slab", -] - [[package]] name = "measure_time" version = "0.9.0" @@ -11306,14 +11269,13 @@ dependencies = [ [[package]] name = "object_store_opendal" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" +version = "0.54.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0b88fc0e0c4890c1d99e2b8c519c5db40f7d9b69a0f562ff1ad4967a4c8bbc6" dependencies = [ "async-trait", "bytes", - "chrono", "futures", - "mea", "object_store", "opendal", "pin-project", @@ -11344,244 +11306,37 @@ checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" [[package]] name = "opendal" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "opendal-core", - "opendal-layer-async-backtrace", - "opendal-layer-fastrace", - "opendal-layer-prometheus-client", - "opendal-service-azblob", - "opendal-service-azdls", - "opendal-service-gcs", - "opendal-service-ipfs", - "opendal-service-moka", - "opendal-service-obs", - "opendal-service-oss", - "opendal-service-s3", -] - -[[package]] -name = "opendal-core" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" +version = "0.54.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42afda58fa2cf50914402d132cc1caacff116a85d10c72ab2082bb7c50021754" dependencies = [ "anyhow", + "async-backtrace", "backon", "base64 0.22.1", "bytes", - "ctor 0.6.3", + "chrono", + "crc32c", + "fastrace", "futures", "getrandom 0.2.16", "hdrs", "http 1.3.1", "http-body 1.0.1", - "jiff", "log", "md-5", - "mea", + "moka", "percent-encoding", + "prometheus-client 0.23.1", + "prost", "quick-xml 0.38.4", "reqsign", "reqwest", "serde", "serde_json", - "tokio", - "url", - "uuid", - "web-time", -] - -[[package]] -name = "opendal-layer-async-backtrace" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "async-backtrace", - "opendal-core", -] - -[[package]] -name = "opendal-layer-fastrace" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "fastrace", - "opendal-core", -] - -[[package]] -name = "opendal-layer-immutable-index" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "opendal-core", -] - -[[package]] -name = "opendal-layer-observe-metrics-common" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "futures", - "http 1.3.1", - "opendal-core", -] - -[[package]] -name = "opendal-layer-prometheus-client" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "opendal-core", - "opendal-layer-observe-metrics-common", - "prometheus-client 0.24.0", -] - -[[package]] -name = "opendal-service-azblob" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "base64 0.22.1", - "bytes", - "ctor 0.6.3", - "http 1.3.1", - "log", - "opendal-core", - "opendal-service-azure-common", - "quick-xml 0.38.4", - "reqsign", - "serde", "sha2", - "uuid", -] - -[[package]] -name = "opendal-service-azdls" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "bytes", - "ctor 0.6.3", - "http 1.3.1", - "log", - "opendal-core", - "opendal-service-azure-common", - "quick-xml 0.38.4", - "reqsign", - "serde", - "serde_json", -] - -[[package]] -name = "opendal-service-azure-common" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "http 1.3.1", - "opendal-core", - "reqsign", -] - -[[package]] -name = "opendal-service-gcs" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "backon", - "base64 0.22.1", - "bytes", - "ctor 0.6.3", - "http 1.3.1", - "log", - "opendal-core", - "percent-encoding", - "quick-xml 0.38.4", - "reqsign", - "reqwest", - "serde", - "serde_json", "tokio", -] - -[[package]] -name = "opendal-service-ipfs" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "bytes", - "ctor 0.6.3", - "http 1.3.1", - "log", - "opendal-core", - "prost", - "serde", -] - -[[package]] -name = "opendal-service-moka" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "ctor 0.6.3", - "log", - "moka", - "opendal-core", - "serde", -] - -[[package]] -name = "opendal-service-obs" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "bytes", - "ctor 0.6.3", - "http 1.3.1", - "log", - "opendal-core", - "quick-xml 0.38.4", - "reqsign", - "serde", -] - -[[package]] -name = "opendal-service-oss" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "bytes", - "ctor 0.6.3", - "http 1.3.1", - "log", - "opendal-core", - "quick-xml 0.38.4", - "reqsign", - "reqwest", - "serde", -] - -[[package]] -name = "opendal-service-s3" -version = "0.55.0" -source = "git+https://github.com/apache/opendal.git?rev=02953ef#02953ef90c475eb592596f1a0b68370188a80128" -dependencies = [ - "base64 0.22.1", - "bytes", - "crc32c", - "ctor 0.6.3", - "http 1.3.1", - "log", - "md-5", - "opendal-core", - "quick-xml 0.38.4", - "reqsign-aws-v4", - "reqsign-core", - "reqsign-file-read-tokio", - "reqsign-http-send-reqwest", - "reqwest", - "serde", + "uuid", ] [[package]] @@ -12763,19 +12518,19 @@ dependencies = [ "dtoa", "itoa", "parking_lot 0.12.3", - "prometheus-client-derive-encode 0.4.2", + "prometheus-client-derive-encode", ] [[package]] name = "prometheus-client" -version = "0.24.0" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4500adecd7af8e0e9f4dbce15cfee07ce913fbf6ad605cc468b83f2d531ee94" +checksum = "cf41c1a7c32ed72abe5082fb19505b969095c12da9f5732a4bc9878757fd087c" dependencies = [ "dtoa", "itoa", "parking_lot 0.12.3", - "prometheus-client-derive-encode 0.5.0", + "prometheus-client-derive-encode", ] [[package]] @@ -12789,17 +12544,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "prometheus-client-derive-encode" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9adf1691c04c0a5ff46ff8f262b58beb07b0dbb61f96f9f54f6cbd82106ed87f" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", -] - [[package]] name = "prometheus-parse" version = "0.2.5" @@ -12845,7 +12589,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck 0.5.0", - "itertools 0.11.0", + "itertools 0.14.0", "log", "multimap", "once_cell", @@ -12865,7 +12609,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.14.0", "proc-macro2", "quote", "syn 2.0.106", @@ -13137,9 +12881,9 @@ dependencies = [ [[package]] name = "python3-dll-a" -version = "0.2.14" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d381ef313ae70b4da5f95f8a4de773c6aa5cd28f73adec4b4a31df70b66780d8" +checksum = "49fe4227a288cf9493942ad0220ea3f185f4d1f2a14f197f7344d6d02f4ed4ed" dependencies = [ "cc", ] @@ -13159,6 +12903,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quick-xml" version = "0.38.4" @@ -13637,88 +13391,18 @@ dependencies = [ "log", "once_cell", "percent-encoding", + "quick-xml 0.37.5", "rand 0.8.5", "reqwest", "rsa", - "serde", - "serde_json", - "sha1", - "sha2", -] - -[[package]] -name = "reqsign-aws-v4" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4510c2a3e42b653cf788d560a3d54b0ae4cc315a62aaba773554f18319c0db0b" -dependencies = [ - "anyhow", - "async-trait", - "bytes", - "form_urlencoded", - "http 1.3.1", - "log", - "percent-encoding", - "quick-xml 0.38.4", - "reqsign-core", "rust-ini", "serde", "serde_json", - "serde_urlencoded", - "sha1", -] - -[[package]] -name = "reqsign-core" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39da118ccf3bdb067ac6cc40136fec99bc5ba418cbd388dc88e4ce0e5d0b1423" -dependencies = [ - "anyhow", - "async-trait", - "base64 0.22.1", - "bytes", - "form_urlencoded", - "hex", - "hmac", - "http 1.3.1", - "jiff", - "log", - "percent-encoding", "sha1", "sha2", - "windows-sys 0.61.2", -] - -[[package]] -name = "reqsign-file-read-tokio" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "669ea66036266a9ac371d2e63cc7d345e69994da0168b4e6f3487fe21e126f76" -dependencies = [ - "anyhow", - "async-trait", - "reqsign-core", "tokio", ] -[[package]] -name = "reqsign-http-send-reqwest" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46186bce769674f9200ad01af6f2ca42de3e819ddc002fff1edae135bfb6cd9c" -dependencies = [ - "anyhow", - "async-trait", - "bytes", - "futures-channel", - "http 1.3.1", - "http-body-util", - "reqsign-core", - "reqwest", - "wasm-bindgen-futures", -] - [[package]] name = "reqwest" version = "0.12.24" @@ -14879,9 +14563,12 @@ dependencies = [ [[package]] name = "slab" -version = "0.4.11" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] [[package]] name = "sled" @@ -17820,7 +17507,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index e16e8641180a5..d92dd1f9bed60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -312,13 +312,13 @@ hyper-util = { version = "0.1.9", features = ["client", "client-legacy", "tokio" lru = "0.12" ## in branch dev -iceberg = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "6536f9c", features = [ +iceberg = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "32b1403", features = [ "storage-all", ] } -iceberg-catalog-glue = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "6536f9c" } -iceberg-catalog-hms = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "6536f9c" } -iceberg-catalog-rest = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "6536f9c" } -iceberg-catalog-s3tables = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "6536f9c" } +iceberg-catalog-glue = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "32b1403" } +iceberg-catalog-hms = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "32b1403" } +iceberg-catalog-rest = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "32b1403" } +iceberg-catalog-s3tables = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "32b1403" } # Explicitly specify compatible AWS SDK versions aws-config = "1.5.18" @@ -367,9 +367,9 @@ num-derive = "0.4.2" num-traits = "0.2.19" num_cpus = "1.17" object = "0.36.5" -object_store_opendal = { git = "https://github.com/apache/opendal.git", rev = "02953ef" } +object_store_opendal = { version = "0.54.1" } once_cell = "1.15.0" -opendal = { git = "https://github.com/apache/opendal.git", rev = "02953ef", features = [ +opendal = { version = "0.54.1", features = [ "layers-fastrace", "layers-prometheus-client", "layers-async-backtrace", @@ -387,8 +387,6 @@ opendal = { git = "https://github.com/apache/opendal.git", rev = "02953ef", feat "services-webhdfs", "services-huggingface", ] } -opendal-layer-immutable-index = { git = "https://github.com/apache/opendal.git", rev = "02953ef" } -opendal-layer-observe-metrics-common = { git = "https://github.com/apache/opendal.git", rev = "02953ef" } openraft = { version = "0.10.0", features = [ "serde", "tracing-log", diff --git a/src/bendsave/src/storage.rs b/src/bendsave/src/storage.rs index d9affd2871989..217d6535c7a25 100644 --- a/src/bendsave/src/storage.rs +++ b/src/bendsave/src/storage.rs @@ -16,9 +16,9 @@ use std::collections::HashMap; use std::str::FromStr; use std::sync::Arc; +use anyhow::anyhow; use anyhow::Ok; use anyhow::Result; -use anyhow::anyhow; use bytes::BufMut; use bytes::Bytes; use bytes::BytesMut; @@ -36,8 +36,8 @@ use databend_common_meta_client::ClientHandle; use databend_common_meta_client::MetaGrpcClient; use databend_common_meta_types::protobuf::ExportRequest; use databend_common_storage::init_operator; -use databend_common_users::UserApiProvider; use databend_common_users::builtin::BuiltIn; +use databend_common_users::UserApiProvider; use databend_common_version::BUILD_INFO; use databend_enterprise_query::license::RealLicenseManager; use databend_query::sessions::BuildInfoRef; @@ -45,9 +45,9 @@ use databend_query::sessions::SessionManager; use futures::TryStream; use futures::TryStreamExt; use log::debug; -use opendal::Operator; use opendal::layers::LoggingLayer; use opendal::layers::RetryLayer; +use opendal::Operator; /// Load the configuration file of databend query. /// @@ -219,7 +219,7 @@ mod tests { use std::path::Path; use databend_common_base::base::tokio; - use databend_common_storage::Scheme; + use opendal::Scheme; use super::*; @@ -237,12 +237,12 @@ mod tests { #[tokio::test] async fn test_load_epochfs_storage() -> Result<()> { let op = load_bendsave_storage("s3://bendsave/tmp?region=us-east-1").await?; - assert_eq!(op.info().scheme(), Scheme::S3.to_string()); + assert_eq!(op.info().scheme(), Scheme::S3); assert_eq!(op.info().name(), "bendsave"); assert_eq!(op.info().root(), "/tmp/"); let op = load_bendsave_storage("fs://opt").await?; - assert_eq!(op.info().scheme(), Scheme::Fs.to_string()); + assert_eq!(op.info().scheme(), Scheme::Fs); assert_eq!(op.info().root(), "/opt"); Ok(()) } diff --git a/src/binaries/tool/table_meta_inspector.rs b/src/binaries/tool/table_meta_inspector.rs index 902a0f8f4ad96..1e953dda3caa9 100644 --- a/src/binaries/tool/table_meta_inspector.rs +++ b/src/binaries/tool/table_meta_inspector.rs @@ -16,27 +16,27 @@ use std::collections::BTreeMap; use std::env; use std::fs::File; use std::io; +use std::io::stdout; use std::io::BufWriter; use std::io::Read; use std::io::Write; -use std::io::stdout; use clap::Parser; use databend_common_config::Config; use databend_common_config::InnerConfig; use databend_common_exception::Result; -use databend_common_storage::StorageConfig; use databend_common_storage::init_operator; -use databend_common_tracing::Config as LogConfig; +use databend_common_storage::StorageConfig; use databend_common_tracing::init_logging; +use databend_common_tracing::Config as LogConfig; use databend_common_version::BUILD_INFO; use databend_common_version::DATABEND_COMMIT_VERSION; use databend_query::GlobalServices; use databend_storages_common_table_meta::meta::SegmentInfo; use databend_storages_common_table_meta::meta::TableSnapshot; use log::info; -use opendal::Operator; use opendal::services::Fs; +use opendal::Operator; use serde::Deserialize; use serde::Serialize; use serfig::collectors::from_file; diff --git a/src/common/exception/src/exception_into.rs b/src/common/exception/src/exception_into.rs index 4945fb7786a2e..df84a23c7ee79 100644 --- a/src/common/exception/src/exception_into.rs +++ b/src/common/exception/src/exception_into.rs @@ -19,12 +19,12 @@ use std::fmt::Formatter; use geozero::error::GeozeroError; +use crate::exception_backtrace::capture; +use crate::span::Span; use crate::ErrorCode; use crate::ErrorFrame; use crate::ParseError; use crate::StackTrace; -use crate::exception_backtrace::capture; -use crate::span::Span; #[derive(thiserror::Error)] enum OtherErrors { @@ -183,19 +183,12 @@ impl From for ErrorCode { impl From for ErrorCode { fn from(error: opendal::Error) -> Self { - let msg = error.message(); - let detail = error.to_string(); - let detail = detail - .strip_suffix(msg) - .and_then(|err| err.strip_suffix(" => ")) - .unwrap_or(&detail); - match error.kind() { - opendal::ErrorKind::NotFound => ErrorCode::StorageNotFound(msg).add_detail(detail), + opendal::ErrorKind::NotFound => ErrorCode::StorageNotFound(error.to_string()), opendal::ErrorKind::PermissionDenied => { - ErrorCode::StoragePermissionDenied(msg).add_detail(detail) + ErrorCode::StoragePermissionDenied(error.to_string()) } - _ => ErrorCode::StorageOther(msg).add_detail(detail), + _ => ErrorCode::StorageOther(format!("{error:?}")), } } } diff --git a/src/common/native/src/read/reader.rs b/src/common/native/src/read/reader.rs index 33bfa612fedeb..e407e26f90038 100644 --- a/src/common/native/src/read/reader.rs +++ b/src/common/native/src/read/reader.rs @@ -19,14 +19,14 @@ use std::io::SeekFrom; use databend_common_expression::TableSchema; use opendal::Reader; -use super::NativeReadBuf; -use super::PageIterator; use super::read_basic::read_u32; use super::read_basic::read_u64; -use crate::ColumnMeta; -use crate::PageMeta; +use super::NativeReadBuf; +use super::PageIterator; use crate::error::Error; use crate::error::Result; +use crate::ColumnMeta; +use crate::PageMeta; const DEFAULT_FOOTER_SIZE: u64 = 64 * 1024; diff --git a/src/common/storage/Cargo.toml b/src/common/storage/Cargo.toml index 8f4a9854e01d3..4f06485f6796b 100644 --- a/src/common/storage/Cargo.toml +++ b/src/common/storage/Cargo.toml @@ -34,8 +34,6 @@ iceberg = { workspace = true } log = { workspace = true } lru = { workspace = true } opendal = { workspace = true } -opendal-layer-immutable-index = { workspace = true } -opendal-layer-observe-metrics-common = { workspace = true } parquet = { workspace = true } prometheus-client = { workspace = true } regex = { workspace = true } diff --git a/src/common/storage/src/http_client.rs b/src/common/storage/src/http_client.rs index 05d4debaef2cb..fc712aef06559 100644 --- a/src/common/storage/src/http_client.rs +++ b/src/common/storage/src/http_client.rs @@ -22,11 +22,11 @@ use databend_common_metrics::storage::metrics_inc_storage_http_requests_count; use futures::TryStreamExt; use http::Request; use http::Response; -use opendal::Buffer; -use opendal::raw::HttpBody; -use opendal::raw::HttpFetch; use opendal::raw::parse_content_encoding; use opendal::raw::parse_content_length; +use opendal::raw::HttpBody; +use opendal::raw::HttpFetch; +use opendal::Buffer; use url::Url; pub struct StorageHttpClient { diff --git a/src/common/storage/src/lib.rs b/src/common/storage/src/lib.rs index d04d0dd66e9db..04fcd82ca6647 100644 --- a/src/common/storage/src/lib.rs +++ b/src/common/storage/src/lib.rs @@ -43,7 +43,6 @@ pub use http_client::StorageHttpClient; mod operator; pub use operator::DataOperator; pub use operator::OperatorRegistry; -pub use operator::Scheme; pub use operator::check_operator; pub use operator::init_operator; diff --git a/src/common/storage/src/metrics.rs b/src/common/storage/src/metrics.rs index 6c858cf955a33..894c83ff1b968 100644 --- a/src/common/storage/src/metrics.rs +++ b/src/common/storage/src/metrics.rs @@ -12,15 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; use std::sync::atomic::AtomicU64; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; +use std::sync::Arc; use std::time::Instant; -use opendal::Buffer; -use opendal::Metadata; -use opendal::Result; +use opendal::raw::oio; use opendal::raw::Access; use opendal::raw::Layer; use opendal::raw::LayeredAccess; @@ -31,7 +29,9 @@ use opendal::raw::RpDelete; use opendal::raw::RpList; use opendal::raw::RpRead; use opendal::raw::RpWrite; -use opendal::raw::oio; +use opendal::Buffer; +use opendal::Metadata; +use opendal::Result; /// StorageMetrics represents the metrics of storage (all bytes metrics are compressed size). #[derive(Debug, Default)] diff --git a/src/common/storage/src/metrics_layer.rs b/src/common/storage/src/metrics_layer.rs index 78d4e3831583a..90cd024e9a6e0 100644 --- a/src/common/storage/src/metrics_layer.rs +++ b/src/common/storage/src/metrics_layer.rs @@ -17,15 +17,15 @@ use std::fmt::Debug; use std::sync::LazyLock; use std::time::Duration; -use databend_common_base::runtime::metrics::FamilyCounter; -use databend_common_base::runtime::metrics::FamilyGauge; -use databend_common_base::runtime::metrics::FamilyHistogram; use databend_common_base::runtime::metrics::register_counter_family; use databend_common_base::runtime::metrics::register_gauge_family; use databend_common_base::runtime::metrics::register_histogram_family; +use databend_common_base::runtime::metrics::FamilyCounter; +use databend_common_base::runtime::metrics::FamilyGauge; +use databend_common_base::runtime::metrics::FamilyHistogram; +use opendal::layers::observe; use opendal::raw::Access; use opendal::raw::Layer; -use opendal_layer_observe_metrics_common as observe; use prometheus_client::encoding::EncodeLabel; use prometheus_client::encoding::EncodeLabelSet; use prometheus_client::encoding::LabelSetEncoder; diff --git a/src/common/storage/src/operator.rs b/src/common/storage/src/operator.rs index 5f9834696fb8c..d7c99b8c0a3f8 100644 --- a/src/common/storage/src/operator.rs +++ b/src/common/storage/src/operator.rs @@ -16,17 +16,17 @@ use std::env; use std::io::Error; use std::io::ErrorKind; use std::io::Result; -use std::str::FromStr; use std::sync::LazyLock; use std::time::Duration; use anyhow::anyhow; use databend_common_base::base::GlobalInstance; +use databend_common_base::runtime::metrics::register_counter_family; +use databend_common_base::runtime::metrics::FamilyCounter; use databend_common_base::runtime::GlobalIORuntime; use databend_common_base::runtime::TrySpawn; -use databend_common_base::runtime::metrics::FamilyCounter; -use databend_common_base::runtime::metrics::register_counter_family; use databend_common_exception::ErrorCode; +use databend_common_meta_app::storage::set_s3_storage_class; use databend_common_meta_app::storage::S3StorageClass; use databend_common_meta_app::storage::StorageAzblobConfig; use databend_common_meta_app::storage::StorageCosConfig; @@ -44,29 +44,28 @@ use databend_common_meta_app::storage::StorageOssConfig; use databend_common_meta_app::storage::StorageParams; use databend_common_meta_app::storage::StorageS3Config; use databend_common_meta_app::storage::StorageWebhdfsConfig; -use databend_common_meta_app::storage::set_s3_storage_class; use databend_enterprise_storage_encryption::get_storage_encryption_handler; use log::warn; -use opendal::Builder; -use opendal::Operator; use opendal::layers::AsyncBacktraceLayer; use opendal::layers::ConcurrentLimitLayer; use opendal::layers::FastraceLayer; use opendal::layers::HttpClientLayer; +use opendal::layers::ImmutableIndexLayer; use opendal::layers::LoggingLayer; use opendal::layers::RetryInterceptor; use opendal::layers::RetryLayer; use opendal::layers::TimeoutLayer; use opendal::raw::HttpClient; use opendal::services; -use opendal_layer_immutable_index::ImmutableIndexLayer; +use opendal::Builder; +use opendal::Operator; -use crate::StorageConfig; -use crate::StorageHttpClient; use crate::http_client::get_storage_http_client; use crate::metrics_layer::METRICS_LAYER; use crate::operator_cache::get_operator_cache; use crate::runtime_layer::RuntimeLayer; +use crate::StorageConfig; +use crate::StorageHttpClient; static METRIC_OPENDAL_RETRIES_COUNT: LazyLock>> = LazyLock::new(|| register_counter_family("opendal_retries_count")); @@ -404,8 +403,8 @@ fn init_s3_operator(cfg: &StorageS3Config) -> Result { .session_token(&cfg.security_token) .role_arn(&cfg.role_arn) .external_id(&cfg.external_id) - // Don't enable it otherwise we will get Permission in stat unknown files - // .allow_anonymous() + // It's safe to allow anonymous since opendal will perform the check first. + .allow_anonymous() // Root. .root(&cfg.root); @@ -655,76 +654,3 @@ impl OperatorRegistry for iceberg::io::FileIO { Ok((file_io.get_operator().clone(), &location[pos..])) } } - -#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] -pub enum Scheme { - Azblob, - Gcs, - Hdfs, - Ipfs, - S3, - Oss, - Obs, - Cos, - Http, - Fs, - Webhdfs, - Huggingface, - Custom(&'static str), -} - -impl Scheme { - /// Convert self into static str. - pub fn into_static(self) -> &'static str { - self.into() - } -} - -impl From for &'static str { - fn from(v: Scheme) -> Self { - match v { - Scheme::Azblob => "azblob", - Scheme::Gcs => "gcs", - Scheme::Hdfs => "hdfs", - Scheme::Ipfs => "ipfs", - Scheme::S3 => "s3", - Scheme::Oss => "oss", - Scheme::Obs => "obs", - Scheme::Cos => "cos", - Scheme::Http => "http", - Scheme::Fs => "fs", - Scheme::Webhdfs => "webhdfs", - Scheme::Huggingface => "huggingface", - Scheme::Custom(s) => s, - } - } -} - -impl FromStr for Scheme { - type Err = Error; - - fn from_str(s: &str) -> Result { - let s = s.to_lowercase(); - match s.as_str() { - "azblob" => Ok(Scheme::Azblob), - "gcs" => Ok(Scheme::Gcs), - "hdfs" => Ok(Scheme::Hdfs), - "ipfs" => Ok(Scheme::Ipfs), - "s3" | "s3a" => Ok(Scheme::S3), - "oss" => Ok(Scheme::Oss), - "obs" => Ok(Scheme::Obs), - "cos" => Ok(Scheme::Cos), - "http" | "https" => Ok(Scheme::Http), - "fs" => Ok(Scheme::Fs), - "webhdfs" => Ok(Scheme::Webhdfs), - "huggingface" | "hf" => Ok(Scheme::Huggingface), - _ => Ok(Scheme::Custom(Box::leak(s.into_boxed_str()))), - } - } -} - -impl std::fmt::Display for Scheme { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.into_static()) - } -} diff --git a/src/common/storage/src/parquet.rs b/src/common/storage/src/parquet.rs index dcf640c839dab..f07c3fe2c7e1c 100644 --- a/src/common/storage/src/parquet.rs +++ b/src/common/storage/src/parquet.rs @@ -18,8 +18,8 @@ use std::sync::Arc; use arrow_schema::Schema as ArrowSchema; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::FieldIndex; use databend_common_expression::converts::arrow::EXTENSION_KEY; +use databend_common_expression::FieldIndex; use opendal::Operator; use parquet::arrow::parquet_to_arrow_schema; use parquet::errors::ParquetError; @@ -213,14 +213,14 @@ pub fn traverse_parquet_schema_tree( #[cfg(test)] mod tests { + use databend_common_expression::types::NumberDataType; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; - use databend_common_expression::types::NumberDataType; use parquet::arrow::ArrowSchemaConverter; - use crate::parquet::ParquetSchemaTreeNode; use crate::parquet::build_parquet_schema_tree; + use crate::parquet::ParquetSchemaTreeNode; #[test] fn test_build_parquet_schema_tree() { diff --git a/src/common/storage/src/runtime_layer.rs b/src/common/storage/src/runtime_layer.rs index 64b4b025a074d..2ce25bafeb78d 100644 --- a/src/common/storage/src/runtime_layer.rs +++ b/src/common/storage/src/runtime_layer.rs @@ -19,9 +19,7 @@ use std::sync::Arc; use databend_common_base::runtime::Runtime; use databend_common_base::runtime::ThreadTracker; use databend_common_base::runtime::TrySpawn; -use opendal::Buffer; -use opendal::Metadata; -use opendal::Result; +use opendal::raw::oio; use opendal::raw::Access; use opendal::raw::Layer; use opendal::raw::LayeredAccess; @@ -39,7 +37,9 @@ use opendal::raw::RpPresign; use opendal::raw::RpRead; use opendal::raw::RpStat; use opendal::raw::RpWrite; -use opendal::raw::oio; +use opendal::Buffer; +use opendal::Metadata; +use opendal::Result; /// # TODO /// @@ -307,18 +307,26 @@ impl oio::List for RuntimeIO { } impl oio::Delete for RuntimeIO { - async fn delete(&mut self, path: &str, args: OpDelete) -> Result<()> { - self.inner.as_mut().unwrap().delete(path, args).await + fn delete(&mut self, path: &str, args: OpDelete) -> Result<()> { + self.inner.as_mut().unwrap().delete(path, args) } - async fn close(&mut self) -> Result<()> { + async fn flush(&mut self) -> Result { let mut r = self.inner.take().expect("deleter must be valid"); let runtime = self.runtime.clone(); - let _ = runtime - .spawn(async move { r.close().await }) + let (r, res) = runtime + .try_spawn( + async move { + let res = r.flush().await; + (r, res) + }, + Some(self.spawn_task_name.clone()), + ) + .expect("spawn must success") .await - .expect("join must success")?; - Ok(()) + .expect("join must success"); + self.inner = Some(r); + res } } diff --git a/src/common/storage/src/stage.rs b/src/common/storage/src/stage.rs index 6249648de87b9..dc99d215532fe 100644 --- a/src/common/storage/src/stage.rs +++ b/src/common/storage/src/stage.rs @@ -24,17 +24,17 @@ use databend_common_exception::Result; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::principal::StageType; use databend_common_meta_app::principal::UserIdentity; +use futures::stream; use futures::Stream; use futures::StreamExt; use futures::TryStreamExt; -use futures::stream; use opendal::EntryMode; use opendal::Metadata; use opendal::Operator; use regex::Regex; -use crate::DataOperator; use crate::init_operator; +use crate::DataOperator; #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] pub enum StageFileStatus { @@ -59,10 +59,7 @@ impl StageFileInfo { path, size: meta.content_length(), md5: meta.content_md5().map(str::to_string), - last_modified: meta.last_modified().map(|m| { - let ns = m.into_inner().as_nanosecond(); - DateTime::from_timestamp_nanos(ns as i64) - }), + last_modified: meta.last_modified(), etag: meta.etag().map(str::to_string), status: StageFileStatus::NeedCopy, creator: None, diff --git a/src/common/tracing/src/init.rs b/src/common/tracing/src/init.rs index cd3f45a787a16..8749fe710ab79 100644 --- a/src/common/tracing/src/init.rs +++ b/src/common/tracing/src/init.rs @@ -14,35 +14,35 @@ use std::borrow::Cow; use std::collections::BTreeMap; -use std::sync::Arc; use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; +use std::sync::Arc; -use databend_common_base::base::GlobalInstance; use databend_common_base::base::tokio; use databend_common_base::base::tokio::sync::RwLock; +use databend_common_base::base::GlobalInstance; use databend_common_base::runtime::Thread; use fastrace::prelude::*; use log::LevelFilter; -use logforth::filter::EnvFilter; use logforth::filter::env_filter::EnvFilterBuilder; +use logforth::filter::EnvFilter; use opendal::Operator; use opentelemetry_otlp::Compression; use opentelemetry_otlp::WithExportConfig; use opentelemetry_otlp::WithTonicConfig; -use crate::Config; use crate::config::LogFormat; use crate::config::OTLPProtocol; use crate::filter::ThreadTrackerFilter; +use crate::loggers::new_rolling_file_appender; use crate::loggers::IdenticalLayout; use crate::loggers::JsonLayout; use crate::loggers::TextLayout; -use crate::loggers::new_rolling_file_appender; use crate::predefined_tables::table_to_target; use crate::query_log_collector::QueryLogCollector; use crate::remote_log::RemoteLog; use crate::structlog::StructLogReporter; +use crate::Config; const HEADER_TRACE_PARENT: &str = "traceparent"; diff --git a/src/common/tracing/tests/it/remote_log.rs b/src/common/tracing/tests/it/remote_log.rs index f064a106bc0c5..532bdd467ed2e 100644 --- a/src/common/tracing/tests/it/remote_log.rs +++ b/src/common/tracing/tests/it/remote_log.rs @@ -17,9 +17,10 @@ use std::sync::Arc; use std::time::Duration; use async_channel::bounded; -use databend_common_base::base::GlobalInstance; use databend_common_base::base::tokio; +use databend_common_base::base::GlobalInstance; use databend_common_exception::Result; +use databend_common_tracing::convert_to_batch; use databend_common_tracing::Config; use databend_common_tracing::GlobalLogger; use databend_common_tracing::LogMessage; @@ -27,11 +28,10 @@ use databend_common_tracing::RemoteLog; use databend_common_tracing::RemoteLogBuffer; use databend_common_tracing::RemoteLogElement; use databend_common_tracing::RemoteLogGuard; -use databend_common_tracing::convert_to_batch; use log::Level; use log::Record; -use opendal::Operator; use opendal::services; +use opendal::Operator; fn setup() -> Result<(RemoteLog, Box)> { let mut labels = BTreeMap::new(); diff --git a/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs b/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs index c3c659c720724..27ad1abe8fcc0 100644 --- a/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs +++ b/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs @@ -27,27 +27,28 @@ use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_meta_app::schema::least_visible_time_ident::LeastVisibleTimeIdent; use databend_common_meta_app::schema::LeastVisibleTime; use databend_common_meta_app::schema::ListIndexesByIdReq; -use databend_common_meta_app::schema::least_visible_time_ident::LeastVisibleTimeIdent; -use databend_common_storages_fuse::FuseTable; -use databend_common_storages_fuse::RetentionPolicy; use databend_common_storages_fuse::io::MetaReaders; use databend_common_storages_fuse::io::SegmentsIO; use databend_common_storages_fuse::io::TableMetaLocationGenerator; +use databend_common_storages_fuse::FuseTable; +use databend_common_storages_fuse::RetentionPolicy; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; use databend_storages_common_cache::LoadParams; use databend_storages_common_io::Files; +use databend_storages_common_table_meta::meta::uuid_from_date_time; use databend_storages_common_table_meta::meta::CompactSegmentInfo; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::VACUUM2_OBJECT_KEY_PREFIX; -use databend_storages_common_table_meta::meta::uuid_from_date_time; use futures_util::TryStreamExt; use log::info; use opendal::Entry; use opendal::ErrorKind; use opendal::Operator; +use opendal::Scheme; use uuid::Version; /// An assumption of the maximum duration from the time the first block is written to the time the @@ -485,7 +486,7 @@ async fn list_until_prefix( let dal = fuse_table.get_operator_ref(); match dal.info().scheme() { - "fs" => fs_list_until_prefix(dal, path, until, need_one_more, gc_root_meta_ts).await, + Scheme::Fs => fs_list_until_prefix(dal, path, until, need_one_more, gc_root_meta_ts).await, _ => general_list_until_prefix(dal, path, until, need_one_more, gc_root_meta_ts).await, } } @@ -586,9 +587,6 @@ async fn is_gc_candidate_segment_block( })? }; - let last_modified = - DateTime::from_timestamp_nanos(last_modified.into_inner().as_nanosecond() as i64); - Ok(last_modified + ASSUMPTION_MAX_TXN_DURATION < gc_root_meta_ts) } @@ -676,15 +674,12 @@ async fn select_gc_root( let gc_root = read_snapshot_from_location(fuse_table, &gc_root_path).await; let gc_root_meta_ts = match dal.stat(&gc_root_path).await { - Ok(v) => v - .last_modified() - .ok_or_else(|| { - ErrorCode::StorageOther(format!( - "Failed to get `last_modified` metadata of the gc root object '{}'", - gc_root_path - )) - }) - .map(|v| DateTime::from_timestamp_nanos(v.into_inner().as_nanosecond() as i64))?, + Ok(v) => v.last_modified().ok_or_else(|| { + ErrorCode::StorageOther(format!( + "Failed to get `last_modified` metadata of the gc root object '{}'", + gc_root_path + )) + })?, Err(e) => { return if e.kind() == ErrorKind::NotFound { // Concurrent vacuum, ignore it @@ -716,13 +711,8 @@ async fn select_gc_root( gc_root_path )) })?, - Some(v) => v + Some(v) => v, }; - - let last_modified = DateTime::from_timestamp_nanos( - last_modified.into_inner().as_nanosecond() as i64, - ); - if last_modified + ASSUMPTION_MAX_TXN_DURATION < gc_root_meta_ts { gc_candidates.push(path.to_owned()); } diff --git a/src/query/ee/src/storages/fuse/operations/vacuum_temporary_files.rs b/src/query/ee/src/storages/fuse/operations/vacuum_temporary_files.rs index b595a2bb2c326..7458db51ee331 100644 --- a/src/query/ee/src/storages/fuse/operations/vacuum_temporary_files.rs +++ b/src/query/ee/src/storages/fuse/operations/vacuum_temporary_files.rs @@ -105,7 +105,7 @@ async fn vacuum_by_duration( let meta = meta.unwrap(); if let Some(modified) = meta.last_modified() { - if timestamp - modified.into_inner().as_millisecond() < expire_time { + if timestamp - modified.timestamp_millis() < expire_time { continue; } } diff --git a/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs b/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs index e6b54943945ff..eec3b1edbed7d 100644 --- a/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs +++ b/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![allow(clippy::let_and_return)] use std::fmt::Debug; use std::sync::Arc; use std::time::Duration; @@ -51,13 +50,13 @@ use databend_enterprise_vacuum_handler::vacuum_handler::VacuumTempOptions; use databend_query::test_kits::*; use databend_storages_common_io::Files; use databend_storages_common_table_meta::table::OPT_KEY_DATABASE_ID; -use opendal::EntryMode; -use opendal::Metadata; -use opendal::OperatorBuilder; use opendal::raw::Access; use opendal::raw::AccessorInfo; use opendal::raw::OpStat; use opendal::raw::RpStat; +use opendal::EntryMode; +use opendal::Metadata; +use opendal::OperatorBuilder; #[tokio::test(flavor = "multi_thread")] async fn test_fuse_do_vacuum_drop_tables() -> Result<()> { @@ -227,13 +226,13 @@ mod test_accessor { use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; + use opendal::raw::oio; + use opendal::raw::oio::Entry; use opendal::raw::MaybeSend; use opendal::raw::OpDelete; use opendal::raw::OpList; use opendal::raw::RpDelete; use opendal::raw::RpList; - use opendal::raw::oio; - use opendal::raw::oio::Entry; use super::*; @@ -298,15 +297,17 @@ mod test_accessor { } impl oio::Delete for MockDeleter { - async fn delete(&mut self, _path: &str, _args: OpDelete) -> opendal::Result<()> { + fn delete(&mut self, _path: &str, _args: OpDelete) -> opendal::Result<()> { self.size += 1; Ok(()) } - async fn close(&mut self) -> opendal::Result<()> { + async fn flush(&mut self) -> opendal::Result { self.hit_batch.store(true, Ordering::Release); + + let n = self.size; self.size = 0; - Ok(()) + Ok(n) } } @@ -888,12 +889,10 @@ async fn test_vacuum_drop_create_or_replace_impl(vacuum_stmts: &[&str]) -> Resul // db1.t1 should still be accessible fixture.execute_command("select * from db1.t1").await?; // db2.t1 should not exist - assert!( - fixture - .execute_command("select * from db2.t1") - .await - .is_err() - ); + assert!(fixture + .execute_command("select * from db2.t1") + .await + .is_err()); Ok(()) } diff --git a/src/query/service/src/history_tables/external.rs b/src/query/service/src/history_tables/external.rs index 8e1014a98b8c6..bd349547cea56 100644 --- a/src/query/service/src/history_tables/external.rs +++ b/src/query/service/src/history_tables/external.rs @@ -15,8 +15,8 @@ use std::collections::BTreeMap; use databend_common_meta_app::storage::StorageParams; -use databend_common_storage::Scheme; use opendal::raw::normalize_root; +use opendal::Scheme; #[derive(Debug)] pub struct ExternalStorageConnection { diff --git a/src/query/service/src/history_tables/global_history_log.rs b/src/query/service/src/history_tables/global_history_log.rs index e1761db2de5be..1dcd5b4d2de76 100644 --- a/src/query/service/src/history_tables/global_history_log.rs +++ b/src/query/service/src/history_tables/global_history_log.rs @@ -12,18 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; +use std::sync::Arc; use std::time::Duration; use databend_common_base::base::GlobalInstance; +use databend_common_base::runtime::spawn; use databend_common_base::runtime::CaptureLogSettings; use databend_common_base::runtime::MemStat; use databend_common_base::runtime::Runtime; use databend_common_base::runtime::ThreadTracker; use databend_common_base::runtime::TrySpawn; -use databend_common_base::runtime::spawn; use databend_common_catalog::table_context::TableContext; use databend_common_config::GlobalConfig; use databend_common_config::InnerConfig; @@ -35,14 +35,14 @@ use databend_common_license::license_manager::LicenseManagerSwitch; use databend_common_meta_app::storage::StorageParams; use databend_common_meta_client::MetaGrpcClient; use databend_common_sql::Planner; -use databend_common_storage::DataOperator; use databend_common_storage::init_operator; -use databend_common_tracing::GlobalLogger; -use databend_common_tracing::HistoryTable; +use databend_common_storage::DataOperator; use databend_common_tracing::get_all_history_table_names; use databend_common_tracing::init_history_tables; -use futures_util::TryStreamExt; +use databend_common_tracing::GlobalLogger; +use databend_common_tracing::HistoryTable; use futures_util::future::join_all; +use futures_util::TryStreamExt; use log::debug; use log::error; use log::info; @@ -50,18 +50,18 @@ use log::warn; use opendal::raw::normalize_root; use parking_lot::Mutex; use rand::random; -use tokio::time::Instant; use tokio::time::sleep; +use tokio::time::Instant; use uuid::Uuid; use crate::clusters::ClusterDiscovery; use crate::history_tables::alter_table::get_alter_table_sql; use crate::history_tables::alter_table::get_log_table; use crate::history_tables::alter_table::should_reset; -use crate::history_tables::error_handling::ErrorCounters; use crate::history_tables::error_handling::is_temp_error; -use crate::history_tables::external::ExternalStorageConnection; +use crate::history_tables::error_handling::ErrorCounters; use crate::history_tables::external::get_external_storage_connection; +use crate::history_tables::external::ExternalStorageConnection; use crate::history_tables::meta::HistoryMetaHandle; use crate::history_tables::session::create_session; use crate::interpreters::InterpreterFactory; @@ -361,12 +361,11 @@ impl GlobalHistoryLog { let vacuum = format!("VACUUM TABLE system_history.{}", table.name); self.execute_sql(&vacuum).await?; } - info!( - "periodic retention operation on history log table '{}' completed successfully (delete {} secs, vacuum {} secs)", - table.name, - delete_elapsed, - start.elapsed().as_secs() - delete_elapsed - ); + info!("periodic retention operation on history log table '{}' completed successfully (delete {} secs, vacuum {} secs)", + table.name, + delete_elapsed, + start.elapsed().as_secs() - delete_elapsed + ); return Ok(true); } Ok(false) diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs index 86727c1f27ca7..480d075562008 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs @@ -15,46 +15,46 @@ use std::sync::Arc; use std::time::Instant; -use arrow_ipc::CompressionType; use arrow_ipc::writer::IpcWriteOptions; +use arrow_ipc::CompressionType; use databend_common_base::base::ProgressValues; use databend_common_base::runtime::profile::Profile; use databend_common_base::runtime::profile::ProfileStatisticsName; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; -use databend_common_expression::BlockMetaInfoDowncast; -use databend_common_expression::DataBlock; -use databend_common_expression::FromData; -use databend_common_expression::PartitionedPayload; use databend_common_expression::arrow::serialize_column; use databend_common_expression::types::ArgType; use databend_common_expression::types::ArrayType; use databend_common_expression::types::Int64Type; use databend_common_expression::types::ReturnType; use databend_common_expression::types::UInt64Type; +use databend_common_expression::BlockMetaInfoDowncast; +use databend_common_expression::DataBlock; +use databend_common_expression::FromData; +use databend_common_expression::PartitionedPayload; use databend_common_pipeline::core::InputPort; use databend_common_pipeline::core::OutputPort; use databend_common_pipeline::core::Processor; -use databend_common_pipeline_transforms::UnknownMode; use databend_common_pipeline_transforms::processors::BlockMetaTransform; use databend_common_pipeline_transforms::processors::BlockMetaTransformer; +use databend_common_pipeline_transforms::UnknownMode; use databend_common_settings::FlightCompression; use futures_util::future::BoxFuture; use log::info; use opendal::Operator; use super::SerializePayload; +use crate::pipelines::processors::transforms::aggregator::agg_spilling_aggregate_payload as local_agg_spilling_aggregate_payload; +use crate::pipelines::processors::transforms::aggregator::aggregate_exchange_injector::compute_block_number; +use crate::pipelines::processors::transforms::aggregator::aggregate_meta::AggregateMeta; +use crate::pipelines::processors::transforms::aggregator::exchange_defines; use crate::pipelines::processors::transforms::aggregator::AggregateSerdeMeta; use crate::pipelines::processors::transforms::aggregator::AggregatorParams; use crate::pipelines::processors::transforms::aggregator::FlightSerialized; use crate::pipelines::processors::transforms::aggregator::FlightSerializedMeta; use crate::pipelines::processors::transforms::aggregator::SerializeAggregateStream; -use crate::pipelines::processors::transforms::aggregator::agg_spilling_aggregate_payload as local_agg_spilling_aggregate_payload; -use crate::pipelines::processors::transforms::aggregator::aggregate_exchange_injector::compute_block_number; -use crate::pipelines::processors::transforms::aggregator::aggregate_meta::AggregateMeta; -use crate::pipelines::processors::transforms::aggregator::exchange_defines; -use crate::servers::flight::v1::exchange::ExchangeShuffleMeta; use crate::servers::flight::v1::exchange::serde::serialize_block; +use crate::servers::flight::v1::exchange::ExchangeShuffleMeta; use crate::sessions::QueryContext; use crate::spillers::Spiller; use crate::spillers::SpillerConfig; diff --git a/src/query/service/src/spillers/adapter.rs b/src/query/service/src/spillers/adapter.rs index c33877f6dba9a..b6aab5bfe38e0 100644 --- a/src/query/service/src/spillers/adapter.rs +++ b/src/query/service/src/spillers/adapter.rs @@ -12,18 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::hash_map::Entry; use std::collections::HashMap; use std::collections::HashSet; -use std::collections::hash_map::Entry; use std::ops::DerefMut; use std::ops::Range; use std::sync::Arc; use std::sync::RwLock; use std::time::Instant; -use databend_common_base::base::ProgressValues; use databend_common_base::base::dma_buffer_to_bytes; use databend_common_base::base::dma_read_file_range; +use databend_common_base::base::ProgressValues; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; use databend_common_expression::DataBlock; @@ -36,14 +36,14 @@ use opendal::Buffer; use opendal::Operator; use parquet::file::metadata::RowGroupMetaDataPtr; -use super::Location; -use super::SpillsBufferPool; use super::async_buffer::SpillTarget; use super::block_reader::BlocksReader; use super::block_writer::BlocksWriter; use super::inner::*; use super::row_group_encoder::*; use super::serialize::*; +use super::Location; +use super::SpillsBufferPool; use crate::sessions::QueryContext; #[derive(Clone)] diff --git a/src/query/service/src/spillers/async_buffer.rs b/src/query/service/src/spillers/async_buffer.rs index f7f58afe1b71c..976ba679b0c53 100644 --- a/src/query/service/src/spillers/async_buffer.rs +++ b/src/query/service/src/spillers/async_buffer.rs @@ -32,27 +32,27 @@ use databend_common_base::runtime::Runtime; use databend_common_base::runtime::TrySpawn; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::infer_table_schema; use databend_common_expression::DataBlock; use databend_common_expression::DataSchema; use databend_common_expression::DataSchemaRef; use databend_common_expression::TableSchemaRef; -use databend_common_expression::infer_table_schema; use databend_common_meta_app::storage::StorageParams; -use databend_common_storages_parquet::ReadSettings; -use databend_common_storages_parquet::parquet_reader::RowGroupCore; use databend_common_storages_parquet::parquet_reader::row_group::get_ranges; -use fastrace::Span; +use databend_common_storages_parquet::parquet_reader::RowGroupCore; +use databend_common_storages_parquet::ReadSettings; use fastrace::future::FutureExt; +use fastrace::Span; use opendal::Metadata; use opendal::Operator; use opendal::Writer; -use parquet::arrow::ArrowWriter; -use parquet::arrow::FieldLevels; -use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::ParquetRecordBatchReader; use parquet::arrow::arrow_reader::RowGroups; use parquet::arrow::parquet_to_arrow_field_levels; use parquet::arrow::parquet_to_arrow_schema; +use parquet::arrow::ArrowWriter; +use parquet::arrow::FieldLevels; +use parquet::arrow::ProjectionMask; use parquet::basic::Compression; use parquet::file::metadata::RowGroupMetaData; use parquet::file::properties::EnabledStatistics; @@ -832,9 +832,9 @@ impl Background { #[cfg(test)] mod tests { use std::io::Write; - use std::sync::Arc; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; + use std::sync::Arc; use databend_common_base::runtime::spawn; use opendal::Operator; diff --git a/src/query/service/src/spillers/block_reader.rs b/src/query/service/src/spillers/block_reader.rs index 98154ebb6b713..2883c74ef2d9d 100644 --- a/src/query/service/src/spillers/block_reader.rs +++ b/src/query/service/src/spillers/block_reader.rs @@ -22,8 +22,8 @@ use databend_common_expression::DataBlock; use databend_common_pipeline_transforms::traits::Location; use opendal::Operator; -use super::Layout; use super::serialize::deserialize_block; +use super::Layout; pub struct BlocksReader<'a> { read_bytes: u64, diff --git a/src/query/service/src/spillers/inner.rs b/src/query/service/src/spillers/inner.rs index 4fa84bb4b899f..9b4fe414424e7 100644 --- a/src/query/service/src/spillers/inner.rs +++ b/src/query/service/src/spillers/inner.rs @@ -17,23 +17,23 @@ use std::fmt::Formatter; use std::sync::Arc; use std::time::Instant; +use databend_common_base::base::dma_buffer_to_bytes; +use databend_common_base::base::dma_read_file_range; use databend_common_base::base::Alignment; use databend_common_base::base::DmaWriteBuf; use databend_common_base::base::GlobalUniqName; -use databend_common_base::base::dma_buffer_to_bytes; -use databend_common_base::base::dma_read_file_range; use databend_common_base::runtime::profile::Profile; use databend_common_base::runtime::profile::ProfileStatisticsName; use databend_common_exception::Result; use databend_common_expression::DataBlock; use databend_storages_common_cache::TempDir; +use opendal::services::Fs; use opendal::Buffer; use opendal::Operator; -use opendal::services::Fs; -use super::Location; use super::async_buffer::SpillTarget; use super::serialize::*; +use super::Location; /// Spiller type, currently only supports HashJoin #[derive(Clone, Debug, Eq, PartialEq)] diff --git a/src/query/service/src/spillers/row_group_encoder.rs b/src/query/service/src/spillers/row_group_encoder.rs index 6f7b7c6b3dc20..f832c69e5865d 100644 --- a/src/query/service/src/spillers/row_group_encoder.rs +++ b/src/query/service/src/spillers/row_group_encoder.rs @@ -19,30 +19,30 @@ use std::sync::Arc; use arrow_schema::Schema; use bytes::Bytes; +use databend_common_base::base::dma_buffer_to_bytes; use databend_common_base::base::DmaWriteBuf; use databend_common_base::base::SyncDmaFile; -use databend_common_base::base::dma_buffer_to_bytes; use databend_common_base::rangemap::RangeMerger; use databend_common_exception::Result; use databend_common_expression::BlockEntry; use databend_common_expression::DataBlock; use databend_common_expression::DataSchema; use databend_common_expression::Value; -use databend_common_storages_parquet::ReadSettings; use databend_common_storages_parquet::parquet_reader::RowGroupCore; +use databend_common_storages_parquet::ReadSettings; use databend_storages_common_cache::ParquetMetaData; use databend_storages_common_cache::TempDir; use databend_storages_common_cache::TempPath; use either::Either; use opendal::Operator; -use parquet::arrow::ArrowSchemaConverter; -use parquet::arrow::FieldLevels; -use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::ParquetRecordBatchReader; -use parquet::arrow::arrow_writer::ArrowColumnWriter; use parquet::arrow::arrow_writer::compute_leaves; use parquet::arrow::arrow_writer::get_column_writers; +use parquet::arrow::arrow_writer::ArrowColumnWriter; use parquet::arrow::parquet_to_arrow_field_levels; +use parquet::arrow::ArrowSchemaConverter; +use parquet::arrow::FieldLevels; +use parquet::arrow::ProjectionMask; use parquet::errors; use parquet::file::metadata::RowGroupMetaData; use parquet::file::metadata::RowGroupMetaDataPtr; @@ -53,11 +53,11 @@ use parquet::file::writer::SerializedFileWriter; use parquet::file::writer::SerializedRowGroupWriter; use parquet::schema::types::SchemaDescriptor; +use super::async_buffer::BufferWriter; +use super::async_buffer::SpillTarget; use super::Location; use super::SpillerInner; use super::SpillsBufferPool; -use super::async_buffer::BufferWriter; -use super::async_buffer::SpillTarget; pub struct Properties { schema: Arc, @@ -534,13 +534,13 @@ impl SpillerInner { #[cfg(test)] mod tests { use databend_common_exception::Result; - use databend_common_expression::Column; - use databend_common_expression::FromData; + use databend_common_expression::types::array::ArrayColumnBuilder; + use databend_common_expression::types::number::Int32Type; use databend_common_expression::types::ArgType; use databend_common_expression::types::DataType; use databend_common_expression::types::StringType; - use databend_common_expression::types::array::ArrayColumnBuilder; - use databend_common_expression::types::number::Int32Type; + use databend_common_expression::Column; + use databend_common_expression::FromData; use super::*; diff --git a/src/query/service/src/spillers/serialize.rs b/src/query/service/src/spillers/serialize.rs index 386bb731c60d9..681e6e217367c 100644 --- a/src/query/service/src/spillers/serialize.rs +++ b/src/query/service/src/spillers/serialize.rs @@ -25,18 +25,18 @@ use databend_common_base::base::Alignment; use databend_common_base::base::DmaWriteBuf; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::arrow::write_column; +use databend_common_expression::infer_table_schema; +use databend_common_expression::types::DataType; use databend_common_expression::BlockEntry; use databend_common_expression::DataBlock; use databend_common_expression::DataField; use databend_common_expression::DataSchema; use databend_common_expression::Value; -use databend_common_expression::arrow::write_column; -use databend_common_expression::infer_table_schema; -use databend_common_expression::types::DataType; use databend_storages_common_io::BufferReader; use opendal::Buffer; -use parquet::arrow::ArrowWriter; use parquet::arrow::arrow_reader::ParquetRecordBatchReader; +use parquet::arrow::ArrowWriter; use parquet::basic::Compression; use parquet::file::properties::EnabledStatistics; use parquet::file::properties::WriterProperties; @@ -199,10 +199,10 @@ fn bare_blocks_to_parquet( #[cfg(test)] mod tests { use bytes::Bytes; - use databend_common_expression::FromData; use databend_common_expression::block_debug::assert_block_value_eq; use databend_common_expression::types::Int64Type; use databend_common_expression::types::StringType; + use databend_common_expression::FromData; use super::*; diff --git a/src/query/service/src/table_functions/infer_schema/infer_schema_table.rs b/src/query/service/src/table_functions/infer_schema/infer_schema_table.rs index 1c9113707d35d..7642693c97693 100644 --- a/src/query/service/src/table_functions/infer_schema/infer_schema_table.rs +++ b/src/query/service/src/table_functions/infer_schema/infer_schema_table.rs @@ -30,12 +30,12 @@ use databend_common_catalog::table_args::TableArgs; use databend_common_compress::CompressAlgorithm; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::types::NumberDataType; use databend_common_expression::BlockThresholds; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRefExt; -use databend_common_expression::types::NumberDataType; use databend_common_meta_app::principal::FileFormatParams; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::principal::StageType; @@ -46,21 +46,21 @@ use databend_common_pipeline::core::Pipeline; use databend_common_pipeline::sources::PrefetchAsyncSourcer; use databend_common_pipeline_transforms::TransformPipelineHelper; use databend_common_sql::binder::resolve_file_location; -use databend_common_storage::Scheme; -use databend_common_storage::StageFilesInfo; use databend_common_storage::init_stage_operator; +use databend_common_storage::StageFilesInfo; use databend_common_storages_stage::BytesReader; use databend_common_storages_stage::Decompressor; use databend_common_storages_stage::InferSchemaPartInfo; use databend_common_storages_stage::LoadContext; use databend_common_users::Object; use databend_storages_common_stage::SingleFilePartition; +use opendal::Scheme; use super::parquet::ParquetInferSchemaSource; use crate::sessions::TableContext; -use crate::table_functions::TableFunction; use crate::table_functions::infer_schema::separator::InferSchemaSeparator; use crate::table_functions::infer_schema::table_args::InferSchemaArgsParsed; +use crate::table_functions::TableFunction; pub(crate) const INFER_SCHEMA: &str = "infer_schema"; diff --git a/src/query/service/src/test_kits/block_writer.rs b/src/query/service/src/test_kits/block_writer.rs index a11e8e9d55b36..f9a7ee9f12918 100644 --- a/src/query/service/src/test_kits/block_writer.rs +++ b/src/query/service/src/test_kits/block_writer.rs @@ -21,15 +21,16 @@ use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE; use databend_common_io::constants::DEFAULT_BLOCK_INDEX_BUFFER_SIZE; use databend_common_sql::ApproxDistinctColumns; use databend_common_sql::BloomIndexColumns; -use databend_common_storages_fuse::FuseStorageFormat; -use databend_common_storages_fuse::io::TableMetaLocationGenerator; -use databend_common_storages_fuse::io::WriteSettings; use databend_common_storages_fuse::io::build_column_hlls; use databend_common_storages_fuse::io::serialize_block; +use databend_common_storages_fuse::io::TableMetaLocationGenerator; +use databend_common_storages_fuse::io::WriteSettings; +use databend_common_storages_fuse::FuseStorageFormat; use databend_storages_common_blocks::blocks_to_parquet; use databend_storages_common_index::BloomIndex; use databend_storages_common_index::BloomIndexBuilder; use databend_storages_common_index::RangeIndex; +use databend_storages_common_table_meta::meta::encode_column_hll; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::ClusterStatistics; use databend_storages_common_table_meta::meta::Compression; @@ -37,7 +38,6 @@ use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::RawBlockHLL; use databend_storages_common_table_meta::meta::StatisticsOfColumns; use databend_storages_common_table_meta::meta::TableMetaTimestamps; -use databend_storages_common_table_meta::meta::encode_column_hll; use databend_storages_common_table_meta::table::TableCompression; use opendal::Operator; use parquet::format::FileMetaData; diff --git a/src/query/service/src/test_kits/fuse.rs b/src/query/service/src/test_kits/fuse.rs index f78714a52a077..9ec4d6eb57827 100644 --- a/src/query/service/src/test_kits/fuse.rs +++ b/src/query/service/src/test_kits/fuse.rs @@ -20,24 +20,27 @@ use chrono::DateTime; use chrono::Duration; use chrono::Utc; use databend_common_exception::Result; +use databend_common_expression::types::NumberScalar; use databend_common_expression::BlockThresholds; use databend_common_expression::DataBlock; use databend_common_expression::DataSchemaRef; use databend_common_expression::ScalarRef; use databend_common_expression::SendableDataBlockStream; -use databend_common_expression::types::NumberScalar; use databend_common_sql::optimizer::ir::SExpr; use databend_common_sql::plans::Mutation; use databend_common_storages_factory::Table; -use databend_common_storages_fuse::FUSE_TBL_SEGMENT_PREFIX; -use databend_common_storages_fuse::FuseStorageFormat; -use databend_common_storages_fuse::FuseTable; use databend_common_storages_fuse::io::MetaWriter; use databend_common_storages_fuse::io::TableMetaLocationGenerator; use databend_common_storages_fuse::statistics::gen_columns_statistics; use databend_common_storages_fuse::statistics::merge_statistics; use databend_common_storages_fuse::statistics::reducers::reduce_block_metas; +use databend_common_storages_fuse::FuseStorageFormat; +use databend_common_storages_fuse::FuseTable; +use databend_common_storages_fuse::FUSE_TBL_SEGMENT_PREFIX; use databend_storages_common_cache::SegmentStatistics; +use databend_storages_common_table_meta::meta::testing::SegmentInfoV2; +use databend_storages_common_table_meta::meta::testing::TableSnapshotV2; +use databend_storages_common_table_meta::meta::testing::TableSnapshotV4; use databend_storages_common_table_meta::meta::AdditionalStatsMeta; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::Location; @@ -46,17 +49,14 @@ use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::TableMetaTimestamps; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::Versioned; -use databend_storages_common_table_meta::meta::testing::SegmentInfoV2; -use databend_storages_common_table_meta::meta::testing::TableSnapshotV2; -use databend_storages_common_table_meta::meta::testing::TableSnapshotV4; use futures_util::TryStreamExt; use opendal::Operator; use serde::Serialize; use uuid::Uuid; -use super::TestFixture; use super::block_writer::BlockWriter; use super::old_version_generator; +use super::TestFixture; use crate::interpreters::Interpreter; use crate::interpreters::MutationInterpreter; use crate::sessions::QueryContext; diff --git a/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs b/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs index ec26082b6d230..a6ef2e8d06600 100644 --- a/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs +++ b/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs @@ -18,6 +18,9 @@ use std::sync::Arc; use chrono::Utc; use databend_common_base::base::tokio; +use databend_common_expression::types::Int32Type; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::types::NumberScalar; use databend_common_expression::ColumnId; use databend_common_expression::DataBlock; use databend_common_expression::FromData; @@ -25,13 +28,10 @@ use databend_common_expression::Scalar; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchemaRefExt; -use databend_common_expression::types::Int32Type; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::NumberScalar; -use databend_common_storages_fuse::FuseStorageFormat; use databend_common_storages_fuse::io::TableMetaLocationGenerator; -use databend_common_storages_fuse::statistics::STATS_STRING_PREFIX_LEN; use databend_common_storages_fuse::statistics::gen_columns_statistics; +use databend_common_storages_fuse::statistics::STATS_STRING_PREFIX_LEN; +use databend_common_storages_fuse::FuseStorageFormat; use databend_query::test_kits::*; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheValue; @@ -48,8 +48,8 @@ use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::Versioned; use opendal::Operator; use parquet::format::FileMetaData; -use sysinfo::System; use sysinfo::get_current_pid; +use sysinfo::System; use uuid::Uuid; // NOTE: diff --git a/src/query/service/tests/it/storages/fuse/meta/column_oriented.rs b/src/query/service/tests/it/storages/fuse/meta/column_oriented.rs index d566bbf6c4088..28dd954cafcaf 100644 --- a/src/query/service/tests/it/storages/fuse/meta/column_oriented.rs +++ b/src/query/service/tests/it/storages/fuse/meta/column_oriented.rs @@ -15,6 +15,9 @@ use std::sync::Arc; use databend_common_exception::Result; +use databend_common_expression::types::BinaryType; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::types::UInt64Type; use databend_common_expression::Column; use databend_common_expression::DataBlock; use databend_common_expression::FromData; @@ -23,28 +26,25 @@ use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; -use databend_common_expression::types::BinaryType; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::UInt64Type; -use databend_common_storages_fuse::FuseStorageFormat; -use databend_common_storages_fuse::io::TableMetaLocationGenerator; use databend_common_storages_fuse::io::read::read_column_oriented_segment; +use databend_common_storages_fuse::io::TableMetaLocationGenerator; use databend_common_storages_fuse::statistics::gen_columns_statistics; use databend_common_storages_fuse::statistics::reduce_block_metas; +use databend_common_storages_fuse::FuseStorageFormat; use databend_query::test_kits::BlockWriter; use databend_query::test_kits::TestFixture; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; -use databend_storages_common_table_meta::meta::BlockMeta; -use databend_storages_common_table_meta::meta::ClusterStatistics; -use databend_storages_common_table_meta::meta::Compression; use databend_storages_common_table_meta::meta::column_oriented_segment::*; use databend_storages_common_table_meta::meta::decode; use databend_storages_common_table_meta::meta::testing::MetaEncoding; +use databend_storages_common_table_meta::meta::BlockMeta; +use databend_storages_common_table_meta::meta::ClusterStatistics; +use databend_storages_common_table_meta::meta::Compression; use opendal::Operator; -async fn generate_column_oriented_segment() --> Result<(ColumnOrientedSegment, Vec, TableSchemaRef)> { +async fn generate_column_oriented_segment( +) -> Result<(ColumnOrientedSegment, Vec, TableSchemaRef)> { let field_1 = TableField::new("u64", TableDataType::Number(NumberDataType::UInt64)); let field_2 = TableField::new( "nullable_u64", diff --git a/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs b/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs index a150932d32003..4b518baa9e50f 100644 --- a/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs +++ b/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#![allow(clippy::manual_is_multiple_of)] - use std::collections::HashSet; use std::sync::Arc; @@ -46,8 +44,8 @@ use databend_storages_common_table_meta::meta::SegmentInfo; use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::TableSnapshot; use opendal::Operator; -use rand::Rng; use rand::thread_rng; +use rand::Rng; use crate::storages::fuse::operations::mutation::segments_compact_mutator::CompactSegmentTestFixture; diff --git a/src/query/service/tests/it/storages/fuse/statistics.rs b/src/query/service/tests/it/storages/fuse/statistics.rs index a533ff7314ac3..83fc87e974878 100644 --- a/src/query/service/tests/it/storages/fuse/statistics.rs +++ b/src/query/service/tests/it/storages/fuse/statistics.rs @@ -18,6 +18,12 @@ use std::sync::Arc; use chrono::Utc; use databend_common_base::base::tokio; use databend_common_exception::Result; +use databend_common_expression::type_check::check; +use databend_common_expression::types::number::Int32Type; +use databend_common_expression::types::number::NumberScalar; +use databend_common_expression::types::DataType; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::types::StringType; use databend_common_expression::BlockThresholds; use databend_common_expression::Column; use databend_common_expression::DataBlock; @@ -30,38 +36,32 @@ use databend_common_expression::Scalar; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; -use databend_common_expression::type_check::check; -use databend_common_expression::types::DataType; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::StringType; -use databend_common_expression::types::number::Int32Type; -use databend_common_expression::types::number::NumberScalar; -use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_functions::aggregates::eval_aggr; -use databend_common_sql::ApproxDistinctColumns; +use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_sql::evaluator::BlockOperator; -use databend_common_storages_fuse::FuseStorageFormat; +use databend_common_sql::ApproxDistinctColumns; use databend_common_storages_fuse::io::build_column_hlls; +use databend_common_storages_fuse::statistics::reducers::reduce_block_metas; +use databend_common_storages_fuse::statistics::Trim; use databend_common_storages_fuse::statistics::END_OF_UNICODE_RANGE; use databend_common_storages_fuse::statistics::STATS_STRING_PREFIX_LEN; -use databend_common_storages_fuse::statistics::Trim; -use databend_common_storages_fuse::statistics::reducers::reduce_block_metas; +use databend_common_storages_fuse::FuseStorageFormat; use databend_query::storages::fuse::io::TableMetaLocationGenerator; -use databend_query::storages::fuse::statistics::ClusterStatsGenerator; -use databend_query::storages::fuse::statistics::RowOrientedSegmentBuilder; use databend_query::storages::fuse::statistics::gen_columns_statistics; use databend_query::storages::fuse::statistics::reducers; +use databend_query::storages::fuse::statistics::ClusterStatsGenerator; +use databend_query::storages::fuse::statistics::RowOrientedSegmentBuilder; use databend_query::test_kits::*; use databend_storages_common_index::RangeIndex; +use databend_storages_common_table_meta::meta::column_oriented_segment::SegmentBuilder; +use databend_storages_common_table_meta::meta::decode_column_hll; +use databend_storages_common_table_meta::meta::encode_column_hll; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::ClusterStatistics; use databend_storages_common_table_meta::meta::ColumnStatistics; use databend_storages_common_table_meta::meta::Compression; use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::VirtualColumnMeta; -use databend_storages_common_table_meta::meta::column_oriented_segment::SegmentBuilder; -use databend_storages_common_table_meta::meta::decode_column_hll; -use databend_storages_common_table_meta::meta::encode_column_hll; use opendal::Operator; use rand::Rng; @@ -822,10 +822,10 @@ fn test_reduce_block_meta() -> Result<()> { let mut acc_file_size = 0; let mut acc_bloom_filter_index_size = 0; for _ in 0..size { - let row_count = rng.r#gen::() / size; - let block_size = rng.r#gen::() / size; - let file_size = rng.r#gen::() / size; - let bloom_filter_index_size = rng.r#gen::() / size; + let row_count = rng.gen::() / size; + let block_size = rng.gen::() / size; + let file_size = rng.gen::() / size; + let bloom_filter_index_size = rng.gen::() / size; acc_row_count += row_count; acc_block_size += block_size; acc_file_size += file_size; diff --git a/src/query/sql/src/planner/binder/copy_into_location.rs b/src/query/sql/src/planner/binder/copy_into_location.rs index 0d91877404b9a..a6dd9e8d83c7a 100644 --- a/src/query/sql/src/planner/binder/copy_into_location.rs +++ b/src/query/sql/src/planner/binder/copy_into_location.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +use databend_common_ast::ast::quote::display_ident; use databend_common_ast::ast::CopyIntoLocationSource; use databend_common_ast::ast::CopyIntoLocationStmt; use databend_common_ast::ast::Statement; -use databend_common_ast::ast::quote::display_ident; use databend_common_ast::parser::parse_sql; use databend_common_ast::parser::tokenize_sql; use databend_common_exception::ErrorCode; @@ -24,11 +24,11 @@ use databend_common_storage::init_stage_operator; use databend_storages_common_stage::CopyIntoLocationInfo; use opendal::ErrorKind; -use crate::BindContext; -use crate::binder::Binder; use crate::binder::copy_into_table::resolve_file_location; +use crate::binder::Binder; use crate::plans::CopyIntoLocationPlan; use crate::plans::Plan; +use crate::BindContext; impl Binder { #[async_backtrace::framed] diff --git a/src/query/sql/src/planner/binder/ddl/table.rs b/src/query/sql/src/planner/binder/ddl/table.rs index ddff4c2fc65d9..88bb1707a82a8 100644 --- a/src/query/sql/src/planner/binder/ddl/table.rs +++ b/src/query/sql/src/planner/binder/ddl/table.rs @@ -70,6 +70,9 @@ use databend_common_catalog::table::CompactionLimits; use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::infer_schema_type; +use databend_common_expression::infer_table_schema; +use databend_common_expression::types::DataType; use databend_common_expression::AutoIncrementExpr; use databend_common_expression::ComputedExpr; use databend_common_expression::DataField; @@ -79,9 +82,6 @@ use databend_common_expression::TableField; use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; use databend_common_expression::TableSchemaRefExt; -use databend_common_expression::infer_schema_type; -use databend_common_expression::infer_table_schema; -use databend_common_expression::types::DataType; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_license::license::Feature; use databend_common_license::license_manager::LicenseManagerSwitch; @@ -95,6 +95,8 @@ use databend_common_storage::check_operator; use databend_common_storage::init_operator; use databend_common_storages_basic::view_table::QUERY; use databend_common_storages_basic::view_table::VIEW_ENGINE; +use databend_storages_common_table_meta::table::is_reserved_opt_key; +use databend_storages_common_table_meta::table::TableCompression; use databend_storages_common_table_meta::table::OPT_KEY_CLUSTER_TYPE; use databend_storages_common_table_meta::table::OPT_KEY_DATABASE_ID; use databend_storages_common_table_meta::table::OPT_KEY_ENGINE_META; @@ -103,28 +105,22 @@ use databend_storages_common_table_meta::table::OPT_KEY_STORAGE_PREFIX; use databend_storages_common_table_meta::table::OPT_KEY_TABLE_ATTACHED_DATA_URI; use databend_storages_common_table_meta::table::OPT_KEY_TABLE_COMPRESSION; use databend_storages_common_table_meta::table::OPT_KEY_TEMP_PREFIX; -use databend_storages_common_table_meta::table::TableCompression; -use databend_storages_common_table_meta::table::is_reserved_opt_key; use derive_visitor::DriveMut; use log::debug; use opendal::Operator; -use crate::BindContext; -use crate::DefaultExprBinder; -use crate::Planner; -use crate::SelectBuilder; +use crate::binder::get_storage_params_from_options; +use crate::binder::parse_storage_params_from_uri; +use crate::binder::scalar::ScalarBinder; use crate::binder::Binder; use crate::binder::ColumnBindingBuilder; use crate::binder::ConstraintExprBinder; use crate::binder::Visibility; -use crate::binder::get_storage_params_from_options; -use crate::binder::parse_storage_params_from_uri; -use crate::binder::scalar::ScalarBinder; use crate::optimizer::ir::SExpr; use crate::parse_computed_expr_to_string; -use crate::planner::semantic::IdentifierNormalizer; use crate::planner::semantic::normalize_identifier; use crate::planner::semantic::resolve_type_name; +use crate::planner::semantic::IdentifierNormalizer; use crate::plans::AddColumnOption; use crate::plans::AddTableColumnPlan; use crate::plans::AddTableConstraintPlan; @@ -166,6 +162,10 @@ use crate::plans::VacuumDropTablePlan; use crate::plans::VacuumTableOption; use crate::plans::VacuumTablePlan; use crate::plans::VacuumTemporaryFilesPlan; +use crate::BindContext; +use crate::DefaultExprBinder; +use crate::Planner; +use crate::SelectBuilder; pub(in crate::planner::binder) struct AnalyzeCreateTableResult { pub(in crate::planner::binder) schema: TableSchemaRef, @@ -604,7 +604,7 @@ impl Binder { Some(self.ctx.as_ref()), "when create TABLE with external location", ) - .await?; + .await?; // create a temporary op to check if params is correct let op = init_operator(&sp)?; @@ -738,15 +738,12 @@ impl Binder { // since we get it from table options location and connection when load table each time. // we do this in case we change this idea. storage_params = Some(sp); - ( - AnalyzeCreateTableResult { - schema: Arc::new(table_schema), - field_comments: vec![], - table_indexes: None, - table_constraints: None, - }, - as_query_plan, - ) + (AnalyzeCreateTableResult { + schema: Arc::new(table_schema), + field_comments: vec![], + table_indexes: None, + table_constraints: None, + }, as_query_plan) } Engine::Delta => { let sp = @@ -758,15 +755,12 @@ impl Binder { // we do this in case we change this idea. storage_params = Some(sp); engine_options.insert(OPT_KEY_ENGINE_META.to_lowercase().to_string(), meta); - ( - AnalyzeCreateTableResult { - schema: Arc::new(table_schema), - field_comments: vec![], - table_indexes: None, - table_constraints: None, - }, - as_query_plan, - ) + (AnalyzeCreateTableResult { + schema: Arc::new(table_schema), + field_comments: vec![], + table_indexes: None, + table_constraints: None, + }, as_query_plan) } _ => Err(ErrorCode::BadArguments( "Incorrect CREATE query: required list of column descriptions or AS section or SELECT or ICEBERG/DELTA table engine", @@ -1180,9 +1174,7 @@ impl Binder { "Invalid number of arguments for attaching policy '{}' to '{}': \ expected at least 2 arguments (masked column + condition columns), \ got {} argument(s)", - name, - table, - columns.len() + name, table, columns.len() ))); } @@ -1353,9 +1345,7 @@ impl Binder { .get_settings() .get_enable_experimental_row_access_policy()? { - return Err(ErrorCode::Unimplemented( - "Experimental Row Access Policy is unstable and may have compatibility issues. To use it, set enable_experimental_row_access_policy=1", - )); + return Err(ErrorCode::Unimplemented("Experimental Row Access Policy is unstable and may have compatibility issues. To use it, set enable_experimental_row_access_policy=1")); } let columns = columns .iter() @@ -1379,9 +1369,7 @@ impl Binder { .get_settings() .get_enable_experimental_row_access_policy()? { - return Err(ErrorCode::Unimplemented( - "Experimental Row Access Policy is unstable and may have compatibility issues. To use it, set enable_experimental_row_access_policy=1", - )); + return Err(ErrorCode::Unimplemented("Experimental Row Access Policy is unstable and may have compatibility issues. To use it, set enable_experimental_row_access_policy=1")); } let policy = self.normalize_identifier(policy).name; Ok(Plan::DropTableRowAccessPolicy(Box::new( @@ -1400,9 +1388,7 @@ impl Binder { .get_settings() .get_enable_experimental_row_access_policy()? { - return Err(ErrorCode::Unimplemented( - "Experimental Row Access Policy is unstable and may have compatibility issues. To use it, set enable_experimental_row_access_policy=1", - )); + return Err(ErrorCode::Unimplemented("Experimental Row Access Policy is unstable and may have compatibility issues. To use it, set enable_experimental_row_access_policy=1")); } Ok(Plan::DropAllTableRowAccessPolicies(Box::new( DropAllTableRowAccessPoliciesPlan { diff --git a/src/query/sql/src/planner/binder/location.rs b/src/query/sql/src/planner/binder/location.rs index 13b82aaaf04d4..db0bb4a3c6f90 100644 --- a/src/query/sql/src/planner/binder/location.rs +++ b/src/query/sql/src/planner/binder/location.rs @@ -28,9 +28,6 @@ use databend_common_catalog::table_context::TableContext; use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_meta_app::storage::S3StorageClass; -use databend_common_meta_app::storage::STORAGE_GCS_DEFAULT_ENDPOINT; -use databend_common_meta_app::storage::STORAGE_IPFS_DEFAULT_ENDPOINT; -use databend_common_meta_app::storage::STORAGE_S3_DEFAULT_ENDPOINT; use databend_common_meta_app::storage::StorageAzblobConfig; use databend_common_meta_app::storage::StorageCosConfig; use databend_common_meta_app::storage::StorageFsConfig; @@ -43,12 +40,15 @@ use databend_common_meta_app::storage::StorageOssConfig; use databend_common_meta_app::storage::StorageParams; use databend_common_meta_app::storage::StorageS3Config; use databend_common_meta_app::storage::StorageWebhdfsConfig; +use databend_common_meta_app::storage::STORAGE_GCS_DEFAULT_ENDPOINT; +use databend_common_meta_app::storage::STORAGE_IPFS_DEFAULT_ENDPOINT; +use databend_common_meta_app::storage::STORAGE_S3_DEFAULT_ENDPOINT; use databend_common_storage::STDIN_FD; -use databend_common_storage::Scheme; -use log::LevelFilter; use log::info; +use log::LevelFilter; use opendal::raw::normalize_path; use opendal::raw::normalize_root; +use opendal::Scheme; /// secure_omission will fix omitted endpoint url schemes into 'https://' #[inline] diff --git a/src/query/storages/basic/src/result_cache/read/reader.rs b/src/query/storages/basic/src/result_cache/read/reader.rs index a6a45fc0c811d..30e84fb79758b 100644 --- a/src/query/storages/basic/src/result_cache/read/reader.rs +++ b/src/query/storages/basic/src/result_cache/read/reader.rs @@ -23,8 +23,8 @@ use databend_common_storage::DataOperator; use opendal::Operator; use parquet::arrow::arrow_reader::ParquetRecordBatchReader; -use crate::result_cache::common::ResultCacheValue; use crate::result_cache::common::gen_result_cache_meta_key; +use crate::result_cache::common::ResultCacheValue; use crate::result_cache::meta_manager::ResultCacheMetaManager; pub struct ResultCacheReader { diff --git a/src/query/storages/common/io/src/merge_io_reader.rs b/src/query/storages/common/io/src/merge_io_reader.rs index 9c9caf4e63dc6..13f5a0353938e 100644 --- a/src/query/storages/common/io/src/merge_io_reader.rs +++ b/src/query/storages/common/io/src/merge_io_reader.rs @@ -25,9 +25,9 @@ use databend_common_metrics::storage::*; use futures::future::try_join_all; use opendal::Operator; +use crate::merge_io_result::OwnerMemory; use crate::MergeIOReadResult; use crate::ReadSettings; -use crate::merge_io_result::OwnerMemory; pub struct MergeIOReader {} diff --git a/src/query/storages/common/session/src/temp_table.rs b/src/query/storages/common/session/src/temp_table.rs index ec8989d0ce154..4c4948994fd83 100644 --- a/src/query/storages/common/session/src/temp_table.rs +++ b/src/query/storages/common/session/src/temp_table.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::hash_map::Entry; use std::collections::BTreeMap; use std::collections::HashMap; -use std::collections::hash_map::Entry; use std::sync::Arc; use databend_common_exception::ErrorCode; @@ -41,14 +41,14 @@ use databend_common_meta_app::schema::UpdateTempTableReq; use databend_common_meta_app::schema::UpsertTableOptionReply; use databend_common_meta_app::schema::UpsertTableOptionReq; use databend_common_meta_types::SeqV; -use databend_common_storage::DataOperator; use databend_common_storage::init_operator; -use databend_storages_common_blocks::memory::IN_MEMORY_DATA; +use databend_common_storage::DataOperator; use databend_storages_common_blocks::memory::InMemoryDataKey; +use databend_storages_common_blocks::memory::IN_MEMORY_DATA; use databend_storages_common_table_meta::meta::parse_storage_prefix; use databend_storages_common_table_meta::table::OPT_KEY_DATABASE_ID; -use databend_storages_common_table_meta::table_id_ranges::TEMP_TBL_ID_BEGIN; use databend_storages_common_table_meta::table_id_ranges::is_temp_table_id; +use databend_storages_common_table_meta::table_id_ranges::TEMP_TBL_ID_BEGIN; use log::info; use opendal::Operator; use parking_lot::Mutex; @@ -467,8 +467,8 @@ pub async fn drop_all_temp_tables( let num_mem_table = mem_tbl_ids.len(); info!( - "[TEMP TABLE] session={user_name_session_id} starting cleanup, reason = {reason}, {} fuse table, {} mem table.", - num_fuse_table, num_mem_table + "[TEMP TABLE] session={user_name_session_id} starting cleanup, reason = {reason}, {} fuse table, {} mem table." + , num_fuse_table, num_mem_table ); // Clean up each fuse table directory individually with the correct operator diff --git a/src/query/storages/delta/src/table.rs b/src/query/storages/delta/src/table.rs index 1f1ca5306c05b..af8f0421422ae 100644 --- a/src/query/storages/delta/src/table.rs +++ b/src/query/storages/delta/src/table.rs @@ -50,8 +50,8 @@ use databend_common_storages_parquet::ParquetSourceType; use databend_storages_common_pruner::partition_prunner::FetchPartitionScalars; use databend_storages_common_pruner::partition_prunner::PartitionPruner; use databend_storages_common_table_meta::table::OPT_KEY_ENGINE_META; -use deltalake::DeltaTableBuilder; use deltalake::kernel::Add; +use deltalake::DeltaTableBuilder; use object_store_opendal::OpendalStore; use serde::Deserialize; use serde::Serialize; diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index 36a0a0ede9bec..dc8c9ea11d75b 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -34,6 +34,7 @@ use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::plan::ReclusterParts; use databend_common_catalog::plan::StreamColumn; +use databend_common_catalog::table::is_temp_table_by_table_info; use databend_common_catalog::table::Bound; use databend_common_catalog::table::ColumnRange; use databend_common_catalog::table::ColumnStatisticsProvider; @@ -41,21 +42,20 @@ use databend_common_catalog::table::CompactionLimits; use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::NavigationDescriptor; use databend_common_catalog::table::TimeNavigation; -use databend_common_catalog::table::is_temp_table_by_table_info; use databend_common_catalog::table_context::TableContext; use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::types::DataType; use databend_common_expression::BlockThresholds; use databend_common_expression::ColumnId; -use databend_common_expression::ORIGIN_BLOCK_ID_COL_NAME; -use databend_common_expression::ORIGIN_BLOCK_ROW_NUM_COL_NAME; -use databend_common_expression::ORIGIN_VERSION_COL_NAME; use databend_common_expression::RemoteExpr; use databend_common_expression::TableField; use databend_common_expression::TableSchema; +use databend_common_expression::ORIGIN_BLOCK_ID_COL_NAME; +use databend_common_expression::ORIGIN_BLOCK_ROW_NUM_COL_NAME; +use databend_common_expression::ORIGIN_VERSION_COL_NAME; use databend_common_expression::VECTOR_SCORE_COLUMN_ID; -use databend_common_expression::types::DataType; use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE; use databend_common_io::constants::DEFAULT_BLOCK_COMPRESSED_SIZE; use databend_common_io::constants::DEFAULT_BLOCK_PER_SEGMENT; @@ -66,20 +66,22 @@ use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; use databend_common_meta_app::schema::UpdateStreamMetaReq; use databend_common_meta_app::schema::UpsertTableCopiedFileReq; +use databend_common_meta_app::storage::set_s3_storage_class; use databend_common_meta_app::storage::S3StorageClass; use databend_common_meta_app::storage::StorageParams; -use databend_common_meta_app::storage::set_s3_storage_class; use databend_common_pipeline::core::Pipeline; -use databend_common_sql::ApproxDistinctColumns; -use databend_common_sql::BloomIndexColumns; use databend_common_sql::binder::STREAM_COLUMN_FACTORY; use databend_common_sql::parse_cluster_keys; use databend_common_sql::plans::TruncateMode; +use databend_common_sql::ApproxDistinctColumns; +use databend_common_sql::BloomIndexColumns; +use databend_common_storage::init_operator; use databend_common_storage::StorageMetrics; use databend_common_storage::StorageMetricsLayer; -use databend_common_storage::init_operator; use databend_storages_common_cache::LoadParams; use databend_storages_common_io::Files; +use databend_storages_common_table_meta::meta::decode_column_hll; +use databend_storages_common_table_meta::meta::parse_storage_prefix; use databend_storages_common_table_meta::meta::ClusterKey; use databend_storages_common_table_meta::meta::CompactSegmentInfo; use databend_storages_common_table_meta::meta::SnapshotId; @@ -87,10 +89,9 @@ use databend_storages_common_table_meta::meta::TableMetaTimestamps; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::TableSnapshotStatistics; use databend_storages_common_table_meta::meta::Versioned; -use databend_storages_common_table_meta::meta::decode_column_hll; -use databend_storages_common_table_meta::meta::parse_storage_prefix; use databend_storages_common_table_meta::table::ChangeType; use databend_storages_common_table_meta::table::ClusterType; +use databend_storages_common_table_meta::table::TableCompression; use databend_storages_common_table_meta::table::OPT_KEY_APPROX_DISTINCT_COLUMNS; use databend_storages_common_table_meta::table::OPT_KEY_BLOOM_INDEX_COLUMNS; use databend_storages_common_table_meta::table::OPT_KEY_CHANGE_TRACKING; @@ -102,7 +103,6 @@ use databend_storages_common_table_meta::table::OPT_KEY_SNAPSHOT_LOCATION_FIXED_ use databend_storages_common_table_meta::table::OPT_KEY_STORAGE_FORMAT; use databend_storages_common_table_meta::table::OPT_KEY_TABLE_ATTACHED_DATA_URI; use databend_storages_common_table_meta::table::OPT_KEY_TABLE_COMPRESSION; -use databend_storages_common_table_meta::table::TableCompression; use futures_util::TryStreamExt; use itertools::Itertools; use log::info; @@ -110,21 +110,6 @@ use log::warn; use opendal::Operator; use parking_lot::Mutex; -use crate::DEFAULT_ROW_PER_PAGE; -use crate::FUSE_OPT_KEY_ATTACH_COLUMN_IDS; -use crate::FUSE_OPT_KEY_BLOCK_IN_MEM_SIZE_THRESHOLD; -use crate::FUSE_OPT_KEY_BLOCK_PER_SEGMENT; -use crate::FUSE_OPT_KEY_DATA_RETENTION_NUM_SNAPSHOTS_TO_KEEP; -use crate::FUSE_OPT_KEY_DATA_RETENTION_PERIOD_IN_HOURS; -use crate::FUSE_OPT_KEY_ENABLE_PARQUET_DICTIONARY; -use crate::FUSE_OPT_KEY_FILE_SIZE; -use crate::FUSE_OPT_KEY_ROW_PER_BLOCK; -use crate::FUSE_OPT_KEY_ROW_PER_PAGE; -use crate::FuseSegmentFormat; -use crate::FuseStorageFormat; -use crate::NavigationPoint; -use crate::Table; -use crate::TableStatistics; use crate::fuse_column::FuseTableColumnStatisticsProvider; use crate::fuse_type::FuseTableType; use crate::io::MetaReaders; @@ -132,11 +117,26 @@ use crate::io::SegmentsIO; use crate::io::TableMetaLocationGenerator; use crate::io::TableSnapshotReader; use crate::io::WriteSettings; +use crate::operations::load_last_snapshot_hint; use crate::operations::ChangesDesc; use crate::operations::SnapshotHint; -use crate::operations::load_last_snapshot_hint; -use crate::statistics::Trim; use crate::statistics::reduce_block_statistics; +use crate::statistics::Trim; +use crate::FuseSegmentFormat; +use crate::FuseStorageFormat; +use crate::NavigationPoint; +use crate::Table; +use crate::TableStatistics; +use crate::DEFAULT_ROW_PER_PAGE; +use crate::FUSE_OPT_KEY_ATTACH_COLUMN_IDS; +use crate::FUSE_OPT_KEY_BLOCK_IN_MEM_SIZE_THRESHOLD; +use crate::FUSE_OPT_KEY_BLOCK_PER_SEGMENT; +use crate::FUSE_OPT_KEY_DATA_RETENTION_NUM_SNAPSHOTS_TO_KEEP; +use crate::FUSE_OPT_KEY_DATA_RETENTION_PERIOD_IN_HOURS; +use crate::FUSE_OPT_KEY_ENABLE_PARQUET_DICTIONARY; +use crate::FUSE_OPT_KEY_FILE_SIZE; +use crate::FUSE_OPT_KEY_ROW_PER_BLOCK; +use crate::FUSE_OPT_KEY_ROW_PER_PAGE; #[derive(Clone)] pub struct FuseTable { @@ -680,16 +680,15 @@ impl FuseTable { info!( "extracting snapshot location of table {} with id {:?} from the last snapshot hint file.", - table_info.desc, table_info.ident + table_info.desc, + table_info.ident ); let snapshot_hint = Self::refresh_schema_from_hint(operator, storage_prefix)?; info!( "extracted snapshot location [{:?}] of table {}, with id {:?} from the last snapshot hint file.", - snapshot_hint - .as_ref() - .map(|(hint, _)| &hint.snapshot_full_path), + snapshot_hint.as_ref().map(|(hint, _)| &hint.snapshot_full_path), table_info.desc, table_info.ident ); @@ -954,7 +953,11 @@ impl Table for FuseTable { } Err(e) if e.code() == ErrorCode::TABLE_HISTORICAL_DATA_NOT_FOUND => { warn!("navigate failed: {:?}", e); - if dry_run { Ok(Some(vec![])) } else { Ok(None) } + if dry_run { + Ok(Some(vec![])) + } else { + Ok(None) + } } Err(e) => Err(e), } diff --git a/src/query/storages/fuse/src/io/read/agg_index/agg_index_reader.rs b/src/query/storages/fuse/src/io/read/agg_index/agg_index_reader.rs index fb9f24122a24d..ead67dd81e7fe 100644 --- a/src/query/storages/fuse/src/io/read/agg_index/agg_index_reader.rs +++ b/src/query/storages/fuse/src/io/read/agg_index/agg_index_reader.rs @@ -18,14 +18,14 @@ use databend_common_catalog::plan::AggIndexInfo; use databend_common_catalog::plan::AggIndexMeta; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; +use databend_common_expression::types::BooleanType; +use databend_common_expression::types::DataType; use databend_common_expression::BlockEntry; use databend_common_expression::DataBlock; use databend_common_expression::Evaluator; use databend_common_expression::Expr; use databend_common_expression::FunctionContext; use databend_common_expression::Scalar; -use databend_common_expression::types::BooleanType; -use databend_common_expression::types::DataType; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_storages_common_table_meta::table::TableCompression; use opendal::Operator; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_native.rs b/src/query/storages/fuse/src/io/read/block/block_reader_native.rs index 5d8b4c5c2a77c..b528ebf0ee1b1 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_native.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_native.rs @@ -27,9 +27,9 @@ use databend_common_expression::ColumnId; use databend_common_expression::DataBlock; use databend_common_expression::DataSchema; use databend_common_metrics::storage::*; -use databend_common_native::read::NativeReadBuf; -use databend_common_native::read::reader::NativeReader; use databend_common_native::read::reader::read_meta_async; +use databend_common_native::read::reader::NativeReader; +use databend_common_native::read::NativeReadBuf; use databend_storages_common_io::ReadSettings; use databend_storages_common_table_meta::meta::ColumnMeta; use opendal::Operator; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_native_deserialize.rs b/src/query/storages/fuse/src/io/read/block/block_reader_native_deserialize.rs index 1f8ca52c19590..f583ae24802fd 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_native_deserialize.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_native_deserialize.rs @@ -24,8 +24,8 @@ use databend_common_expression::DataBlock; use databend_common_expression::TableField; use databend_common_expression::Value; use databend_common_metrics::storage::*; -use databend_common_native::read::ColumnIter; use databend_common_native::read::reader::NativeReader; +use databend_common_native::read::ColumnIter; use databend_common_storage::ColumnNode; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; @@ -35,9 +35,9 @@ use opendal::Buffer; use super::block_reader_deserialize::DeserializedArray; use super::block_reader_deserialize::FieldDeserializationContext; +use crate::io::read::block::block_reader_merge_io::DataItem; use crate::io::BlockReader; use crate::io::NativeReaderExt; -use crate::io::read::block::block_reader_merge_io::DataItem; impl BlockReader { /// Deserialize column chunks data from native format to DataBlock. diff --git a/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs b/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs index afce67c444fc1..2920fe1c9548d 100644 --- a/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs +++ b/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs @@ -27,9 +27,9 @@ use databend_storages_common_index::filters::Filter; use databend_storages_common_index::filters::FilterImpl; use databend_storages_common_table_meta::meta::SingleColumnMeta; use opendal::Operator; -use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::ParquetRecordBatchReader; use parquet::arrow::parquet_to_arrow_field_levels; +use parquet::arrow::ProjectionMask; use parquet::basic::Compression as ParquetCompression; use parquet::schema::types::SchemaDescPtr; diff --git a/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_loader.rs b/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_loader.rs index 1cebb5bbb04fd..4904da1fbe069 100644 --- a/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_loader.rs +++ b/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_loader.rs @@ -40,14 +40,14 @@ use databend_storages_common_table_meta::meta::SingleColumnMeta; use databend_storages_common_table_meta::table::TableCompression; use log::info; use opendal::Operator; -use parquet::arrow::ArrowSchemaConverter; -use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::ParquetRecordBatchReader; use parquet::arrow::parquet_to_arrow_field_levels; +use parquet::arrow::ArrowSchemaConverter; +use parquet::arrow::ProjectionMask; use crate::index::InvertedIndexFile; -use crate::io::MetaReaders; use crate::io::read::block::parquet::RowGroupImplBuilder; +use crate::io::MetaReaders; const INDEX_COLUMN_NAMES: [&str; 8] = [ "fast", diff --git a/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_reader.rs b/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_reader.rs index ef0514dd1bc9d..38a17e30fa2cd 100644 --- a/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_reader.rs +++ b/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_reader.rs @@ -24,7 +24,6 @@ use databend_common_metrics::storage::metrics_inc_block_inverted_index_search_mi use databend_storages_common_io::ReadSettings; use databend_storages_common_table_meta::meta::SingleColumnMeta; use opendal::Operator; -use tantivy::Index; use tantivy::collector::DocSetCollector; use tantivy::collector::TopDocs; use tantivy::directory::FileSlice; @@ -37,6 +36,7 @@ use tantivy::query::QueryClone; use tantivy::schema::IndexRecordOption; use tantivy::termdict::TermInfoStore; use tantivy::tokenizer::TokenizerManager; +use tantivy::Index; use tantivy_common::BinarySerializable; use tantivy_fst::raw::Fst; diff --git a/src/query/storages/fuse/src/io/read/segment_reader.rs b/src/query/storages/fuse/src/io/read/segment_reader.rs index a193de7ef2750..d024b267e5903 100644 --- a/src/query/storages/fuse/src/io/read/segment_reader.rs +++ b/src/query/storages/fuse/src/io/read/segment_reader.rs @@ -19,14 +19,14 @@ use databend_common_exception::Result; use databend_common_expression::TableSchemaRef; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; -use databend_storages_common_table_meta::meta::CompactSegmentInfo; -use databend_storages_common_table_meta::meta::Location; -use databend_storages_common_table_meta::meta::SegmentInfo; +use databend_storages_common_table_meta::meta::column_oriented_segment::deserialize_column_oriented_segment; use databend_storages_common_table_meta::meta::column_oriented_segment::AbstractSegment; use databend_storages_common_table_meta::meta::column_oriented_segment::ColumnOrientedSegment; use databend_storages_common_table_meta::meta::column_oriented_segment::ColumnOrientedSegmentBuilder; use databend_storages_common_table_meta::meta::column_oriented_segment::SegmentBuilder; -use databend_storages_common_table_meta::meta::column_oriented_segment::deserialize_column_oriented_segment; +use databend_storages_common_table_meta::meta::CompactSegmentInfo; +use databend_storages_common_table_meta::meta::Location; +use databend_storages_common_table_meta::meta::SegmentInfo; use opendal::Operator; use super::meta::bytes_reader; diff --git a/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs b/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs index 6545866053bc4..ea8378f38a130 100644 --- a/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs +++ b/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs @@ -36,14 +36,14 @@ use databend_storages_common_io::MergeIOReader; use databend_storages_common_io::ReadSettings; use databend_storages_common_table_meta::table::TableCompression; use opendal::Operator; -use parquet::arrow::ArrowSchemaConverter; -use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::ParquetRecordBatchReader; use parquet::arrow::parquet_to_arrow_field_levels; +use parquet::arrow::ArrowSchemaConverter; +use parquet::arrow::ProjectionMask; use crate::index::VectorIndexFile; -use crate::io::MetaReaders; use crate::io::read::block::parquet::RowGroupImplBuilder; +use crate::io::MetaReaders; #[async_trait::async_trait] trait InRuntime diff --git a/src/query/storages/fuse/src/io/write/block_writer.rs b/src/query/storages/fuse/src/io/write/block_writer.rs index dfb7a73e8d897..5a03e64978c34 100644 --- a/src/query/storages/fuse/src/io/write/block_writer.rs +++ b/src/query/storages/fuse/src/io/write/block_writer.rs @@ -12,15 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::hash_map::Entry; use std::collections::BTreeMap; use std::collections::HashMap; -use std::collections::hash_map::Entry; use std::sync::Arc; use std::time::Instant; use chrono::Utc; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; +use databend_common_expression::local_block_meta_serde; use databend_common_expression::BlockMetaInfo; use databend_common_expression::Column; use databend_common_expression::ColumnId; @@ -28,7 +29,6 @@ use databend_common_expression::DataBlock; use databend_common_expression::FieldIndex; use databend_common_expression::TableField; use databend_common_expression::TableSchemaRef; -use databend_common_expression::local_block_meta_serde; use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE; use databend_common_metrics::storage::metrics_inc_block_index_write_milliseconds; use databend_common_metrics::storage::metrics_inc_block_index_write_nums; @@ -46,6 +46,7 @@ use databend_common_metrics::storage::metrics_inc_block_write_nums; use databend_common_native::write::NativeWriter; use databend_storages_common_blocks::blocks_to_parquet_with_stats; use databend_storages_common_index::NgramArgs; +use databend_storages_common_table_meta::meta::encode_column_hll; use databend_storages_common_table_meta::meta::BlockHLLState; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::ClusterStatistics; @@ -53,24 +54,23 @@ use databend_storages_common_table_meta::meta::ColumnMeta; use databend_storages_common_table_meta::meta::ExtendedBlockMeta; use databend_storages_common_table_meta::meta::StatisticsOfColumns; use databend_storages_common_table_meta::meta::TableMetaTimestamps; -use databend_storages_common_table_meta::meta::encode_column_hll; use databend_storages_common_table_meta::table::TableCompression; use opendal::Operator; -use crate::FuseStorageFormat; -use crate::io::BloomIndexState; -use crate::io::TableMetaLocationGenerator; use crate::io::build_column_hlls; +use crate::io::write::virtual_column_builder::VirtualColumnBuilder; +use crate::io::write::virtual_column_builder::VirtualColumnState; use crate::io::write::InvertedIndexBuilder; use crate::io::write::InvertedIndexState; use crate::io::write::VectorIndexBuilder; use crate::io::write::VectorIndexState; use crate::io::write::WriteSettings; -use crate::io::write::virtual_column_builder::VirtualColumnBuilder; -use crate::io::write::virtual_column_builder::VirtualColumnState; +use crate::io::BloomIndexState; +use crate::io::TableMetaLocationGenerator; use crate::operations::column_parquet_metas; -use crate::statistics::ClusterStatsGenerator; use crate::statistics::gen_columns_statistics; +use crate::statistics::ClusterStatsGenerator; +use crate::FuseStorageFormat; pub fn serialize_block( write_settings: &WriteSettings, diff --git a/src/query/storages/fuse/src/io/write/bloom_index_writer.rs b/src/query/storages/fuse/src/io/write/bloom_index_writer.rs index a7cc4642d7ce6..34f9e192143f4 100644 --- a/src/query/storages/fuse/src/io/write/bloom_index_writer.rs +++ b/src/query/storages/fuse/src/io/write/bloom_index_writer.rs @@ -26,19 +26,19 @@ use databend_common_expression::TableField; use databend_common_expression::TableSchemaRef; use databend_common_io::constants::DEFAULT_BLOCK_INDEX_BUFFER_SIZE; use databend_storages_common_blocks::blocks_to_parquet; +use databend_storages_common_index::filters::BlockFilter; use databend_storages_common_index::BloomIndex; use databend_storages_common_index::BloomIndexBuilder; use databend_storages_common_index::NgramArgs; -use databend_storages_common_index::filters::BlockFilter; use databend_storages_common_io::ReadSettings; +use databend_storages_common_table_meta::meta::column_oriented_segment::BlockReadInfo; use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::Versioned; -use databend_storages_common_table_meta::meta::column_oriented_segment::BlockReadInfo; use databend_storages_common_table_meta::table::TableCompression; use opendal::Operator; -use crate::FuseStorageFormat; use crate::io::BlockReader; +use crate::FuseStorageFormat; #[derive(Debug)] pub struct BloomIndexState { diff --git a/src/query/storages/fuse/src/operations/analyze/collect_ndv_source.rs b/src/query/storages/fuse/src/operations/analyze/collect_ndv_source.rs index 252f89133597e..3e58be6651aa9 100644 --- a/src/query/storages/fuse/src/operations/analyze/collect_ndv_source.rs +++ b/src/query/storages/fuse/src/operations/analyze/collect_ndv_source.rs @@ -43,6 +43,8 @@ use databend_storages_common_cache::LoadParams; use databend_storages_common_cache::SegmentStatistics; use databend_storages_common_index::RangeIndex; use databend_storages_common_io::ReadSettings; +use databend_storages_common_table_meta::meta::decode_column_hll; +use databend_storages_common_table_meta::meta::encode_column_hll; use databend_storages_common_table_meta::meta::AdditionalStatsMeta; use databend_storages_common_table_meta::meta::BlockHLL; use databend_storages_common_table_meta::meta::Location; @@ -50,22 +52,20 @@ use databend_storages_common_table_meta::meta::RawBlockHLL; use databend_storages_common_table_meta::meta::SegmentInfo; use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::Versioned; -use databend_storages_common_table_meta::meta::decode_column_hll; -use databend_storages_common_table_meta::meta::encode_column_hll; use opendal::Operator; -use crate::FuseLazyPartInfo; -use crate::FuseStorageFormat; -use crate::FuseTable; +use crate::io::build_column_hlls; +use crate::io::read::meta::SegmentStatsReader; use crate::io::BlockReader; use crate::io::CachedMetaWriter; use crate::io::CompactSegmentInfoReader; use crate::io::MetaReaders; use crate::io::TableMetaLocationGenerator; -use crate::io::build_column_hlls; -use crate::io::read::meta::SegmentStatsReader; use crate::operations::acquire_task_permit; use crate::operations::analyze::AnalyzeNDVMeta; +use crate::FuseLazyPartInfo; +use crate::FuseStorageFormat; +use crate::FuseTable; struct SegmentWithHLL { segment_location: Location, @@ -296,7 +296,7 @@ impl Processor for AnalyzeCollectNDVSource { return Err(ErrorCode::Internal(format!( "Invalid state reached in sync process: {:?}. This is a bug.", state - ))); + ))) } } Ok(()) @@ -416,7 +416,7 @@ impl Processor for AnalyzeCollectNDVSource { return Err(ErrorCode::Internal(format!( "Invalid state reached in async process: {:?}. This is a bug.", state - ))); + ))) } } Ok(()) diff --git a/src/query/storages/fuse/src/operations/commit.rs b/src/query/storages/fuse/src/operations/commit.rs index 9a648611ce1f1..ac3b386254071 100644 --- a/src/query/storages/fuse/src/operations/commit.rs +++ b/src/query/storages/fuse/src/operations/commit.rs @@ -42,6 +42,8 @@ use databend_common_sql::executor::physical_plans::MutationKind; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CachedObject; use databend_storages_common_cache::LoadParams; +use databend_storages_common_table_meta::meta::decode_column_hll; +use databend_storages_common_table_meta::meta::merge_column_hll_mut; use databend_storages_common_table_meta::meta::BlockHLL; use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::SegmentInfo; @@ -51,8 +53,6 @@ use databend_storages_common_table_meta::meta::TableMetaTimestamps; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::TableSnapshotStatistics; use databend_storages_common_table_meta::meta::Versioned; -use databend_storages_common_table_meta::meta::decode_column_hll; -use databend_storages_common_table_meta::meta::merge_column_hll_mut; use databend_storages_common_table_meta::readers::snapshot_reader::TableSnapshotAccessor; use databend_storages_common_table_meta::table::OPT_KEY_LEGACY_SNAPSHOT_LOC; use databend_storages_common_table_meta::table::OPT_KEY_SNAPSHOT_LOCATION; @@ -61,21 +61,21 @@ use log::debug; use log::info; use opendal::Operator; -use super::TableMutationAggregator; use super::decorate_snapshot; use super::new_serialize_segment_processor; -use crate::FuseTable; +use super::TableMutationAggregator; use crate::io::MetaReaders; use crate::io::MetaWriter; use crate::io::SegmentsIO; use crate::io::TableMetaLocationGenerator; -use crate::operations::SnapshotHintWriter; use crate::operations::common::AppendGenerator; use crate::operations::common::CommitSink; use crate::operations::common::ConflictResolveContext; use crate::operations::set_backoff; -use crate::statistics::TableStatsGenerator; +use crate::operations::SnapshotHintWriter; use crate::statistics::merge_statistics; +use crate::statistics::TableStatsGenerator; +use crate::FuseTable; impl FuseTable { #[async_backtrace::framed] diff --git a/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs b/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs index b87f4b70f07eb..afa4c66c34ba6 100644 --- a/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs +++ b/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs @@ -21,8 +21,8 @@ use std::sync::Arc; use std::time::Duration; use std::time::Instant; -use backoff::ExponentialBackoff; use backoff::backoff::Backoff; +use backoff::ExponentialBackoff; use databend_common_base::base::GlobalInstance; use databend_common_catalog::table::Table; use databend_common_catalog::table::TableExt; @@ -56,20 +56,20 @@ use log::error; use log::info; use opendal::Operator; -use crate::FUSE_OPT_KEY_ENABLE_AUTO_ANALYZE; -use crate::FUSE_OPT_KEY_ENABLE_AUTO_VACUUM; -use crate::FuseTable; use crate::io::TableMetaLocationGenerator; +use crate::operations::set_backoff; +use crate::operations::set_compaction_num_block_hint; +use crate::operations::vacuum::vacuum_table; use crate::operations::AppendGenerator; use crate::operations::CommitMeta; use crate::operations::MutationGenerator; use crate::operations::SnapshotGenerator; use crate::operations::TransformMergeCommitMeta; use crate::operations::TruncateGenerator; -use crate::operations::set_backoff; -use crate::operations::set_compaction_num_block_hint; -use crate::operations::vacuum::vacuum_table; use crate::statistics::TableStatsGenerator; +use crate::FuseTable; +use crate::FUSE_OPT_KEY_ENABLE_AUTO_ANALYZE; +use crate::FUSE_OPT_KEY_ENABLE_AUTO_VACUUM; enum State { None, @@ -222,9 +222,9 @@ where F: SnapshotGenerator + Send + Sync + 'static snapshot_gen .as_any() .downcast_ref::() - .is_some_and(|g| { + .is_some_and(|generator| { matches!( - g.mutation_kind, + generator.mutation_kind, MutationKind::Update | MutationKind::Delete | MutationKind::MergeInto @@ -315,14 +315,14 @@ where F: SnapshotGenerator + Send + Sync + 'static snapshot_gen .as_any() .downcast_ref::() - .is_some_and(|g| matches!(g.mode(), TruncateMode::DropAll)) + .is_some_and(|generator| matches!(generator.mode(), TruncateMode::DropAll)) } fn need_truncate(&self) -> bool { self.snapshot_gen .as_any() .downcast_ref::() - .is_some_and(|g| !matches!(g.mode(), TruncateMode::Delete)) + .is_some_and(|generator| !matches!(generator.mode(), TruncateMode::Delete)) } fn is_append_only_txn(&self) -> bool { diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_block_writer.rs b/src/query/storages/fuse/src/operations/common/processors/transform_block_writer.rs index ce67c53bc9bf1..28c0be2933a65 100644 --- a/src/query/storages/fuse/src/operations/common/processors/transform_block_writer.rs +++ b/src/query/storages/fuse/src/operations/common/processors/transform_block_writer.rs @@ -35,13 +35,13 @@ use databend_common_sql::executor::physical_plans::MutationKind; use databend_common_storage::MutationStatus; use opendal::Operator; -use crate::FuseTable; use crate::io::BlockSerialization; use crate::io::BlockWriter; use crate::io::StreamBlockBuilder; use crate::io::StreamBlockProperties; use crate::operations::MutationLogEntry; use crate::operations::MutationLogs; +use crate::FuseTable; enum State { Consume, diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs b/src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs index 767a4a429a412..26aee7148135e 100644 --- a/src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs +++ b/src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::hash_map::Entry; use std::collections::BTreeMap; use std::collections::HashMap; -use std::collections::hash_map::Entry; use std::sync::Arc; use std::time::Instant; @@ -31,6 +31,7 @@ use databend_common_expression::VirtualDataSchema; use databend_common_pipeline_transforms::processors::AsyncAccumulatingTransform; use databend_common_sql::executor::physical_plans::MutationKind; use databend_storages_common_cache::SegmentStatistics; +use databend_storages_common_table_meta::meta::merge_column_hll_mut; use databend_storages_common_table_meta::meta::AdditionalStatsMeta; use databend_storages_common_table_meta::meta::BlockHLL; use databend_storages_common_table_meta::meta::BlockHLLState; @@ -44,7 +45,6 @@ use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::TableMetaTimestamps; use databend_storages_common_table_meta::meta::Versioned; use databend_storages_common_table_meta::meta::VirtualBlockMeta; -use databend_storages_common_table_meta::meta::merge_column_hll_mut; use databend_storages_common_table_meta::table::ClusterType; use itertools::Itertools; use log::debug; @@ -52,11 +52,10 @@ use log::info; use log::warn; use opendal::Operator; -use crate::FuseTable; +use crate::io::read::read_segment_stats; use crate::io::CachedMetaWriter; use crate::io::SegmentsIO; use crate::io::TableMetaLocationGenerator; -use crate::io::read::read_segment_stats; use crate::operations::common::CommitMeta; use crate::operations::common::ConflictResolveContext; use crate::operations::common::MutationLogEntry; @@ -65,10 +64,11 @@ use crate::operations::common::SnapshotChanges; use crate::operations::common::SnapshotMerged; use crate::operations::mutation::BlockIndex; use crate::operations::mutation::SegmentIndex; -use crate::statistics::VirtualColumnAccumulator; use crate::statistics::reducers::merge_statistics_mut; use crate::statistics::reducers::reduce_block_metas; use crate::statistics::sort_by_cluster_stats; +use crate::statistics::VirtualColumnAccumulator; +use crate::FuseTable; pub struct TableMutationAggregator { ctx: Arc, diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs index 926954639263a..b4d59326a1803 100644 --- a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs +++ b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs @@ -38,19 +38,19 @@ use databend_storages_common_index::RangeIndex; use databend_storages_common_table_meta::meta::TableMetaTimestamps; use opendal::Operator; -use crate::FuseTable; +use crate::io::create_inverted_index_builders; use crate::io::BlockBuilder; use crate::io::BlockSerialization; use crate::io::BlockWriter; use crate::io::VectorIndexBuilder; use crate::io::VirtualColumnBuilder; -use crate::io::create_inverted_index_builders; use crate::operations::common::BlockMetaIndex; use crate::operations::common::MutationLogEntry; use crate::operations::common::MutationLogs; use crate::operations::mutation::ClusterStatsGenType; use crate::operations::mutation::SerializeDataMeta; use crate::statistics::ClusterStatsGenerator; +use crate::FuseTable; #[allow(clippy::large_enum_variant)] enum State { diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_segment.rs b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_segment.rs index 5a62c459d28c6..fe10c06ea0c1d 100644 --- a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_segment.rs +++ b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_segment.rs @@ -29,6 +29,7 @@ use databend_common_pipeline::core::InputPort; use databend_common_pipeline::core::OutputPort; use databend_common_pipeline::core::Processor; use databend_common_pipeline::core::ProcessorPtr; +use databend_storages_common_table_meta::meta::column_oriented_segment::*; use databend_storages_common_table_meta::meta::AdditionalStatsMeta; use databend_storages_common_table_meta::meta::BlockHLL; use databend_storages_common_table_meta::meta::ExtendedBlockMeta; @@ -37,18 +38,17 @@ use databend_storages_common_table_meta::meta::SegmentStatistics; use databend_storages_common_table_meta::meta::TableMetaTimestamps; use databend_storages_common_table_meta::meta::Versioned; use databend_storages_common_table_meta::meta::VirtualBlockMeta; -use databend_storages_common_table_meta::meta::column_oriented_segment::*; use log::info; use opendal::Operator; -use crate::FuseSegmentFormat; -use crate::FuseTable; use crate::io::TableMetaLocationGenerator; use crate::operations::common::MutationLogEntry; use crate::operations::common::MutationLogs; use crate::statistics::ColumnHLLAccumulator; use crate::statistics::RowOrientedSegmentBuilder; use crate::statistics::VirtualColumnAccumulator; +use crate::FuseSegmentFormat; +use crate::FuseTable; enum State { None, diff --git a/src/query/storages/fuse/src/operations/inverted_index.rs b/src/query/storages/fuse/src/operations/inverted_index.rs index 30bcacd5edae6..33a1924c01fd3 100644 --- a/src/query/storages/fuse/src/operations/inverted_index.rs +++ b/src/query/storages/fuse/src/operations/inverted_index.rs @@ -14,9 +14,9 @@ use std::collections::BTreeMap; use std::collections::VecDeque; -use std::sync::Arc; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; +use std::sync::Arc; use std::time::Instant; use async_trait::async_trait; @@ -47,13 +47,13 @@ use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::Location; use opendal::Operator; -use crate::FuseStorageFormat; -use crate::FuseTable; +use crate::io::write_data; use crate::io::BlockReader; use crate::io::InvertedIndexWriter; use crate::io::MetaReaders; use crate::io::TableMetaLocationGenerator; -use crate::io::write_data; +use crate::FuseStorageFormat; +use crate::FuseTable; impl FuseTable { // The big picture of refresh inverted index into pipeline: diff --git a/src/query/storages/fuse/src/operations/merge_into/mutator/matched_mutator.rs b/src/query/storages/fuse/src/operations/merge_into/mutator/matched_mutator.rs index f38f9616de528..3bbc2e873264d 100644 --- a/src/query/storages/fuse/src/operations/merge_into/mutator/matched_mutator.rs +++ b/src/query/storages/fuse/src/operations/merge_into/mutator/matched_mutator.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::hash_map::Entry; use std::collections::HashMap; use std::collections::HashSet; -use std::collections::hash_map::Entry; use std::sync::Arc; use std::time::Instant; @@ -22,20 +22,20 @@ use ahash::AHashMap; use databend_common_base::base::tokio::sync::Semaphore; use databend_common_base::runtime::GlobalIORuntime; use databend_common_base::runtime::TrySpawn; -use databend_common_catalog::plan::Projection; use databend_common_catalog::plan::build_origin_block_row_num; use databend_common_catalog::plan::gen_mutation_stream_meta; use databend_common_catalog::plan::split_prefix; use databend_common_catalog::plan::split_row_id; +use databend_common_catalog::plan::Projection; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::BlockMetaInfoDowncast; -use databend_common_expression::DataBlock; use databend_common_expression::types::DataType; use databend_common_expression::types::MutableBitmap; use databend_common_expression::types::NumberDataType; +use databend_common_expression::BlockMetaInfoDowncast; +use databend_common_expression::DataBlock; use databend_common_metrics::storage::*; use databend_common_sql::StreamContext; use databend_common_storage::MutationStatus; @@ -48,14 +48,12 @@ use itertools::Itertools; use log::info; use opendal::Operator; -use crate::FuseTable; use crate::io::BlockBuilder; use crate::io::BlockReader; use crate::io::BlockWriter; use crate::io::CompactSegmentInfoReader; use crate::io::MetaReaders; use crate::io::WriteSettings; -use crate::operations::BlockMetaIndex; use crate::operations::acquire_task_permit; use crate::operations::common::MutationLogEntry; use crate::operations::common::MutationLogs; @@ -63,6 +61,8 @@ use crate::operations::merge_into::processors::RowIdKind; use crate::operations::mutation::BlockIndex; use crate::operations::mutation::SegmentIndex; use crate::operations::read_block; +use crate::operations::BlockMetaIndex; +use crate::FuseTable; struct AggregationContext { data_accessor: Operator, @@ -322,8 +322,8 @@ impl MatchedAggregator { // the row_id is generated by block_id, not block_idx,reference to fill_internal_column_meta() let block_meta = segment_info.blocks[block_idx].clone(); - let update_modified_offsets = &item.1.0; - let delete_modified_offsets = &item.1.1; + let update_modified_offsets = &item.1 .0; + let delete_modified_offsets = &item.1 .1; let modified_offsets: HashSet = update_modified_offsets .union(delete_modified_offsets) .cloned() diff --git a/src/query/storages/fuse/src/operations/mutation/mutator/block_compact_mutator.rs b/src/query/storages/fuse/src/operations/mutation/mutator/block_compact_mutator.rs index 91c9fc4e1e9f3..ab029f2ed6208 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutator/block_compact_mutator.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutator/block_compact_mutator.rs @@ -27,9 +27,9 @@ use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::is_stream_column_id; use databend_common_expression::BlockThresholds; use databend_common_expression::ColumnId; -use databend_common_expression::is_stream_column_id; use databend_common_metrics::storage::*; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::CompactSegmentInfo; @@ -38,10 +38,8 @@ use databend_storages_common_table_meta::meta::Statistics; use log::info; use opendal::Operator; -use crate::TableContext; -use crate::io::SegmentsIO; use crate::io::read::read_segment_stats; -use crate::operations::CompactOptions; +use crate::io::SegmentsIO; use crate::operations::acquire_task_permit; use crate::operations::common::BlockMetaIndex; use crate::operations::mutation::BlockIndex; @@ -50,8 +48,10 @@ use crate::operations::mutation::CompactExtraInfo; use crate::operations::mutation::CompactLazyPartInfo; use crate::operations::mutation::CompactTaskInfo; use crate::operations::mutation::SegmentIndex; +use crate::operations::CompactOptions; use crate::statistics::reducers::merge_statistics_mut; use crate::statistics::sort_by_cluster_stats; +use crate::TableContext; #[derive(Clone)] pub struct BlockCompactMutator { diff --git a/src/query/storages/fuse/src/operations/mutation/mutator/recluster_mutator.rs b/src/query/storages/fuse/src/operations/mutation/mutator/recluster_mutator.rs index 64ab11f415a51..4422a29bb2d6f 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutator/recluster_mutator.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutator/recluster_mutator.rs @@ -19,8 +19,8 @@ use std::collections::HashMap; use std::collections::HashSet; use std::sync::Arc; -use databend_common_base::runtime::GLOBAL_MEM_STAT; use databend_common_base::runtime::execute_futures_in_parallel; +use databend_common_base::runtime::GLOBAL_MEM_STAT; use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_catalog::plan::ReclusterParts; @@ -28,11 +28,11 @@ use databend_common_catalog::plan::ReclusterTask; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; +use databend_common_expression::compare_scalars; +use databend_common_expression::types::DataType; use databend_common_expression::BlockThresholds; use databend_common_expression::Scalar; use databend_common_expression::TableSchemaRef; -use databend_common_expression::compare_scalars; -use databend_common_expression::types::DataType; use databend_common_storage::ColumnNodes; use databend_storages_common_cache::LoadParams; use databend_storages_common_pruner::BlockMetaIndex; @@ -42,26 +42,26 @@ use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::RawBlockHLL; use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::TableSnapshot; -use fastrace::Span; use fastrace::func_path; use fastrace::future::FutureExt; +use fastrace::Span; use indexmap::IndexSet; use log::debug; use log::warn; use opendal::Operator; -use crate::DEFAULT_AVG_DEPTH_THRESHOLD; -use crate::FUSE_OPT_KEY_ROW_AVG_DEPTH_THRESHOLD; -use crate::FuseTable; -use crate::SegmentLocation; use crate::io::MetaReaders; -use crate::operations::BlockCompactMutator; -use crate::operations::CompactLazyPartInfo; use crate::operations::common::BlockMetaIndex as BlockIndex; -use crate::operations::mutation::SegmentCompactChecker; use crate::operations::mutation::mutator::block_compact_mutator::CompactLimitState; +use crate::operations::mutation::SegmentCompactChecker; +use crate::operations::BlockCompactMutator; +use crate::operations::CompactLazyPartInfo; use crate::statistics::reducers::merge_statistics_mut; use crate::statistics::sort_by_cluster_stats; +use crate::FuseTable; +use crate::SegmentLocation; +use crate::DEFAULT_AVG_DEPTH_THRESHOLD; +use crate::FUSE_OPT_KEY_ROW_AVG_DEPTH_THRESHOLD; pub enum ReclusterMode { Recluster, diff --git a/src/query/storages/fuse/src/operations/mutation/mutator/segment_compact_mutator.rs b/src/query/storages/fuse/src/operations/mutation/mutator/segment_compact_mutator.rs index e3b22cd244860..6daa79bbe20d7 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutator/segment_compact_mutator.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutator/segment_compact_mutator.rs @@ -27,15 +27,15 @@ use databend_storages_common_table_meta::meta::Versioned; use log::info; use opendal::Operator; -use crate::FuseTable; -use crate::TableContext; +use crate::io::read::read_segment_stats_in_parallel; use crate::io::CachedMetaWriter; use crate::io::SegmentsIO; use crate::io::TableMetaLocationGenerator; -use crate::io::read::read_segment_stats_in_parallel; use crate::operations::CompactOptions; use crate::statistics::reducers::merge_statistics_mut; use crate::statistics::sort_by_cluster_stats; +use crate::FuseTable; +use crate::TableContext; #[derive(Default)] pub struct SegmentCompactionState { diff --git a/src/query/storages/fuse/src/operations/navigate.rs b/src/query/storages/fuse/src/operations/navigate.rs index 20b973b4d3357..0eba172650d63 100644 --- a/src/query/storages/fuse/src/operations/navigate.rs +++ b/src/query/storages/fuse/src/operations/navigate.rs @@ -32,13 +32,13 @@ use futures::TryStreamExt; use log::info; use opendal::EntryMode; -use crate::FUSE_TBL_SNAPSHOT_PREFIX; -use crate::FuseTable; use crate::fuse_table::RetentionPolicy; use crate::io::MetaReaders; use crate::io::SnapshotHistoryReader; use crate::io::SnapshotsIO; use crate::io::TableMetaLocationGenerator; +use crate::FuseTable; +use crate::FUSE_TBL_SNAPSHOT_PREFIX; impl FuseTable { #[fastrace::trace] @@ -437,13 +437,11 @@ impl FuseTable { let meta = op.stat(de.path()).await?; meta.last_modified() }; + let location = de.path().to_string(); if let Some(modified) = modified { - let utc_modified = DateTime::from_timestamp_nanos( - modified.into_inner().as_nanosecond() as i64, - ); - if f(location.clone(), utc_modified) { - file_list.push((location, utc_modified)); + if f(location.clone(), modified) { + file_list.push((location, modified)); } } } diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs index 76648c5e3da66..a1ee6558fc0bb 100644 --- a/src/query/storages/fuse/src/operations/read_partitions.rs +++ b/src/query/storages/fuse/src/operations/read_partitions.rs @@ -55,8 +55,8 @@ use databend_storages_common_index::BloomIndex; use databend_storages_common_index::NgramArgs; use databend_storages_common_pruner::BlockMetaIndex; use databend_storages_common_pruner::TopNPruner; -use databend_storages_common_table_meta::meta::BlockMeta; -use databend_storages_common_table_meta::meta::ColumnStatistics; +use databend_storages_common_table_meta::meta::column_oriented_segment::meta_name; +use databend_storages_common_table_meta::meta::column_oriented_segment::stat_name; use databend_storages_common_table_meta::meta::column_oriented_segment::BLOCK_SIZE; use databend_storages_common_table_meta::meta::column_oriented_segment::BLOOM_FILTER_INDEX_LOCATION; use databend_storages_common_table_meta::meta::column_oriented_segment::BLOOM_FILTER_INDEX_SIZE; @@ -68,8 +68,8 @@ use databend_storages_common_table_meta::meta::column_oriented_segment::INVERTED use databend_storages_common_table_meta::meta::column_oriented_segment::LOCATION; use databend_storages_common_table_meta::meta::column_oriented_segment::NGRAM_FILTER_INDEX_SIZE; use databend_storages_common_table_meta::meta::column_oriented_segment::ROW_COUNT; -use databend_storages_common_table_meta::meta::column_oriented_segment::meta_name; -use databend_storages_common_table_meta::meta::column_oriented_segment::stat_name; +use databend_storages_common_table_meta::meta::BlockMeta; +use databend_storages_common_table_meta::meta::ColumnStatistics; use databend_storages_common_table_meta::table::ChangeType; use databend_storages_common_table_meta::table::ClusterType; use itertools::Itertools; @@ -78,18 +78,15 @@ use opendal::Operator; use sha2::Digest; use sha2::Sha256; -use crate::FuseLazyPartInfo; -use crate::FuseSegmentFormat; -use crate::FuseTable; use crate::fuse_part::FuseBlockPartInfo; use crate::io::BloomIndexRebuilder; +use crate::pruning::create_segment_location_vector; +use crate::pruning::table_sample; use crate::pruning::BlockPruner; use crate::pruning::FusePruner; use crate::pruning::SegmentLocation; use crate::pruning::SegmentPruner; use crate::pruning::VectorIndexPruner; -use crate::pruning::create_segment_location_vector; -use crate::pruning::table_sample; use crate::pruning_pipeline::AsyncBlockPruneTransform; use crate::pruning_pipeline::ColumnOrientedBlockPruneSink; use crate::pruning_pipeline::ExtractSegmentTransform; @@ -104,6 +101,9 @@ use crate::pruning_pipeline::SyncBlockPruneTransform; use crate::pruning_pipeline::TopNPruneTransform; use crate::pruning_pipeline::VectorIndexPruneTransform; use crate::segment_format_from_location; +use crate::FuseLazyPartInfo; +use crate::FuseSegmentFormat; +use crate::FuseTable; const DEFAULT_GRAM_SIZE: usize = 3; const DEFAULT_BLOOM_SIZE: u64 = 1024 * 1024; diff --git a/src/query/storages/fuse/src/operations/recluster.rs b/src/query/storages/fuse/src/operations/recluster.rs index 79f1c5c2fbfdf..4ce55ee9b9052 100644 --- a/src/query/storages/fuse/src/operations/recluster.rs +++ b/src/query/storages/fuse/src/operations/recluster.rs @@ -16,8 +16,8 @@ use std::sync::Arc; use std::time::Instant; use databend_common_base::base::tokio::select; -use databend_common_base::base::tokio::sync::Semaphore; use databend_common_base::base::tokio::sync::mpsc; +use databend_common_base::base::tokio::sync::Semaphore; use databend_common_base::runtime::GlobalIORuntime; use databend_common_base::runtime::TrySpawn; use databend_common_catalog::plan::PushDownInfo; @@ -36,14 +36,14 @@ use databend_storages_common_table_meta::table::ClusterType; use log::warn; use opendal::Operator; -use crate::FuseTable; -use crate::SegmentLocation; -use crate::operations::ReclusterMutator; use crate::operations::acquire_task_permit; use crate::operations::mutation::ReclusterMode; +use crate::operations::ReclusterMutator; +use crate::pruning::create_segment_location_vector; use crate::pruning::PruningContext; use crate::pruning::SegmentPruner; -use crate::pruning::create_segment_location_vector; +use crate::FuseTable; +use crate::SegmentLocation; impl FuseTable { #[async_backtrace::framed] diff --git a/src/query/storages/fuse/src/operations/replace_into/mutator/replace_into_operation_agg.rs b/src/query/storages/fuse/src/operations/replace_into/mutator/replace_into_operation_agg.rs index 7829194f4a737..6b735e3b4c1d7 100644 --- a/src/query/storages/fuse/src/operations/replace_into/mutator/replace_into_operation_agg.rs +++ b/src/query/storages/fuse/src/operations/replace_into/mutator/replace_into_operation_agg.rs @@ -20,12 +20,14 @@ use ahash::AHashMap; use databend_common_base::base::tokio::sync::Semaphore; use databend_common_base::runtime::GlobalIORuntime; use databend_common_base::runtime::TrySpawn; -use databend_common_catalog::plan::Projection; use databend_common_catalog::plan::gen_mutation_stream_meta; +use databend_common_catalog::plan::Projection; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::types::MutableBitmap; +use databend_common_expression::types::UInt64Type; use databend_common_expression::Column; use databend_common_expression::ColumnId; use databend_common_expression::ComputedExpr; @@ -33,19 +35,17 @@ use databend_common_expression::DataBlock; use databend_common_expression::FieldIndex; use databend_common_expression::FromData; use databend_common_expression::Scalar; -use databend_common_expression::types::MutableBitmap; -use databend_common_expression::types::UInt64Type; use databend_common_metrics::storage::*; -use databend_common_sql::StreamContext; use databend_common_sql::evaluator::BlockOperator; use databend_common_sql::executor::physical_plans::OnConflictField; +use databend_common_sql::StreamContext; use databend_storages_common_cache::BlockMetaCache; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; use databend_storages_common_cache::LoadParams; -use databend_storages_common_index::BloomIndex; use databend_storages_common_index::filters::Filter; use databend_storages_common_index::filters::FilterImpl; +use databend_storages_common_index::BloomIndex; use databend_storages_common_io::ReadSettings; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::BlockSlotDescription; @@ -56,14 +56,13 @@ use log::info; use log::warn; use opendal::Operator; -use crate::FuseTable; +use crate::io::read::bloom::block_filter_reader::BloomBlockFilterReader; use crate::io::BlockBuilder; use crate::io::BlockReader; use crate::io::BlockWriter; use crate::io::CompactSegmentInfoReader; use crate::io::MetaReaders; use crate::io::WriteSettings; -use crate::io::read::bloom::block_filter_reader::BloomBlockFilterReader; use crate::operations::acquire_task_permit; use crate::operations::common::BlockMetaIndex; use crate::operations::common::MutationLogEntry; @@ -74,8 +73,9 @@ use crate::operations::read_block; use crate::operations::replace_into::meta::DeletionByColumn; use crate::operations::replace_into::meta::ReplaceIntoOperation; use crate::operations::replace_into::meta::UniqueKeyDigest; -use crate::operations::replace_into::mutator::DeletionAccumulator; use crate::operations::replace_into::mutator::row_hash_of_columns; +use crate::operations::replace_into::mutator::DeletionAccumulator; +use crate::FuseTable; struct AggregationContext { segment_locations: AHashMap, @@ -788,11 +788,11 @@ impl AggregationContext { #[cfg(test)] mod tests { + use databend_common_expression::types::NumberDataType; + use databend_common_expression::types::NumberScalar; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; - use databend_common_expression::types::NumberDataType; - use databend_common_expression::types::NumberScalar; use super::*; diff --git a/src/query/storages/fuse/src/operations/snapshot_hint.rs b/src/query/storages/fuse/src/operations/snapshot_hint.rs index 71eedf1d54aba..a41d93d2f90a1 100644 --- a/src/query/storages/fuse/src/operations/snapshot_hint.rs +++ b/src/query/storages/fuse/src/operations/snapshot_hint.rs @@ -29,9 +29,9 @@ use opendal::Operator; use serde::Deserialize; use serde::Serialize; +use crate::io::TableMetaLocationGenerator; use crate::FUSE_TBL_LAST_SNAPSHOT_HINT; use crate::FUSE_TBL_LAST_SNAPSHOT_HINT_V2; -use crate::io::TableMetaLocationGenerator; pub struct SnapshotHintWriter<'a> { ctx: &'a dyn TableContext, diff --git a/src/query/storages/fuse/src/operations/table_index.rs b/src/query/storages/fuse/src/operations/table_index.rs index 32d0f5ae81e45..3009fb32017cd 100644 --- a/src/query/storages/fuse/src/operations/table_index.rs +++ b/src/query/storages/fuse/src/operations/table_index.rs @@ -23,6 +23,7 @@ use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::local_block_meta_serde; use databend_common_expression::BlockMetaInfo; use databend_common_expression::BlockMetaInfoDowncast; use databend_common_expression::DataBlock; @@ -30,7 +31,6 @@ use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; -use databend_common_expression::local_block_meta_serde; use databend_common_meta_app::schema::TableIndex; use databend_common_meta_app::schema::TableIndexType; use databend_common_meta_app::schema::TableMeta; @@ -56,29 +56,29 @@ use databend_storages_common_table_meta::meta::Versioned; use log::info; use opendal::Operator; -use crate::FuseStorageFormat; -use crate::FuseTable; +use crate::index::filters::BlockFilter; +use crate::index::filters::Filter; use crate::index::BloomIndex; use crate::index::BloomIndexBuilder; use crate::index::NgramArgs; -use crate::index::filters::BlockFilter; -use crate::index::filters::Filter; +use crate::io::read::bloom::block_filter_reader::load_bloom_filter_by_columns; +use crate::io::read::bloom::block_filter_reader::load_index_meta; +use crate::io::read::load_vector_index_meta; +use crate::io::read::read_segment_stats; use crate::io::BlockReader; use crate::io::BlockWriter; use crate::io::BloomIndexState; use crate::io::MetaReaders; use crate::io::TableMetaLocationGenerator; use crate::io::VectorIndexBuilder; -use crate::io::read::bloom::block_filter_reader::load_bloom_filter_by_columns; -use crate::io::read::bloom::block_filter_reader::load_index_meta; -use crate::io::read::load_vector_index_meta; -use crate::io::read::read_segment_stats; use crate::operations::BlockMetaIndex; use crate::operations::CommitSink; use crate::operations::MutationGenerator; use crate::operations::MutationLogEntry; use crate::operations::MutationLogs; use crate::operations::TableMutationAggregator; +use crate::FuseStorageFormat; +use crate::FuseTable; pub async fn do_refresh_table_index( fuse_table: &FuseTable, diff --git a/src/query/storages/fuse/src/pruning/bloom_pruner.rs b/src/query/storages/fuse/src/pruning/bloom_pruner.rs index 71807f9db971a..b2563748e2d3c 100644 --- a/src/query/storages/fuse/src/pruning/bloom_pruner.rs +++ b/src/query/storages/fuse/src/pruning/bloom_pruner.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::hash_map::Entry; use std::collections::HashMap; use std::collections::HashSet; -use std::collections::hash_map::Entry; use std::sync::Arc; use databend_common_exception::ErrorCode; @@ -28,13 +28,13 @@ use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; use databend_common_expression::Value; use databend_common_sql::BloomIndexColumns; +use databend_storages_common_index::filters::BlockFilter; use databend_storages_common_index::BloomIndex; use databend_storages_common_index::FilterEvalResult; use databend_storages_common_index::NgramArgs; -use databend_storages_common_index::filters::BlockFilter; +use databend_storages_common_table_meta::meta::column_oriented_segment::BlockReadInfo; use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::StatisticsOfColumns; -use databend_storages_common_table_meta::meta::column_oriented_segment::BlockReadInfo; use log::info; use log::warn; use opendal::Operator; diff --git a/src/query/storages/fuse/src/table_functions/fuse_time_travel_size.rs b/src/query/storages/fuse/src/table_functions/fuse_time_travel_size.rs index 5a242960f453d..31c3d6fca4f67 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_time_travel_size.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_time_travel_size.rs @@ -21,27 +21,27 @@ use databend_common_catalog::table_args::TableArgs; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::types::BooleanType; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::types::StringType; +use databend_common_expression::types::UInt64Type; use databend_common_expression::DataBlock; use databend_common_expression::FromData; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchemaRef; use databend_common_expression::TableSchemaRefExt; -use databend_common_expression::types::BooleanType; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::StringType; -use databend_common_expression::types::UInt64Type; use futures_util::TryStreamExt; use log::info; use opendal::Operator; use super::parse_opt_opt_args; -use crate::FUSE_OPT_KEY_DATA_RETENTION_PERIOD_IN_HOURS; -use crate::FuseTable; use crate::io::SnapshotsIO; +use crate::table_functions::string_literal; use crate::table_functions::SimpleArgFunc; use crate::table_functions::SimpleArgFuncTemplate; -use crate::table_functions::string_literal; +use crate::FuseTable; +use crate::FUSE_OPT_KEY_DATA_RETENTION_PERIOD_IN_HOURS; pub struct FuseTimeTravelSizeArgs { pub database_name: Option, diff --git a/src/query/storages/hive/hive/src/hive_table.rs b/src/query/storages/hive/hive/src/hive_table.rs index 21b2841307b52..1603787f85fdc 100644 --- a/src/query/storages/hive/hive/src/hive_table.rs +++ b/src/query/storages/hive/hive/src/hive_table.rs @@ -49,8 +49,8 @@ use databend_common_pipeline::core::Pipeline; use databend_common_pipeline::core::ProcessorPtr; use databend_common_pipeline::sources::SyncSource; use databend_common_pipeline::sources::SyncSourcer; -use databend_common_storage::DataOperator; use databend_common_storage::init_operator; +use databend_common_storage::DataOperator; use databend_common_storages_parquet::ParquetPruner; use databend_common_storages_parquet::ParquetReaderBuilder; use databend_common_storages_parquet::ParquetSourceType; @@ -66,10 +66,10 @@ use opendal::Operator; use super::hive_catalog::HiveCatalog; use super::hive_table_options::HiveTableOptions; -use crate::HivePartInfo; -use crate::HivePartitionFiller; use crate::hive_table_source::HiveTableSource; use crate::utils::HiveFetchPartitionScalars; +use crate::HivePartInfo; +use crate::HivePartitionFiller; pub const HIVE_TABLE_ENGINE: &str = "hive"; pub const HIVE_DEFAULT_PARTITION: &str = "__HIVE_DEFAULT_PARTITION__"; @@ -245,7 +245,8 @@ impl HiveTable { if partition_num < 100000 { trace!( "get {} partitions from hive metastore:{:?}", - partition_num, partition_names + partition_num, + partition_names ); } else { trace!("get {} partitions from hive metastore", partition_num); diff --git a/src/query/storages/orc/src/chunk_reader_impl.rs b/src/query/storages/orc/src/chunk_reader_impl.rs index a5307184a4bb5..4e53b8b8938e1 100644 --- a/src/query/storages/orc/src/chunk_reader_impl.rs +++ b/src/query/storages/orc/src/chunk_reader_impl.rs @@ -13,10 +13,10 @@ // limitations under the License. use bytes::Bytes; +use futures_util::future::BoxFuture; use futures_util::AsyncRead; use futures_util::AsyncReadExt; use futures_util::FutureExt; -use futures_util::future::BoxFuture; use opendal::Operator; use orc_rust::reader::AsyncChunkReader; diff --git a/src/query/storages/orc/src/table.rs b/src/query/storages/orc/src/table.rs index d75f889eaf34d..d3172bc13111b 100644 --- a/src/query/storages/orc/src/table.rs +++ b/src/query/storages/orc/src/table.rs @@ -31,18 +31,18 @@ use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::ColumnId; -use databend_common_expression::FILE_ROW_NUMBER_COLUMN_ID; -use databend_common_expression::FILENAME_COLUMN_ID; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; +use databend_common_expression::FILENAME_COLUMN_ID; +use databend_common_expression::FILE_ROW_NUMBER_COLUMN_ID; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::schema::TableIdent; use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; use databend_common_pipeline::core::Pipeline; -use databend_common_storage::StageFileInfo; use databend_common_storage::init_stage_operator; +use databend_common_storage::StageFileInfo; use databend_storages_common_table_meta::table::ChangeType; use opendal::Operator; use orc_rust::ArrowReaderBuilder; diff --git a/src/query/storages/parquet/src/copy_into_table/reader.rs b/src/query/storages/parquet/src/copy_into_table/reader.rs index f350c5e4a31f9..a006076e17d11 100644 --- a/src/query/storages/parquet/src/copy_into_table/reader.rs +++ b/src/query/storages/parquet/src/copy_into_table/reader.rs @@ -19,9 +19,9 @@ use databend_common_catalog::plan::Projection; use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; +use databend_common_expression::expr::*; use databend_common_expression::RemoteDefaultExpr; use databend_common_expression::TableSchemaRef; -use databend_common_expression::expr::*; use databend_common_meta_app::principal::NullAs; use databend_common_meta_app::principal::StageFileFormatType; use databend_common_storage::parquet::infer_schema_with_extension; @@ -29,10 +29,10 @@ use databend_storages_common_stage::project_columnar; use opendal::Operator; use parquet::file::metadata::FileMetaData; -use crate::parquet_reader::InMemoryRowGroup; -use crate::parquet_reader::ParquetReaderBuilder; use crate::parquet_reader::policy::ReadPolicyBuilder; use crate::parquet_reader::policy::ReadPolicyImpl; +use crate::parquet_reader::InMemoryRowGroup; +use crate::parquet_reader::ParquetReaderBuilder; use crate::partition::ParquetRowGroupPart; use crate::read_settings::ReadSettings; use crate::schema::arrow_to_table_schema; diff --git a/src/query/storages/parquet/src/copy_into_table/source.rs b/src/query/storages/parquet/src/copy_into_table/source.rs index 516ba916fb960..8776f9df12a1f 100644 --- a/src/query/storages/parquet/src/copy_into_table/source.rs +++ b/src/query/storages/parquet/src/copy_into_table/source.rs @@ -31,11 +31,11 @@ use databend_common_pipeline::core::Processor; use databend_common_pipeline::core::ProcessorPtr; use opendal::Operator; -use crate::ParquetPart; use crate::copy_into_table::projection::CopyProjectionEvaluator; use crate::copy_into_table::reader::RowGroupReaderForCopy; use crate::parquet_reader::policy::ReadPolicyImpl; use crate::read_settings::ReadSettings; +use crate::ParquetPart; enum State { Init, diff --git a/src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs b/src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs index 09cdc87cd9d39..2b9ad8719e2e2 100644 --- a/src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs +++ b/src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs @@ -19,39 +19,39 @@ use arrow_schema::ArrowError; use bytes::Bytes; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::types::DataType; use databend_common_expression::DataBlock; use databend_common_expression::DataSchema; use databend_common_expression::Scalar; use databend_common_expression::TableField; use databend_common_expression::TableSchemaRef; -use databend_common_expression::types::DataType; use databend_common_metrics::storage::metrics_inc_omit_filter_rowgroups; use databend_common_metrics::storage::metrics_inc_omit_filter_rows; use databend_common_storage::OperatorRegistry; +use futures::future::BoxFuture; use futures::StreamExt; use futures::TryFutureExt; -use futures::future::BoxFuture; use opendal::Reader; -use parquet::arrow::ParquetRecordBatchStreamBuilder; -use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::ArrowPredicateFn; use parquet::arrow::arrow_reader::ArrowReaderOptions; use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use parquet::arrow::arrow_reader::RowFilter; use parquet::arrow::async_reader::AsyncFileReader; use parquet::arrow::async_reader::ParquetRecordBatchStream; +use parquet::arrow::ParquetRecordBatchStreamBuilder; +use parquet::arrow::ProjectionMask; use parquet::file::metadata::ParquetMetaData; use parquet::file::metadata::ParquetMetaDataReader; use parquet::schema::types::SchemaDescPtr; -use crate::ParquetPruner; use crate::meta::check_parquet_schema; -use crate::parquet_reader::DataBlockIterator; use crate::parquet_reader::predicate::ParquetPredicate; -use crate::parquet_reader::utils::FieldPaths; use crate::parquet_reader::utils::transform_record_batch; use crate::parquet_reader::utils::transform_record_batch_by_field_paths; +use crate::parquet_reader::utils::FieldPaths; +use crate::parquet_reader::DataBlockIterator; use crate::transformer::RecordBatchTransformer; +use crate::ParquetPruner; /// The reader to read a whole parquet file. pub struct ParquetWholeFileReader { diff --git a/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs b/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs index 2a518426d4d19..fb6bf2fdbb228 100644 --- a/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs +++ b/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs @@ -23,6 +23,10 @@ use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::type_check::check_function; +use databend_common_expression::types::DataType; +use databend_common_expression::types::Int64Type; +use databend_common_expression::types::NumberDataType; use databend_common_expression::ColumnRef; use databend_common_expression::Constant; use databend_common_expression::Expr; @@ -34,43 +38,39 @@ use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; use databend_common_expression::TopKSorter; use databend_common_expression::Value; -use databend_common_expression::type_check::check_function; -use databend_common_expression::types::DataType; -use databend_common_expression::types::Int64Type; -use databend_common_expression::types::NumberDataType; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_metrics::storage::metrics_inc_omit_filter_rowgroups; use databend_common_metrics::storage::metrics_inc_omit_filter_rows; use databend_common_storage::OperatorRegistry; -use futures::StreamExt; use futures::future::try_join_all; +use futures::StreamExt; use opendal::Operator; use opendal::Reader; -use parquet::arrow::PARQUET_FIELD_ID_META_KEY; -use parquet::arrow::ParquetRecordBatchStreamBuilder; use parquet::arrow::arrow_reader::ArrowReaderOptions; use parquet::arrow::arrow_reader::RowSelection; use parquet::arrow::arrow_reader::RowSelector; +use parquet::arrow::ParquetRecordBatchStreamBuilder; +use parquet::arrow::PARQUET_FIELD_ID_META_KEY; use parquet::file::metadata::ParquetMetaData; use parquet::file::metadata::RowGroupMetaData; use parquet::format::PageLocation; use parquet::schema::types::SchemaDescPtr; -use crate::DeleteType; -use crate::ParquetFileReader; -use crate::ParquetReaderBuilder; -use crate::ParquetSourceType; use crate::parquet_part::DeleteTask; -use crate::parquet_reader::policy::POLICY_PREDICATE_ONLY; use crate::parquet_reader::policy::PolicyBuilders; use crate::parquet_reader::policy::PolicyType; use crate::parquet_reader::policy::ReadPolicyImpl; -use crate::parquet_reader::predicate::ParquetPredicate; +use crate::parquet_reader::policy::POLICY_PREDICATE_ONLY; use crate::parquet_reader::predicate::build_predicate; +use crate::parquet_reader::predicate::ParquetPredicate; use crate::parquet_reader::row_group::InMemoryRowGroup; use crate::partition::ParquetRowGroupPart; use crate::read_settings::ReadSettings; use crate::transformer::RecordBatchTransformer; +use crate::DeleteType; +use crate::ParquetFileReader; +use crate::ParquetReaderBuilder; +use crate::ParquetSourceType; static DELETES_FILE_SCHEMA: LazyLock = LazyLock::new(|| { arrow_schema::Schema::new(vec![ diff --git a/src/query/storages/parquet/src/parquet_reader/reader/streaming_load_reader.rs b/src/query/storages/parquet/src/parquet_reader/reader/streaming_load_reader.rs index e8278b9f6ba8a..3e25b6854533e 100644 --- a/src/query/storages/parquet/src/parquet_reader/reader/streaming_load_reader.rs +++ b/src/query/storages/parquet/src/parquet_reader/reader/streaming_load_reader.rs @@ -20,24 +20,24 @@ use databend_common_catalog::plan::Projection; use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; +use databend_common_expression::expr::*; use databend_common_expression::DataSchemaRef; use databend_common_expression::FunctionContext; use databend_common_expression::RemoteDefaultExpr; use databend_common_expression::TableSchemaRef; -use databend_common_expression::expr::*; use databend_common_meta_app::principal::NullAs; use databend_common_meta_app::principal::StageFileFormatType; use databend_common_storage::parquet::infer_schema_with_extension; use databend_storages_common_stage::project_columnar; -use opendal::Operator; use opendal::services::Memory; +use opendal::Operator; use parquet::file::metadata::ParquetMetaDataReader; -use crate::ParquetSourceType; use crate::copy_into_table::CopyProjectionEvaluator; use crate::parquet_reader::DataBlockIterator; use crate::parquet_reader::ParquetReaderBuilder; use crate::schema::arrow_to_table_schema; +use crate::ParquetSourceType; pub struct InmMemoryFile { file_data: Bytes, diff --git a/src/query/storages/parquet/src/parquet_reader/row_group.rs b/src/query/storages/parquet/src/parquet_reader/row_group.rs index 5dbe38aa946cb..c3506e0e3bf30 100644 --- a/src/query/storages/parquet/src/parquet_reader/row_group.rs +++ b/src/query/storages/parquet/src/parquet_reader/row_group.rs @@ -25,9 +25,9 @@ use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; use databend_storages_common_cache::ColumnData; use opendal::Operator; -use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::RowGroups; use parquet::arrow::arrow_reader::RowSelection; +use parquet::arrow::ProjectionMask; use parquet::column::page::PageIterator; use parquet::column::page::PageReader; use parquet::errors::ParquetError; @@ -322,7 +322,7 @@ impl RowGroupCore { self.column_chunks .iter() .enumerate() - .filter(|&(idx, chunk)| chunk.is_none() && projection.leaf_included(idx)) + .filter(|&(idx, chunk)| (chunk.is_none() && projection.leaf_included(idx))) .map(|(idx, _chunk)| { let column = self.metadata.meta().column(idx); let (start, length) = column.byte_range(); @@ -497,8 +497,8 @@ mod test { use arrow_schema::Schema; use bytes::Bytes; use databend_common_base::base::tokio; - use opendal::Operator; use opendal::services::Memory; + use opendal::Operator; use parquet::arrow::ArrowWriter; use parquet::basic::Repetition; use parquet::file::metadata::RowGroupMetaData; diff --git a/src/query/storages/parquet/src/parquet_table/table.rs b/src/query/storages/parquet/src/parquet_table/table.rs index f834b4bd36eb5..f5971b264bf09 100644 --- a/src/query/storages/parquet/src/parquet_table/table.rs +++ b/src/query/storages/parquet/src/parquet_table/table.rs @@ -35,10 +35,10 @@ use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::ColumnId; -use databend_common_expression::FILE_ROW_NUMBER_COLUMN_ID; -use databend_common_expression::FILENAME_COLUMN_ID; use databend_common_expression::TableField; use databend_common_expression::TableSchema; +use databend_common_expression::FILENAME_COLUMN_ID; +use databend_common_expression::FILE_ROW_NUMBER_COLUMN_ID; use databend_common_meta_app::principal::ParquetFileFormatParams; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::schema::TableIdent; @@ -46,11 +46,11 @@ use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; use databend_common_pipeline::core::Pipeline; use databend_common_settings::Settings; -use databend_common_storage::StageFileInfo; -use databend_common_storage::StageFilesInfo; use databend_common_storage::init_stage_operator; use databend_common_storage::parquet::infer_schema_with_extension; use databend_common_storage::read_metadata_async; +use databend_common_storage::StageFileInfo; +use databend_common_storage::StageFilesInfo; use databend_storages_common_table_meta::table::ChangeType; use log::info; use opendal::Operator; diff --git a/src/query/storages/stage/src/append/row_based_file/writer_processor.rs b/src/query/storages/stage/src/append/row_based_file/writer_processor.rs index beef171ce4a0a..e0c96f436def2 100644 --- a/src/query/storages/stage/src/append/row_based_file/writer_processor.rs +++ b/src/query/storages/stage/src/append/row_based_file/writer_processor.rs @@ -32,9 +32,9 @@ use databend_storages_common_stage::CopyIntoLocationInfo; use opendal::Operator; use super::buffers::FileOutputBuffers; -use crate::append::UnloadOutput; use crate::append::output::DataSummary; use crate::append::path::unload_path; +use crate::append::UnloadOutput; pub struct RowBasedFileWriter { input: Arc, diff --git a/src/query/storages/system/src/temp_files_table.rs b/src/query/storages/system/src/temp_files_table.rs index ff6f6f3726394..70cbd5f2cd383 100644 --- a/src/query/storages/system/src/temp_files_table.rs +++ b/src/query/storages/system/src/temp_files_table.rs @@ -25,6 +25,10 @@ use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::types::NumberType; +use databend_common_expression::types::StringType; +use databend_common_expression::types::TimestampType; use databend_common_expression::BlockEntry; use databend_common_expression::DataBlock; use databend_common_expression::FromData; @@ -32,10 +36,6 @@ use databend_common_expression::SendableDataBlockStream; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchemaRefExt; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::NumberType; -use databend_common_expression::types::StringType; -use databend_common_expression::types::TimestampType; use databend_common_meta_app::schema::TableIdent; use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; @@ -45,14 +45,14 @@ use databend_common_pipeline::core::ProcessorPtr; use databend_common_pipeline::sources::EmptySource; use databend_common_pipeline::sources::StreamSource; use databend_common_storage::DataOperator; -use futures::StreamExt; use futures::stream; use futures::stream::Chunks; use futures::stream::Take; +use futures::StreamExt; +use opendal::operator_futures::FutureLister; use opendal::Lister; use opendal::Metadata; use opendal::Operator; -use opendal::options::ListOptions; use crate::table::SystemTablePart; @@ -152,35 +152,22 @@ impl TempFilesTable { let limit = push_downs.as_ref().and_then(|x| x.limit); let operator = DataOperator::instance().spill_operator(); - let lister = { - let op = operator.clone(); - let path = location_prefix.clone(); - async move { - op.lister_options(&path, ListOptions { - recursive: true, - ..Default::default() - }) - .await - } - }; + let lister = operator.lister_with(&location_prefix).recursive(true); let stream = { let prefix = location_prefix.clone(); let mut counter = 0; let ctx = ctx.clone(); - - stream_source_from_entry_lister_with_chunk_size( - operator.clone(), - lister, - limit, - MAX_BATCH_SIZE, - move |entries| { + let builder = ListerStreamSourceBuilder::with_lister_fut(operator, lister); + builder + .limit_opt(limit) + .chunk_size(MAX_BATCH_SIZE) + .build(move |entries| { counter += entries.len(); let block = Self::block_from_entries(&prefix, entries)?; ctx.set_status_info(format!("{} entries processed", counter).as_str()); Ok(block) - }, - )? + })? }; StreamSource::create(ctx.get_scan_progress(), Some(stream), output) @@ -215,11 +202,8 @@ impl TempFilesTable { if metadata.is_file() { temp_files_name.push(path.trim_start_matches(location_prefix).to_string()); - temp_files_last_modified.push( - metadata - .last_modified() - .map(|x| x.into_inner().as_microsecond()), - ); + temp_files_last_modified + .push(metadata.last_modified().map(|x| x.timestamp_micros())); temp_files_content_length.push(metadata.content_length()); } } @@ -235,18 +219,66 @@ impl TempFilesTable { const MAX_BATCH_SIZE: usize = 1000; +pub struct ListerStreamSourceBuilder +where T: Future> + Send + 'static +{ + op: Operator, + lister_fut: FutureLister, + limit: Option, + chunk_size: usize, +} + +impl ListerStreamSourceBuilder +where T: Future> + Send + 'static +{ + pub fn with_lister_fut(op: Operator, lister_fut: FutureLister) -> Self { + Self { + op, + lister_fut, + limit: None, + chunk_size: MAX_BATCH_SIZE, + } + } + + pub fn limit_opt(mut self, limit: Option) -> Self { + self.limit = limit; + self + } + + pub fn chunk_size(mut self, chunk_size: usize) -> Self { + self.chunk_size = chunk_size; + self + } + + pub fn build( + self, + block_builder: (impl FnMut(Vec<(String, Metadata)>) -> Result + + Sync + + Send + + 'static), + ) -> Result { + stream_source_from_entry_lister_with_chunk_size( + self.op.clone(), + self.lister_fut, + self.limit, + self.chunk_size, + block_builder, + ) + } +} + fn stream_source_from_entry_lister_with_chunk_size( op: Operator, - lister_fut: T, + lister_fut: FutureLister, limit: Option, chunk_size: usize, - block_builder: impl FnMut(Vec<(String, Metadata)>) -> Result + Sync + Send + 'static, + block_builder: (impl FnMut(Vec<(String, Metadata)>) -> Result + Sync + Send + 'static), ) -> Result where T: Future> + Send + 'static, { enum ListerState> + Send + 'static> { - Uninitialized(U), + Uninitialized(FutureLister), Initialized(Chunks>), } From 7b51f0a7df77f881441ba40603f64409f7a58449 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 24 Dec 2025 18:30:54 +0800 Subject: [PATCH 2/3] revert --- src/bendsave/src/storage.rs | 6 +- src/binaries/tool/table_meta_inspector.rs | 8 +- src/common/exception/src/exception_into.rs | 4 +- src/common/native/src/read/reader.rs | 8 +- src/common/storage/src/http_client.rs | 6 +- src/common/storage/src/metrics.rs | 10 +-- src/common/storage/src/metrics_layer.rs | 6 +- src/common/storage/src/operator.rs | 14 ++-- src/common/storage/src/parquet.rs | 6 +- src/common/storage/src/runtime_layer.rs | 8 +- src/common/storage/src/stage.rs | 4 +- src/common/tracing/src/init.rs | 10 +-- src/common/tracing/tests/it/remote_log.rs | 6 +- .../fuse/operations/vacuum_table_v2.rs | 8 +- .../it/storages/fuse/operations/vacuum.rs | 20 ++--- .../service/src/history_tables/external.rs | 2 +- .../src/history_tables/global_history_log.rs | 29 ++++---- ...transform_exchange_aggregate_serializer.rs | 22 +++--- src/query/service/src/spillers/adapter.rs | 8 +- .../service/src/spillers/async_buffer.rs | 16 ++-- .../service/src/spillers/block_reader.rs | 2 +- src/query/service/src/spillers/inner.rs | 8 +- .../service/src/spillers/row_group_encoder.rs | 24 +++--- src/query/service/src/spillers/serialize.rs | 10 +-- .../infer_schema/infer_schema_table.rs | 6 +- .../service/src/test_kits/block_writer.rs | 8 +- src/query/service/src/test_kits/fuse.rs | 16 ++-- .../it/storages/fuse/bloom_index_meta_size.rs | 12 +-- .../it/storages/fuse/meta/column_oriented.rs | 20 ++--- .../mutation/block_compact_mutator.rs | 2 +- .../tests/it/storages/fuse/statistics.rs | 40 +++++----- .../src/planner/binder/copy_into_location.rs | 6 +- src/query/sql/src/planner/binder/ddl/table.rs | 74 +++++++++++-------- src/query/sql/src/planner/binder/location.rs | 10 +-- .../basic/src/result_cache/read/reader.rs | 2 +- .../storages/common/io/src/merge_io_reader.rs | 2 +- .../storages/common/session/src/temp_table.rs | 12 +-- src/query/storages/delta/src/table.rs | 2 +- src/query/storages/fuse/src/fuse_table.rs | 71 +++++++++--------- .../src/io/read/agg_index/agg_index_reader.rs | 4 +- .../src/io/read/block/block_reader_native.rs | 4 +- .../block/block_reader_native_deserialize.rs | 4 +- .../src/io/read/bloom/column_filter_reader.rs | 2 +- .../inverted_index/inverted_index_loader.rs | 6 +- .../inverted_index/inverted_index_reader.rs | 2 +- .../fuse/src/io/read/segment_reader.rs | 8 +- .../read/vector_index/vector_index_loader.rs | 6 +- .../fuse/src/io/write/block_writer.rs | 18 ++--- .../fuse/src/io/write/bloom_index_writer.rs | 6 +- .../operations/analyze/collect_ndv_source.rs | 18 ++--- .../storages/fuse/src/operations/commit.rs | 12 +-- .../common/processors/sink_commit.rs | 14 ++-- .../processors/transform_block_writer.rs | 2 +- .../transform_mutation_aggregator.rs | 10 +-- .../processors/transform_serialize_block.rs | 4 +- .../processors/transform_serialize_segment.rs | 6 +- .../fuse/src/operations/inverted_index.rs | 8 +- .../merge_into/mutator/matched_mutator.rs | 16 ++-- .../mutation/mutator/block_compact_mutator.rs | 8 +- .../mutation/mutator/recluster_mutator.rs | 22 +++--- .../mutator/segment_compact_mutator.rs | 6 +- .../storages/fuse/src/operations/navigate.rs | 4 +- .../fuse/src/operations/read_partitions.rs | 18 ++--- .../storages/fuse/src/operations/recluster.rs | 10 +-- .../mutator/replace_into_operation_agg.rs | 20 ++--- .../fuse/src/operations/snapshot_hint.rs | 2 +- .../fuse/src/operations/table_index.rs | 18 ++--- .../storages/fuse/src/pruning/bloom_pruner.rs | 6 +- .../table_functions/fuse_time_travel_size.rs | 14 ++-- .../storages/hive/hive/src/hive_table.rs | 9 +-- .../storages/orc/src/chunk_reader_impl.rs | 2 +- src/query/storages/orc/src/table.rs | 6 +- .../parquet/src/copy_into_table/reader.rs | 6 +- .../parquet/src/copy_into_table/source.rs | 2 +- .../src/parquet_reader/reader/full_reader.rs | 14 ++-- .../parquet_reader/reader/row_group_reader.rs | 26 +++---- .../reader/streaming_load_reader.rs | 6 +- .../parquet/src/parquet_reader/row_group.rs | 4 +- .../parquet/src/parquet_table/table.rs | 8 +- .../append/row_based_file/writer_processor.rs | 2 +- .../storages/system/src/temp_files_table.rs | 19 +++-- 81 files changed, 461 insertions(+), 449 deletions(-) diff --git a/src/bendsave/src/storage.rs b/src/bendsave/src/storage.rs index 217d6535c7a25..059aa1e4d60fe 100644 --- a/src/bendsave/src/storage.rs +++ b/src/bendsave/src/storage.rs @@ -16,9 +16,9 @@ use std::collections::HashMap; use std::str::FromStr; use std::sync::Arc; -use anyhow::anyhow; use anyhow::Ok; use anyhow::Result; +use anyhow::anyhow; use bytes::BufMut; use bytes::Bytes; use bytes::BytesMut; @@ -36,8 +36,8 @@ use databend_common_meta_client::ClientHandle; use databend_common_meta_client::MetaGrpcClient; use databend_common_meta_types::protobuf::ExportRequest; use databend_common_storage::init_operator; -use databend_common_users::builtin::BuiltIn; use databend_common_users::UserApiProvider; +use databend_common_users::builtin::BuiltIn; use databend_common_version::BUILD_INFO; use databend_enterprise_query::license::RealLicenseManager; use databend_query::sessions::BuildInfoRef; @@ -45,9 +45,9 @@ use databend_query::sessions::SessionManager; use futures::TryStream; use futures::TryStreamExt; use log::debug; +use opendal::Operator; use opendal::layers::LoggingLayer; use opendal::layers::RetryLayer; -use opendal::Operator; /// Load the configuration file of databend query. /// diff --git a/src/binaries/tool/table_meta_inspector.rs b/src/binaries/tool/table_meta_inspector.rs index 1e953dda3caa9..902a0f8f4ad96 100644 --- a/src/binaries/tool/table_meta_inspector.rs +++ b/src/binaries/tool/table_meta_inspector.rs @@ -16,27 +16,27 @@ use std::collections::BTreeMap; use std::env; use std::fs::File; use std::io; -use std::io::stdout; use std::io::BufWriter; use std::io::Read; use std::io::Write; +use std::io::stdout; use clap::Parser; use databend_common_config::Config; use databend_common_config::InnerConfig; use databend_common_exception::Result; -use databend_common_storage::init_operator; use databend_common_storage::StorageConfig; -use databend_common_tracing::init_logging; +use databend_common_storage::init_operator; use databend_common_tracing::Config as LogConfig; +use databend_common_tracing::init_logging; use databend_common_version::BUILD_INFO; use databend_common_version::DATABEND_COMMIT_VERSION; use databend_query::GlobalServices; use databend_storages_common_table_meta::meta::SegmentInfo; use databend_storages_common_table_meta::meta::TableSnapshot; use log::info; -use opendal::services::Fs; use opendal::Operator; +use opendal::services::Fs; use serde::Deserialize; use serde::Serialize; use serfig::collectors::from_file; diff --git a/src/common/exception/src/exception_into.rs b/src/common/exception/src/exception_into.rs index df84a23c7ee79..daf06e7a9ecb1 100644 --- a/src/common/exception/src/exception_into.rs +++ b/src/common/exception/src/exception_into.rs @@ -19,12 +19,12 @@ use std::fmt::Formatter; use geozero::error::GeozeroError; -use crate::exception_backtrace::capture; -use crate::span::Span; use crate::ErrorCode; use crate::ErrorFrame; use crate::ParseError; use crate::StackTrace; +use crate::exception_backtrace::capture; +use crate::span::Span; #[derive(thiserror::Error)] enum OtherErrors { diff --git a/src/common/native/src/read/reader.rs b/src/common/native/src/read/reader.rs index e407e26f90038..33bfa612fedeb 100644 --- a/src/common/native/src/read/reader.rs +++ b/src/common/native/src/read/reader.rs @@ -19,14 +19,14 @@ use std::io::SeekFrom; use databend_common_expression::TableSchema; use opendal::Reader; -use super::read_basic::read_u32; -use super::read_basic::read_u64; use super::NativeReadBuf; use super::PageIterator; -use crate::error::Error; -use crate::error::Result; +use super::read_basic::read_u32; +use super::read_basic::read_u64; use crate::ColumnMeta; use crate::PageMeta; +use crate::error::Error; +use crate::error::Result; const DEFAULT_FOOTER_SIZE: u64 = 64 * 1024; diff --git a/src/common/storage/src/http_client.rs b/src/common/storage/src/http_client.rs index fc712aef06559..05d4debaef2cb 100644 --- a/src/common/storage/src/http_client.rs +++ b/src/common/storage/src/http_client.rs @@ -22,11 +22,11 @@ use databend_common_metrics::storage::metrics_inc_storage_http_requests_count; use futures::TryStreamExt; use http::Request; use http::Response; -use opendal::raw::parse_content_encoding; -use opendal::raw::parse_content_length; +use opendal::Buffer; use opendal::raw::HttpBody; use opendal::raw::HttpFetch; -use opendal::Buffer; +use opendal::raw::parse_content_encoding; +use opendal::raw::parse_content_length; use url::Url; pub struct StorageHttpClient { diff --git a/src/common/storage/src/metrics.rs b/src/common/storage/src/metrics.rs index 894c83ff1b968..6c858cf955a33 100644 --- a/src/common/storage/src/metrics.rs +++ b/src/common/storage/src/metrics.rs @@ -12,13 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; use std::sync::atomic::AtomicU64; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; -use std::sync::Arc; use std::time::Instant; -use opendal::raw::oio; +use opendal::Buffer; +use opendal::Metadata; +use opendal::Result; use opendal::raw::Access; use opendal::raw::Layer; use opendal::raw::LayeredAccess; @@ -29,9 +31,7 @@ use opendal::raw::RpDelete; use opendal::raw::RpList; use opendal::raw::RpRead; use opendal::raw::RpWrite; -use opendal::Buffer; -use opendal::Metadata; -use opendal::Result; +use opendal::raw::oio; /// StorageMetrics represents the metrics of storage (all bytes metrics are compressed size). #[derive(Debug, Default)] diff --git a/src/common/storage/src/metrics_layer.rs b/src/common/storage/src/metrics_layer.rs index 90cd024e9a6e0..1719bea9beb4a 100644 --- a/src/common/storage/src/metrics_layer.rs +++ b/src/common/storage/src/metrics_layer.rs @@ -17,12 +17,12 @@ use std::fmt::Debug; use std::sync::LazyLock; use std::time::Duration; -use databend_common_base::runtime::metrics::register_counter_family; -use databend_common_base::runtime::metrics::register_gauge_family; -use databend_common_base::runtime::metrics::register_histogram_family; use databend_common_base::runtime::metrics::FamilyCounter; use databend_common_base::runtime::metrics::FamilyGauge; use databend_common_base::runtime::metrics::FamilyHistogram; +use databend_common_base::runtime::metrics::register_counter_family; +use databend_common_base::runtime::metrics::register_gauge_family; +use databend_common_base::runtime::metrics::register_histogram_family; use opendal::layers::observe; use opendal::raw::Access; use opendal::raw::Layer; diff --git a/src/common/storage/src/operator.rs b/src/common/storage/src/operator.rs index 4317e625401be..301eaabd06c46 100644 --- a/src/common/storage/src/operator.rs +++ b/src/common/storage/src/operator.rs @@ -21,12 +21,11 @@ use std::time::Duration; use anyhow::anyhow; use databend_common_base::base::GlobalInstance; -use databend_common_base::runtime::metrics::register_counter_family; -use databend_common_base::runtime::metrics::FamilyCounter; use databend_common_base::runtime::GlobalIORuntime; use databend_common_base::runtime::TrySpawn; +use databend_common_base::runtime::metrics::FamilyCounter; +use databend_common_base::runtime::metrics::register_counter_family; use databend_common_exception::ErrorCode; -use databend_common_meta_app::storage::set_s3_storage_class; use databend_common_meta_app::storage::S3StorageClass; use databend_common_meta_app::storage::StorageAzblobConfig; use databend_common_meta_app::storage::StorageCosConfig; @@ -44,8 +43,11 @@ use databend_common_meta_app::storage::StorageOssConfig; use databend_common_meta_app::storage::StorageParams; use databend_common_meta_app::storage::StorageS3Config; use databend_common_meta_app::storage::StorageWebhdfsConfig; +use databend_common_meta_app::storage::set_s3_storage_class; use databend_enterprise_storage_encryption::get_storage_encryption_handler; use log::warn; +use opendal::Builder; +use opendal::Operator; use opendal::layers::AsyncBacktraceLayer; use opendal::layers::ConcurrentLimitLayer; use opendal::layers::FastraceLayer; @@ -57,15 +59,13 @@ use opendal::layers::RetryLayer; use opendal::layers::TimeoutLayer; use opendal::raw::HttpClient; use opendal::services; -use opendal::Builder; -use opendal::Operator; +use crate::StorageConfig; +use crate::StorageHttpClient; use crate::http_client::get_storage_http_client; use crate::metrics_layer::METRICS_LAYER; use crate::operator_cache::get_operator_cache; use crate::runtime_layer::RuntimeLayer; -use crate::StorageConfig; -use crate::StorageHttpClient; static METRIC_OPENDAL_RETRIES_COUNT: LazyLock>> = LazyLock::new(|| register_counter_family("opendal_retries_count")); diff --git a/src/common/storage/src/parquet.rs b/src/common/storage/src/parquet.rs index f07c3fe2c7e1c..dcf640c839dab 100644 --- a/src/common/storage/src/parquet.rs +++ b/src/common/storage/src/parquet.rs @@ -18,8 +18,8 @@ use std::sync::Arc; use arrow_schema::Schema as ArrowSchema; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::converts::arrow::EXTENSION_KEY; use databend_common_expression::FieldIndex; +use databend_common_expression::converts::arrow::EXTENSION_KEY; use opendal::Operator; use parquet::arrow::parquet_to_arrow_schema; use parquet::errors::ParquetError; @@ -213,14 +213,14 @@ pub fn traverse_parquet_schema_tree( #[cfg(test)] mod tests { - use databend_common_expression::types::NumberDataType; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; + use databend_common_expression::types::NumberDataType; use parquet::arrow::ArrowSchemaConverter; - use crate::parquet::build_parquet_schema_tree; use crate::parquet::ParquetSchemaTreeNode; + use crate::parquet::build_parquet_schema_tree; #[test] fn test_build_parquet_schema_tree() { diff --git a/src/common/storage/src/runtime_layer.rs b/src/common/storage/src/runtime_layer.rs index 2ce25bafeb78d..578006e41a768 100644 --- a/src/common/storage/src/runtime_layer.rs +++ b/src/common/storage/src/runtime_layer.rs @@ -19,7 +19,9 @@ use std::sync::Arc; use databend_common_base::runtime::Runtime; use databend_common_base::runtime::ThreadTracker; use databend_common_base::runtime::TrySpawn; -use opendal::raw::oio; +use opendal::Buffer; +use opendal::Metadata; +use opendal::Result; use opendal::raw::Access; use opendal::raw::Layer; use opendal::raw::LayeredAccess; @@ -37,9 +39,7 @@ use opendal::raw::RpPresign; use opendal::raw::RpRead; use opendal::raw::RpStat; use opendal::raw::RpWrite; -use opendal::Buffer; -use opendal::Metadata; -use opendal::Result; +use opendal::raw::oio; /// # TODO /// diff --git a/src/common/storage/src/stage.rs b/src/common/storage/src/stage.rs index 2bd8772122cbc..02a1a3e29e323 100644 --- a/src/common/storage/src/stage.rs +++ b/src/common/storage/src/stage.rs @@ -24,18 +24,18 @@ use databend_common_exception::Result; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::principal::StageType; use databend_common_meta_app::principal::UserIdentity; -use futures::stream; use databend_common_meta_app::storage::StorageParams; use futures::Stream; use futures::StreamExt; use futures::TryStreamExt; +use futures::stream; use opendal::EntryMode; use opendal::Metadata; use opendal::Operator; use regex::Regex; -use crate::init_operator; use crate::DataOperator; +use crate::init_operator; #[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq)] pub enum StageFileStatus { diff --git a/src/common/tracing/src/init.rs b/src/common/tracing/src/init.rs index 8749fe710ab79..cd3f45a787a16 100644 --- a/src/common/tracing/src/init.rs +++ b/src/common/tracing/src/init.rs @@ -14,35 +14,35 @@ use std::borrow::Cow; use std::collections::BTreeMap; +use std::sync::Arc; use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; -use std::sync::Arc; +use databend_common_base::base::GlobalInstance; use databend_common_base::base::tokio; use databend_common_base::base::tokio::sync::RwLock; -use databend_common_base::base::GlobalInstance; use databend_common_base::runtime::Thread; use fastrace::prelude::*; use log::LevelFilter; -use logforth::filter::env_filter::EnvFilterBuilder; use logforth::filter::EnvFilter; +use logforth::filter::env_filter::EnvFilterBuilder; use opendal::Operator; use opentelemetry_otlp::Compression; use opentelemetry_otlp::WithExportConfig; use opentelemetry_otlp::WithTonicConfig; +use crate::Config; use crate::config::LogFormat; use crate::config::OTLPProtocol; use crate::filter::ThreadTrackerFilter; -use crate::loggers::new_rolling_file_appender; use crate::loggers::IdenticalLayout; use crate::loggers::JsonLayout; use crate::loggers::TextLayout; +use crate::loggers::new_rolling_file_appender; use crate::predefined_tables::table_to_target; use crate::query_log_collector::QueryLogCollector; use crate::remote_log::RemoteLog; use crate::structlog::StructLogReporter; -use crate::Config; const HEADER_TRACE_PARENT: &str = "traceparent"; diff --git a/src/common/tracing/tests/it/remote_log.rs b/src/common/tracing/tests/it/remote_log.rs index 532bdd467ed2e..f064a106bc0c5 100644 --- a/src/common/tracing/tests/it/remote_log.rs +++ b/src/common/tracing/tests/it/remote_log.rs @@ -17,10 +17,9 @@ use std::sync::Arc; use std::time::Duration; use async_channel::bounded; -use databend_common_base::base::tokio; use databend_common_base::base::GlobalInstance; +use databend_common_base::base::tokio; use databend_common_exception::Result; -use databend_common_tracing::convert_to_batch; use databend_common_tracing::Config; use databend_common_tracing::GlobalLogger; use databend_common_tracing::LogMessage; @@ -28,10 +27,11 @@ use databend_common_tracing::RemoteLog; use databend_common_tracing::RemoteLogBuffer; use databend_common_tracing::RemoteLogElement; use databend_common_tracing::RemoteLogGuard; +use databend_common_tracing::convert_to_batch; use log::Level; use log::Record; -use opendal::services; use opendal::Operator; +use opendal::services; fn setup() -> Result<(RemoteLog, Box)> { let mut labels = BTreeMap::new(); diff --git a/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs b/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs index 27ad1abe8fcc0..510db7c43161f 100644 --- a/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs +++ b/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs @@ -27,22 +27,22 @@ use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_meta_app::schema::least_visible_time_ident::LeastVisibleTimeIdent; use databend_common_meta_app::schema::LeastVisibleTime; use databend_common_meta_app::schema::ListIndexesByIdReq; +use databend_common_meta_app::schema::least_visible_time_ident::LeastVisibleTimeIdent; +use databend_common_storages_fuse::FuseTable; +use databend_common_storages_fuse::RetentionPolicy; use databend_common_storages_fuse::io::MetaReaders; use databend_common_storages_fuse::io::SegmentsIO; use databend_common_storages_fuse::io::TableMetaLocationGenerator; -use databend_common_storages_fuse::FuseTable; -use databend_common_storages_fuse::RetentionPolicy; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; use databend_storages_common_cache::LoadParams; use databend_storages_common_io::Files; -use databend_storages_common_table_meta::meta::uuid_from_date_time; use databend_storages_common_table_meta::meta::CompactSegmentInfo; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::VACUUM2_OBJECT_KEY_PREFIX; +use databend_storages_common_table_meta::meta::uuid_from_date_time; use futures_util::TryStreamExt; use log::info; use opendal::Entry; diff --git a/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs b/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs index eec3b1edbed7d..47ae1a8e363d2 100644 --- a/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs +++ b/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs @@ -50,13 +50,13 @@ use databend_enterprise_vacuum_handler::vacuum_handler::VacuumTempOptions; use databend_query::test_kits::*; use databend_storages_common_io::Files; use databend_storages_common_table_meta::table::OPT_KEY_DATABASE_ID; +use opendal::EntryMode; +use opendal::Metadata; +use opendal::OperatorBuilder; use opendal::raw::Access; use opendal::raw::AccessorInfo; use opendal::raw::OpStat; use opendal::raw::RpStat; -use opendal::EntryMode; -use opendal::Metadata; -use opendal::OperatorBuilder; #[tokio::test(flavor = "multi_thread")] async fn test_fuse_do_vacuum_drop_tables() -> Result<()> { @@ -226,13 +226,13 @@ mod test_accessor { use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; - use opendal::raw::oio; - use opendal::raw::oio::Entry; use opendal::raw::MaybeSend; use opendal::raw::OpDelete; use opendal::raw::OpList; use opendal::raw::RpDelete; use opendal::raw::RpList; + use opendal::raw::oio; + use opendal::raw::oio::Entry; use super::*; @@ -889,10 +889,12 @@ async fn test_vacuum_drop_create_or_replace_impl(vacuum_stmts: &[&str]) -> Resul // db1.t1 should still be accessible fixture.execute_command("select * from db1.t1").await?; // db2.t1 should not exist - assert!(fixture - .execute_command("select * from db2.t1") - .await - .is_err()); + assert!( + fixture + .execute_command("select * from db2.t1") + .await + .is_err() + ); Ok(()) } diff --git a/src/query/service/src/history_tables/external.rs b/src/query/service/src/history_tables/external.rs index bd349547cea56..ca7e6b3a5e341 100644 --- a/src/query/service/src/history_tables/external.rs +++ b/src/query/service/src/history_tables/external.rs @@ -15,8 +15,8 @@ use std::collections::BTreeMap; use databend_common_meta_app::storage::StorageParams; -use opendal::raw::normalize_root; use opendal::Scheme; +use opendal::raw::normalize_root; #[derive(Debug)] pub struct ExternalStorageConnection { diff --git a/src/query/service/src/history_tables/global_history_log.rs b/src/query/service/src/history_tables/global_history_log.rs index 1dcd5b4d2de76..e1761db2de5be 100644 --- a/src/query/service/src/history_tables/global_history_log.rs +++ b/src/query/service/src/history_tables/global_history_log.rs @@ -12,18 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::sync::Arc; use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; -use std::sync::Arc; use std::time::Duration; use databend_common_base::base::GlobalInstance; -use databend_common_base::runtime::spawn; use databend_common_base::runtime::CaptureLogSettings; use databend_common_base::runtime::MemStat; use databend_common_base::runtime::Runtime; use databend_common_base::runtime::ThreadTracker; use databend_common_base::runtime::TrySpawn; +use databend_common_base::runtime::spawn; use databend_common_catalog::table_context::TableContext; use databend_common_config::GlobalConfig; use databend_common_config::InnerConfig; @@ -35,14 +35,14 @@ use databend_common_license::license_manager::LicenseManagerSwitch; use databend_common_meta_app::storage::StorageParams; use databend_common_meta_client::MetaGrpcClient; use databend_common_sql::Planner; -use databend_common_storage::init_operator; use databend_common_storage::DataOperator; -use databend_common_tracing::get_all_history_table_names; -use databend_common_tracing::init_history_tables; +use databend_common_storage::init_operator; use databend_common_tracing::GlobalLogger; use databend_common_tracing::HistoryTable; -use futures_util::future::join_all; +use databend_common_tracing::get_all_history_table_names; +use databend_common_tracing::init_history_tables; use futures_util::TryStreamExt; +use futures_util::future::join_all; use log::debug; use log::error; use log::info; @@ -50,18 +50,18 @@ use log::warn; use opendal::raw::normalize_root; use parking_lot::Mutex; use rand::random; -use tokio::time::sleep; use tokio::time::Instant; +use tokio::time::sleep; use uuid::Uuid; use crate::clusters::ClusterDiscovery; use crate::history_tables::alter_table::get_alter_table_sql; use crate::history_tables::alter_table::get_log_table; use crate::history_tables::alter_table::should_reset; -use crate::history_tables::error_handling::is_temp_error; use crate::history_tables::error_handling::ErrorCounters; -use crate::history_tables::external::get_external_storage_connection; +use crate::history_tables::error_handling::is_temp_error; use crate::history_tables::external::ExternalStorageConnection; +use crate::history_tables::external::get_external_storage_connection; use crate::history_tables::meta::HistoryMetaHandle; use crate::history_tables::session::create_session; use crate::interpreters::InterpreterFactory; @@ -361,11 +361,12 @@ impl GlobalHistoryLog { let vacuum = format!("VACUUM TABLE system_history.{}", table.name); self.execute_sql(&vacuum).await?; } - info!("periodic retention operation on history log table '{}' completed successfully (delete {} secs, vacuum {} secs)", - table.name, - delete_elapsed, - start.elapsed().as_secs() - delete_elapsed - ); + info!( + "periodic retention operation on history log table '{}' completed successfully (delete {} secs, vacuum {} secs)", + table.name, + delete_elapsed, + start.elapsed().as_secs() - delete_elapsed + ); return Ok(true); } Ok(false) diff --git a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs index 480d075562008..86727c1f27ca7 100644 --- a/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs +++ b/src/query/service/src/pipelines/processors/transforms/aggregator/serde/transform_exchange_aggregate_serializer.rs @@ -15,46 +15,46 @@ use std::sync::Arc; use std::time::Instant; -use arrow_ipc::writer::IpcWriteOptions; use arrow_ipc::CompressionType; +use arrow_ipc::writer::IpcWriteOptions; use databend_common_base::base::ProgressValues; use databend_common_base::runtime::profile::Profile; use databend_common_base::runtime::profile::ProfileStatisticsName; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; +use databend_common_expression::BlockMetaInfoDowncast; +use databend_common_expression::DataBlock; +use databend_common_expression::FromData; +use databend_common_expression::PartitionedPayload; use databend_common_expression::arrow::serialize_column; use databend_common_expression::types::ArgType; use databend_common_expression::types::ArrayType; use databend_common_expression::types::Int64Type; use databend_common_expression::types::ReturnType; use databend_common_expression::types::UInt64Type; -use databend_common_expression::BlockMetaInfoDowncast; -use databend_common_expression::DataBlock; -use databend_common_expression::FromData; -use databend_common_expression::PartitionedPayload; use databend_common_pipeline::core::InputPort; use databend_common_pipeline::core::OutputPort; use databend_common_pipeline::core::Processor; +use databend_common_pipeline_transforms::UnknownMode; use databend_common_pipeline_transforms::processors::BlockMetaTransform; use databend_common_pipeline_transforms::processors::BlockMetaTransformer; -use databend_common_pipeline_transforms::UnknownMode; use databend_common_settings::FlightCompression; use futures_util::future::BoxFuture; use log::info; use opendal::Operator; use super::SerializePayload; -use crate::pipelines::processors::transforms::aggregator::agg_spilling_aggregate_payload as local_agg_spilling_aggregate_payload; -use crate::pipelines::processors::transforms::aggregator::aggregate_exchange_injector::compute_block_number; -use crate::pipelines::processors::transforms::aggregator::aggregate_meta::AggregateMeta; -use crate::pipelines::processors::transforms::aggregator::exchange_defines; use crate::pipelines::processors::transforms::aggregator::AggregateSerdeMeta; use crate::pipelines::processors::transforms::aggregator::AggregatorParams; use crate::pipelines::processors::transforms::aggregator::FlightSerialized; use crate::pipelines::processors::transforms::aggregator::FlightSerializedMeta; use crate::pipelines::processors::transforms::aggregator::SerializeAggregateStream; -use crate::servers::flight::v1::exchange::serde::serialize_block; +use crate::pipelines::processors::transforms::aggregator::agg_spilling_aggregate_payload as local_agg_spilling_aggregate_payload; +use crate::pipelines::processors::transforms::aggregator::aggregate_exchange_injector::compute_block_number; +use crate::pipelines::processors::transforms::aggregator::aggregate_meta::AggregateMeta; +use crate::pipelines::processors::transforms::aggregator::exchange_defines; use crate::servers::flight::v1::exchange::ExchangeShuffleMeta; +use crate::servers::flight::v1::exchange::serde::serialize_block; use crate::sessions::QueryContext; use crate::spillers::Spiller; use crate::spillers::SpillerConfig; diff --git a/src/query/service/src/spillers/adapter.rs b/src/query/service/src/spillers/adapter.rs index b6aab5bfe38e0..c33877f6dba9a 100644 --- a/src/query/service/src/spillers/adapter.rs +++ b/src/query/service/src/spillers/adapter.rs @@ -12,18 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::hash_map::Entry; use std::collections::HashMap; use std::collections::HashSet; +use std::collections::hash_map::Entry; use std::ops::DerefMut; use std::ops::Range; use std::sync::Arc; use std::sync::RwLock; use std::time::Instant; +use databend_common_base::base::ProgressValues; use databend_common_base::base::dma_buffer_to_bytes; use databend_common_base::base::dma_read_file_range; -use databend_common_base::base::ProgressValues; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; use databend_common_expression::DataBlock; @@ -36,14 +36,14 @@ use opendal::Buffer; use opendal::Operator; use parquet::file::metadata::RowGroupMetaDataPtr; +use super::Location; +use super::SpillsBufferPool; use super::async_buffer::SpillTarget; use super::block_reader::BlocksReader; use super::block_writer::BlocksWriter; use super::inner::*; use super::row_group_encoder::*; use super::serialize::*; -use super::Location; -use super::SpillsBufferPool; use crate::sessions::QueryContext; #[derive(Clone)] diff --git a/src/query/service/src/spillers/async_buffer.rs b/src/query/service/src/spillers/async_buffer.rs index 976ba679b0c53..f7f58afe1b71c 100644 --- a/src/query/service/src/spillers/async_buffer.rs +++ b/src/query/service/src/spillers/async_buffer.rs @@ -32,27 +32,27 @@ use databend_common_base::runtime::Runtime; use databend_common_base::runtime::TrySpawn; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::infer_table_schema; use databend_common_expression::DataBlock; use databend_common_expression::DataSchema; use databend_common_expression::DataSchemaRef; use databend_common_expression::TableSchemaRef; +use databend_common_expression::infer_table_schema; use databend_common_meta_app::storage::StorageParams; -use databend_common_storages_parquet::parquet_reader::row_group::get_ranges; -use databend_common_storages_parquet::parquet_reader::RowGroupCore; use databend_common_storages_parquet::ReadSettings; -use fastrace::future::FutureExt; +use databend_common_storages_parquet::parquet_reader::RowGroupCore; +use databend_common_storages_parquet::parquet_reader::row_group::get_ranges; use fastrace::Span; +use fastrace::future::FutureExt; use opendal::Metadata; use opendal::Operator; use opendal::Writer; +use parquet::arrow::ArrowWriter; +use parquet::arrow::FieldLevels; +use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::ParquetRecordBatchReader; use parquet::arrow::arrow_reader::RowGroups; use parquet::arrow::parquet_to_arrow_field_levels; use parquet::arrow::parquet_to_arrow_schema; -use parquet::arrow::ArrowWriter; -use parquet::arrow::FieldLevels; -use parquet::arrow::ProjectionMask; use parquet::basic::Compression; use parquet::file::metadata::RowGroupMetaData; use parquet::file::properties::EnabledStatistics; @@ -832,9 +832,9 @@ impl Background { #[cfg(test)] mod tests { use std::io::Write; + use std::sync::Arc; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; - use std::sync::Arc; use databend_common_base::runtime::spawn; use opendal::Operator; diff --git a/src/query/service/src/spillers/block_reader.rs b/src/query/service/src/spillers/block_reader.rs index 2883c74ef2d9d..98154ebb6b713 100644 --- a/src/query/service/src/spillers/block_reader.rs +++ b/src/query/service/src/spillers/block_reader.rs @@ -22,8 +22,8 @@ use databend_common_expression::DataBlock; use databend_common_pipeline_transforms::traits::Location; use opendal::Operator; -use super::serialize::deserialize_block; use super::Layout; +use super::serialize::deserialize_block; pub struct BlocksReader<'a> { read_bytes: u64, diff --git a/src/query/service/src/spillers/inner.rs b/src/query/service/src/spillers/inner.rs index 9b4fe414424e7..4fa84bb4b899f 100644 --- a/src/query/service/src/spillers/inner.rs +++ b/src/query/service/src/spillers/inner.rs @@ -17,23 +17,23 @@ use std::fmt::Formatter; use std::sync::Arc; use std::time::Instant; -use databend_common_base::base::dma_buffer_to_bytes; -use databend_common_base::base::dma_read_file_range; use databend_common_base::base::Alignment; use databend_common_base::base::DmaWriteBuf; use databend_common_base::base::GlobalUniqName; +use databend_common_base::base::dma_buffer_to_bytes; +use databend_common_base::base::dma_read_file_range; use databend_common_base::runtime::profile::Profile; use databend_common_base::runtime::profile::ProfileStatisticsName; use databend_common_exception::Result; use databend_common_expression::DataBlock; use databend_storages_common_cache::TempDir; -use opendal::services::Fs; use opendal::Buffer; use opendal::Operator; +use opendal::services::Fs; +use super::Location; use super::async_buffer::SpillTarget; use super::serialize::*; -use super::Location; /// Spiller type, currently only supports HashJoin #[derive(Clone, Debug, Eq, PartialEq)] diff --git a/src/query/service/src/spillers/row_group_encoder.rs b/src/query/service/src/spillers/row_group_encoder.rs index f832c69e5865d..6f7b7c6b3dc20 100644 --- a/src/query/service/src/spillers/row_group_encoder.rs +++ b/src/query/service/src/spillers/row_group_encoder.rs @@ -19,30 +19,30 @@ use std::sync::Arc; use arrow_schema::Schema; use bytes::Bytes; -use databend_common_base::base::dma_buffer_to_bytes; use databend_common_base::base::DmaWriteBuf; use databend_common_base::base::SyncDmaFile; +use databend_common_base::base::dma_buffer_to_bytes; use databend_common_base::rangemap::RangeMerger; use databend_common_exception::Result; use databend_common_expression::BlockEntry; use databend_common_expression::DataBlock; use databend_common_expression::DataSchema; use databend_common_expression::Value; -use databend_common_storages_parquet::parquet_reader::RowGroupCore; use databend_common_storages_parquet::ReadSettings; +use databend_common_storages_parquet::parquet_reader::RowGroupCore; use databend_storages_common_cache::ParquetMetaData; use databend_storages_common_cache::TempDir; use databend_storages_common_cache::TempPath; use either::Either; use opendal::Operator; +use parquet::arrow::ArrowSchemaConverter; +use parquet::arrow::FieldLevels; +use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::ParquetRecordBatchReader; +use parquet::arrow::arrow_writer::ArrowColumnWriter; use parquet::arrow::arrow_writer::compute_leaves; use parquet::arrow::arrow_writer::get_column_writers; -use parquet::arrow::arrow_writer::ArrowColumnWriter; use parquet::arrow::parquet_to_arrow_field_levels; -use parquet::arrow::ArrowSchemaConverter; -use parquet::arrow::FieldLevels; -use parquet::arrow::ProjectionMask; use parquet::errors; use parquet::file::metadata::RowGroupMetaData; use parquet::file::metadata::RowGroupMetaDataPtr; @@ -53,11 +53,11 @@ use parquet::file::writer::SerializedFileWriter; use parquet::file::writer::SerializedRowGroupWriter; use parquet::schema::types::SchemaDescriptor; -use super::async_buffer::BufferWriter; -use super::async_buffer::SpillTarget; use super::Location; use super::SpillerInner; use super::SpillsBufferPool; +use super::async_buffer::BufferWriter; +use super::async_buffer::SpillTarget; pub struct Properties { schema: Arc, @@ -534,13 +534,13 @@ impl SpillerInner { #[cfg(test)] mod tests { use databend_common_exception::Result; - use databend_common_expression::types::array::ArrayColumnBuilder; - use databend_common_expression::types::number::Int32Type; + use databend_common_expression::Column; + use databend_common_expression::FromData; use databend_common_expression::types::ArgType; use databend_common_expression::types::DataType; use databend_common_expression::types::StringType; - use databend_common_expression::Column; - use databend_common_expression::FromData; + use databend_common_expression::types::array::ArrayColumnBuilder; + use databend_common_expression::types::number::Int32Type; use super::*; diff --git a/src/query/service/src/spillers/serialize.rs b/src/query/service/src/spillers/serialize.rs index 681e6e217367c..386bb731c60d9 100644 --- a/src/query/service/src/spillers/serialize.rs +++ b/src/query/service/src/spillers/serialize.rs @@ -25,18 +25,18 @@ use databend_common_base::base::Alignment; use databend_common_base::base::DmaWriteBuf; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::arrow::write_column; -use databend_common_expression::infer_table_schema; -use databend_common_expression::types::DataType; use databend_common_expression::BlockEntry; use databend_common_expression::DataBlock; use databend_common_expression::DataField; use databend_common_expression::DataSchema; use databend_common_expression::Value; +use databend_common_expression::arrow::write_column; +use databend_common_expression::infer_table_schema; +use databend_common_expression::types::DataType; use databend_storages_common_io::BufferReader; use opendal::Buffer; -use parquet::arrow::arrow_reader::ParquetRecordBatchReader; use parquet::arrow::ArrowWriter; +use parquet::arrow::arrow_reader::ParquetRecordBatchReader; use parquet::basic::Compression; use parquet::file::properties::EnabledStatistics; use parquet::file::properties::WriterProperties; @@ -199,10 +199,10 @@ fn bare_blocks_to_parquet( #[cfg(test)] mod tests { use bytes::Bytes; + use databend_common_expression::FromData; use databend_common_expression::block_debug::assert_block_value_eq; use databend_common_expression::types::Int64Type; use databend_common_expression::types::StringType; - use databend_common_expression::FromData; use super::*; diff --git a/src/query/service/src/table_functions/infer_schema/infer_schema_table.rs b/src/query/service/src/table_functions/infer_schema/infer_schema_table.rs index 7642693c97693..6fc71ed831bab 100644 --- a/src/query/service/src/table_functions/infer_schema/infer_schema_table.rs +++ b/src/query/service/src/table_functions/infer_schema/infer_schema_table.rs @@ -30,12 +30,12 @@ use databend_common_catalog::table_args::TableArgs; use databend_common_compress::CompressAlgorithm; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::types::NumberDataType; use databend_common_expression::BlockThresholds; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRefExt; +use databend_common_expression::types::NumberDataType; use databend_common_meta_app::principal::FileFormatParams; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::principal::StageType; @@ -46,8 +46,8 @@ use databend_common_pipeline::core::Pipeline; use databend_common_pipeline::sources::PrefetchAsyncSourcer; use databend_common_pipeline_transforms::TransformPipelineHelper; use databend_common_sql::binder::resolve_file_location; -use databend_common_storage::init_stage_operator; use databend_common_storage::StageFilesInfo; +use databend_common_storage::init_stage_operator; use databend_common_storages_stage::BytesReader; use databend_common_storages_stage::Decompressor; use databend_common_storages_stage::InferSchemaPartInfo; @@ -58,9 +58,9 @@ use opendal::Scheme; use super::parquet::ParquetInferSchemaSource; use crate::sessions::TableContext; +use crate::table_functions::TableFunction; use crate::table_functions::infer_schema::separator::InferSchemaSeparator; use crate::table_functions::infer_schema::table_args::InferSchemaArgsParsed; -use crate::table_functions::TableFunction; pub(crate) const INFER_SCHEMA: &str = "infer_schema"; diff --git a/src/query/service/src/test_kits/block_writer.rs b/src/query/service/src/test_kits/block_writer.rs index f9a7ee9f12918..a11e8e9d55b36 100644 --- a/src/query/service/src/test_kits/block_writer.rs +++ b/src/query/service/src/test_kits/block_writer.rs @@ -21,16 +21,15 @@ use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE; use databend_common_io::constants::DEFAULT_BLOCK_INDEX_BUFFER_SIZE; use databend_common_sql::ApproxDistinctColumns; use databend_common_sql::BloomIndexColumns; -use databend_common_storages_fuse::io::build_column_hlls; -use databend_common_storages_fuse::io::serialize_block; +use databend_common_storages_fuse::FuseStorageFormat; use databend_common_storages_fuse::io::TableMetaLocationGenerator; use databend_common_storages_fuse::io::WriteSettings; -use databend_common_storages_fuse::FuseStorageFormat; +use databend_common_storages_fuse::io::build_column_hlls; +use databend_common_storages_fuse::io::serialize_block; use databend_storages_common_blocks::blocks_to_parquet; use databend_storages_common_index::BloomIndex; use databend_storages_common_index::BloomIndexBuilder; use databend_storages_common_index::RangeIndex; -use databend_storages_common_table_meta::meta::encode_column_hll; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::ClusterStatistics; use databend_storages_common_table_meta::meta::Compression; @@ -38,6 +37,7 @@ use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::RawBlockHLL; use databend_storages_common_table_meta::meta::StatisticsOfColumns; use databend_storages_common_table_meta::meta::TableMetaTimestamps; +use databend_storages_common_table_meta::meta::encode_column_hll; use databend_storages_common_table_meta::table::TableCompression; use opendal::Operator; use parquet::format::FileMetaData; diff --git a/src/query/service/src/test_kits/fuse.rs b/src/query/service/src/test_kits/fuse.rs index 9ec4d6eb57827..f78714a52a077 100644 --- a/src/query/service/src/test_kits/fuse.rs +++ b/src/query/service/src/test_kits/fuse.rs @@ -20,27 +20,24 @@ use chrono::DateTime; use chrono::Duration; use chrono::Utc; use databend_common_exception::Result; -use databend_common_expression::types::NumberScalar; use databend_common_expression::BlockThresholds; use databend_common_expression::DataBlock; use databend_common_expression::DataSchemaRef; use databend_common_expression::ScalarRef; use databend_common_expression::SendableDataBlockStream; +use databend_common_expression::types::NumberScalar; use databend_common_sql::optimizer::ir::SExpr; use databend_common_sql::plans::Mutation; use databend_common_storages_factory::Table; +use databend_common_storages_fuse::FUSE_TBL_SEGMENT_PREFIX; +use databend_common_storages_fuse::FuseStorageFormat; +use databend_common_storages_fuse::FuseTable; use databend_common_storages_fuse::io::MetaWriter; use databend_common_storages_fuse::io::TableMetaLocationGenerator; use databend_common_storages_fuse::statistics::gen_columns_statistics; use databend_common_storages_fuse::statistics::merge_statistics; use databend_common_storages_fuse::statistics::reducers::reduce_block_metas; -use databend_common_storages_fuse::FuseStorageFormat; -use databend_common_storages_fuse::FuseTable; -use databend_common_storages_fuse::FUSE_TBL_SEGMENT_PREFIX; use databend_storages_common_cache::SegmentStatistics; -use databend_storages_common_table_meta::meta::testing::SegmentInfoV2; -use databend_storages_common_table_meta::meta::testing::TableSnapshotV2; -use databend_storages_common_table_meta::meta::testing::TableSnapshotV4; use databend_storages_common_table_meta::meta::AdditionalStatsMeta; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::Location; @@ -49,14 +46,17 @@ use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::TableMetaTimestamps; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::Versioned; +use databend_storages_common_table_meta::meta::testing::SegmentInfoV2; +use databend_storages_common_table_meta::meta::testing::TableSnapshotV2; +use databend_storages_common_table_meta::meta::testing::TableSnapshotV4; use futures_util::TryStreamExt; use opendal::Operator; use serde::Serialize; use uuid::Uuid; +use super::TestFixture; use super::block_writer::BlockWriter; use super::old_version_generator; -use super::TestFixture; use crate::interpreters::Interpreter; use crate::interpreters::MutationInterpreter; use crate::sessions::QueryContext; diff --git a/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs b/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs index a6ef2e8d06600..ec26082b6d230 100644 --- a/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs +++ b/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs @@ -18,9 +18,6 @@ use std::sync::Arc; use chrono::Utc; use databend_common_base::base::tokio; -use databend_common_expression::types::Int32Type; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::NumberScalar; use databend_common_expression::ColumnId; use databend_common_expression::DataBlock; use databend_common_expression::FromData; @@ -28,10 +25,13 @@ use databend_common_expression::Scalar; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchemaRefExt; +use databend_common_expression::types::Int32Type; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::types::NumberScalar; +use databend_common_storages_fuse::FuseStorageFormat; use databend_common_storages_fuse::io::TableMetaLocationGenerator; -use databend_common_storages_fuse::statistics::gen_columns_statistics; use databend_common_storages_fuse::statistics::STATS_STRING_PREFIX_LEN; -use databend_common_storages_fuse::FuseStorageFormat; +use databend_common_storages_fuse::statistics::gen_columns_statistics; use databend_query::test_kits::*; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheValue; @@ -48,8 +48,8 @@ use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::Versioned; use opendal::Operator; use parquet::format::FileMetaData; -use sysinfo::get_current_pid; use sysinfo::System; +use sysinfo::get_current_pid; use uuid::Uuid; // NOTE: diff --git a/src/query/service/tests/it/storages/fuse/meta/column_oriented.rs b/src/query/service/tests/it/storages/fuse/meta/column_oriented.rs index 28dd954cafcaf..d566bbf6c4088 100644 --- a/src/query/service/tests/it/storages/fuse/meta/column_oriented.rs +++ b/src/query/service/tests/it/storages/fuse/meta/column_oriented.rs @@ -15,9 +15,6 @@ use std::sync::Arc; use databend_common_exception::Result; -use databend_common_expression::types::BinaryType; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::UInt64Type; use databend_common_expression::Column; use databend_common_expression::DataBlock; use databend_common_expression::FromData; @@ -26,25 +23,28 @@ use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; -use databend_common_storages_fuse::io::read::read_column_oriented_segment; +use databend_common_expression::types::BinaryType; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::types::UInt64Type; +use databend_common_storages_fuse::FuseStorageFormat; use databend_common_storages_fuse::io::TableMetaLocationGenerator; +use databend_common_storages_fuse::io::read::read_column_oriented_segment; use databend_common_storages_fuse::statistics::gen_columns_statistics; use databend_common_storages_fuse::statistics::reduce_block_metas; -use databend_common_storages_fuse::FuseStorageFormat; use databend_query::test_kits::BlockWriter; use databend_query::test_kits::TestFixture; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; -use databend_storages_common_table_meta::meta::column_oriented_segment::*; -use databend_storages_common_table_meta::meta::decode; -use databend_storages_common_table_meta::meta::testing::MetaEncoding; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::ClusterStatistics; use databend_storages_common_table_meta::meta::Compression; +use databend_storages_common_table_meta::meta::column_oriented_segment::*; +use databend_storages_common_table_meta::meta::decode; +use databend_storages_common_table_meta::meta::testing::MetaEncoding; use opendal::Operator; -async fn generate_column_oriented_segment( -) -> Result<(ColumnOrientedSegment, Vec, TableSchemaRef)> { +async fn generate_column_oriented_segment() +-> Result<(ColumnOrientedSegment, Vec, TableSchemaRef)> { let field_1 = TableField::new("u64", TableDataType::Number(NumberDataType::UInt64)); let field_2 = TableField::new( "nullable_u64", diff --git a/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs b/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs index 4b518baa9e50f..1f0b8ef510bca 100644 --- a/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs +++ b/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs @@ -44,8 +44,8 @@ use databend_storages_common_table_meta::meta::SegmentInfo; use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::TableSnapshot; use opendal::Operator; -use rand::thread_rng; use rand::Rng; +use rand::thread_rng; use crate::storages::fuse::operations::mutation::segments_compact_mutator::CompactSegmentTestFixture; diff --git a/src/query/service/tests/it/storages/fuse/statistics.rs b/src/query/service/tests/it/storages/fuse/statistics.rs index 83fc87e974878..a533ff7314ac3 100644 --- a/src/query/service/tests/it/storages/fuse/statistics.rs +++ b/src/query/service/tests/it/storages/fuse/statistics.rs @@ -18,12 +18,6 @@ use std::sync::Arc; use chrono::Utc; use databend_common_base::base::tokio; use databend_common_exception::Result; -use databend_common_expression::type_check::check; -use databend_common_expression::types::number::Int32Type; -use databend_common_expression::types::number::NumberScalar; -use databend_common_expression::types::DataType; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::StringType; use databend_common_expression::BlockThresholds; use databend_common_expression::Column; use databend_common_expression::DataBlock; @@ -36,32 +30,38 @@ use databend_common_expression::Scalar; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; -use databend_common_functions::aggregates::eval_aggr; +use databend_common_expression::type_check::check; +use databend_common_expression::types::DataType; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::types::StringType; +use databend_common_expression::types::number::Int32Type; +use databend_common_expression::types::number::NumberScalar; use databend_common_functions::BUILTIN_FUNCTIONS; -use databend_common_sql::evaluator::BlockOperator; +use databend_common_functions::aggregates::eval_aggr; use databend_common_sql::ApproxDistinctColumns; +use databend_common_sql::evaluator::BlockOperator; +use databend_common_storages_fuse::FuseStorageFormat; use databend_common_storages_fuse::io::build_column_hlls; -use databend_common_storages_fuse::statistics::reducers::reduce_block_metas; -use databend_common_storages_fuse::statistics::Trim; use databend_common_storages_fuse::statistics::END_OF_UNICODE_RANGE; use databend_common_storages_fuse::statistics::STATS_STRING_PREFIX_LEN; -use databend_common_storages_fuse::FuseStorageFormat; +use databend_common_storages_fuse::statistics::Trim; +use databend_common_storages_fuse::statistics::reducers::reduce_block_metas; use databend_query::storages::fuse::io::TableMetaLocationGenerator; -use databend_query::storages::fuse::statistics::gen_columns_statistics; -use databend_query::storages::fuse::statistics::reducers; use databend_query::storages::fuse::statistics::ClusterStatsGenerator; use databend_query::storages::fuse::statistics::RowOrientedSegmentBuilder; +use databend_query::storages::fuse::statistics::gen_columns_statistics; +use databend_query::storages::fuse::statistics::reducers; use databend_query::test_kits::*; use databend_storages_common_index::RangeIndex; -use databend_storages_common_table_meta::meta::column_oriented_segment::SegmentBuilder; -use databend_storages_common_table_meta::meta::decode_column_hll; -use databend_storages_common_table_meta::meta::encode_column_hll; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::ClusterStatistics; use databend_storages_common_table_meta::meta::ColumnStatistics; use databend_storages_common_table_meta::meta::Compression; use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::VirtualColumnMeta; +use databend_storages_common_table_meta::meta::column_oriented_segment::SegmentBuilder; +use databend_storages_common_table_meta::meta::decode_column_hll; +use databend_storages_common_table_meta::meta::encode_column_hll; use opendal::Operator; use rand::Rng; @@ -822,10 +822,10 @@ fn test_reduce_block_meta() -> Result<()> { let mut acc_file_size = 0; let mut acc_bloom_filter_index_size = 0; for _ in 0..size { - let row_count = rng.gen::() / size; - let block_size = rng.gen::() / size; - let file_size = rng.gen::() / size; - let bloom_filter_index_size = rng.gen::() / size; + let row_count = rng.r#gen::() / size; + let block_size = rng.r#gen::() / size; + let file_size = rng.r#gen::() / size; + let bloom_filter_index_size = rng.r#gen::() / size; acc_row_count += row_count; acc_block_size += block_size; acc_file_size += file_size; diff --git a/src/query/sql/src/planner/binder/copy_into_location.rs b/src/query/sql/src/planner/binder/copy_into_location.rs index a6dd9e8d83c7a..0d91877404b9a 100644 --- a/src/query/sql/src/planner/binder/copy_into_location.rs +++ b/src/query/sql/src/planner/binder/copy_into_location.rs @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -use databend_common_ast::ast::quote::display_ident; use databend_common_ast::ast::CopyIntoLocationSource; use databend_common_ast::ast::CopyIntoLocationStmt; use databend_common_ast::ast::Statement; +use databend_common_ast::ast::quote::display_ident; use databend_common_ast::parser::parse_sql; use databend_common_ast::parser::tokenize_sql; use databend_common_exception::ErrorCode; @@ -24,11 +24,11 @@ use databend_common_storage::init_stage_operator; use databend_storages_common_stage::CopyIntoLocationInfo; use opendal::ErrorKind; -use crate::binder::copy_into_table::resolve_file_location; +use crate::BindContext; use crate::binder::Binder; +use crate::binder::copy_into_table::resolve_file_location; use crate::plans::CopyIntoLocationPlan; use crate::plans::Plan; -use crate::BindContext; impl Binder { #[async_backtrace::framed] diff --git a/src/query/sql/src/planner/binder/ddl/table.rs b/src/query/sql/src/planner/binder/ddl/table.rs index 88bb1707a82a8..ddff4c2fc65d9 100644 --- a/src/query/sql/src/planner/binder/ddl/table.rs +++ b/src/query/sql/src/planner/binder/ddl/table.rs @@ -70,9 +70,6 @@ use databend_common_catalog::table::CompactionLimits; use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::infer_schema_type; -use databend_common_expression::infer_table_schema; -use databend_common_expression::types::DataType; use databend_common_expression::AutoIncrementExpr; use databend_common_expression::ComputedExpr; use databend_common_expression::DataField; @@ -82,6 +79,9 @@ use databend_common_expression::TableField; use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; use databend_common_expression::TableSchemaRefExt; +use databend_common_expression::infer_schema_type; +use databend_common_expression::infer_table_schema; +use databend_common_expression::types::DataType; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_license::license::Feature; use databend_common_license::license_manager::LicenseManagerSwitch; @@ -95,8 +95,6 @@ use databend_common_storage::check_operator; use databend_common_storage::init_operator; use databend_common_storages_basic::view_table::QUERY; use databend_common_storages_basic::view_table::VIEW_ENGINE; -use databend_storages_common_table_meta::table::is_reserved_opt_key; -use databend_storages_common_table_meta::table::TableCompression; use databend_storages_common_table_meta::table::OPT_KEY_CLUSTER_TYPE; use databend_storages_common_table_meta::table::OPT_KEY_DATABASE_ID; use databend_storages_common_table_meta::table::OPT_KEY_ENGINE_META; @@ -105,22 +103,28 @@ use databend_storages_common_table_meta::table::OPT_KEY_STORAGE_PREFIX; use databend_storages_common_table_meta::table::OPT_KEY_TABLE_ATTACHED_DATA_URI; use databend_storages_common_table_meta::table::OPT_KEY_TABLE_COMPRESSION; use databend_storages_common_table_meta::table::OPT_KEY_TEMP_PREFIX; +use databend_storages_common_table_meta::table::TableCompression; +use databend_storages_common_table_meta::table::is_reserved_opt_key; use derive_visitor::DriveMut; use log::debug; use opendal::Operator; -use crate::binder::get_storage_params_from_options; -use crate::binder::parse_storage_params_from_uri; -use crate::binder::scalar::ScalarBinder; +use crate::BindContext; +use crate::DefaultExprBinder; +use crate::Planner; +use crate::SelectBuilder; use crate::binder::Binder; use crate::binder::ColumnBindingBuilder; use crate::binder::ConstraintExprBinder; use crate::binder::Visibility; +use crate::binder::get_storage_params_from_options; +use crate::binder::parse_storage_params_from_uri; +use crate::binder::scalar::ScalarBinder; use crate::optimizer::ir::SExpr; use crate::parse_computed_expr_to_string; +use crate::planner::semantic::IdentifierNormalizer; use crate::planner::semantic::normalize_identifier; use crate::planner::semantic::resolve_type_name; -use crate::planner::semantic::IdentifierNormalizer; use crate::plans::AddColumnOption; use crate::plans::AddTableColumnPlan; use crate::plans::AddTableConstraintPlan; @@ -162,10 +166,6 @@ use crate::plans::VacuumDropTablePlan; use crate::plans::VacuumTableOption; use crate::plans::VacuumTablePlan; use crate::plans::VacuumTemporaryFilesPlan; -use crate::BindContext; -use crate::DefaultExprBinder; -use crate::Planner; -use crate::SelectBuilder; pub(in crate::planner::binder) struct AnalyzeCreateTableResult { pub(in crate::planner::binder) schema: TableSchemaRef, @@ -604,7 +604,7 @@ impl Binder { Some(self.ctx.as_ref()), "when create TABLE with external location", ) - .await?; + .await?; // create a temporary op to check if params is correct let op = init_operator(&sp)?; @@ -738,12 +738,15 @@ impl Binder { // since we get it from table options location and connection when load table each time. // we do this in case we change this idea. storage_params = Some(sp); - (AnalyzeCreateTableResult { - schema: Arc::new(table_schema), - field_comments: vec![], - table_indexes: None, - table_constraints: None, - }, as_query_plan) + ( + AnalyzeCreateTableResult { + schema: Arc::new(table_schema), + field_comments: vec![], + table_indexes: None, + table_constraints: None, + }, + as_query_plan, + ) } Engine::Delta => { let sp = @@ -755,12 +758,15 @@ impl Binder { // we do this in case we change this idea. storage_params = Some(sp); engine_options.insert(OPT_KEY_ENGINE_META.to_lowercase().to_string(), meta); - (AnalyzeCreateTableResult { - schema: Arc::new(table_schema), - field_comments: vec![], - table_indexes: None, - table_constraints: None, - }, as_query_plan) + ( + AnalyzeCreateTableResult { + schema: Arc::new(table_schema), + field_comments: vec![], + table_indexes: None, + table_constraints: None, + }, + as_query_plan, + ) } _ => Err(ErrorCode::BadArguments( "Incorrect CREATE query: required list of column descriptions or AS section or SELECT or ICEBERG/DELTA table engine", @@ -1174,7 +1180,9 @@ impl Binder { "Invalid number of arguments for attaching policy '{}' to '{}': \ expected at least 2 arguments (masked column + condition columns), \ got {} argument(s)", - name, table, columns.len() + name, + table, + columns.len() ))); } @@ -1345,7 +1353,9 @@ impl Binder { .get_settings() .get_enable_experimental_row_access_policy()? { - return Err(ErrorCode::Unimplemented("Experimental Row Access Policy is unstable and may have compatibility issues. To use it, set enable_experimental_row_access_policy=1")); + return Err(ErrorCode::Unimplemented( + "Experimental Row Access Policy is unstable and may have compatibility issues. To use it, set enable_experimental_row_access_policy=1", + )); } let columns = columns .iter() @@ -1369,7 +1379,9 @@ impl Binder { .get_settings() .get_enable_experimental_row_access_policy()? { - return Err(ErrorCode::Unimplemented("Experimental Row Access Policy is unstable and may have compatibility issues. To use it, set enable_experimental_row_access_policy=1")); + return Err(ErrorCode::Unimplemented( + "Experimental Row Access Policy is unstable and may have compatibility issues. To use it, set enable_experimental_row_access_policy=1", + )); } let policy = self.normalize_identifier(policy).name; Ok(Plan::DropTableRowAccessPolicy(Box::new( @@ -1388,7 +1400,9 @@ impl Binder { .get_settings() .get_enable_experimental_row_access_policy()? { - return Err(ErrorCode::Unimplemented("Experimental Row Access Policy is unstable and may have compatibility issues. To use it, set enable_experimental_row_access_policy=1")); + return Err(ErrorCode::Unimplemented( + "Experimental Row Access Policy is unstable and may have compatibility issues. To use it, set enable_experimental_row_access_policy=1", + )); } Ok(Plan::DropAllTableRowAccessPolicies(Box::new( DropAllTableRowAccessPoliciesPlan { diff --git a/src/query/sql/src/planner/binder/location.rs b/src/query/sql/src/planner/binder/location.rs index db0bb4a3c6f90..d36d81a2158a9 100644 --- a/src/query/sql/src/planner/binder/location.rs +++ b/src/query/sql/src/planner/binder/location.rs @@ -28,6 +28,9 @@ use databend_common_catalog::table_context::TableContext; use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_meta_app::storage::S3StorageClass; +use databend_common_meta_app::storage::STORAGE_GCS_DEFAULT_ENDPOINT; +use databend_common_meta_app::storage::STORAGE_IPFS_DEFAULT_ENDPOINT; +use databend_common_meta_app::storage::STORAGE_S3_DEFAULT_ENDPOINT; use databend_common_meta_app::storage::StorageAzblobConfig; use databend_common_meta_app::storage::StorageCosConfig; use databend_common_meta_app::storage::StorageFsConfig; @@ -40,15 +43,12 @@ use databend_common_meta_app::storage::StorageOssConfig; use databend_common_meta_app::storage::StorageParams; use databend_common_meta_app::storage::StorageS3Config; use databend_common_meta_app::storage::StorageWebhdfsConfig; -use databend_common_meta_app::storage::STORAGE_GCS_DEFAULT_ENDPOINT; -use databend_common_meta_app::storage::STORAGE_IPFS_DEFAULT_ENDPOINT; -use databend_common_meta_app::storage::STORAGE_S3_DEFAULT_ENDPOINT; use databend_common_storage::STDIN_FD; -use log::info; use log::LevelFilter; +use log::info; +use opendal::Scheme; use opendal::raw::normalize_path; use opendal::raw::normalize_root; -use opendal::Scheme; /// secure_omission will fix omitted endpoint url schemes into 'https://' #[inline] diff --git a/src/query/storages/basic/src/result_cache/read/reader.rs b/src/query/storages/basic/src/result_cache/read/reader.rs index 30e84fb79758b..a6a45fc0c811d 100644 --- a/src/query/storages/basic/src/result_cache/read/reader.rs +++ b/src/query/storages/basic/src/result_cache/read/reader.rs @@ -23,8 +23,8 @@ use databend_common_storage::DataOperator; use opendal::Operator; use parquet::arrow::arrow_reader::ParquetRecordBatchReader; -use crate::result_cache::common::gen_result_cache_meta_key; use crate::result_cache::common::ResultCacheValue; +use crate::result_cache::common::gen_result_cache_meta_key; use crate::result_cache::meta_manager::ResultCacheMetaManager; pub struct ResultCacheReader { diff --git a/src/query/storages/common/io/src/merge_io_reader.rs b/src/query/storages/common/io/src/merge_io_reader.rs index 13f5a0353938e..9c9caf4e63dc6 100644 --- a/src/query/storages/common/io/src/merge_io_reader.rs +++ b/src/query/storages/common/io/src/merge_io_reader.rs @@ -25,9 +25,9 @@ use databend_common_metrics::storage::*; use futures::future::try_join_all; use opendal::Operator; -use crate::merge_io_result::OwnerMemory; use crate::MergeIOReadResult; use crate::ReadSettings; +use crate::merge_io_result::OwnerMemory; pub struct MergeIOReader {} diff --git a/src/query/storages/common/session/src/temp_table.rs b/src/query/storages/common/session/src/temp_table.rs index 4c4948994fd83..ec8989d0ce154 100644 --- a/src/query/storages/common/session/src/temp_table.rs +++ b/src/query/storages/common/session/src/temp_table.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::hash_map::Entry; use std::collections::BTreeMap; use std::collections::HashMap; +use std::collections::hash_map::Entry; use std::sync::Arc; use databend_common_exception::ErrorCode; @@ -41,14 +41,14 @@ use databend_common_meta_app::schema::UpdateTempTableReq; use databend_common_meta_app::schema::UpsertTableOptionReply; use databend_common_meta_app::schema::UpsertTableOptionReq; use databend_common_meta_types::SeqV; -use databend_common_storage::init_operator; use databend_common_storage::DataOperator; -use databend_storages_common_blocks::memory::InMemoryDataKey; +use databend_common_storage::init_operator; use databend_storages_common_blocks::memory::IN_MEMORY_DATA; +use databend_storages_common_blocks::memory::InMemoryDataKey; use databend_storages_common_table_meta::meta::parse_storage_prefix; use databend_storages_common_table_meta::table::OPT_KEY_DATABASE_ID; -use databend_storages_common_table_meta::table_id_ranges::is_temp_table_id; use databend_storages_common_table_meta::table_id_ranges::TEMP_TBL_ID_BEGIN; +use databend_storages_common_table_meta::table_id_ranges::is_temp_table_id; use log::info; use opendal::Operator; use parking_lot::Mutex; @@ -467,8 +467,8 @@ pub async fn drop_all_temp_tables( let num_mem_table = mem_tbl_ids.len(); info!( - "[TEMP TABLE] session={user_name_session_id} starting cleanup, reason = {reason}, {} fuse table, {} mem table." - , num_fuse_table, num_mem_table + "[TEMP TABLE] session={user_name_session_id} starting cleanup, reason = {reason}, {} fuse table, {} mem table.", + num_fuse_table, num_mem_table ); // Clean up each fuse table directory individually with the correct operator diff --git a/src/query/storages/delta/src/table.rs b/src/query/storages/delta/src/table.rs index af8f0421422ae..1f1ca5306c05b 100644 --- a/src/query/storages/delta/src/table.rs +++ b/src/query/storages/delta/src/table.rs @@ -50,8 +50,8 @@ use databend_common_storages_parquet::ParquetSourceType; use databend_storages_common_pruner::partition_prunner::FetchPartitionScalars; use databend_storages_common_pruner::partition_prunner::PartitionPruner; use databend_storages_common_table_meta::table::OPT_KEY_ENGINE_META; -use deltalake::kernel::Add; use deltalake::DeltaTableBuilder; +use deltalake::kernel::Add; use object_store_opendal::OpendalStore; use serde::Deserialize; use serde::Serialize; diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs index dc8c9ea11d75b..36a0a0ede9bec 100644 --- a/src/query/storages/fuse/src/fuse_table.rs +++ b/src/query/storages/fuse/src/fuse_table.rs @@ -34,7 +34,6 @@ use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::plan::ReclusterParts; use databend_common_catalog::plan::StreamColumn; -use databend_common_catalog::table::is_temp_table_by_table_info; use databend_common_catalog::table::Bound; use databend_common_catalog::table::ColumnRange; use databend_common_catalog::table::ColumnStatisticsProvider; @@ -42,20 +41,21 @@ use databend_common_catalog::table::CompactionLimits; use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::NavigationDescriptor; use databend_common_catalog::table::TimeNavigation; +use databend_common_catalog::table::is_temp_table_by_table_info; use databend_common_catalog::table_context::TableContext; use databend_common_config::GlobalConfig; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::types::DataType; use databend_common_expression::BlockThresholds; use databend_common_expression::ColumnId; -use databend_common_expression::RemoteExpr; -use databend_common_expression::TableField; -use databend_common_expression::TableSchema; use databend_common_expression::ORIGIN_BLOCK_ID_COL_NAME; use databend_common_expression::ORIGIN_BLOCK_ROW_NUM_COL_NAME; use databend_common_expression::ORIGIN_VERSION_COL_NAME; +use databend_common_expression::RemoteExpr; +use databend_common_expression::TableField; +use databend_common_expression::TableSchema; use databend_common_expression::VECTOR_SCORE_COLUMN_ID; +use databend_common_expression::types::DataType; use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE; use databend_common_io::constants::DEFAULT_BLOCK_COMPRESSED_SIZE; use databend_common_io::constants::DEFAULT_BLOCK_PER_SEGMENT; @@ -66,22 +66,20 @@ use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; use databend_common_meta_app::schema::UpdateStreamMetaReq; use databend_common_meta_app::schema::UpsertTableCopiedFileReq; -use databend_common_meta_app::storage::set_s3_storage_class; use databend_common_meta_app::storage::S3StorageClass; use databend_common_meta_app::storage::StorageParams; +use databend_common_meta_app::storage::set_s3_storage_class; use databend_common_pipeline::core::Pipeline; +use databend_common_sql::ApproxDistinctColumns; +use databend_common_sql::BloomIndexColumns; use databend_common_sql::binder::STREAM_COLUMN_FACTORY; use databend_common_sql::parse_cluster_keys; use databend_common_sql::plans::TruncateMode; -use databend_common_sql::ApproxDistinctColumns; -use databend_common_sql::BloomIndexColumns; -use databend_common_storage::init_operator; use databend_common_storage::StorageMetrics; use databend_common_storage::StorageMetricsLayer; +use databend_common_storage::init_operator; use databend_storages_common_cache::LoadParams; use databend_storages_common_io::Files; -use databend_storages_common_table_meta::meta::decode_column_hll; -use databend_storages_common_table_meta::meta::parse_storage_prefix; use databend_storages_common_table_meta::meta::ClusterKey; use databend_storages_common_table_meta::meta::CompactSegmentInfo; use databend_storages_common_table_meta::meta::SnapshotId; @@ -89,9 +87,10 @@ use databend_storages_common_table_meta::meta::TableMetaTimestamps; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::TableSnapshotStatistics; use databend_storages_common_table_meta::meta::Versioned; +use databend_storages_common_table_meta::meta::decode_column_hll; +use databend_storages_common_table_meta::meta::parse_storage_prefix; use databend_storages_common_table_meta::table::ChangeType; use databend_storages_common_table_meta::table::ClusterType; -use databend_storages_common_table_meta::table::TableCompression; use databend_storages_common_table_meta::table::OPT_KEY_APPROX_DISTINCT_COLUMNS; use databend_storages_common_table_meta::table::OPT_KEY_BLOOM_INDEX_COLUMNS; use databend_storages_common_table_meta::table::OPT_KEY_CHANGE_TRACKING; @@ -103,6 +102,7 @@ use databend_storages_common_table_meta::table::OPT_KEY_SNAPSHOT_LOCATION_FIXED_ use databend_storages_common_table_meta::table::OPT_KEY_STORAGE_FORMAT; use databend_storages_common_table_meta::table::OPT_KEY_TABLE_ATTACHED_DATA_URI; use databend_storages_common_table_meta::table::OPT_KEY_TABLE_COMPRESSION; +use databend_storages_common_table_meta::table::TableCompression; use futures_util::TryStreamExt; use itertools::Itertools; use log::info; @@ -110,23 +110,6 @@ use log::warn; use opendal::Operator; use parking_lot::Mutex; -use crate::fuse_column::FuseTableColumnStatisticsProvider; -use crate::fuse_type::FuseTableType; -use crate::io::MetaReaders; -use crate::io::SegmentsIO; -use crate::io::TableMetaLocationGenerator; -use crate::io::TableSnapshotReader; -use crate::io::WriteSettings; -use crate::operations::load_last_snapshot_hint; -use crate::operations::ChangesDesc; -use crate::operations::SnapshotHint; -use crate::statistics::reduce_block_statistics; -use crate::statistics::Trim; -use crate::FuseSegmentFormat; -use crate::FuseStorageFormat; -use crate::NavigationPoint; -use crate::Table; -use crate::TableStatistics; use crate::DEFAULT_ROW_PER_PAGE; use crate::FUSE_OPT_KEY_ATTACH_COLUMN_IDS; use crate::FUSE_OPT_KEY_BLOCK_IN_MEM_SIZE_THRESHOLD; @@ -137,6 +120,23 @@ use crate::FUSE_OPT_KEY_ENABLE_PARQUET_DICTIONARY; use crate::FUSE_OPT_KEY_FILE_SIZE; use crate::FUSE_OPT_KEY_ROW_PER_BLOCK; use crate::FUSE_OPT_KEY_ROW_PER_PAGE; +use crate::FuseSegmentFormat; +use crate::FuseStorageFormat; +use crate::NavigationPoint; +use crate::Table; +use crate::TableStatistics; +use crate::fuse_column::FuseTableColumnStatisticsProvider; +use crate::fuse_type::FuseTableType; +use crate::io::MetaReaders; +use crate::io::SegmentsIO; +use crate::io::TableMetaLocationGenerator; +use crate::io::TableSnapshotReader; +use crate::io::WriteSettings; +use crate::operations::ChangesDesc; +use crate::operations::SnapshotHint; +use crate::operations::load_last_snapshot_hint; +use crate::statistics::Trim; +use crate::statistics::reduce_block_statistics; #[derive(Clone)] pub struct FuseTable { @@ -680,15 +680,16 @@ impl FuseTable { info!( "extracting snapshot location of table {} with id {:?} from the last snapshot hint file.", - table_info.desc, - table_info.ident + table_info.desc, table_info.ident ); let snapshot_hint = Self::refresh_schema_from_hint(operator, storage_prefix)?; info!( "extracted snapshot location [{:?}] of table {}, with id {:?} from the last snapshot hint file.", - snapshot_hint.as_ref().map(|(hint, _)| &hint.snapshot_full_path), + snapshot_hint + .as_ref() + .map(|(hint, _)| &hint.snapshot_full_path), table_info.desc, table_info.ident ); @@ -953,11 +954,7 @@ impl Table for FuseTable { } Err(e) if e.code() == ErrorCode::TABLE_HISTORICAL_DATA_NOT_FOUND => { warn!("navigate failed: {:?}", e); - if dry_run { - Ok(Some(vec![])) - } else { - Ok(None) - } + if dry_run { Ok(Some(vec![])) } else { Ok(None) } } Err(e) => Err(e), } diff --git a/src/query/storages/fuse/src/io/read/agg_index/agg_index_reader.rs b/src/query/storages/fuse/src/io/read/agg_index/agg_index_reader.rs index ead67dd81e7fe..fb9f24122a24d 100644 --- a/src/query/storages/fuse/src/io/read/agg_index/agg_index_reader.rs +++ b/src/query/storages/fuse/src/io/read/agg_index/agg_index_reader.rs @@ -18,14 +18,14 @@ use databend_common_catalog::plan::AggIndexInfo; use databend_common_catalog::plan::AggIndexMeta; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; -use databend_common_expression::types::BooleanType; -use databend_common_expression::types::DataType; use databend_common_expression::BlockEntry; use databend_common_expression::DataBlock; use databend_common_expression::Evaluator; use databend_common_expression::Expr; use databend_common_expression::FunctionContext; use databend_common_expression::Scalar; +use databend_common_expression::types::BooleanType; +use databend_common_expression::types::DataType; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_storages_common_table_meta::table::TableCompression; use opendal::Operator; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_native.rs b/src/query/storages/fuse/src/io/read/block/block_reader_native.rs index b528ebf0ee1b1..5d8b4c5c2a77c 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_native.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_native.rs @@ -27,9 +27,9 @@ use databend_common_expression::ColumnId; use databend_common_expression::DataBlock; use databend_common_expression::DataSchema; use databend_common_metrics::storage::*; -use databend_common_native::read::reader::read_meta_async; -use databend_common_native::read::reader::NativeReader; use databend_common_native::read::NativeReadBuf; +use databend_common_native::read::reader::NativeReader; +use databend_common_native::read::reader::read_meta_async; use databend_storages_common_io::ReadSettings; use databend_storages_common_table_meta::meta::ColumnMeta; use opendal::Operator; diff --git a/src/query/storages/fuse/src/io/read/block/block_reader_native_deserialize.rs b/src/query/storages/fuse/src/io/read/block/block_reader_native_deserialize.rs index f583ae24802fd..1f8ca52c19590 100644 --- a/src/query/storages/fuse/src/io/read/block/block_reader_native_deserialize.rs +++ b/src/query/storages/fuse/src/io/read/block/block_reader_native_deserialize.rs @@ -24,8 +24,8 @@ use databend_common_expression::DataBlock; use databend_common_expression::TableField; use databend_common_expression::Value; use databend_common_metrics::storage::*; -use databend_common_native::read::reader::NativeReader; use databend_common_native::read::ColumnIter; +use databend_common_native::read::reader::NativeReader; use databend_common_storage::ColumnNode; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; @@ -35,9 +35,9 @@ use opendal::Buffer; use super::block_reader_deserialize::DeserializedArray; use super::block_reader_deserialize::FieldDeserializationContext; -use crate::io::read::block::block_reader_merge_io::DataItem; use crate::io::BlockReader; use crate::io::NativeReaderExt; +use crate::io::read::block::block_reader_merge_io::DataItem; impl BlockReader { /// Deserialize column chunks data from native format to DataBlock. diff --git a/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs b/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs index 2920fe1c9548d..afce67c444fc1 100644 --- a/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs +++ b/src/query/storages/fuse/src/io/read/bloom/column_filter_reader.rs @@ -27,9 +27,9 @@ use databend_storages_common_index::filters::Filter; use databend_storages_common_index::filters::FilterImpl; use databend_storages_common_table_meta::meta::SingleColumnMeta; use opendal::Operator; +use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::ParquetRecordBatchReader; use parquet::arrow::parquet_to_arrow_field_levels; -use parquet::arrow::ProjectionMask; use parquet::basic::Compression as ParquetCompression; use parquet::schema::types::SchemaDescPtr; diff --git a/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_loader.rs b/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_loader.rs index 4904da1fbe069..1cebb5bbb04fd 100644 --- a/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_loader.rs +++ b/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_loader.rs @@ -40,14 +40,14 @@ use databend_storages_common_table_meta::meta::SingleColumnMeta; use databend_storages_common_table_meta::table::TableCompression; use log::info; use opendal::Operator; -use parquet::arrow::arrow_reader::ParquetRecordBatchReader; -use parquet::arrow::parquet_to_arrow_field_levels; use parquet::arrow::ArrowSchemaConverter; use parquet::arrow::ProjectionMask; +use parquet::arrow::arrow_reader::ParquetRecordBatchReader; +use parquet::arrow::parquet_to_arrow_field_levels; use crate::index::InvertedIndexFile; -use crate::io::read::block::parquet::RowGroupImplBuilder; use crate::io::MetaReaders; +use crate::io::read::block::parquet::RowGroupImplBuilder; const INDEX_COLUMN_NAMES: [&str; 8] = [ "fast", diff --git a/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_reader.rs b/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_reader.rs index 38a17e30fa2cd..ef0514dd1bc9d 100644 --- a/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_reader.rs +++ b/src/query/storages/fuse/src/io/read/inverted_index/inverted_index_reader.rs @@ -24,6 +24,7 @@ use databend_common_metrics::storage::metrics_inc_block_inverted_index_search_mi use databend_storages_common_io::ReadSettings; use databend_storages_common_table_meta::meta::SingleColumnMeta; use opendal::Operator; +use tantivy::Index; use tantivy::collector::DocSetCollector; use tantivy::collector::TopDocs; use tantivy::directory::FileSlice; @@ -36,7 +37,6 @@ use tantivy::query::QueryClone; use tantivy::schema::IndexRecordOption; use tantivy::termdict::TermInfoStore; use tantivy::tokenizer::TokenizerManager; -use tantivy::Index; use tantivy_common::BinarySerializable; use tantivy_fst::raw::Fst; diff --git a/src/query/storages/fuse/src/io/read/segment_reader.rs b/src/query/storages/fuse/src/io/read/segment_reader.rs index d024b267e5903..a193de7ef2750 100644 --- a/src/query/storages/fuse/src/io/read/segment_reader.rs +++ b/src/query/storages/fuse/src/io/read/segment_reader.rs @@ -19,14 +19,14 @@ use databend_common_exception::Result; use databend_common_expression::TableSchemaRef; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; -use databend_storages_common_table_meta::meta::column_oriented_segment::deserialize_column_oriented_segment; +use databend_storages_common_table_meta::meta::CompactSegmentInfo; +use databend_storages_common_table_meta::meta::Location; +use databend_storages_common_table_meta::meta::SegmentInfo; use databend_storages_common_table_meta::meta::column_oriented_segment::AbstractSegment; use databend_storages_common_table_meta::meta::column_oriented_segment::ColumnOrientedSegment; use databend_storages_common_table_meta::meta::column_oriented_segment::ColumnOrientedSegmentBuilder; use databend_storages_common_table_meta::meta::column_oriented_segment::SegmentBuilder; -use databend_storages_common_table_meta::meta::CompactSegmentInfo; -use databend_storages_common_table_meta::meta::Location; -use databend_storages_common_table_meta::meta::SegmentInfo; +use databend_storages_common_table_meta::meta::column_oriented_segment::deserialize_column_oriented_segment; use opendal::Operator; use super::meta::bytes_reader; diff --git a/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs b/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs index ea8378f38a130..6545866053bc4 100644 --- a/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs +++ b/src/query/storages/fuse/src/io/read/vector_index/vector_index_loader.rs @@ -36,14 +36,14 @@ use databend_storages_common_io::MergeIOReader; use databend_storages_common_io::ReadSettings; use databend_storages_common_table_meta::table::TableCompression; use opendal::Operator; -use parquet::arrow::arrow_reader::ParquetRecordBatchReader; -use parquet::arrow::parquet_to_arrow_field_levels; use parquet::arrow::ArrowSchemaConverter; use parquet::arrow::ProjectionMask; +use parquet::arrow::arrow_reader::ParquetRecordBatchReader; +use parquet::arrow::parquet_to_arrow_field_levels; use crate::index::VectorIndexFile; -use crate::io::read::block::parquet::RowGroupImplBuilder; use crate::io::MetaReaders; +use crate::io::read::block::parquet::RowGroupImplBuilder; #[async_trait::async_trait] trait InRuntime diff --git a/src/query/storages/fuse/src/io/write/block_writer.rs b/src/query/storages/fuse/src/io/write/block_writer.rs index 5a03e64978c34..dfb7a73e8d897 100644 --- a/src/query/storages/fuse/src/io/write/block_writer.rs +++ b/src/query/storages/fuse/src/io/write/block_writer.rs @@ -12,16 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::hash_map::Entry; use std::collections::BTreeMap; use std::collections::HashMap; +use std::collections::hash_map::Entry; use std::sync::Arc; use std::time::Instant; use chrono::Utc; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; -use databend_common_expression::local_block_meta_serde; use databend_common_expression::BlockMetaInfo; use databend_common_expression::Column; use databend_common_expression::ColumnId; @@ -29,6 +28,7 @@ use databend_common_expression::DataBlock; use databend_common_expression::FieldIndex; use databend_common_expression::TableField; use databend_common_expression::TableSchemaRef; +use databend_common_expression::local_block_meta_serde; use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE; use databend_common_metrics::storage::metrics_inc_block_index_write_milliseconds; use databend_common_metrics::storage::metrics_inc_block_index_write_nums; @@ -46,7 +46,6 @@ use databend_common_metrics::storage::metrics_inc_block_write_nums; use databend_common_native::write::NativeWriter; use databend_storages_common_blocks::blocks_to_parquet_with_stats; use databend_storages_common_index::NgramArgs; -use databend_storages_common_table_meta::meta::encode_column_hll; use databend_storages_common_table_meta::meta::BlockHLLState; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::ClusterStatistics; @@ -54,23 +53,24 @@ use databend_storages_common_table_meta::meta::ColumnMeta; use databend_storages_common_table_meta::meta::ExtendedBlockMeta; use databend_storages_common_table_meta::meta::StatisticsOfColumns; use databend_storages_common_table_meta::meta::TableMetaTimestamps; +use databend_storages_common_table_meta::meta::encode_column_hll; use databend_storages_common_table_meta::table::TableCompression; use opendal::Operator; +use crate::FuseStorageFormat; +use crate::io::BloomIndexState; +use crate::io::TableMetaLocationGenerator; use crate::io::build_column_hlls; -use crate::io::write::virtual_column_builder::VirtualColumnBuilder; -use crate::io::write::virtual_column_builder::VirtualColumnState; use crate::io::write::InvertedIndexBuilder; use crate::io::write::InvertedIndexState; use crate::io::write::VectorIndexBuilder; use crate::io::write::VectorIndexState; use crate::io::write::WriteSettings; -use crate::io::BloomIndexState; -use crate::io::TableMetaLocationGenerator; +use crate::io::write::virtual_column_builder::VirtualColumnBuilder; +use crate::io::write::virtual_column_builder::VirtualColumnState; use crate::operations::column_parquet_metas; -use crate::statistics::gen_columns_statistics; use crate::statistics::ClusterStatsGenerator; -use crate::FuseStorageFormat; +use crate::statistics::gen_columns_statistics; pub fn serialize_block( write_settings: &WriteSettings, diff --git a/src/query/storages/fuse/src/io/write/bloom_index_writer.rs b/src/query/storages/fuse/src/io/write/bloom_index_writer.rs index 34f9e192143f4..a7cc4642d7ce6 100644 --- a/src/query/storages/fuse/src/io/write/bloom_index_writer.rs +++ b/src/query/storages/fuse/src/io/write/bloom_index_writer.rs @@ -26,19 +26,19 @@ use databend_common_expression::TableField; use databend_common_expression::TableSchemaRef; use databend_common_io::constants::DEFAULT_BLOCK_INDEX_BUFFER_SIZE; use databend_storages_common_blocks::blocks_to_parquet; -use databend_storages_common_index::filters::BlockFilter; use databend_storages_common_index::BloomIndex; use databend_storages_common_index::BloomIndexBuilder; use databend_storages_common_index::NgramArgs; +use databend_storages_common_index::filters::BlockFilter; use databend_storages_common_io::ReadSettings; -use databend_storages_common_table_meta::meta::column_oriented_segment::BlockReadInfo; use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::Versioned; +use databend_storages_common_table_meta::meta::column_oriented_segment::BlockReadInfo; use databend_storages_common_table_meta::table::TableCompression; use opendal::Operator; -use crate::io::BlockReader; use crate::FuseStorageFormat; +use crate::io::BlockReader; #[derive(Debug)] pub struct BloomIndexState { diff --git a/src/query/storages/fuse/src/operations/analyze/collect_ndv_source.rs b/src/query/storages/fuse/src/operations/analyze/collect_ndv_source.rs index 3e58be6651aa9..252f89133597e 100644 --- a/src/query/storages/fuse/src/operations/analyze/collect_ndv_source.rs +++ b/src/query/storages/fuse/src/operations/analyze/collect_ndv_source.rs @@ -43,8 +43,6 @@ use databend_storages_common_cache::LoadParams; use databend_storages_common_cache::SegmentStatistics; use databend_storages_common_index::RangeIndex; use databend_storages_common_io::ReadSettings; -use databend_storages_common_table_meta::meta::decode_column_hll; -use databend_storages_common_table_meta::meta::encode_column_hll; use databend_storages_common_table_meta::meta::AdditionalStatsMeta; use databend_storages_common_table_meta::meta::BlockHLL; use databend_storages_common_table_meta::meta::Location; @@ -52,20 +50,22 @@ use databend_storages_common_table_meta::meta::RawBlockHLL; use databend_storages_common_table_meta::meta::SegmentInfo; use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::Versioned; +use databend_storages_common_table_meta::meta::decode_column_hll; +use databend_storages_common_table_meta::meta::encode_column_hll; use opendal::Operator; -use crate::io::build_column_hlls; -use crate::io::read::meta::SegmentStatsReader; +use crate::FuseLazyPartInfo; +use crate::FuseStorageFormat; +use crate::FuseTable; use crate::io::BlockReader; use crate::io::CachedMetaWriter; use crate::io::CompactSegmentInfoReader; use crate::io::MetaReaders; use crate::io::TableMetaLocationGenerator; +use crate::io::build_column_hlls; +use crate::io::read::meta::SegmentStatsReader; use crate::operations::acquire_task_permit; use crate::operations::analyze::AnalyzeNDVMeta; -use crate::FuseLazyPartInfo; -use crate::FuseStorageFormat; -use crate::FuseTable; struct SegmentWithHLL { segment_location: Location, @@ -296,7 +296,7 @@ impl Processor for AnalyzeCollectNDVSource { return Err(ErrorCode::Internal(format!( "Invalid state reached in sync process: {:?}. This is a bug.", state - ))) + ))); } } Ok(()) @@ -416,7 +416,7 @@ impl Processor for AnalyzeCollectNDVSource { return Err(ErrorCode::Internal(format!( "Invalid state reached in async process: {:?}. This is a bug.", state - ))) + ))); } } Ok(()) diff --git a/src/query/storages/fuse/src/operations/commit.rs b/src/query/storages/fuse/src/operations/commit.rs index ac3b386254071..9a648611ce1f1 100644 --- a/src/query/storages/fuse/src/operations/commit.rs +++ b/src/query/storages/fuse/src/operations/commit.rs @@ -42,8 +42,6 @@ use databend_common_sql::executor::physical_plans::MutationKind; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CachedObject; use databend_storages_common_cache::LoadParams; -use databend_storages_common_table_meta::meta::decode_column_hll; -use databend_storages_common_table_meta::meta::merge_column_hll_mut; use databend_storages_common_table_meta::meta::BlockHLL; use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::SegmentInfo; @@ -53,6 +51,8 @@ use databend_storages_common_table_meta::meta::TableMetaTimestamps; use databend_storages_common_table_meta::meta::TableSnapshot; use databend_storages_common_table_meta::meta::TableSnapshotStatistics; use databend_storages_common_table_meta::meta::Versioned; +use databend_storages_common_table_meta::meta::decode_column_hll; +use databend_storages_common_table_meta::meta::merge_column_hll_mut; use databend_storages_common_table_meta::readers::snapshot_reader::TableSnapshotAccessor; use databend_storages_common_table_meta::table::OPT_KEY_LEGACY_SNAPSHOT_LOC; use databend_storages_common_table_meta::table::OPT_KEY_SNAPSHOT_LOCATION; @@ -61,21 +61,21 @@ use log::debug; use log::info; use opendal::Operator; +use super::TableMutationAggregator; use super::decorate_snapshot; use super::new_serialize_segment_processor; -use super::TableMutationAggregator; +use crate::FuseTable; use crate::io::MetaReaders; use crate::io::MetaWriter; use crate::io::SegmentsIO; use crate::io::TableMetaLocationGenerator; +use crate::operations::SnapshotHintWriter; use crate::operations::common::AppendGenerator; use crate::operations::common::CommitSink; use crate::operations::common::ConflictResolveContext; use crate::operations::set_backoff; -use crate::operations::SnapshotHintWriter; -use crate::statistics::merge_statistics; use crate::statistics::TableStatsGenerator; -use crate::FuseTable; +use crate::statistics::merge_statistics; impl FuseTable { #[async_backtrace::framed] diff --git a/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs b/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs index afa4c66c34ba6..5b19b543bc328 100644 --- a/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs +++ b/src/query/storages/fuse/src/operations/common/processors/sink_commit.rs @@ -21,8 +21,8 @@ use std::sync::Arc; use std::time::Duration; use std::time::Instant; -use backoff::backoff::Backoff; use backoff::ExponentialBackoff; +use backoff::backoff::Backoff; use databend_common_base::base::GlobalInstance; use databend_common_catalog::table::Table; use databend_common_catalog::table::TableExt; @@ -56,20 +56,20 @@ use log::error; use log::info; use opendal::Operator; +use crate::FUSE_OPT_KEY_ENABLE_AUTO_ANALYZE; +use crate::FUSE_OPT_KEY_ENABLE_AUTO_VACUUM; +use crate::FuseTable; use crate::io::TableMetaLocationGenerator; -use crate::operations::set_backoff; -use crate::operations::set_compaction_num_block_hint; -use crate::operations::vacuum::vacuum_table; use crate::operations::AppendGenerator; use crate::operations::CommitMeta; use crate::operations::MutationGenerator; use crate::operations::SnapshotGenerator; use crate::operations::TransformMergeCommitMeta; use crate::operations::TruncateGenerator; +use crate::operations::set_backoff; +use crate::operations::set_compaction_num_block_hint; +use crate::operations::vacuum::vacuum_table; use crate::statistics::TableStatsGenerator; -use crate::FuseTable; -use crate::FUSE_OPT_KEY_ENABLE_AUTO_ANALYZE; -use crate::FUSE_OPT_KEY_ENABLE_AUTO_VACUUM; enum State { None, diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_block_writer.rs b/src/query/storages/fuse/src/operations/common/processors/transform_block_writer.rs index 28c0be2933a65..ce67c53bc9bf1 100644 --- a/src/query/storages/fuse/src/operations/common/processors/transform_block_writer.rs +++ b/src/query/storages/fuse/src/operations/common/processors/transform_block_writer.rs @@ -35,13 +35,13 @@ use databend_common_sql::executor::physical_plans::MutationKind; use databend_common_storage::MutationStatus; use opendal::Operator; +use crate::FuseTable; use crate::io::BlockSerialization; use crate::io::BlockWriter; use crate::io::StreamBlockBuilder; use crate::io::StreamBlockProperties; use crate::operations::MutationLogEntry; use crate::operations::MutationLogs; -use crate::FuseTable; enum State { Consume, diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs b/src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs index 26aee7148135e..767a4a429a412 100644 --- a/src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs +++ b/src/query/storages/fuse/src/operations/common/processors/transform_mutation_aggregator.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::hash_map::Entry; use std::collections::BTreeMap; use std::collections::HashMap; +use std::collections::hash_map::Entry; use std::sync::Arc; use std::time::Instant; @@ -31,7 +31,6 @@ use databend_common_expression::VirtualDataSchema; use databend_common_pipeline_transforms::processors::AsyncAccumulatingTransform; use databend_common_sql::executor::physical_plans::MutationKind; use databend_storages_common_cache::SegmentStatistics; -use databend_storages_common_table_meta::meta::merge_column_hll_mut; use databend_storages_common_table_meta::meta::AdditionalStatsMeta; use databend_storages_common_table_meta::meta::BlockHLL; use databend_storages_common_table_meta::meta::BlockHLLState; @@ -45,6 +44,7 @@ use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::TableMetaTimestamps; use databend_storages_common_table_meta::meta::Versioned; use databend_storages_common_table_meta::meta::VirtualBlockMeta; +use databend_storages_common_table_meta::meta::merge_column_hll_mut; use databend_storages_common_table_meta::table::ClusterType; use itertools::Itertools; use log::debug; @@ -52,10 +52,11 @@ use log::info; use log::warn; use opendal::Operator; -use crate::io::read::read_segment_stats; +use crate::FuseTable; use crate::io::CachedMetaWriter; use crate::io::SegmentsIO; use crate::io::TableMetaLocationGenerator; +use crate::io::read::read_segment_stats; use crate::operations::common::CommitMeta; use crate::operations::common::ConflictResolveContext; use crate::operations::common::MutationLogEntry; @@ -64,11 +65,10 @@ use crate::operations::common::SnapshotChanges; use crate::operations::common::SnapshotMerged; use crate::operations::mutation::BlockIndex; use crate::operations::mutation::SegmentIndex; +use crate::statistics::VirtualColumnAccumulator; use crate::statistics::reducers::merge_statistics_mut; use crate::statistics::reducers::reduce_block_metas; use crate::statistics::sort_by_cluster_stats; -use crate::statistics::VirtualColumnAccumulator; -use crate::FuseTable; pub struct TableMutationAggregator { ctx: Arc, diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs index b4d59326a1803..926954639263a 100644 --- a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs +++ b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs @@ -38,19 +38,19 @@ use databend_storages_common_index::RangeIndex; use databend_storages_common_table_meta::meta::TableMetaTimestamps; use opendal::Operator; -use crate::io::create_inverted_index_builders; +use crate::FuseTable; use crate::io::BlockBuilder; use crate::io::BlockSerialization; use crate::io::BlockWriter; use crate::io::VectorIndexBuilder; use crate::io::VirtualColumnBuilder; +use crate::io::create_inverted_index_builders; use crate::operations::common::BlockMetaIndex; use crate::operations::common::MutationLogEntry; use crate::operations::common::MutationLogs; use crate::operations::mutation::ClusterStatsGenType; use crate::operations::mutation::SerializeDataMeta; use crate::statistics::ClusterStatsGenerator; -use crate::FuseTable; #[allow(clippy::large_enum_variant)] enum State { diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_segment.rs b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_segment.rs index fe10c06ea0c1d..5a62c459d28c6 100644 --- a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_segment.rs +++ b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_segment.rs @@ -29,7 +29,6 @@ use databend_common_pipeline::core::InputPort; use databend_common_pipeline::core::OutputPort; use databend_common_pipeline::core::Processor; use databend_common_pipeline::core::ProcessorPtr; -use databend_storages_common_table_meta::meta::column_oriented_segment::*; use databend_storages_common_table_meta::meta::AdditionalStatsMeta; use databend_storages_common_table_meta::meta::BlockHLL; use databend_storages_common_table_meta::meta::ExtendedBlockMeta; @@ -38,17 +37,18 @@ use databend_storages_common_table_meta::meta::SegmentStatistics; use databend_storages_common_table_meta::meta::TableMetaTimestamps; use databend_storages_common_table_meta::meta::Versioned; use databend_storages_common_table_meta::meta::VirtualBlockMeta; +use databend_storages_common_table_meta::meta::column_oriented_segment::*; use log::info; use opendal::Operator; +use crate::FuseSegmentFormat; +use crate::FuseTable; use crate::io::TableMetaLocationGenerator; use crate::operations::common::MutationLogEntry; use crate::operations::common::MutationLogs; use crate::statistics::ColumnHLLAccumulator; use crate::statistics::RowOrientedSegmentBuilder; use crate::statistics::VirtualColumnAccumulator; -use crate::FuseSegmentFormat; -use crate::FuseTable; enum State { None, diff --git a/src/query/storages/fuse/src/operations/inverted_index.rs b/src/query/storages/fuse/src/operations/inverted_index.rs index 33a1924c01fd3..30bcacd5edae6 100644 --- a/src/query/storages/fuse/src/operations/inverted_index.rs +++ b/src/query/storages/fuse/src/operations/inverted_index.rs @@ -14,9 +14,9 @@ use std::collections::BTreeMap; use std::collections::VecDeque; +use std::sync::Arc; use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; -use std::sync::Arc; use std::time::Instant; use async_trait::async_trait; @@ -47,13 +47,13 @@ use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::Location; use opendal::Operator; -use crate::io::write_data; +use crate::FuseStorageFormat; +use crate::FuseTable; use crate::io::BlockReader; use crate::io::InvertedIndexWriter; use crate::io::MetaReaders; use crate::io::TableMetaLocationGenerator; -use crate::FuseStorageFormat; -use crate::FuseTable; +use crate::io::write_data; impl FuseTable { // The big picture of refresh inverted index into pipeline: diff --git a/src/query/storages/fuse/src/operations/merge_into/mutator/matched_mutator.rs b/src/query/storages/fuse/src/operations/merge_into/mutator/matched_mutator.rs index 3bbc2e873264d..f38f9616de528 100644 --- a/src/query/storages/fuse/src/operations/merge_into/mutator/matched_mutator.rs +++ b/src/query/storages/fuse/src/operations/merge_into/mutator/matched_mutator.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::hash_map::Entry; use std::collections::HashMap; use std::collections::HashSet; +use std::collections::hash_map::Entry; use std::sync::Arc; use std::time::Instant; @@ -22,20 +22,20 @@ use ahash::AHashMap; use databend_common_base::base::tokio::sync::Semaphore; use databend_common_base::runtime::GlobalIORuntime; use databend_common_base::runtime::TrySpawn; +use databend_common_catalog::plan::Projection; use databend_common_catalog::plan::build_origin_block_row_num; use databend_common_catalog::plan::gen_mutation_stream_meta; use databend_common_catalog::plan::split_prefix; use databend_common_catalog::plan::split_row_id; -use databend_common_catalog::plan::Projection; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::BlockMetaInfoDowncast; +use databend_common_expression::DataBlock; use databend_common_expression::types::DataType; use databend_common_expression::types::MutableBitmap; use databend_common_expression::types::NumberDataType; -use databend_common_expression::BlockMetaInfoDowncast; -use databend_common_expression::DataBlock; use databend_common_metrics::storage::*; use databend_common_sql::StreamContext; use databend_common_storage::MutationStatus; @@ -48,12 +48,14 @@ use itertools::Itertools; use log::info; use opendal::Operator; +use crate::FuseTable; use crate::io::BlockBuilder; use crate::io::BlockReader; use crate::io::BlockWriter; use crate::io::CompactSegmentInfoReader; use crate::io::MetaReaders; use crate::io::WriteSettings; +use crate::operations::BlockMetaIndex; use crate::operations::acquire_task_permit; use crate::operations::common::MutationLogEntry; use crate::operations::common::MutationLogs; @@ -61,8 +63,6 @@ use crate::operations::merge_into::processors::RowIdKind; use crate::operations::mutation::BlockIndex; use crate::operations::mutation::SegmentIndex; use crate::operations::read_block; -use crate::operations::BlockMetaIndex; -use crate::FuseTable; struct AggregationContext { data_accessor: Operator, @@ -322,8 +322,8 @@ impl MatchedAggregator { // the row_id is generated by block_id, not block_idx,reference to fill_internal_column_meta() let block_meta = segment_info.blocks[block_idx].clone(); - let update_modified_offsets = &item.1 .0; - let delete_modified_offsets = &item.1 .1; + let update_modified_offsets = &item.1.0; + let delete_modified_offsets = &item.1.1; let modified_offsets: HashSet = update_modified_offsets .union(delete_modified_offsets) .cloned() diff --git a/src/query/storages/fuse/src/operations/mutation/mutator/block_compact_mutator.rs b/src/query/storages/fuse/src/operations/mutation/mutator/block_compact_mutator.rs index ab029f2ed6208..91c9fc4e1e9f3 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutator/block_compact_mutator.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutator/block_compact_mutator.rs @@ -27,9 +27,9 @@ use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::is_stream_column_id; use databend_common_expression::BlockThresholds; use databend_common_expression::ColumnId; +use databend_common_expression::is_stream_column_id; use databend_common_metrics::storage::*; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::CompactSegmentInfo; @@ -38,8 +38,10 @@ use databend_storages_common_table_meta::meta::Statistics; use log::info; use opendal::Operator; -use crate::io::read::read_segment_stats; +use crate::TableContext; use crate::io::SegmentsIO; +use crate::io::read::read_segment_stats; +use crate::operations::CompactOptions; use crate::operations::acquire_task_permit; use crate::operations::common::BlockMetaIndex; use crate::operations::mutation::BlockIndex; @@ -48,10 +50,8 @@ use crate::operations::mutation::CompactExtraInfo; use crate::operations::mutation::CompactLazyPartInfo; use crate::operations::mutation::CompactTaskInfo; use crate::operations::mutation::SegmentIndex; -use crate::operations::CompactOptions; use crate::statistics::reducers::merge_statistics_mut; use crate::statistics::sort_by_cluster_stats; -use crate::TableContext; #[derive(Clone)] pub struct BlockCompactMutator { diff --git a/src/query/storages/fuse/src/operations/mutation/mutator/recluster_mutator.rs b/src/query/storages/fuse/src/operations/mutation/mutator/recluster_mutator.rs index 4422a29bb2d6f..64ab11f415a51 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutator/recluster_mutator.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutator/recluster_mutator.rs @@ -19,8 +19,8 @@ use std::collections::HashMap; use std::collections::HashSet; use std::sync::Arc; -use databend_common_base::runtime::execute_futures_in_parallel; use databend_common_base::runtime::GLOBAL_MEM_STAT; +use databend_common_base::runtime::execute_futures_in_parallel; use databend_common_catalog::plan::Partitions; use databend_common_catalog::plan::PartitionsShuffleKind; use databend_common_catalog::plan::ReclusterParts; @@ -28,11 +28,11 @@ use databend_common_catalog::plan::ReclusterTask; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; -use databend_common_expression::compare_scalars; -use databend_common_expression::types::DataType; use databend_common_expression::BlockThresholds; use databend_common_expression::Scalar; use databend_common_expression::TableSchemaRef; +use databend_common_expression::compare_scalars; +use databend_common_expression::types::DataType; use databend_common_storage::ColumnNodes; use databend_storages_common_cache::LoadParams; use databend_storages_common_pruner::BlockMetaIndex; @@ -42,26 +42,26 @@ use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::RawBlockHLL; use databend_storages_common_table_meta::meta::Statistics; use databend_storages_common_table_meta::meta::TableSnapshot; +use fastrace::Span; use fastrace::func_path; use fastrace::future::FutureExt; -use fastrace::Span; use indexmap::IndexSet; use log::debug; use log::warn; use opendal::Operator; +use crate::DEFAULT_AVG_DEPTH_THRESHOLD; +use crate::FUSE_OPT_KEY_ROW_AVG_DEPTH_THRESHOLD; +use crate::FuseTable; +use crate::SegmentLocation; use crate::io::MetaReaders; -use crate::operations::common::BlockMetaIndex as BlockIndex; -use crate::operations::mutation::mutator::block_compact_mutator::CompactLimitState; -use crate::operations::mutation::SegmentCompactChecker; use crate::operations::BlockCompactMutator; use crate::operations::CompactLazyPartInfo; +use crate::operations::common::BlockMetaIndex as BlockIndex; +use crate::operations::mutation::SegmentCompactChecker; +use crate::operations::mutation::mutator::block_compact_mutator::CompactLimitState; use crate::statistics::reducers::merge_statistics_mut; use crate::statistics::sort_by_cluster_stats; -use crate::FuseTable; -use crate::SegmentLocation; -use crate::DEFAULT_AVG_DEPTH_THRESHOLD; -use crate::FUSE_OPT_KEY_ROW_AVG_DEPTH_THRESHOLD; pub enum ReclusterMode { Recluster, diff --git a/src/query/storages/fuse/src/operations/mutation/mutator/segment_compact_mutator.rs b/src/query/storages/fuse/src/operations/mutation/mutator/segment_compact_mutator.rs index 6daa79bbe20d7..e3b22cd244860 100644 --- a/src/query/storages/fuse/src/operations/mutation/mutator/segment_compact_mutator.rs +++ b/src/query/storages/fuse/src/operations/mutation/mutator/segment_compact_mutator.rs @@ -27,15 +27,15 @@ use databend_storages_common_table_meta::meta::Versioned; use log::info; use opendal::Operator; -use crate::io::read::read_segment_stats_in_parallel; +use crate::FuseTable; +use crate::TableContext; use crate::io::CachedMetaWriter; use crate::io::SegmentsIO; use crate::io::TableMetaLocationGenerator; +use crate::io::read::read_segment_stats_in_parallel; use crate::operations::CompactOptions; use crate::statistics::reducers::merge_statistics_mut; use crate::statistics::sort_by_cluster_stats; -use crate::FuseTable; -use crate::TableContext; #[derive(Default)] pub struct SegmentCompactionState { diff --git a/src/query/storages/fuse/src/operations/navigate.rs b/src/query/storages/fuse/src/operations/navigate.rs index 0eba172650d63..e6ae0e3d1f3bb 100644 --- a/src/query/storages/fuse/src/operations/navigate.rs +++ b/src/query/storages/fuse/src/operations/navigate.rs @@ -32,13 +32,13 @@ use futures::TryStreamExt; use log::info; use opendal::EntryMode; +use crate::FUSE_TBL_SNAPSHOT_PREFIX; +use crate::FuseTable; use crate::fuse_table::RetentionPolicy; use crate::io::MetaReaders; use crate::io::SnapshotHistoryReader; use crate::io::SnapshotsIO; use crate::io::TableMetaLocationGenerator; -use crate::FuseTable; -use crate::FUSE_TBL_SNAPSHOT_PREFIX; impl FuseTable { #[fastrace::trace] diff --git a/src/query/storages/fuse/src/operations/read_partitions.rs b/src/query/storages/fuse/src/operations/read_partitions.rs index a1ee6558fc0bb..76648c5e3da66 100644 --- a/src/query/storages/fuse/src/operations/read_partitions.rs +++ b/src/query/storages/fuse/src/operations/read_partitions.rs @@ -55,8 +55,8 @@ use databend_storages_common_index::BloomIndex; use databend_storages_common_index::NgramArgs; use databend_storages_common_pruner::BlockMetaIndex; use databend_storages_common_pruner::TopNPruner; -use databend_storages_common_table_meta::meta::column_oriented_segment::meta_name; -use databend_storages_common_table_meta::meta::column_oriented_segment::stat_name; +use databend_storages_common_table_meta::meta::BlockMeta; +use databend_storages_common_table_meta::meta::ColumnStatistics; use databend_storages_common_table_meta::meta::column_oriented_segment::BLOCK_SIZE; use databend_storages_common_table_meta::meta::column_oriented_segment::BLOOM_FILTER_INDEX_LOCATION; use databend_storages_common_table_meta::meta::column_oriented_segment::BLOOM_FILTER_INDEX_SIZE; @@ -68,8 +68,8 @@ use databend_storages_common_table_meta::meta::column_oriented_segment::INVERTED use databend_storages_common_table_meta::meta::column_oriented_segment::LOCATION; use databend_storages_common_table_meta::meta::column_oriented_segment::NGRAM_FILTER_INDEX_SIZE; use databend_storages_common_table_meta::meta::column_oriented_segment::ROW_COUNT; -use databend_storages_common_table_meta::meta::BlockMeta; -use databend_storages_common_table_meta::meta::ColumnStatistics; +use databend_storages_common_table_meta::meta::column_oriented_segment::meta_name; +use databend_storages_common_table_meta::meta::column_oriented_segment::stat_name; use databend_storages_common_table_meta::table::ChangeType; use databend_storages_common_table_meta::table::ClusterType; use itertools::Itertools; @@ -78,15 +78,18 @@ use opendal::Operator; use sha2::Digest; use sha2::Sha256; +use crate::FuseLazyPartInfo; +use crate::FuseSegmentFormat; +use crate::FuseTable; use crate::fuse_part::FuseBlockPartInfo; use crate::io::BloomIndexRebuilder; -use crate::pruning::create_segment_location_vector; -use crate::pruning::table_sample; use crate::pruning::BlockPruner; use crate::pruning::FusePruner; use crate::pruning::SegmentLocation; use crate::pruning::SegmentPruner; use crate::pruning::VectorIndexPruner; +use crate::pruning::create_segment_location_vector; +use crate::pruning::table_sample; use crate::pruning_pipeline::AsyncBlockPruneTransform; use crate::pruning_pipeline::ColumnOrientedBlockPruneSink; use crate::pruning_pipeline::ExtractSegmentTransform; @@ -101,9 +104,6 @@ use crate::pruning_pipeline::SyncBlockPruneTransform; use crate::pruning_pipeline::TopNPruneTransform; use crate::pruning_pipeline::VectorIndexPruneTransform; use crate::segment_format_from_location; -use crate::FuseLazyPartInfo; -use crate::FuseSegmentFormat; -use crate::FuseTable; const DEFAULT_GRAM_SIZE: usize = 3; const DEFAULT_BLOOM_SIZE: u64 = 1024 * 1024; diff --git a/src/query/storages/fuse/src/operations/recluster.rs b/src/query/storages/fuse/src/operations/recluster.rs index 4ce55ee9b9052..79f1c5c2fbfdf 100644 --- a/src/query/storages/fuse/src/operations/recluster.rs +++ b/src/query/storages/fuse/src/operations/recluster.rs @@ -16,8 +16,8 @@ use std::sync::Arc; use std::time::Instant; use databend_common_base::base::tokio::select; -use databend_common_base::base::tokio::sync::mpsc; use databend_common_base::base::tokio::sync::Semaphore; +use databend_common_base::base::tokio::sync::mpsc; use databend_common_base::runtime::GlobalIORuntime; use databend_common_base::runtime::TrySpawn; use databend_common_catalog::plan::PushDownInfo; @@ -36,14 +36,14 @@ use databend_storages_common_table_meta::table::ClusterType; use log::warn; use opendal::Operator; +use crate::FuseTable; +use crate::SegmentLocation; +use crate::operations::ReclusterMutator; use crate::operations::acquire_task_permit; use crate::operations::mutation::ReclusterMode; -use crate::operations::ReclusterMutator; -use crate::pruning::create_segment_location_vector; use crate::pruning::PruningContext; use crate::pruning::SegmentPruner; -use crate::FuseTable; -use crate::SegmentLocation; +use crate::pruning::create_segment_location_vector; impl FuseTable { #[async_backtrace::framed] diff --git a/src/query/storages/fuse/src/operations/replace_into/mutator/replace_into_operation_agg.rs b/src/query/storages/fuse/src/operations/replace_into/mutator/replace_into_operation_agg.rs index 6b735e3b4c1d7..7829194f4a737 100644 --- a/src/query/storages/fuse/src/operations/replace_into/mutator/replace_into_operation_agg.rs +++ b/src/query/storages/fuse/src/operations/replace_into/mutator/replace_into_operation_agg.rs @@ -20,14 +20,12 @@ use ahash::AHashMap; use databend_common_base::base::tokio::sync::Semaphore; use databend_common_base::runtime::GlobalIORuntime; use databend_common_base::runtime::TrySpawn; -use databend_common_catalog::plan::gen_mutation_stream_meta; use databend_common_catalog::plan::Projection; +use databend_common_catalog::plan::gen_mutation_stream_meta; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::types::MutableBitmap; -use databend_common_expression::types::UInt64Type; use databend_common_expression::Column; use databend_common_expression::ColumnId; use databend_common_expression::ComputedExpr; @@ -35,17 +33,19 @@ use databend_common_expression::DataBlock; use databend_common_expression::FieldIndex; use databend_common_expression::FromData; use databend_common_expression::Scalar; +use databend_common_expression::types::MutableBitmap; +use databend_common_expression::types::UInt64Type; use databend_common_metrics::storage::*; +use databend_common_sql::StreamContext; use databend_common_sql::evaluator::BlockOperator; use databend_common_sql::executor::physical_plans::OnConflictField; -use databend_common_sql::StreamContext; use databend_storages_common_cache::BlockMetaCache; use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; use databend_storages_common_cache::LoadParams; +use databend_storages_common_index::BloomIndex; use databend_storages_common_index::filters::Filter; use databend_storages_common_index::filters::FilterImpl; -use databend_storages_common_index::BloomIndex; use databend_storages_common_io::ReadSettings; use databend_storages_common_table_meta::meta::BlockMeta; use databend_storages_common_table_meta::meta::BlockSlotDescription; @@ -56,13 +56,14 @@ use log::info; use log::warn; use opendal::Operator; -use crate::io::read::bloom::block_filter_reader::BloomBlockFilterReader; +use crate::FuseTable; use crate::io::BlockBuilder; use crate::io::BlockReader; use crate::io::BlockWriter; use crate::io::CompactSegmentInfoReader; use crate::io::MetaReaders; use crate::io::WriteSettings; +use crate::io::read::bloom::block_filter_reader::BloomBlockFilterReader; use crate::operations::acquire_task_permit; use crate::operations::common::BlockMetaIndex; use crate::operations::common::MutationLogEntry; @@ -73,9 +74,8 @@ use crate::operations::read_block; use crate::operations::replace_into::meta::DeletionByColumn; use crate::operations::replace_into::meta::ReplaceIntoOperation; use crate::operations::replace_into::meta::UniqueKeyDigest; -use crate::operations::replace_into::mutator::row_hash_of_columns; use crate::operations::replace_into::mutator::DeletionAccumulator; -use crate::FuseTable; +use crate::operations::replace_into::mutator::row_hash_of_columns; struct AggregationContext { segment_locations: AHashMap, @@ -788,11 +788,11 @@ impl AggregationContext { #[cfg(test)] mod tests { - use databend_common_expression::types::NumberDataType; - use databend_common_expression::types::NumberScalar; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; + use databend_common_expression::types::NumberDataType; + use databend_common_expression::types::NumberScalar; use super::*; diff --git a/src/query/storages/fuse/src/operations/snapshot_hint.rs b/src/query/storages/fuse/src/operations/snapshot_hint.rs index a41d93d2f90a1..71eedf1d54aba 100644 --- a/src/query/storages/fuse/src/operations/snapshot_hint.rs +++ b/src/query/storages/fuse/src/operations/snapshot_hint.rs @@ -29,9 +29,9 @@ use opendal::Operator; use serde::Deserialize; use serde::Serialize; -use crate::io::TableMetaLocationGenerator; use crate::FUSE_TBL_LAST_SNAPSHOT_HINT; use crate::FUSE_TBL_LAST_SNAPSHOT_HINT_V2; +use crate::io::TableMetaLocationGenerator; pub struct SnapshotHintWriter<'a> { ctx: &'a dyn TableContext, diff --git a/src/query/storages/fuse/src/operations/table_index.rs b/src/query/storages/fuse/src/operations/table_index.rs index 3009fb32017cd..32d0f5ae81e45 100644 --- a/src/query/storages/fuse/src/operations/table_index.rs +++ b/src/query/storages/fuse/src/operations/table_index.rs @@ -23,7 +23,6 @@ use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::local_block_meta_serde; use databend_common_expression::BlockMetaInfo; use databend_common_expression::BlockMetaInfoDowncast; use databend_common_expression::DataBlock; @@ -31,6 +30,7 @@ use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; +use databend_common_expression::local_block_meta_serde; use databend_common_meta_app::schema::TableIndex; use databend_common_meta_app::schema::TableIndexType; use databend_common_meta_app::schema::TableMeta; @@ -56,29 +56,29 @@ use databend_storages_common_table_meta::meta::Versioned; use log::info; use opendal::Operator; -use crate::index::filters::BlockFilter; -use crate::index::filters::Filter; +use crate::FuseStorageFormat; +use crate::FuseTable; use crate::index::BloomIndex; use crate::index::BloomIndexBuilder; use crate::index::NgramArgs; -use crate::io::read::bloom::block_filter_reader::load_bloom_filter_by_columns; -use crate::io::read::bloom::block_filter_reader::load_index_meta; -use crate::io::read::load_vector_index_meta; -use crate::io::read::read_segment_stats; +use crate::index::filters::BlockFilter; +use crate::index::filters::Filter; use crate::io::BlockReader; use crate::io::BlockWriter; use crate::io::BloomIndexState; use crate::io::MetaReaders; use crate::io::TableMetaLocationGenerator; use crate::io::VectorIndexBuilder; +use crate::io::read::bloom::block_filter_reader::load_bloom_filter_by_columns; +use crate::io::read::bloom::block_filter_reader::load_index_meta; +use crate::io::read::load_vector_index_meta; +use crate::io::read::read_segment_stats; use crate::operations::BlockMetaIndex; use crate::operations::CommitSink; use crate::operations::MutationGenerator; use crate::operations::MutationLogEntry; use crate::operations::MutationLogs; use crate::operations::TableMutationAggregator; -use crate::FuseStorageFormat; -use crate::FuseTable; pub async fn do_refresh_table_index( fuse_table: &FuseTable, diff --git a/src/query/storages/fuse/src/pruning/bloom_pruner.rs b/src/query/storages/fuse/src/pruning/bloom_pruner.rs index b2563748e2d3c..71807f9db971a 100644 --- a/src/query/storages/fuse/src/pruning/bloom_pruner.rs +++ b/src/query/storages/fuse/src/pruning/bloom_pruner.rs @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::hash_map::Entry; use std::collections::HashMap; use std::collections::HashSet; +use std::collections::hash_map::Entry; use std::sync::Arc; use databend_common_exception::ErrorCode; @@ -28,13 +28,13 @@ use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; use databend_common_expression::Value; use databend_common_sql::BloomIndexColumns; -use databend_storages_common_index::filters::BlockFilter; use databend_storages_common_index::BloomIndex; use databend_storages_common_index::FilterEvalResult; use databend_storages_common_index::NgramArgs; -use databend_storages_common_table_meta::meta::column_oriented_segment::BlockReadInfo; +use databend_storages_common_index::filters::BlockFilter; use databend_storages_common_table_meta::meta::Location; use databend_storages_common_table_meta::meta::StatisticsOfColumns; +use databend_storages_common_table_meta::meta::column_oriented_segment::BlockReadInfo; use log::info; use log::warn; use opendal::Operator; diff --git a/src/query/storages/fuse/src/table_functions/fuse_time_travel_size.rs b/src/query/storages/fuse/src/table_functions/fuse_time_travel_size.rs index 31c3d6fca4f67..5a242960f453d 100644 --- a/src/query/storages/fuse/src/table_functions/fuse_time_travel_size.rs +++ b/src/query/storages/fuse/src/table_functions/fuse_time_travel_size.rs @@ -21,27 +21,27 @@ use databend_common_catalog::table_args::TableArgs; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::types::BooleanType; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::StringType; -use databend_common_expression::types::UInt64Type; use databend_common_expression::DataBlock; use databend_common_expression::FromData; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchemaRef; use databend_common_expression::TableSchemaRefExt; +use databend_common_expression::types::BooleanType; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::types::StringType; +use databend_common_expression::types::UInt64Type; use futures_util::TryStreamExt; use log::info; use opendal::Operator; use super::parse_opt_opt_args; +use crate::FUSE_OPT_KEY_DATA_RETENTION_PERIOD_IN_HOURS; +use crate::FuseTable; use crate::io::SnapshotsIO; -use crate::table_functions::string_literal; use crate::table_functions::SimpleArgFunc; use crate::table_functions::SimpleArgFuncTemplate; -use crate::FuseTable; -use crate::FUSE_OPT_KEY_DATA_RETENTION_PERIOD_IN_HOURS; +use crate::table_functions::string_literal; pub struct FuseTimeTravelSizeArgs { pub database_name: Option, diff --git a/src/query/storages/hive/hive/src/hive_table.rs b/src/query/storages/hive/hive/src/hive_table.rs index 1603787f85fdc..21b2841307b52 100644 --- a/src/query/storages/hive/hive/src/hive_table.rs +++ b/src/query/storages/hive/hive/src/hive_table.rs @@ -49,8 +49,8 @@ use databend_common_pipeline::core::Pipeline; use databend_common_pipeline::core::ProcessorPtr; use databend_common_pipeline::sources::SyncSource; use databend_common_pipeline::sources::SyncSourcer; -use databend_common_storage::init_operator; use databend_common_storage::DataOperator; +use databend_common_storage::init_operator; use databend_common_storages_parquet::ParquetPruner; use databend_common_storages_parquet::ParquetReaderBuilder; use databend_common_storages_parquet::ParquetSourceType; @@ -66,10 +66,10 @@ use opendal::Operator; use super::hive_catalog::HiveCatalog; use super::hive_table_options::HiveTableOptions; -use crate::hive_table_source::HiveTableSource; -use crate::utils::HiveFetchPartitionScalars; use crate::HivePartInfo; use crate::HivePartitionFiller; +use crate::hive_table_source::HiveTableSource; +use crate::utils::HiveFetchPartitionScalars; pub const HIVE_TABLE_ENGINE: &str = "hive"; pub const HIVE_DEFAULT_PARTITION: &str = "__HIVE_DEFAULT_PARTITION__"; @@ -245,8 +245,7 @@ impl HiveTable { if partition_num < 100000 { trace!( "get {} partitions from hive metastore:{:?}", - partition_num, - partition_names + partition_num, partition_names ); } else { trace!("get {} partitions from hive metastore", partition_num); diff --git a/src/query/storages/orc/src/chunk_reader_impl.rs b/src/query/storages/orc/src/chunk_reader_impl.rs index 4e53b8b8938e1..a5307184a4bb5 100644 --- a/src/query/storages/orc/src/chunk_reader_impl.rs +++ b/src/query/storages/orc/src/chunk_reader_impl.rs @@ -13,10 +13,10 @@ // limitations under the License. use bytes::Bytes; -use futures_util::future::BoxFuture; use futures_util::AsyncRead; use futures_util::AsyncReadExt; use futures_util::FutureExt; +use futures_util::future::BoxFuture; use opendal::Operator; use orc_rust::reader::AsyncChunkReader; diff --git a/src/query/storages/orc/src/table.rs b/src/query/storages/orc/src/table.rs index d3172bc13111b..d75f889eaf34d 100644 --- a/src/query/storages/orc/src/table.rs +++ b/src/query/storages/orc/src/table.rs @@ -31,18 +31,18 @@ use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::ColumnId; +use databend_common_expression::FILE_ROW_NUMBER_COLUMN_ID; +use databend_common_expression::FILENAME_COLUMN_ID; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchema; -use databend_common_expression::FILENAME_COLUMN_ID; -use databend_common_expression::FILE_ROW_NUMBER_COLUMN_ID; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::schema::TableIdent; use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; use databend_common_pipeline::core::Pipeline; -use databend_common_storage::init_stage_operator; use databend_common_storage::StageFileInfo; +use databend_common_storage::init_stage_operator; use databend_storages_common_table_meta::table::ChangeType; use opendal::Operator; use orc_rust::ArrowReaderBuilder; diff --git a/src/query/storages/parquet/src/copy_into_table/reader.rs b/src/query/storages/parquet/src/copy_into_table/reader.rs index a006076e17d11..f350c5e4a31f9 100644 --- a/src/query/storages/parquet/src/copy_into_table/reader.rs +++ b/src/query/storages/parquet/src/copy_into_table/reader.rs @@ -19,9 +19,9 @@ use databend_common_catalog::plan::Projection; use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; -use databend_common_expression::expr::*; use databend_common_expression::RemoteDefaultExpr; use databend_common_expression::TableSchemaRef; +use databend_common_expression::expr::*; use databend_common_meta_app::principal::NullAs; use databend_common_meta_app::principal::StageFileFormatType; use databend_common_storage::parquet::infer_schema_with_extension; @@ -29,10 +29,10 @@ use databend_storages_common_stage::project_columnar; use opendal::Operator; use parquet::file::metadata::FileMetaData; -use crate::parquet_reader::policy::ReadPolicyBuilder; -use crate::parquet_reader::policy::ReadPolicyImpl; use crate::parquet_reader::InMemoryRowGroup; use crate::parquet_reader::ParquetReaderBuilder; +use crate::parquet_reader::policy::ReadPolicyBuilder; +use crate::parquet_reader::policy::ReadPolicyImpl; use crate::partition::ParquetRowGroupPart; use crate::read_settings::ReadSettings; use crate::schema::arrow_to_table_schema; diff --git a/src/query/storages/parquet/src/copy_into_table/source.rs b/src/query/storages/parquet/src/copy_into_table/source.rs index 8776f9df12a1f..516ba916fb960 100644 --- a/src/query/storages/parquet/src/copy_into_table/source.rs +++ b/src/query/storages/parquet/src/copy_into_table/source.rs @@ -31,11 +31,11 @@ use databend_common_pipeline::core::Processor; use databend_common_pipeline::core::ProcessorPtr; use opendal::Operator; +use crate::ParquetPart; use crate::copy_into_table::projection::CopyProjectionEvaluator; use crate::copy_into_table::reader::RowGroupReaderForCopy; use crate::parquet_reader::policy::ReadPolicyImpl; use crate::read_settings::ReadSettings; -use crate::ParquetPart; enum State { Init, diff --git a/src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs b/src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs index 2b9ad8719e2e2..09cdc87cd9d39 100644 --- a/src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs +++ b/src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs @@ -19,39 +19,39 @@ use arrow_schema::ArrowError; use bytes::Bytes; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::types::DataType; use databend_common_expression::DataBlock; use databend_common_expression::DataSchema; use databend_common_expression::Scalar; use databend_common_expression::TableField; use databend_common_expression::TableSchemaRef; +use databend_common_expression::types::DataType; use databend_common_metrics::storage::metrics_inc_omit_filter_rowgroups; use databend_common_metrics::storage::metrics_inc_omit_filter_rows; use databend_common_storage::OperatorRegistry; -use futures::future::BoxFuture; use futures::StreamExt; use futures::TryFutureExt; +use futures::future::BoxFuture; use opendal::Reader; +use parquet::arrow::ParquetRecordBatchStreamBuilder; +use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::ArrowPredicateFn; use parquet::arrow::arrow_reader::ArrowReaderOptions; use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use parquet::arrow::arrow_reader::RowFilter; use parquet::arrow::async_reader::AsyncFileReader; use parquet::arrow::async_reader::ParquetRecordBatchStream; -use parquet::arrow::ParquetRecordBatchStreamBuilder; -use parquet::arrow::ProjectionMask; use parquet::file::metadata::ParquetMetaData; use parquet::file::metadata::ParquetMetaDataReader; use parquet::schema::types::SchemaDescPtr; +use crate::ParquetPruner; use crate::meta::check_parquet_schema; +use crate::parquet_reader::DataBlockIterator; use crate::parquet_reader::predicate::ParquetPredicate; +use crate::parquet_reader::utils::FieldPaths; use crate::parquet_reader::utils::transform_record_batch; use crate::parquet_reader::utils::transform_record_batch_by_field_paths; -use crate::parquet_reader::utils::FieldPaths; -use crate::parquet_reader::DataBlockIterator; use crate::transformer::RecordBatchTransformer; -use crate::ParquetPruner; /// The reader to read a whole parquet file. pub struct ParquetWholeFileReader { diff --git a/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs b/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs index fb6bf2fdbb228..2a518426d4d19 100644 --- a/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs +++ b/src/query/storages/parquet/src/parquet_reader/reader/row_group_reader.rs @@ -23,10 +23,6 @@ use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; -use databend_common_expression::type_check::check_function; -use databend_common_expression::types::DataType; -use databend_common_expression::types::Int64Type; -use databend_common_expression::types::NumberDataType; use databend_common_expression::ColumnRef; use databend_common_expression::Constant; use databend_common_expression::Expr; @@ -38,39 +34,43 @@ use databend_common_expression::TableSchema; use databend_common_expression::TableSchemaRef; use databend_common_expression::TopKSorter; use databend_common_expression::Value; +use databend_common_expression::type_check::check_function; +use databend_common_expression::types::DataType; +use databend_common_expression::types::Int64Type; +use databend_common_expression::types::NumberDataType; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_metrics::storage::metrics_inc_omit_filter_rowgroups; use databend_common_metrics::storage::metrics_inc_omit_filter_rows; use databend_common_storage::OperatorRegistry; -use futures::future::try_join_all; use futures::StreamExt; +use futures::future::try_join_all; use opendal::Operator; use opendal::Reader; +use parquet::arrow::PARQUET_FIELD_ID_META_KEY; +use parquet::arrow::ParquetRecordBatchStreamBuilder; use parquet::arrow::arrow_reader::ArrowReaderOptions; use parquet::arrow::arrow_reader::RowSelection; use parquet::arrow::arrow_reader::RowSelector; -use parquet::arrow::ParquetRecordBatchStreamBuilder; -use parquet::arrow::PARQUET_FIELD_ID_META_KEY; use parquet::file::metadata::ParquetMetaData; use parquet::file::metadata::RowGroupMetaData; use parquet::format::PageLocation; use parquet::schema::types::SchemaDescPtr; +use crate::DeleteType; +use crate::ParquetFileReader; +use crate::ParquetReaderBuilder; +use crate::ParquetSourceType; use crate::parquet_part::DeleteTask; +use crate::parquet_reader::policy::POLICY_PREDICATE_ONLY; use crate::parquet_reader::policy::PolicyBuilders; use crate::parquet_reader::policy::PolicyType; use crate::parquet_reader::policy::ReadPolicyImpl; -use crate::parquet_reader::policy::POLICY_PREDICATE_ONLY; -use crate::parquet_reader::predicate::build_predicate; use crate::parquet_reader::predicate::ParquetPredicate; +use crate::parquet_reader::predicate::build_predicate; use crate::parquet_reader::row_group::InMemoryRowGroup; use crate::partition::ParquetRowGroupPart; use crate::read_settings::ReadSettings; use crate::transformer::RecordBatchTransformer; -use crate::DeleteType; -use crate::ParquetFileReader; -use crate::ParquetReaderBuilder; -use crate::ParquetSourceType; static DELETES_FILE_SCHEMA: LazyLock = LazyLock::new(|| { arrow_schema::Schema::new(vec![ diff --git a/src/query/storages/parquet/src/parquet_reader/reader/streaming_load_reader.rs b/src/query/storages/parquet/src/parquet_reader/reader/streaming_load_reader.rs index 3e25b6854533e..e8278b9f6ba8a 100644 --- a/src/query/storages/parquet/src/parquet_reader/reader/streaming_load_reader.rs +++ b/src/query/storages/parquet/src/parquet_reader/reader/streaming_load_reader.rs @@ -20,24 +20,24 @@ use databend_common_catalog::plan::Projection; use databend_common_catalog::plan::PushDownInfo; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; -use databend_common_expression::expr::*; use databend_common_expression::DataSchemaRef; use databend_common_expression::FunctionContext; use databend_common_expression::RemoteDefaultExpr; use databend_common_expression::TableSchemaRef; +use databend_common_expression::expr::*; use databend_common_meta_app::principal::NullAs; use databend_common_meta_app::principal::StageFileFormatType; use databend_common_storage::parquet::infer_schema_with_extension; use databend_storages_common_stage::project_columnar; -use opendal::services::Memory; use opendal::Operator; +use opendal::services::Memory; use parquet::file::metadata::ParquetMetaDataReader; +use crate::ParquetSourceType; use crate::copy_into_table::CopyProjectionEvaluator; use crate::parquet_reader::DataBlockIterator; use crate::parquet_reader::ParquetReaderBuilder; use crate::schema::arrow_to_table_schema; -use crate::ParquetSourceType; pub struct InmMemoryFile { file_data: Bytes, diff --git a/src/query/storages/parquet/src/parquet_reader/row_group.rs b/src/query/storages/parquet/src/parquet_reader/row_group.rs index c3506e0e3bf30..8e9e52b9d9141 100644 --- a/src/query/storages/parquet/src/parquet_reader/row_group.rs +++ b/src/query/storages/parquet/src/parquet_reader/row_group.rs @@ -25,9 +25,9 @@ use databend_storages_common_cache::CacheAccessor; use databend_storages_common_cache::CacheManager; use databend_storages_common_cache::ColumnData; use opendal::Operator; +use parquet::arrow::ProjectionMask; use parquet::arrow::arrow_reader::RowGroups; use parquet::arrow::arrow_reader::RowSelection; -use parquet::arrow::ProjectionMask; use parquet::column::page::PageIterator; use parquet::column::page::PageReader; use parquet::errors::ParquetError; @@ -497,8 +497,8 @@ mod test { use arrow_schema::Schema; use bytes::Bytes; use databend_common_base::base::tokio; - use opendal::services::Memory; use opendal::Operator; + use opendal::services::Memory; use parquet::arrow::ArrowWriter; use parquet::basic::Repetition; use parquet::file::metadata::RowGroupMetaData; diff --git a/src/query/storages/parquet/src/parquet_table/table.rs b/src/query/storages/parquet/src/parquet_table/table.rs index f5971b264bf09..f834b4bd36eb5 100644 --- a/src/query/storages/parquet/src/parquet_table/table.rs +++ b/src/query/storages/parquet/src/parquet_table/table.rs @@ -35,10 +35,10 @@ use databend_common_catalog::table_context::TableContext; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::ColumnId; +use databend_common_expression::FILE_ROW_NUMBER_COLUMN_ID; +use databend_common_expression::FILENAME_COLUMN_ID; use databend_common_expression::TableField; use databend_common_expression::TableSchema; -use databend_common_expression::FILENAME_COLUMN_ID; -use databend_common_expression::FILE_ROW_NUMBER_COLUMN_ID; use databend_common_meta_app::principal::ParquetFileFormatParams; use databend_common_meta_app::principal::StageInfo; use databend_common_meta_app::schema::TableIdent; @@ -46,11 +46,11 @@ use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; use databend_common_pipeline::core::Pipeline; use databend_common_settings::Settings; +use databend_common_storage::StageFileInfo; +use databend_common_storage::StageFilesInfo; use databend_common_storage::init_stage_operator; use databend_common_storage::parquet::infer_schema_with_extension; use databend_common_storage::read_metadata_async; -use databend_common_storage::StageFileInfo; -use databend_common_storage::StageFilesInfo; use databend_storages_common_table_meta::table::ChangeType; use log::info; use opendal::Operator; diff --git a/src/query/storages/stage/src/append/row_based_file/writer_processor.rs b/src/query/storages/stage/src/append/row_based_file/writer_processor.rs index e0c96f436def2..beef171ce4a0a 100644 --- a/src/query/storages/stage/src/append/row_based_file/writer_processor.rs +++ b/src/query/storages/stage/src/append/row_based_file/writer_processor.rs @@ -32,9 +32,9 @@ use databend_storages_common_stage::CopyIntoLocationInfo; use opendal::Operator; use super::buffers::FileOutputBuffers; +use crate::append::UnloadOutput; use crate::append::output::DataSummary; use crate::append::path::unload_path; -use crate::append::UnloadOutput; pub struct RowBasedFileWriter { input: Arc, diff --git a/src/query/storages/system/src/temp_files_table.rs b/src/query/storages/system/src/temp_files_table.rs index 70cbd5f2cd383..ce2e45be4be1d 100644 --- a/src/query/storages/system/src/temp_files_table.rs +++ b/src/query/storages/system/src/temp_files_table.rs @@ -25,10 +25,6 @@ use databend_common_catalog::table::DistributionLevel; use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; -use databend_common_expression::types::NumberDataType; -use databend_common_expression::types::NumberType; -use databend_common_expression::types::StringType; -use databend_common_expression::types::TimestampType; use databend_common_expression::BlockEntry; use databend_common_expression::DataBlock; use databend_common_expression::FromData; @@ -36,6 +32,10 @@ use databend_common_expression::SendableDataBlockStream; use databend_common_expression::TableDataType; use databend_common_expression::TableField; use databend_common_expression::TableSchemaRefExt; +use databend_common_expression::types::NumberDataType; +use databend_common_expression::types::NumberType; +use databend_common_expression::types::StringType; +use databend_common_expression::types::TimestampType; use databend_common_meta_app::schema::TableIdent; use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; @@ -45,14 +45,14 @@ use databend_common_pipeline::core::ProcessorPtr; use databend_common_pipeline::sources::EmptySource; use databend_common_pipeline::sources::StreamSource; use databend_common_storage::DataOperator; +use futures::StreamExt; use futures::stream; use futures::stream::Chunks; use futures::stream::Take; -use futures::StreamExt; -use opendal::operator_futures::FutureLister; use opendal::Lister; use opendal::Metadata; use opendal::Operator; +use opendal::operator_futures::FutureLister; use crate::table::SystemTablePart; @@ -252,10 +252,9 @@ where T: Future> + Send + 'static pub fn build( self, - block_builder: (impl FnMut(Vec<(String, Metadata)>) -> Result - + Sync - + Send - + 'static), + block_builder: ( + impl FnMut(Vec<(String, Metadata)>) -> Result + Sync + Send + 'static + ), ) -> Result { stream_source_from_entry_lister_with_chunk_size( self.op.clone(), From 79a4803e30a6b62f195e15e33d701bf148065115 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 24 Dec 2025 19:42:06 +0800 Subject: [PATCH 3/3] revert --- src/query/ee/tests/it/storages/fuse/operations/vacuum.rs | 9 +++------ .../fuse/operations/mutation/block_compact_mutator.rs | 2 +- .../tests/it/storages/fuse/operations/navigate.rs | 2 +- .../storages/parquet/src/parquet_reader/row_group.rs | 2 +- src/query/storages/system/src/temp_files_table.rs | 6 ++---- 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs b/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs index 47ae1a8e363d2..3f861ef3b3bab 100644 --- a/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs +++ b/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs @@ -901,10 +901,7 @@ async fn test_vacuum_drop_create_or_replace_impl(vacuum_stmts: &[&str]) -> Resul async fn new_local_meta() -> MetaStore { let version = &BUILD_INFO; let meta_config = MetaConfig::default(); - let meta = { - let config = meta_config.to_meta_grpc_client_conf(version); - let provider = Arc::new(MetaStoreProvider::new(config)); - provider.create_meta_store().await.unwrap() - }; - meta + let config = meta_config.to_meta_grpc_client_conf(version); + let provider = MetaStoreProvider::new(config); + provider.create_meta_store().await.unwrap() } diff --git a/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs b/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs index 1f0b8ef510bca..9be90572d0107 100644 --- a/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs +++ b/src/query/service/tests/it/storages/fuse/operations/mutation/block_compact_mutator.rs @@ -202,7 +202,7 @@ async fn test_safety() -> Result<()> { number_of_segments, number_of_blocks, ); - let cluster_key_id = if number_of_segments % 2 == 0 { + let cluster_key_id = if number_of_segments.is_multiple_of(2) { Some(0) } else { None diff --git a/src/query/service/tests/it/storages/fuse/operations/navigate.rs b/src/query/service/tests/it/storages/fuse/operations/navigate.rs index 5e7f7c7877e32..0f68ccf085549 100644 --- a/src/query/service/tests/it/storages/fuse/operations/navigate.rs +++ b/src/query/service/tests/it/storages/fuse/operations/navigate.rs @@ -219,7 +219,7 @@ async fn test_navigate_for_purge() -> Result<()> { let meta = fuse_table.get_operator().stat(&loc).await?; let modified = meta.last_modified(); assert!(modified.is_some()); - let millis = modified.unwrap().into_inner().as_millisecond(); + let millis = modified.unwrap().timestamp_millis(); let seconds = millis / 1000; let nanos = ((millis % 1000) * 1_000_000) as u32; let base_time = chrono::DateTime::::from_timestamp(seconds as i64, nanos) diff --git a/src/query/storages/parquet/src/parquet_reader/row_group.rs b/src/query/storages/parquet/src/parquet_reader/row_group.rs index 8e9e52b9d9141..5dbe38aa946cb 100644 --- a/src/query/storages/parquet/src/parquet_reader/row_group.rs +++ b/src/query/storages/parquet/src/parquet_reader/row_group.rs @@ -322,7 +322,7 @@ impl RowGroupCore { self.column_chunks .iter() .enumerate() - .filter(|&(idx, chunk)| (chunk.is_none() && projection.leaf_included(idx))) + .filter(|&(idx, chunk)| chunk.is_none() && projection.leaf_included(idx)) .map(|(idx, _chunk)| { let column = self.metadata.meta().column(idx); let (start, length) = column.byte_range(); diff --git a/src/query/storages/system/src/temp_files_table.rs b/src/query/storages/system/src/temp_files_table.rs index ce2e45be4be1d..c04be12740f0c 100644 --- a/src/query/storages/system/src/temp_files_table.rs +++ b/src/query/storages/system/src/temp_files_table.rs @@ -252,9 +252,7 @@ where T: Future> + Send + 'static pub fn build( self, - block_builder: ( - impl FnMut(Vec<(String, Metadata)>) -> Result + Sync + Send + 'static - ), + block_builder: impl FnMut(Vec<(String, Metadata)>) -> Result + Sync + Send + 'static, ) -> Result { stream_source_from_entry_lister_with_chunk_size( self.op.clone(), @@ -271,7 +269,7 @@ fn stream_source_from_entry_lister_with_chunk_size( lister_fut: FutureLister, limit: Option, chunk_size: usize, - block_builder: (impl FnMut(Vec<(String, Metadata)>) -> Result + Sync + Send + 'static), + block_builder: impl FnMut(Vec<(String, Metadata)>) -> Result + Sync + Send + 'static, ) -> Result where T: Future> + Send + 'static,