From 4534637e50f0f1527a5605c279b4486ee9cecec5 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Thu, 16 Jan 2025 15:43:35 +0100 Subject: [PATCH 01/48] [C++][Python][Parquet] Implement Content-Defined Chunking for the Parquet writer --- cpp/src/parquet/column_chunker.h | 313 +++++++++++++++++++++++++++++++ cpp/src/parquet/column_writer.cc | 41 +++- cpp/src/parquet/column_writer.h | 1 + cpp/src/parquet/properties.h | 64 ++++++- python/pyarrow/_parquet.pxd | 6 + python/pyarrow/_parquet.pyx | 26 ++- 6 files changed, 438 insertions(+), 13 deletions(-) create mode 100644 cpp/src/parquet/column_chunker.h diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h new file mode 100644 index 0000000000000..0fb59be04c244 --- /dev/null +++ b/cpp/src/parquet/column_chunker.h @@ -0,0 +1,313 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include "arrow/array.h" +#include "parquet/level_conversion.h" + +using arrow::internal::checked_cast; + +namespace parquet { +namespace internal { + +// Constants +const uint64_t GEAR_HASH_TABLE[] = { + 0xb088d3a9e840f559, 0x5652c7f739ed20d6, 0x45b28969898972ab, 0x6b0a89d5b68ec777, + 0x368f573e8b7a31b7, 0x1dc636dce936d94b, 0x207a4c4e5554d5b6, 0xa474b34628239acb, + 0x3b06a83e1ca3b912, 0x90e78d6c2f02baf7, 0xe1c92df7150d9a8a, 0x8e95053a1086d3ad, + 0x5a2ef4f1b83a0722, 0xa50fac949f807fae, 0x0e7303eb80d8d681, 0x99b07edc1570ad0f, + 0x689d2fb555fd3076, 0x00005082119ea468, 0xc4b08306a88fcc28, 0x3eb0678af6374afd, + 0xf19f87ab86ad7436, 0xf2129fbfbe6bc736, 0x481149575c98a4ed, 0x0000010695477bc5, + 0x1fba37801a9ceacc, 0x3bf06fd663a49b6d, 0x99687e9782e3874b, 0x79a10673aa50d8e3, + 0xe4accf9e6211f420, 0x2520e71f87579071, 0x2bd5d3fd781a8a9b, 0x00de4dcddd11c873, + 0xeaa9311c5a87392f, 0xdb748eb617bc40ff, 0xaf579a8df620bf6f, 0x86a6e5da1b09c2b1, + 0xcc2fc30ac322a12e, 0x355e2afec1f74267, 0x2d99c8f4c021a47b, 0xbade4b4a9404cfc3, + 0xf7b518721d707d69, 0x3286b6587bf32c20, 0x0000b68886af270c, 0xa115d6e4db8a9079, + 0x484f7e9c97b2e199, 0xccca7bb75713e301, 0xbf2584a62bb0f160, 0xade7e813625dbcc8, + 0x000070940d87955a, 0x8ae69108139e626f, 0xbd776ad72fde38a2, 0xfb6b001fc2fcc0cf, + 0xc7a474b8e67bc427, 0xbaf6f11610eb5d58, 0x09cb1f5b6de770d1, 0xb0b219e6977d4c47, + 0x00ccbc386ea7ad4a, 0xcc849d0adf973f01, 0x73a3ef7d016af770, 0xc807d2d386bdbdfe, + 0x7f2ac9966c791730, 0xd037a86bc6c504da, 0xf3f17c661eaa609d, 0xaca626b04daae687, + 0x755a99374f4a5b07, 0x90837ee65b2caede, 0x6ee8ad93fd560785, 0x0000d9e11053edd8, + 0x9e063bb2d21cdbd7, 0x07ab77f12a01d2b2, 0xec550255e6641b44, 0x78fb94a8449c14c6, + 0xc7510e1bc6c0f5f5, 0x0000320b36e4cae3, 0x827c33262c8b1a2d, 0x14675f0b48ea4144, + 0x267bd3a6498deceb, 0xf1916ff982f5035e, 0x86221b7ff434fb88, 0x9dbecee7386f49d8, + 0xea58f8cac80f8f4a, 0x008d198692fc64d8, 0x6d38704fbabf9a36, 0xe032cb07d1e7be4c, + 0x228d21f6ad450890, 0x635cb1bfc02589a5, 0x4620a1739ca2ce71, 0xa7e7dfe3aae5fb58, + 0x0c10ca932b3c0deb, 0x2727fee884afed7b, 0xa2df1c6df9e2ab1f, 0x4dcdd1ac0774f523, + 0x000070ffad33e24e, 0xa2ace87bc5977816, 0x9892275ab4286049, 0xc2861181ddf18959, + 0xbb9972a042483e19, 0xef70cd3766513078, 0x00000513abfc9864, 0xc058b61858c94083, + 0x09e850859725e0de, 0x9197fb3bf83e7d94, 0x7e1e626d12b64bce, 0x520c54507f7b57d1, + 0xbee1797174e22416, 0x6fd9ac3222e95587, 0x0023957c9adfbf3e, 0xa01c7d7e234bbe15, + 0xaba2c758b8a38cbb, 0x0d1fa0ceec3e2b30, 0x0bb6a58b7e60b991, 0x4333dd5b9fa26635, + 0xc2fd3b7d4001c1a3, 0xfb41802454731127, 0x65a56185a50d18cb, 0xf67a02bd8784b54f, + 0x696f11dd67e65063, 0x00002022fca814ab, 0x8cd6be912db9d852, 0x695189b6e9ae8a57, + 0xee9453b50ada0c28, 0xd8fc5ea91a78845e, 0xab86bf191a4aa767, 0x0000c6b5c86415e5, + 0x267310178e08a22e, 0xed2d101b078bca25, 0x3b41ed84b226a8fb, 0x13e622120f28dc06, + 0xa315f5ebfb706d26, 0x8816c34e3301bace, 0xe9395b9cbb71fdae, 0x002ce9202e721648, + 0x4283db1d2bb3c91c, 0xd77d461ad2b1a6a5, 0xe2ec17e46eeb866b, 0xb8e0be4039fbc47c, + 0xdea160c4d5299d04, 0x7eec86c8d28c3634, 0x2119ad129f98a399, 0xa6ccf46b61a283ef, + 0x2c52cedef658c617, 0x2db4871169acdd83, 0x0000f0d6f39ecbe9, 0x3dd5d8c98d2f9489, + 0x8a1872a22b01f584, 0xf282a4c40e7b3cf2, 0x8020ec2ccb1ba196, 0x6693b6e09e59e313, + 0x0000ce19cc7c83eb, 0x20cb5735f6479c3b, 0x762ebf3759d75a5b, 0x207bfe823d693975, + 0xd77dc112339cd9d5, 0x9ba7834284627d03, 0x217dc513e95f51e9, 0xb27b1a29fc5e7816, + 0x00d5cd9831bb662d, 0x71e39b806d75734c, 0x7e572af006fb1a23, 0xa2734f2f6ae91f85, + 0xbf82c6b5022cddf2, 0x5c3beac60761a0de, 0xcdc893bb47416998, 0x6d1085615c187e01, + 0x77f8ae30ac277c5d, 0x917c6b81122a2c91, 0x5b75b699add16967, 0x0000cf6ae79a069b, + 0xf3c40afa60de1104, 0x2063127aa59167c3, 0x621de62269d1894d, 0xd188ac1de62b4726, + 0x107036e2154b673c, 0x0000b85f28553a1d, 0xf2ef4e4c18236f3d, 0xd9d6de6611b9f602, + 0xa1fc7955fb47911c, 0xeb85fd032f298dbd, 0xbe27502fb3befae1, 0xe3034251c4cd661e, + 0x441364d354071836, 0x0082b36c75f2983e, 0xb145910316fa66f0, 0x021c069c9847caf7, + 0x2910dfc75a4b5221, 0x735b353e1c57a8b5, 0xce44312ce98ed96c, 0xbc942e4506bdfa65, + 0xf05086a71257941b, 0xfec3b215d351cead, 0x00ae1055e0144202, 0xf54b40846f42e454, + 0x00007fd9c8bcbcc8, 0xbfbd9ef317de9bfe, 0xa804302ff2854e12, 0x39ce4957a5e5d8d4, + 0xffb9e2a45637ba84, 0x55b9ad1d9ea0818b, 0x00008acbf319178a, 0x48e2bfc8d0fbfb38, + 0x8be39841e848b5e8, 0x0e2712160696a08b, 0xd51096e84b44242a, 0x1101ba176792e13a, + 0xc22e770f4531689d, 0x1689eff272bbc56c, 0x00a92a197f5650ec, 0xbc765990bda1784e, + 0xc61441e392fcb8ae, 0x07e13a2ced31e4a0, 0x92cbe984234e9d4d, 0x8f4ff572bb7d8ac5, + 0x0b9670c00b963bd0, 0x62955a581a03eb01, 0x645f83e5ea000254, 0x41fce516cd88f299, + 0xbbda9748da7a98cf, 0x0000aab2fe4845fa, 0x19761b069bf56555, 0x8b8f5e8343b6ad56, + 0x3e5d1cfd144821d9, 0xec5c1e2ca2b0cd8f, 0xfaf7e0fea7fbb57f, 0x000000d3ba12961b, + 0xda3f90178401b18e, 0x70ff906de33a5feb, 0x0527d5a7c06970e7, 0x22d8e773607c13e9, + 0xc9ab70df643c3bac, 0xeda4c6dc8abe12e3, 0xecef1f410033e78a, 0x0024c2b274ac72cb, + 0x06740d954fa900b4, 0x1d7a299b323d6304, 0xb3c37cb298cbead5, 0xc986e3c76178739b, + 0x9fabea364b46f58a, 0x6da214c5af85cc56, 0x17a43ed8b7a38f84, 0x6eccec511d9adbeb, + 0xf9cab30913335afb, 0x4a5e60c5f415eed2, 0x00006967503672b4, 0x9da51d121454bb87, + 0x84321e13b9bbc816, 0xfb3d6fb6ab2fdd8d, 0x60305eed8e160a8d, 0xcbbf4b14e9946ce8, + 0x00004f63381b10c3, 0x07d5b7816fcc4e10, 0xe5a536726a6a8155, 0x57afb23447a07fdd, + 0x18f346f7abc9d394, 0x636dc655d61ad33d, 0xcc8bab4939f7f3f6, 0x63c7a906c1dd187b}; + +const uint64_t MASK = 0xffff00000000000; +// const int MIN_LEN = 65536 / 8; +// const int MAX_LEN = 65536 * 2; +const int64_t MIN_LEN = 256 * 1024; +const int64_t MAX_LEN = 2 * 1024 * 1024; + +// create a fake null array class with a GetView method returning 0 always +class FakeNullArray { + public: + uint8_t GetView(int64_t i) const { return 0; } + + std::shared_ptr<::arrow::DataType> type() const { return ::arrow::null(); } + + int64_t null_count() const { return 0; } +}; + +class GearHash { + public: + GearHash(const LevelInfo& level_info, uint64_t mask, uint64_t min_len, uint64_t max_len) + : level_info_(level_info), + mask_(mask == 0 ? MASK : mask), + min_len_(min_len == 0 ? MIN_LEN : min_len), + max_len_(max_len == 0 ? MAX_LEN : max_len) {} + + template + bool Roll(const T value) { + constexpr size_t BYTE_WIDTH = sizeof(T); + chunk_size_ += BYTE_WIDTH; + // if (chunk_size_ < min_len_) { + // return false; + // } + auto bytes = reinterpret_cast(&value); + bool match = false; +#pragma unroll + for (size_t i = 0; i < BYTE_WIDTH; ++i) { + hash_ = (hash_ << 1) + GEAR_HASH_TABLE[bytes[i]]; + if ((hash_ & mask_) == 0) { + match = true; + } + } + return match; + } + + bool Roll(std::string_view value) { + chunk_size_ += value.size(); + // if (chunk_size_ < min_len_) { + // return false; + // } + bool match = false; + for (char c : value) { + hash_ = (hash_ << 1) + GEAR_HASH_TABLE[static_cast(c)]; + if ((hash_ & mask_) == 0) { + match = true; + } + } + return match; + } + + bool Check(bool match) { + if ((match && (chunk_size_ >= min_len_)) || (chunk_size_ >= max_len_)) { + chunk_size_ = 0; + return true; + } else { + return false; + } + } + + // bool Check(bool match) { + // if ((match && (chunk_size_ >= min_len_)) || (chunk_size_ >= max_len_)) { + // chunk_size_ = 0; + // return true; + // } else { + // return false; + // } + // } + + // template + // const std::vector> GetBoundaries( + // int64_t num_levels, const T& leaf_array) { + // std::vector> result; + + // int64_t offset = 0; + // int64_t prev_offset = 0; + + // while (offset < num_levels) { + // if (Check(Roll(leaf_array.GetView(offset)))) { + // result.push_back(std::make_tuple(prev_offset, prev_offset, offset - + // prev_offset)); prev_offset = offset; + // } + // ++offset; + // } + // if (prev_offset < num_levels) { + // result.push_back(std::make_tuple(prev_offset, prev_offset, num_levels - + // prev_offset)); + // } + // return result; + // } + + template + const std::vector> GetBoundaries( + const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, + const T& leaf_array) { + std::vector> result; + bool has_def_levels = level_info_.def_level > 0; + bool has_rep_levels = level_info_.rep_level > 0; + // bool no_nulls = leaf_array.null_count() == 0; + // if (!has_rep_levels && !maybe_parent_nulls && no_nulls) { + // return GetBoundaries(num_levels, leaf_array); + // } + + bool is_match; + int64_t level_offset = 0; + int64_t value_offset = 0; + int64_t record_level_offset = 0; + int64_t record_value_offset = 0; + int64_t prev_record_level_offset = 0; + int64_t prev_record_value_offset = 0; + + while (level_offset < num_levels) { + int16_t def_level = has_def_levels ? def_levels[level_offset] : 0; + int16_t rep_level = has_rep_levels ? rep_levels[level_offset] : 0; + + if (rep_level == 0) { + // record boundary + record_level_offset = level_offset; + record_value_offset = value_offset; + } + + is_match = Roll(def_level) || Roll(rep_level); + ++level_offset; + + if (has_rep_levels) { + if (def_level >= level_info_.repeated_ancestor_def_level) { + is_match |= Roll(leaf_array.GetView(value_offset)); + ++value_offset; + } + } else { + is_match |= Roll(leaf_array.GetView(value_offset)); + ++value_offset; + } + + if (Check(is_match)) { + auto levels_to_write = record_level_offset - prev_record_level_offset; + if (levels_to_write > 0) { + result.emplace_back(prev_record_level_offset, prev_record_value_offset, + levels_to_write); + prev_record_level_offset = record_level_offset; + prev_record_value_offset = record_value_offset; + } + } + } + + auto levels_to_write = num_levels - prev_record_level_offset; + if (levels_to_write > 0) { + result.emplace_back(prev_record_level_offset, prev_record_value_offset, + levels_to_write); + } + return result; + } + +#define PRIMITIVE_CASE(TYPE_ID, ArrowType) \ + case ::arrow::Type::TYPE_ID: \ + return GetBoundaries(def_levels, rep_levels, num_levels, \ + checked_cast(values)); + + const ::arrow::Result>> GetBoundaries( + const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, + const ::arrow::Array& values) { + auto type_id = values.type()->id(); + switch (type_id) { + PRIMITIVE_CASE(BOOL, Boolean) + PRIMITIVE_CASE(INT8, Int8) + PRIMITIVE_CASE(INT16, Int16) + PRIMITIVE_CASE(INT32, Int32) + PRIMITIVE_CASE(INT64, Int64) + PRIMITIVE_CASE(UINT8, UInt8) + PRIMITIVE_CASE(UINT16, UInt16) + PRIMITIVE_CASE(UINT32, UInt32) + PRIMITIVE_CASE(UINT64, UInt64) + PRIMITIVE_CASE(HALF_FLOAT, HalfFloat) + PRIMITIVE_CASE(FLOAT, Float) + PRIMITIVE_CASE(DOUBLE, Double) + PRIMITIVE_CASE(STRING, String) + PRIMITIVE_CASE(BINARY, Binary) + PRIMITIVE_CASE(FIXED_SIZE_BINARY, FixedSizeBinary) + PRIMITIVE_CASE(DATE32, Date32) + PRIMITIVE_CASE(DATE64, Date64) + PRIMITIVE_CASE(TIME32, Time32) + PRIMITIVE_CASE(TIME64, Time64) + PRIMITIVE_CASE(TIMESTAMP, Timestamp) + PRIMITIVE_CASE(DURATION, Duration) + PRIMITIVE_CASE(DECIMAL128, Decimal128) + PRIMITIVE_CASE(DECIMAL256, Decimal256) + case ::arrow::Type::DICTIONARY: + return GetBoundaries( + def_levels, rep_levels, num_levels, + *checked_cast(values).indices()); + case ::arrow::Type::NA: + FakeNullArray fake_null_array; + return GetBoundaries(def_levels, rep_levels, num_levels, fake_null_array); + default: + return ::arrow::Status::NotImplemented("Unsupported type " + + values.type()->ToString()); + } + } + + private: + const internal::LevelInfo& level_info_; + uint64_t mask_ = MASK; + uint64_t min_len_; + uint64_t max_len_; + uint64_t hash_ = 0; + uint64_t chunk_size_ = 0; +}; + +} // namespace internal +} // namespace parquet diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 393bf8a162367..8f1e13f274154 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -45,6 +45,7 @@ #include "arrow/util/rle_encoding_internal.h" #include "arrow/util/type_traits.h" #include "arrow/visit_array_inline.h" +#include "parquet/column_chunker.h" #include "parquet/column_page.h" #include "parquet/encoding.h" #include "parquet/encryption/encryption_internal.h" @@ -752,7 +753,9 @@ class ColumnWriterImpl { closed_(false), fallback_(false), definition_levels_sink_(allocator_), - repetition_levels_sink_(allocator_) { + repetition_levels_sink_(allocator_), + content_defined_chunker_(level_info_, properties->cdc_mask(), + properties->cdc_min_size(), properties->cdc_max_size()) { definition_levels_rle_ = std::static_pointer_cast(AllocateBuffer(allocator_, 0)); repetition_levels_rle_ = @@ -892,6 +895,8 @@ class ColumnWriterImpl { std::vector> data_pages_; + internal::GearHash content_defined_chunker_; + private: void InitSinks() { definition_levels_sink_.Rewind(0); @@ -1334,13 +1339,37 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< bits_buffer_->ZeroPadding(); } - if (leaf_array.type()->id() == ::arrow::Type::DICTIONARY) { - return WriteArrowDictionary(def_levels, rep_levels, num_levels, leaf_array, ctx, - maybe_parent_nulls); + if (this->properties_->cdc_enabled()) { + ARROW_ASSIGN_OR_RAISE(auto boundaries, + content_defined_chunker_.GetBoundaries( + def_levels, rep_levels, num_levels, leaf_array)); + for (auto boundary : boundaries) { + auto level_offset = std::get<0>(boundary); + auto array_offset = std::get<1>(boundary); + auto levels_to_write = std::get<2>(boundary); + auto sliced_array = leaf_array.Slice(array_offset); + if (leaf_array.type()->id() == ::arrow::Type::DICTIONARY) { + ARROW_CHECK_OK(WriteArrowDictionary(def_levels + level_offset, + rep_levels + level_offset, levels_to_write, + *sliced_array, ctx, maybe_parent_nulls)); + } else { + ARROW_CHECK_OK(WriteArrowDense(def_levels + level_offset, + rep_levels + level_offset, levels_to_write, + *sliced_array, ctx, maybe_parent_nulls)); + } + AddDataPage(); + } + return Status::OK(); } else { - return WriteArrowDense(def_levels, rep_levels, num_levels, leaf_array, ctx, - maybe_parent_nulls); + if (leaf_array.type()->id() == ::arrow::Type::DICTIONARY) { + return WriteArrowDictionary(def_levels, rep_levels, num_levels, leaf_array, ctx, + maybe_parent_nulls); + } else { + return WriteArrowDense(def_levels, rep_levels, num_levels, leaf_array, ctx, + maybe_parent_nulls); + } } + END_PARQUET_CATCH_EXCEPTIONS } diff --git a/cpp/src/parquet/column_writer.h b/cpp/src/parquet/column_writer.h index bd329d61053f2..2ef549150b34c 100644 --- a/cpp/src/parquet/column_writer.h +++ b/cpp/src/parquet/column_writer.h @@ -23,6 +23,7 @@ #include "arrow/type_fwd.h" #include "arrow/util/compression.h" +#include "parquet/column_chunker.h" #include "parquet/exception.h" #include "parquet/platform.h" #include "parquet/types.h" diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h index 19436b84a379b..2eba39bd8cc50 100644 --- a/cpp/src/parquet/properties.h +++ b/cpp/src/parquet/properties.h @@ -27,6 +27,7 @@ #include "arrow/type.h" #include "arrow/util/compression.h" #include "arrow/util/type_fwd.h" +#include "parquet/column_chunker.h" #include "parquet/encryption/encryption.h" #include "parquet/exception.h" #include "parquet/parquet_version.h" @@ -260,7 +261,11 @@ class PARQUET_EXPORT WriterProperties { created_by_(DEFAULT_CREATED_BY), store_decimal_as_integer_(false), page_checksum_enabled_(false), - size_statistics_level_(DEFAULT_SIZE_STATISTICS_LEVEL) {} + size_statistics_level_(DEFAULT_SIZE_STATISTICS_LEVEL), + cdc_enabled_(false), + cdc_mask_(0), + cdc_min_size_(0), + cdc_max_size_(0) {} explicit Builder(const WriterProperties& properties) : pool_(properties.memory_pool()), @@ -275,10 +280,38 @@ class PARQUET_EXPORT WriterProperties { page_checksum_enabled_(properties.page_checksum_enabled()), size_statistics_level_(properties.size_statistics_level()), sorting_columns_(properties.sorting_columns()), - default_column_properties_(properties.default_column_properties()) {} + default_column_properties_(properties.default_column_properties()), + cdc_enabled_(properties.cdc_enabled()), + cdc_min_size_(properties.cdc_min_size()), + cdc_max_size_(properties.cdc_max_size()) {} virtual ~Builder() {} + Builder* enable_cdc() { + cdc_enabled_ = true; + return this; + } + + Builder* disable_cdc() { + cdc_enabled_ = false; + return this; + } + + Builder* cdc_mask(uint64_t mask) { + cdc_mask_ = mask; + return this; + } + + Builder* cdc_min_size(uint64_t min_size) { + cdc_min_size_ = min_size; + return this; + } + + Builder* cdc_max_size(uint64_t max_size) { + cdc_max_size_ = max_size; + return this; + } + /// Specify the memory pool for the writer. Default default_memory_pool. Builder* memory_pool(MemoryPool* pool) { pool_ = pool; @@ -701,7 +734,8 @@ class PARQUET_EXPORT WriterProperties { pagesize_, version_, created_by_, page_checksum_enabled_, size_statistics_level_, std::move(file_encryption_properties_), default_column_properties_, column_properties, data_page_version_, - store_decimal_as_integer_, std::move(sorting_columns_))); + store_decimal_as_integer_, std::move(sorting_columns_), cdc_enabled_, cdc_mask_, + cdc_min_size_, cdc_max_size_)); } private: @@ -730,6 +764,11 @@ class PARQUET_EXPORT WriterProperties { std::unordered_map dictionary_enabled_; std::unordered_map statistics_enabled_; std::unordered_map page_index_enabled_; + + bool cdc_enabled_; + uint64_t cdc_mask_; + uint64_t cdc_min_size_; + uint64_t cdc_max_size_; }; inline MemoryPool* memory_pool() const { return pool_; } @@ -754,6 +793,11 @@ class PARQUET_EXPORT WriterProperties { inline bool page_checksum_enabled() const { return page_checksum_enabled_; } + inline bool cdc_enabled() const { return cdc_enabled_; } + inline uint64_t cdc_mask() const { return cdc_mask_; } + inline uint64_t cdc_min_size() const { return cdc_min_size_; } + inline uint64_t cdc_max_size() const { return cdc_max_size_; } + inline SizeStatisticsLevel size_statistics_level() const { return size_statistics_level_; } @@ -856,7 +900,8 @@ class PARQUET_EXPORT WriterProperties { const ColumnProperties& default_column_properties, const std::unordered_map& column_properties, ParquetDataPageVersion data_page_version, bool store_short_decimal_as_integer, - std::vector sorting_columns) + std::vector sorting_columns, bool cdc_enabled, uint64_t cdc_mask, + uint64_t cdc_min_size, uint64_t cdc_max_size) : pool_(pool), dictionary_pagesize_limit_(dictionary_pagesize_limit), write_batch_size_(write_batch_size), @@ -871,7 +916,11 @@ class PARQUET_EXPORT WriterProperties { file_encryption_properties_(file_encryption_properties), sorting_columns_(std::move(sorting_columns)), default_column_properties_(default_column_properties), - column_properties_(column_properties) {} + column_properties_(column_properties), + cdc_enabled_(cdc_enabled), + cdc_mask_(cdc_mask), + cdc_min_size_(cdc_min_size), + cdc_max_size_(cdc_max_size) {} MemoryPool* pool_; int64_t dictionary_pagesize_limit_; @@ -891,6 +940,11 @@ class PARQUET_EXPORT WriterProperties { ColumnProperties default_column_properties_; std::unordered_map column_properties_; + + bool cdc_enabled_; + uint64_t cdc_mask_; + uint64_t cdc_min_size_; + uint64_t cdc_max_size_; }; PARQUET_EXPORT const std::shared_ptr& default_writer_properties(); diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index 1e3c89e4e729b..4aae298afdef9 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -446,6 +446,11 @@ cdef extern from "parquet/api/writer.h" namespace "parquet" nogil: Builder* disable_write_page_index() Builder* enable_page_checksum() Builder* disable_page_checksum() + Builder* enable_cdc() + Builder* disable_cdc() + Builder* cdc_mask(uint64_t mask) + Builder* cdc_min_size(uint64_t min_size) + Builder* cdc_max_size(uint64_t max_size) shared_ptr[WriterProperties] build() cdef cppclass ArrowWriterProperties: @@ -597,6 +602,7 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( write_page_checksum=*, sorting_columns=*, store_decimal_as_integer=*, + content_defined_chunking=* ) except * diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index ecc4e57091761..b11e525654a24 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1865,7 +1865,8 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( write_page_index=False, write_page_checksum=False, sorting_columns=None, - store_decimal_as_integer=False) except *: + store_decimal_as_integer=False, + content_defined_chunking=False) except *: """General writer properties""" cdef: @@ -2001,6 +2002,8 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( raise TypeError( "'column_encoding' should be a dictionary or a string") + # size limits + if data_page_size is not None: props.data_pagesize(data_page_size) @@ -2010,6 +2013,23 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( if dictionary_pagesize_limit is not None: props.dictionary_pagesize_limit(dictionary_pagesize_limit) + # content defined chunking + + if content_defined_chunking is False: + props.disable_cdc() + elif content_defined_chunking is True: + props.enable_cdc() + elif isinstance(content_defined_chunking, tuple): + mask, min_size, max_size = content_defined_chunking + props.enable_cdc() + props.cdc_mask(mask) + props.cdc_min_size(min_size) + props.cdc_max_size(max_size) + else: + raise ValueError( + "Unsupported value for content_defined_chunking: {0}" + .format(content_defined_chunking)) + # encryption if encryption_properties is not None: @@ -2183,7 +2203,8 @@ cdef class ParquetWriter(_Weakrefable): write_page_index=False, write_page_checksum=False, sorting_columns=None, - store_decimal_as_integer=False): + store_decimal_as_integer=False, + content_defined_chunking=False): cdef: shared_ptr[WriterProperties] properties shared_ptr[ArrowWriterProperties] arrow_properties @@ -2218,6 +2239,7 @@ cdef class ParquetWriter(_Weakrefable): write_page_checksum=write_page_checksum, sorting_columns=sorting_columns, store_decimal_as_integer=store_decimal_as_integer, + content_defined_chunking=content_defined_chunking ) arrow_properties = _create_arrow_writer_properties( use_deprecated_int96_timestamps=use_deprecated_int96_timestamps, From 910b631bb660d17e4396ec568226fb43bfe956de Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 27 Jan 2025 20:27:34 +0100 Subject: [PATCH 02/48] always roll values --- cpp/src/parquet/column_chunker.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index 0fb59be04c244..7bd0e51e19eb6 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -204,7 +204,7 @@ class GearHash { // return GetBoundaries(num_levels, leaf_array); // } - bool is_match; + bool def_match, rep_match, val_match; int64_t level_offset = 0; int64_t value_offset = 0; int64_t record_level_offset = 0; @@ -222,20 +222,23 @@ class GearHash { record_value_offset = value_offset; } - is_match = Roll(def_level) || Roll(rep_level); + def_match = Roll(def_level); + rep_match = Roll(rep_level); ++level_offset; if (has_rep_levels) { if (def_level >= level_info_.repeated_ancestor_def_level) { - is_match |= Roll(leaf_array.GetView(value_offset)); + val_match = Roll(leaf_array.GetView(value_offset)); ++value_offset; + } else { + val_match = false; } } else { - is_match |= Roll(leaf_array.GetView(value_offset)); + val_match = Roll(leaf_array.GetView(value_offset)); ++value_offset; } - if (Check(is_match)) { + if (Check(def_match || rep_match || val_match)) { auto levels_to_write = record_level_offset - prev_record_level_offset; if (levels_to_write > 0) { result.emplace_back(prev_record_level_offset, prev_record_value_offset, From 020059096168d8e39e63f9c15b75e1b899533fa3 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 27 Jan 2025 22:05:27 +0100 Subject: [PATCH 03/48] add faster paths for flat arrays --- cpp/src/parquet/column_chunker.h | 158 +++++++++++++++---------------- 1 file changed, 78 insertions(+), 80 deletions(-) diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index 7bd0e51e19eb6..2e143a040e8a6 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -122,12 +122,11 @@ class GearHash { bool Roll(const T value) { constexpr size_t BYTE_WIDTH = sizeof(T); chunk_size_ += BYTE_WIDTH; - // if (chunk_size_ < min_len_) { - // return false; - // } + if (chunk_size_ < min_len_) { + return false; + } auto bytes = reinterpret_cast(&value); bool match = false; -#pragma unroll for (size_t i = 0; i < BYTE_WIDTH; ++i) { hash_ = (hash_ << 1) + GEAR_HASH_TABLE[bytes[i]]; if ((hash_ & mask_) == 0) { @@ -139,9 +138,9 @@ class GearHash { bool Roll(std::string_view value) { chunk_size_ += value.size(); - // if (chunk_size_ < min_len_) { - // return false; - // } + if (chunk_size_ < min_len_) { + return false; + } bool match = false; for (char c : value) { hash_ = (hash_ << 1) + GEAR_HASH_TABLE[static_cast(c)]; @@ -152,8 +151,8 @@ class GearHash { return match; } - bool Check(bool match) { - if ((match && (chunk_size_ >= min_len_)) || (chunk_size_ >= max_len_)) { + inline bool Check(bool match) { + if (match || (chunk_size_ >= max_len_)) { chunk_size_ = 0; return true; } else { @@ -161,37 +160,6 @@ class GearHash { } } - // bool Check(bool match) { - // if ((match && (chunk_size_ >= min_len_)) || (chunk_size_ >= max_len_)) { - // chunk_size_ = 0; - // return true; - // } else { - // return false; - // } - // } - - // template - // const std::vector> GetBoundaries( - // int64_t num_levels, const T& leaf_array) { - // std::vector> result; - - // int64_t offset = 0; - // int64_t prev_offset = 0; - - // while (offset < num_levels) { - // if (Check(Roll(leaf_array.GetView(offset)))) { - // result.push_back(std::make_tuple(prev_offset, prev_offset, offset - - // prev_offset)); prev_offset = offset; - // } - // ++offset; - // } - // if (prev_offset < num_levels) { - // result.push_back(std::make_tuple(prev_offset, prev_offset, num_levels - - // prev_offset)); - // } - // return result; - // } - template const std::vector> GetBoundaries( const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, @@ -199,61 +167,91 @@ class GearHash { std::vector> result; bool has_def_levels = level_info_.def_level > 0; bool has_rep_levels = level_info_.rep_level > 0; - // bool no_nulls = leaf_array.null_count() == 0; - // if (!has_rep_levels && !maybe_parent_nulls && no_nulls) { - // return GetBoundaries(num_levels, leaf_array); - // } - - bool def_match, rep_match, val_match; - int64_t level_offset = 0; - int64_t value_offset = 0; - int64_t record_level_offset = 0; - int64_t record_value_offset = 0; - int64_t prev_record_level_offset = 0; - int64_t prev_record_value_offset = 0; - while (level_offset < num_levels) { - int16_t def_level = has_def_levels ? def_levels[level_offset] : 0; - int16_t rep_level = has_rep_levels ? rep_levels[level_offset] : 0; - - if (rep_level == 0) { - // record boundary - record_level_offset = level_offset; - record_value_offset = value_offset; + if (!has_rep_levels && !has_def_levels) { + // fastest path for non-repeated non-null data + bool val_match; + int64_t offset = 0; + int64_t prev_offset = 0; + while (offset < num_levels) { + val_match = Roll(leaf_array.GetView(offset)); + ++offset; + if (Check(val_match)) { + result.emplace_back(prev_offset, prev_offset, offset - prev_offset); + prev_offset = offset; + } } + if (prev_offset < num_levels) { + result.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); + } + } else if (!has_rep_levels) { + // non-repeated data possibly with nulls + bool def_match, val_match; + int64_t offset = 0; + int64_t prev_offset = 0; + while (offset < num_levels) { + def_match = Roll(def_levels[offset]); + val_match = Roll(leaf_array.GetView(offset)); + ++offset; + if (Check(def_match || val_match)) { + result.emplace_back(prev_offset, prev_offset, offset - prev_offset); + prev_offset = offset; + } + } + if (prev_offset < num_levels) { + result.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); + } + } else { + // repeated data possibly with nulls + bool def_match, rep_match, val_match; + int16_t def_level; + int16_t rep_level; + int64_t level_offset = 0; + int64_t value_offset = 0; + int64_t record_level_offset = 0; + int64_t record_value_offset = 0; + int64_t prev_record_level_offset = 0; + int64_t prev_record_value_offset = 0; - def_match = Roll(def_level); - rep_match = Roll(rep_level); - ++level_offset; + while (level_offset < num_levels) { + def_level = def_levels[level_offset]; + rep_level = rep_levels[level_offset]; + ++level_offset; - if (has_rep_levels) { + if (rep_level == 0) { + // record boundary + record_level_offset = level_offset; + record_value_offset = value_offset; + } + + def_match = Roll(def_level); + rep_match = Roll(rep_level); if (def_level >= level_info_.repeated_ancestor_def_level) { val_match = Roll(leaf_array.GetView(value_offset)); ++value_offset; } else { val_match = false; } - } else { - val_match = Roll(leaf_array.GetView(value_offset)); - ++value_offset; - } - if (Check(def_match || rep_match || val_match)) { - auto levels_to_write = record_level_offset - prev_record_level_offset; - if (levels_to_write > 0) { - result.emplace_back(prev_record_level_offset, prev_record_value_offset, - levels_to_write); - prev_record_level_offset = record_level_offset; - prev_record_value_offset = record_value_offset; + if (Check(def_match || rep_match || val_match)) { + auto levels_to_write = record_level_offset - prev_record_level_offset; + if (levels_to_write > 0) { + result.emplace_back(prev_record_level_offset, prev_record_value_offset, + levels_to_write); + prev_record_level_offset = record_level_offset; + prev_record_value_offset = record_value_offset; + } } } - } - auto levels_to_write = num_levels - prev_record_level_offset; - if (levels_to_write > 0) { - result.emplace_back(prev_record_level_offset, prev_record_value_offset, - levels_to_write); + auto levels_to_write = num_levels - prev_record_level_offset; + if (levels_to_write > 0) { + result.emplace_back(prev_record_level_offset, prev_record_value_offset, + levels_to_write); + } + return result; } + return result; } From 44d4131da263b698b7cc666c311d11cfe120b390 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Thu, 30 Jan 2025 17:25:01 +0100 Subject: [PATCH 04/48] normalize chunk sizes according to fastcdc algorithm --- cpp/src/parquet/CMakeLists.txt | 1 + cpp/src/parquet/column_chunker.h | 178 ++++++++++++++----------- cpp/src/parquet/column_chunker_test.cc | 16 +++ cpp/src/parquet/column_writer.cc | 6 +- cpp/src/parquet/properties.h | 20 +-- python/pyarrow/_parquet.pxd | 2 +- python/pyarrow/_parquet.pyx | 4 +- 7 files changed, 131 insertions(+), 96 deletions(-) create mode 100644 cpp/src/parquet/column_chunker_test.cc diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 83eb522484ba0..245e3ae552d04 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -390,6 +390,7 @@ add_parquet_test(reader-test add_parquet_test(writer-test SOURCES + column_chunker_test.cc column_writer_test.cc file_serialize_test.cc stream_writer_test.cc) diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index 2e143a040e8a6..478ebb2eecf71 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -27,77 +27,74 @@ using arrow::internal::checked_cast; namespace parquet { namespace internal { -// Constants -const uint64_t GEAR_HASH_TABLE[] = { - 0xb088d3a9e840f559, 0x5652c7f739ed20d6, 0x45b28969898972ab, 0x6b0a89d5b68ec777, - 0x368f573e8b7a31b7, 0x1dc636dce936d94b, 0x207a4c4e5554d5b6, 0xa474b34628239acb, - 0x3b06a83e1ca3b912, 0x90e78d6c2f02baf7, 0xe1c92df7150d9a8a, 0x8e95053a1086d3ad, - 0x5a2ef4f1b83a0722, 0xa50fac949f807fae, 0x0e7303eb80d8d681, 0x99b07edc1570ad0f, - 0x689d2fb555fd3076, 0x00005082119ea468, 0xc4b08306a88fcc28, 0x3eb0678af6374afd, - 0xf19f87ab86ad7436, 0xf2129fbfbe6bc736, 0x481149575c98a4ed, 0x0000010695477bc5, - 0x1fba37801a9ceacc, 0x3bf06fd663a49b6d, 0x99687e9782e3874b, 0x79a10673aa50d8e3, - 0xe4accf9e6211f420, 0x2520e71f87579071, 0x2bd5d3fd781a8a9b, 0x00de4dcddd11c873, - 0xeaa9311c5a87392f, 0xdb748eb617bc40ff, 0xaf579a8df620bf6f, 0x86a6e5da1b09c2b1, - 0xcc2fc30ac322a12e, 0x355e2afec1f74267, 0x2d99c8f4c021a47b, 0xbade4b4a9404cfc3, - 0xf7b518721d707d69, 0x3286b6587bf32c20, 0x0000b68886af270c, 0xa115d6e4db8a9079, - 0x484f7e9c97b2e199, 0xccca7bb75713e301, 0xbf2584a62bb0f160, 0xade7e813625dbcc8, - 0x000070940d87955a, 0x8ae69108139e626f, 0xbd776ad72fde38a2, 0xfb6b001fc2fcc0cf, - 0xc7a474b8e67bc427, 0xbaf6f11610eb5d58, 0x09cb1f5b6de770d1, 0xb0b219e6977d4c47, - 0x00ccbc386ea7ad4a, 0xcc849d0adf973f01, 0x73a3ef7d016af770, 0xc807d2d386bdbdfe, - 0x7f2ac9966c791730, 0xd037a86bc6c504da, 0xf3f17c661eaa609d, 0xaca626b04daae687, - 0x755a99374f4a5b07, 0x90837ee65b2caede, 0x6ee8ad93fd560785, 0x0000d9e11053edd8, - 0x9e063bb2d21cdbd7, 0x07ab77f12a01d2b2, 0xec550255e6641b44, 0x78fb94a8449c14c6, - 0xc7510e1bc6c0f5f5, 0x0000320b36e4cae3, 0x827c33262c8b1a2d, 0x14675f0b48ea4144, - 0x267bd3a6498deceb, 0xf1916ff982f5035e, 0x86221b7ff434fb88, 0x9dbecee7386f49d8, - 0xea58f8cac80f8f4a, 0x008d198692fc64d8, 0x6d38704fbabf9a36, 0xe032cb07d1e7be4c, - 0x228d21f6ad450890, 0x635cb1bfc02589a5, 0x4620a1739ca2ce71, 0xa7e7dfe3aae5fb58, - 0x0c10ca932b3c0deb, 0x2727fee884afed7b, 0xa2df1c6df9e2ab1f, 0x4dcdd1ac0774f523, - 0x000070ffad33e24e, 0xa2ace87bc5977816, 0x9892275ab4286049, 0xc2861181ddf18959, - 0xbb9972a042483e19, 0xef70cd3766513078, 0x00000513abfc9864, 0xc058b61858c94083, - 0x09e850859725e0de, 0x9197fb3bf83e7d94, 0x7e1e626d12b64bce, 0x520c54507f7b57d1, - 0xbee1797174e22416, 0x6fd9ac3222e95587, 0x0023957c9adfbf3e, 0xa01c7d7e234bbe15, - 0xaba2c758b8a38cbb, 0x0d1fa0ceec3e2b30, 0x0bb6a58b7e60b991, 0x4333dd5b9fa26635, - 0xc2fd3b7d4001c1a3, 0xfb41802454731127, 0x65a56185a50d18cb, 0xf67a02bd8784b54f, - 0x696f11dd67e65063, 0x00002022fca814ab, 0x8cd6be912db9d852, 0x695189b6e9ae8a57, - 0xee9453b50ada0c28, 0xd8fc5ea91a78845e, 0xab86bf191a4aa767, 0x0000c6b5c86415e5, - 0x267310178e08a22e, 0xed2d101b078bca25, 0x3b41ed84b226a8fb, 0x13e622120f28dc06, - 0xa315f5ebfb706d26, 0x8816c34e3301bace, 0xe9395b9cbb71fdae, 0x002ce9202e721648, - 0x4283db1d2bb3c91c, 0xd77d461ad2b1a6a5, 0xe2ec17e46eeb866b, 0xb8e0be4039fbc47c, - 0xdea160c4d5299d04, 0x7eec86c8d28c3634, 0x2119ad129f98a399, 0xa6ccf46b61a283ef, - 0x2c52cedef658c617, 0x2db4871169acdd83, 0x0000f0d6f39ecbe9, 0x3dd5d8c98d2f9489, - 0x8a1872a22b01f584, 0xf282a4c40e7b3cf2, 0x8020ec2ccb1ba196, 0x6693b6e09e59e313, - 0x0000ce19cc7c83eb, 0x20cb5735f6479c3b, 0x762ebf3759d75a5b, 0x207bfe823d693975, - 0xd77dc112339cd9d5, 0x9ba7834284627d03, 0x217dc513e95f51e9, 0xb27b1a29fc5e7816, - 0x00d5cd9831bb662d, 0x71e39b806d75734c, 0x7e572af006fb1a23, 0xa2734f2f6ae91f85, - 0xbf82c6b5022cddf2, 0x5c3beac60761a0de, 0xcdc893bb47416998, 0x6d1085615c187e01, - 0x77f8ae30ac277c5d, 0x917c6b81122a2c91, 0x5b75b699add16967, 0x0000cf6ae79a069b, - 0xf3c40afa60de1104, 0x2063127aa59167c3, 0x621de62269d1894d, 0xd188ac1de62b4726, - 0x107036e2154b673c, 0x0000b85f28553a1d, 0xf2ef4e4c18236f3d, 0xd9d6de6611b9f602, - 0xa1fc7955fb47911c, 0xeb85fd032f298dbd, 0xbe27502fb3befae1, 0xe3034251c4cd661e, - 0x441364d354071836, 0x0082b36c75f2983e, 0xb145910316fa66f0, 0x021c069c9847caf7, - 0x2910dfc75a4b5221, 0x735b353e1c57a8b5, 0xce44312ce98ed96c, 0xbc942e4506bdfa65, - 0xf05086a71257941b, 0xfec3b215d351cead, 0x00ae1055e0144202, 0xf54b40846f42e454, - 0x00007fd9c8bcbcc8, 0xbfbd9ef317de9bfe, 0xa804302ff2854e12, 0x39ce4957a5e5d8d4, - 0xffb9e2a45637ba84, 0x55b9ad1d9ea0818b, 0x00008acbf319178a, 0x48e2bfc8d0fbfb38, - 0x8be39841e848b5e8, 0x0e2712160696a08b, 0xd51096e84b44242a, 0x1101ba176792e13a, - 0xc22e770f4531689d, 0x1689eff272bbc56c, 0x00a92a197f5650ec, 0xbc765990bda1784e, - 0xc61441e392fcb8ae, 0x07e13a2ced31e4a0, 0x92cbe984234e9d4d, 0x8f4ff572bb7d8ac5, - 0x0b9670c00b963bd0, 0x62955a581a03eb01, 0x645f83e5ea000254, 0x41fce516cd88f299, - 0xbbda9748da7a98cf, 0x0000aab2fe4845fa, 0x19761b069bf56555, 0x8b8f5e8343b6ad56, - 0x3e5d1cfd144821d9, 0xec5c1e2ca2b0cd8f, 0xfaf7e0fea7fbb57f, 0x000000d3ba12961b, - 0xda3f90178401b18e, 0x70ff906de33a5feb, 0x0527d5a7c06970e7, 0x22d8e773607c13e9, - 0xc9ab70df643c3bac, 0xeda4c6dc8abe12e3, 0xecef1f410033e78a, 0x0024c2b274ac72cb, - 0x06740d954fa900b4, 0x1d7a299b323d6304, 0xb3c37cb298cbead5, 0xc986e3c76178739b, - 0x9fabea364b46f58a, 0x6da214c5af85cc56, 0x17a43ed8b7a38f84, 0x6eccec511d9adbeb, - 0xf9cab30913335afb, 0x4a5e60c5f415eed2, 0x00006967503672b4, 0x9da51d121454bb87, - 0x84321e13b9bbc816, 0xfb3d6fb6ab2fdd8d, 0x60305eed8e160a8d, 0xcbbf4b14e9946ce8, - 0x00004f63381b10c3, 0x07d5b7816fcc4e10, 0xe5a536726a6a8155, 0x57afb23447a07fdd, - 0x18f346f7abc9d394, 0x636dc655d61ad33d, 0xcc8bab4939f7f3f6, 0x63c7a906c1dd187b}; +const uint64_t GEAR_TABLE[256] = { + 0x3b5d3c7d207e37dc, 0x784d68ba91123086, 0xcd52880f882e7298, 0xeacf8e4e19fdcca7, + 0xc31f385dfbd1632b, 0x1d5f27001e25abe6, 0x83130bde3c9ad991, 0xc4b225676e9b7649, + 0xaa329b29e08eb499, 0xb67fcbd21e577d58, 0x0027baaada2acf6b, 0xe3ef2d5ac73c2226, + 0x0890f24d6ed312b7, 0xa809e036851d7c7e, 0xf0a6fe5e0013d81b, 0x1d026304452cec14, + 0x03864632648e248f, 0xcdaacf3dcd92b9b4, 0xf5e012e63c187856, 0x8862f9d3821c00b6, + 0xa82f7338750f6f8a, 0x1e583dc6c1cb0b6f, 0x7a3145b69743a7f1, 0xabb20fee404807eb, + 0xb14b3cfe07b83a5d, 0xb9dc27898adb9a0f, 0x3703f5e91baa62be, 0xcf0bb866815f7d98, + 0x3d9867c41ea9dcd3, 0x1be1fa65442bf22c, 0x14300da4c55631d9, 0xe698e9cbc6545c99, + 0x4763107ec64e92a5, 0xc65821fc65696a24, 0x76196c064822f0b7, 0x485be841f3525e01, + 0xf652bc9c85974ff5, 0xcad8352face9e3e9, 0x2a6ed1dceb35e98e, 0xc6f483badc11680f, + 0x3cfd8c17e9cf12f1, 0x89b83c5e2ea56471, 0xae665cfd24e392a9, 0xec33c4e504cb8915, + 0x3fb9b15fc9fe7451, 0xd7fd1fd1945f2195, 0x31ade0853443efd8, 0x255efc9863e1e2d2, + 0x10eab6008d5642cf, 0x46f04863257ac804, 0xa52dc42a789a27d3, 0xdaaadf9ce77af565, + 0x6b479cd53d87febb, 0x6309e2d3f93db72f, 0xc5738ffbaa1ff9d6, 0x6bd57f3f25af7968, + 0x67605486d90d0a4a, 0xe14d0b9663bfbdae, 0xb7bbd8d816eb0414, 0xdef8a4f16b35a116, + 0xe7932d85aaaffed6, 0x08161cbae90cfd48, 0x855507beb294f08b, 0x91234ea6ffd399b2, + 0xad70cf4b2435f302, 0xd289a97565bc2d27, 0x8e558437ffca99de, 0x96d2704b7115c040, + 0x0889bbcdfc660e41, 0x5e0d4e67dc92128d, 0x72a9f8917063ed97, 0x438b69d409e016e3, + 0xdf4fed8a5d8a4397, 0x00f41dcf41d403f7, 0x4814eb038e52603f, 0x9dafbacc58e2d651, + 0xfe2f458e4be170af, 0x4457ec414df6a940, 0x06e62f1451123314, 0xbd1014d173ba92cc, + 0xdef318e25ed57760, 0x9fea0de9dfca8525, 0x459de1e76c20624b, 0xaeec189617e2d666, + 0x126a2c06ab5a83cb, 0xb1321532360f6132, 0x65421503dbb40123, 0x2d67c287ea089ab3, + 0x6c93bff5a56bd6b6, 0x4ffb2036cab6d98d, 0xce7b785b1be7ad4f, 0xedb42ef6189fd163, + 0xdc905288703988f6, 0x365f9c1d2c691884, 0xc640583680d99bfe, 0x3cd4624c07593ec6, + 0x7f1ea8d85d7c5805, 0x014842d480b57149, 0x0b649bcb5a828688, 0xbcd5708ed79b18f0, + 0xe987c862fbd2f2f0, 0x982731671f0cd82c, 0xbaf13e8b16d8c063, 0x8ea3109cbd951bba, + 0xd141045bfb385cad, 0x2acbc1a0af1f7d30, 0xe6444d89df03bfdf, 0xa18cc771b8188ff9, + 0x9834429db01c39bb, 0x214add07fe086a1f, 0x8f07c19b1f6b3ff9, 0x56a297b1bf4ffe55, + 0x94d558e493c54fc7, 0x40bfc24c764552cb, 0x931a706f8a8520cb, 0x32229d322935bd52, + 0x2560d0f5dc4fefaf, 0x9dbcc48355969bb6, 0x0fd81c3985c0b56a, 0xe03817e1560f2bda, + 0xc1bb4f81d892b2d5, 0xb0c4864f4e28d2d7, 0x3ecc49f9d9d6c263, 0x51307e99b52ba65e, + 0x8af2b688da84a752, 0xf5d72523b91b20b6, 0x6d95ff1ff4634806, 0x562f21555458339a, + 0xc0ce47f889336346, 0x487823e5089b40d8, 0xe4727c7ebc6d9592, 0x5a8f7277e94970ba, + 0xfca2f406b1c8bb50, 0x5b1f8a95f1791070, 0xd304af9fc9028605, 0x5440ab7fc930e748, + 0x312d25fbca2ab5a1, 0x10f4a4b234a4d575, 0x90301d55047e7473, 0x3b6372886c61591e, + 0x293402b77c444e06, 0x451f34a4d3e97dd7, 0x3158d814d81bc57b, 0x034942425b9bda69, + 0xe2032ff9e532d9bb, 0x62ae066b8b2179e5, 0x9545e10c2f8d71d8, 0x7ff7483eb2d23fc0, + 0x00945fcebdc98d86, 0x8764bbbe99b26ca2, 0x1b1ec62284c0bfc3, 0x58e0fcc4f0aa362b, + 0x5f4abefa878d458d, 0xfd74ac2f9607c519, 0xa4e3fb37df8cbfa9, 0xbf697e43cac574e5, + 0x86f14a3f68f4cd53, 0x24a23d076f1ce522, 0xe725cd8048868cc8, 0xbf3c729eb2464362, + 0xd8f6cd57b3cc1ed8, 0x6329e52425541577, 0x62aa688ad5ae1ac0, 0x0a242566269bf845, + 0x168b1a4753aca74b, 0xf789afefff2e7e3c, 0x6c3362093b6fccdb, 0x4ce8f50bd28c09b2, + 0x006a2db95ae8aa93, 0x975b0d623c3d1a8c, 0x18605d3935338c5b, 0x5bb6f6136cad3c71, + 0x0f53a20701f8d8a6, 0xab8c5ad2e7e93c67, 0x40b5ac5127acaa29, 0x8c7bf63c2075895f, + 0x78bd9f7e014a805c, 0xb2c9e9f4f9c8c032, 0xefd6049827eb91f3, 0x2be459f482c16fbd, + 0xd92ce0c5745aaa8c, 0x0aaa8fb298d965b9, 0x2b37f92c6c803b15, 0x8c54a5e94e0f0e78, + 0x95f9b6e90c0a3032, 0xe7939faa436c7874, 0xd16bfe8f6a8a40c9, 0x44982b86263fd2fa, + 0xe285fb39f984e583, 0x779a8df72d7619d3, 0xf2d79a8de8d5dd1e, 0xd1037354d66684e2, + 0x004c82a4e668a8e5, 0x31d40a7668b044e6, 0xd70578538bd02c11, 0xdb45431078c5f482, + 0x977121bb7f6a51ad, 0x73d5ccbd34eff8dd, 0xe437a07d356e17cd, 0x47b2782043c95627, + 0x9fb251413e41d49a, 0xccd70b60652513d3, 0x1c95b31e8a1b49b2, 0xcae73dfd1bcb4c1b, + 0x34d98331b1f5b70f, 0x784e39f22338d92f, 0x18613d4a064df420, 0xf1d8dae25f0bcebe, + 0x33f77c15ae855efc, 0x3c88b3b912eb109c, 0x956a2ec96bafeea5, 0x1aa005b5e0ad0e87, + 0x5500d70527c4bb8e, 0xe36c57196421cc44, 0x13c4d286cc36ee39, 0x5654a23d818b2a81, + 0x77b1dc13d161abdc, 0x734f44de5f8d5eb5, 0x60717e174a6c89a2, 0xd47d9649266a211e, + 0x5b13a4322bb69e90, 0xf7669609f8b5fc3c, 0x21e6ac55bedcdac9, 0x9b56b62b61166dea, + 0xf48f66b939797e9c, 0x35f332f9c0e6ae9a, 0xcc733f6a9a878db0, 0x3da161e41cc108c2, + 0xb7d74ae535914d51, 0x4d493b0b11d36469, 0xce264d1dfba9741a, 0xa9d1f2dc7436dc06, + 0x70738016604c2a27, 0x231d36e96e93f3d5, 0x7666881197838d19, 0x4a2a83090aaad40c, + 0xf1e761591668b35d, 0x7363236497f730a7, 0x301080e37379dd4d, 0x502dea2971827042, + 0xc2c5eb858f32625f, 0x786afb9edfafbdff, 0xdaee0d868490b2a4, 0x617366b3268609f6, + 0xae0e35a0fe46173e, 0xd1a07de93e824f11, 0x079b8b115ea4cca8, 0x93a99274558faebb, + 0xfb1e6e22e08a03b3, 0xea635fdba3698dd0, 0xcf53659328503a5c, 0xcde3b31e6fd5d780, + 0x8e3e4221d3614413, 0xef14d0d86bf1a22c, 0xe1d830d3f16c5ddb, 0xaabd2b2a451504e1}; -const uint64_t MASK = 0xffff00000000000; -// const int MIN_LEN = 65536 / 8; -// const int MAX_LEN = 65536 * 2; const int64_t MIN_LEN = 256 * 1024; +const int64_t AVG_LEN = 1 * 1024 * 1024; const int64_t MAX_LEN = 2 * 1024 * 1024; // create a fake null array class with a GetView method returning 0 always @@ -110,26 +107,40 @@ class FakeNullArray { int64_t null_count() const { return 0; } }; -class GearHash { +static uint64_t GetMask(uint64_t avg_len, uint8_t bit_adjustment) { + size_t mask_bits = std::log2(avg_len); + size_t effective_bits = mask_bits + bit_adjustment; + return ((1ULL << effective_bits) - 1) << (64 - effective_bits); +} + +class FastCDC { public: - GearHash(const LevelInfo& level_info, uint64_t mask, uint64_t min_len, uint64_t max_len) + FastCDC(const LevelInfo& level_info, uint64_t min_len, uint64_t avg_len, + uint64_t max_len, uint8_t normalization_level = 1) : level_info_(level_info), - mask_(mask == 0 ? MASK : mask), min_len_(min_len == 0 ? MIN_LEN : min_len), - max_len_(max_len == 0 ? MAX_LEN : max_len) {} + avg_len_(avg_len == 0 ? AVG_LEN : avg_len), + max_len_(max_len == 0 ? MAX_LEN : max_len), + mask_s_(GetMask(avg_len_, -normalization_level)), + mask_l_(GetMask(avg_len_, +normalization_level)) {} template bool Roll(const T value) { constexpr size_t BYTE_WIDTH = sizeof(T); chunk_size_ += BYTE_WIDTH; + uint64_t mask; if (chunk_size_ < min_len_) { return false; + } else if (chunk_size_ < avg_len_) { + mask = mask_l_; + } else { + mask = mask_s_; } auto bytes = reinterpret_cast(&value); bool match = false; for (size_t i = 0; i < BYTE_WIDTH; ++i) { - hash_ = (hash_ << 1) + GEAR_HASH_TABLE[bytes[i]]; - if ((hash_ & mask_) == 0) { + hash_ = (hash_ << 1) + GEAR_TABLE[bytes[i]]; + if ((hash_ & mask) == 0) { match = true; } } @@ -138,13 +149,18 @@ class GearHash { bool Roll(std::string_view value) { chunk_size_ += value.size(); + uint64_t mask; if (chunk_size_ < min_len_) { return false; + } else if (chunk_size_ < avg_len_) { + mask = mask_l_; + } else { + mask = mask_s_; } bool match = false; for (char c : value) { - hash_ = (hash_ << 1) + GEAR_HASH_TABLE[static_cast(c)]; - if ((hash_ & mask_) == 0) { + hash_ = (hash_ << 1) + GEAR_TABLE[static_cast(c)]; + if ((hash_ & mask) == 0) { match = true; } } @@ -303,9 +319,11 @@ class GearHash { private: const internal::LevelInfo& level_info_; - uint64_t mask_ = MASK; - uint64_t min_len_; - uint64_t max_len_; + const uint64_t min_len_; + const uint64_t avg_len_; + const uint64_t max_len_; + const uint64_t mask_s_; + const uint64_t mask_l_; uint64_t hash_ = 0; uint64_t chunk_size_ = 0; }; diff --git a/cpp/src/parquet/column_chunker_test.cc b/cpp/src/parquet/column_chunker_test.cc new file mode 100644 index 0000000000000..b248758bc120c --- /dev/null +++ b/cpp/src/parquet/column_chunker_test.cc @@ -0,0 +1,16 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 8f1e13f274154..9491ba70a112a 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -754,8 +754,8 @@ class ColumnWriterImpl { fallback_(false), definition_levels_sink_(allocator_), repetition_levels_sink_(allocator_), - content_defined_chunker_(level_info_, properties->cdc_mask(), - properties->cdc_min_size(), properties->cdc_max_size()) { + content_defined_chunker_(level_info_, properties->cdc_min_size(), + properties->cdc_avg_size(), properties->cdc_max_size()) { definition_levels_rle_ = std::static_pointer_cast(AllocateBuffer(allocator_, 0)); repetition_levels_rle_ = @@ -895,7 +895,7 @@ class ColumnWriterImpl { std::vector> data_pages_; - internal::GearHash content_defined_chunker_; + internal::FastCDC content_defined_chunker_; private: void InitSinks() { diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h index 2eba39bd8cc50..43586658d99ca 100644 --- a/cpp/src/parquet/properties.h +++ b/cpp/src/parquet/properties.h @@ -263,7 +263,7 @@ class PARQUET_EXPORT WriterProperties { page_checksum_enabled_(false), size_statistics_level_(DEFAULT_SIZE_STATISTICS_LEVEL), cdc_enabled_(false), - cdc_mask_(0), + cdc_avg_size_(0), cdc_min_size_(0), cdc_max_size_(0) {} @@ -297,8 +297,8 @@ class PARQUET_EXPORT WriterProperties { return this; } - Builder* cdc_mask(uint64_t mask) { - cdc_mask_ = mask; + Builder* cdc_avg_size(uint64_t avg_size) { + cdc_avg_size_ = avg_size; return this; } @@ -734,8 +734,8 @@ class PARQUET_EXPORT WriterProperties { pagesize_, version_, created_by_, page_checksum_enabled_, size_statistics_level_, std::move(file_encryption_properties_), default_column_properties_, column_properties, data_page_version_, - store_decimal_as_integer_, std::move(sorting_columns_), cdc_enabled_, cdc_mask_, - cdc_min_size_, cdc_max_size_)); + store_decimal_as_integer_, std::move(sorting_columns_), cdc_enabled_, + cdc_avg_size_, cdc_min_size_, cdc_max_size_)); } private: @@ -766,7 +766,7 @@ class PARQUET_EXPORT WriterProperties { std::unordered_map page_index_enabled_; bool cdc_enabled_; - uint64_t cdc_mask_; + uint64_t cdc_avg_size_; uint64_t cdc_min_size_; uint64_t cdc_max_size_; }; @@ -794,7 +794,7 @@ class PARQUET_EXPORT WriterProperties { inline bool page_checksum_enabled() const { return page_checksum_enabled_; } inline bool cdc_enabled() const { return cdc_enabled_; } - inline uint64_t cdc_mask() const { return cdc_mask_; } + inline uint64_t cdc_avg_size() const { return cdc_avg_size_; } inline uint64_t cdc_min_size() const { return cdc_min_size_; } inline uint64_t cdc_max_size() const { return cdc_max_size_; } @@ -900,7 +900,7 @@ class PARQUET_EXPORT WriterProperties { const ColumnProperties& default_column_properties, const std::unordered_map& column_properties, ParquetDataPageVersion data_page_version, bool store_short_decimal_as_integer, - std::vector sorting_columns, bool cdc_enabled, uint64_t cdc_mask, + std::vector sorting_columns, bool cdc_enabled, uint64_t cdc_avg_size, uint64_t cdc_min_size, uint64_t cdc_max_size) : pool_(pool), dictionary_pagesize_limit_(dictionary_pagesize_limit), @@ -918,7 +918,7 @@ class PARQUET_EXPORT WriterProperties { default_column_properties_(default_column_properties), column_properties_(column_properties), cdc_enabled_(cdc_enabled), - cdc_mask_(cdc_mask), + cdc_avg_size_(cdc_avg_size), cdc_min_size_(cdc_min_size), cdc_max_size_(cdc_max_size) {} @@ -942,7 +942,7 @@ class PARQUET_EXPORT WriterProperties { std::unordered_map column_properties_; bool cdc_enabled_; - uint64_t cdc_mask_; + uint64_t cdc_avg_size_; uint64_t cdc_min_size_; uint64_t cdc_max_size_; }; diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index 4aae298afdef9..5b9d43c62a45d 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -448,7 +448,7 @@ cdef extern from "parquet/api/writer.h" namespace "parquet" nogil: Builder* disable_page_checksum() Builder* enable_cdc() Builder* disable_cdc() - Builder* cdc_mask(uint64_t mask) + Builder* cdc_avg_size(uint64_t avg_size) Builder* cdc_min_size(uint64_t min_size) Builder* cdc_max_size(uint64_t max_size) shared_ptr[WriterProperties] build() diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index b11e525654a24..a6a0bbbaad6c4 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -2020,9 +2020,9 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( elif content_defined_chunking is True: props.enable_cdc() elif isinstance(content_defined_chunking, tuple): - mask, min_size, max_size = content_defined_chunking + min_size, avg_size, max_size = content_defined_chunking props.enable_cdc() - props.cdc_mask(mask) + props.cdc_avg_size(avg_size) props.cdc_min_size(min_size) props.cdc_max_size(max_size) else: From d7bab510cb748505bab2b358ffe7c1092c4445d3 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Thu, 30 Jan 2025 20:10:56 +0100 Subject: [PATCH 05/48] missing header and fix level_offset incrementation --- cpp/src/parquet/column_chunker.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index 478ebb2eecf71..b7b17e2653647 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include "arrow/array.h" @@ -108,7 +109,7 @@ class FakeNullArray { }; static uint64_t GetMask(uint64_t avg_len, uint8_t bit_adjustment) { - size_t mask_bits = std::log2(avg_len); + size_t mask_bits = static_cast(std::floor(std::log2(avg_len))); size_t effective_bits = mask_bits + bit_adjustment; return ((1ULL << effective_bits) - 1) << (64 - effective_bits); } @@ -232,13 +233,11 @@ class FastCDC { while (level_offset < num_levels) { def_level = def_levels[level_offset]; rep_level = rep_levels[level_offset]; - ++level_offset; - if (rep_level == 0) { - // record boundary record_level_offset = level_offset; record_value_offset = value_offset; } + ++level_offset; def_match = Roll(def_level); rep_match = Roll(rep_level); From 4a1fcdbd7ee28770bc23bfdf5bece871f641c186 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 31 Jan 2025 01:10:17 +0100 Subject: [PATCH 06/48] don't use normalization by default --- cpp/src/parquet/column_chunker.h | 36 ++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index b7b17e2653647..d6ee639409924 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -21,6 +21,7 @@ #include #include #include "arrow/array.h" +#include "arrow/util/logging.h" #include "parquet/level_conversion.h" using arrow::internal::checked_cast; @@ -28,6 +29,21 @@ using arrow::internal::checked_cast; namespace parquet { namespace internal { +// const uint64_t MASK_TABLE[48] = { +// 0x8000000000000000, 0x8000008000000000, 0x8000800080000000, 0x8008008008000000, +// 0x8040100802000000, 0x8080808080800000, 0x8204081020400000, 0x8208208208200000, +// 0x8420842084200000, 0x8842108842100000, 0x8884442221100000, 0x8888888888880000, +// 0x9112224448880000, 0x9224489224480000, 0x9248924892480000, 0x9249249249240000, +// 0xa492924949240000, 0xa4a4a4a4a4a40000, 0xa5294a5294a40000, 0xa94a94a94a940000, +// 0xaa54aa54aa540000, 0xaaa554aaa5540000, 0xaaaaaa5555540000, 0xaaaaaaaaaaaa0000, +// 0xd55555aaaaaa0000, 0xd55aaad55aaa0000, 0xd5aad5aad5aa0000, 0xd6ad6ad6ad6a0000, +// 0xdad6b5ad6b5a0000, 0xdadadadadada0000, 0xdb6d6db6b6da0000, 0xdb6db6db6db60000, +// 0xedb6edb6edb60000, 0xeddbb6eddbb60000, 0xeeedddbbb7760000, 0xeeeeeeeeeeee0000, +// 0xf77bbbdddeee0000, 0xf7bdeef7bdee0000, 0xfbdefbdefbde0000, 0xfbefbefbefbe0000, +// 0xfdfbf7efdfbe0000, 0xfefefefefefe0000, 0xffbfeff7fdfe0000, 0xffeffeffeffe0000, +// 0xfffefffefffe0000, 0xfffffefffffe0000, 0xfffffffffffe0000, 0xffffffffffff0000 +// }; + const uint64_t GEAR_TABLE[256] = { 0x3b5d3c7d207e37dc, 0x784d68ba91123086, 0xcd52880f882e7298, 0xeacf8e4e19fdcca7, 0xc31f385dfbd1632b, 0x1d5f27001e25abe6, 0x83130bde3c9ad991, 0xc4b225676e9b7649, @@ -98,6 +114,10 @@ const int64_t MIN_LEN = 256 * 1024; const int64_t AVG_LEN = 1 * 1024 * 1024; const int64_t MAX_LEN = 2 * 1024 * 1024; +// const int64_t MIN_LEN = 512 * 1024; +// const int64_t AVG_LEN = 2 * MIN_LEN; +// const int64_t MAX_LEN = 2 * AVG_LEN; + // create a fake null array class with a GetView method returning 0 always class FakeNullArray { public: @@ -108,8 +128,20 @@ class FakeNullArray { int64_t null_count() const { return 0; } }; +// static uint64_t GetMask(uint64_t avg_len, uint8_t bit_adjustment) { +// size_t mask_bits = static_cast(std::floor(std::log2(avg_len))); +// size_t effective_bits = mask_bits + bit_adjustment; +// return MASK_TABLE[effective_bits]; +// } + +// static uint64_t GetMask(uint64_t avg_len, uint8_t bit_adjustment) { +// size_t mask_bits = static_cast(std::floor(std::log2(avg_len))); +// size_t effective_bits = mask_bits + bit_adjustment; +// return ((1ULL << effective_bits) - 1) << (64 - effective_bits); +// } + static uint64_t GetMask(uint64_t avg_len, uint8_t bit_adjustment) { - size_t mask_bits = static_cast(std::floor(std::log2(avg_len))); + size_t mask_bits = 16; size_t effective_bits = mask_bits + bit_adjustment; return ((1ULL << effective_bits) - 1) << (64 - effective_bits); } @@ -117,7 +149,7 @@ static uint64_t GetMask(uint64_t avg_len, uint8_t bit_adjustment) { class FastCDC { public: FastCDC(const LevelInfo& level_info, uint64_t min_len, uint64_t avg_len, - uint64_t max_len, uint8_t normalization_level = 1) + uint64_t max_len, uint8_t normalization_level = 0) : level_info_(level_info), min_len_(min_len == 0 ? MIN_LEN : min_len), avg_len_(avg_len == 0 ? AVG_LEN : avg_len), From b62c57a96f4cc9073c25e19ebd8089bd2d59170e Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 5 Feb 2025 13:05:04 +0100 Subject: [PATCH 07/48] use contexpr for gear hash tables --- cpp/src/parquet/column_chunker.h | 678 +++++++++++++++++++++++++------ cpp/src/parquet/column_writer.cc | 8 +- cpp/src/parquet/properties.h | 34 +- python/pyarrow/_parquet.pxd | 2 - python/pyarrow/_parquet.pyx | 4 +- 5 files changed, 563 insertions(+), 163 deletions(-) diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index d6ee639409924..30601707e1c3d 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -29,94 +29,530 @@ using arrow::internal::checked_cast; namespace parquet { namespace internal { -// const uint64_t MASK_TABLE[48] = { -// 0x8000000000000000, 0x8000008000000000, 0x8000800080000000, 0x8008008008000000, -// 0x8040100802000000, 0x8080808080800000, 0x8204081020400000, 0x8208208208200000, -// 0x8420842084200000, 0x8842108842100000, 0x8884442221100000, 0x8888888888880000, -// 0x9112224448880000, 0x9224489224480000, 0x9248924892480000, 0x9249249249240000, -// 0xa492924949240000, 0xa4a4a4a4a4a40000, 0xa5294a5294a40000, 0xa94a94a94a940000, -// 0xaa54aa54aa540000, 0xaaa554aaa5540000, 0xaaaaaa5555540000, 0xaaaaaaaaaaaa0000, -// 0xd55555aaaaaa0000, 0xd55aaad55aaa0000, 0xd5aad5aad5aa0000, 0xd6ad6ad6ad6a0000, -// 0xdad6b5ad6b5a0000, 0xdadadadadada0000, 0xdb6d6db6b6da0000, 0xdb6db6db6db60000, -// 0xedb6edb6edb60000, 0xeddbb6eddbb60000, 0xeeedddbbb7760000, 0xeeeeeeeeeeee0000, -// 0xf77bbbdddeee0000, 0xf7bdeef7bdee0000, 0xfbdefbdefbde0000, 0xfbefbefbefbe0000, -// 0xfdfbf7efdfbe0000, 0xfefefefefefe0000, 0xffbfeff7fdfe0000, 0xffeffeffeffe0000, -// 0xfffefffefffe0000, 0xfffffefffffe0000, 0xfffffffffffe0000, 0xffffffffffff0000 -// }; - -const uint64_t GEAR_TABLE[256] = { - 0x3b5d3c7d207e37dc, 0x784d68ba91123086, 0xcd52880f882e7298, 0xeacf8e4e19fdcca7, - 0xc31f385dfbd1632b, 0x1d5f27001e25abe6, 0x83130bde3c9ad991, 0xc4b225676e9b7649, - 0xaa329b29e08eb499, 0xb67fcbd21e577d58, 0x0027baaada2acf6b, 0xe3ef2d5ac73c2226, - 0x0890f24d6ed312b7, 0xa809e036851d7c7e, 0xf0a6fe5e0013d81b, 0x1d026304452cec14, - 0x03864632648e248f, 0xcdaacf3dcd92b9b4, 0xf5e012e63c187856, 0x8862f9d3821c00b6, - 0xa82f7338750f6f8a, 0x1e583dc6c1cb0b6f, 0x7a3145b69743a7f1, 0xabb20fee404807eb, - 0xb14b3cfe07b83a5d, 0xb9dc27898adb9a0f, 0x3703f5e91baa62be, 0xcf0bb866815f7d98, - 0x3d9867c41ea9dcd3, 0x1be1fa65442bf22c, 0x14300da4c55631d9, 0xe698e9cbc6545c99, - 0x4763107ec64e92a5, 0xc65821fc65696a24, 0x76196c064822f0b7, 0x485be841f3525e01, - 0xf652bc9c85974ff5, 0xcad8352face9e3e9, 0x2a6ed1dceb35e98e, 0xc6f483badc11680f, - 0x3cfd8c17e9cf12f1, 0x89b83c5e2ea56471, 0xae665cfd24e392a9, 0xec33c4e504cb8915, - 0x3fb9b15fc9fe7451, 0xd7fd1fd1945f2195, 0x31ade0853443efd8, 0x255efc9863e1e2d2, - 0x10eab6008d5642cf, 0x46f04863257ac804, 0xa52dc42a789a27d3, 0xdaaadf9ce77af565, - 0x6b479cd53d87febb, 0x6309e2d3f93db72f, 0xc5738ffbaa1ff9d6, 0x6bd57f3f25af7968, - 0x67605486d90d0a4a, 0xe14d0b9663bfbdae, 0xb7bbd8d816eb0414, 0xdef8a4f16b35a116, - 0xe7932d85aaaffed6, 0x08161cbae90cfd48, 0x855507beb294f08b, 0x91234ea6ffd399b2, - 0xad70cf4b2435f302, 0xd289a97565bc2d27, 0x8e558437ffca99de, 0x96d2704b7115c040, - 0x0889bbcdfc660e41, 0x5e0d4e67dc92128d, 0x72a9f8917063ed97, 0x438b69d409e016e3, - 0xdf4fed8a5d8a4397, 0x00f41dcf41d403f7, 0x4814eb038e52603f, 0x9dafbacc58e2d651, - 0xfe2f458e4be170af, 0x4457ec414df6a940, 0x06e62f1451123314, 0xbd1014d173ba92cc, - 0xdef318e25ed57760, 0x9fea0de9dfca8525, 0x459de1e76c20624b, 0xaeec189617e2d666, - 0x126a2c06ab5a83cb, 0xb1321532360f6132, 0x65421503dbb40123, 0x2d67c287ea089ab3, - 0x6c93bff5a56bd6b6, 0x4ffb2036cab6d98d, 0xce7b785b1be7ad4f, 0xedb42ef6189fd163, - 0xdc905288703988f6, 0x365f9c1d2c691884, 0xc640583680d99bfe, 0x3cd4624c07593ec6, - 0x7f1ea8d85d7c5805, 0x014842d480b57149, 0x0b649bcb5a828688, 0xbcd5708ed79b18f0, - 0xe987c862fbd2f2f0, 0x982731671f0cd82c, 0xbaf13e8b16d8c063, 0x8ea3109cbd951bba, - 0xd141045bfb385cad, 0x2acbc1a0af1f7d30, 0xe6444d89df03bfdf, 0xa18cc771b8188ff9, - 0x9834429db01c39bb, 0x214add07fe086a1f, 0x8f07c19b1f6b3ff9, 0x56a297b1bf4ffe55, - 0x94d558e493c54fc7, 0x40bfc24c764552cb, 0x931a706f8a8520cb, 0x32229d322935bd52, - 0x2560d0f5dc4fefaf, 0x9dbcc48355969bb6, 0x0fd81c3985c0b56a, 0xe03817e1560f2bda, - 0xc1bb4f81d892b2d5, 0xb0c4864f4e28d2d7, 0x3ecc49f9d9d6c263, 0x51307e99b52ba65e, - 0x8af2b688da84a752, 0xf5d72523b91b20b6, 0x6d95ff1ff4634806, 0x562f21555458339a, - 0xc0ce47f889336346, 0x487823e5089b40d8, 0xe4727c7ebc6d9592, 0x5a8f7277e94970ba, - 0xfca2f406b1c8bb50, 0x5b1f8a95f1791070, 0xd304af9fc9028605, 0x5440ab7fc930e748, - 0x312d25fbca2ab5a1, 0x10f4a4b234a4d575, 0x90301d55047e7473, 0x3b6372886c61591e, - 0x293402b77c444e06, 0x451f34a4d3e97dd7, 0x3158d814d81bc57b, 0x034942425b9bda69, - 0xe2032ff9e532d9bb, 0x62ae066b8b2179e5, 0x9545e10c2f8d71d8, 0x7ff7483eb2d23fc0, - 0x00945fcebdc98d86, 0x8764bbbe99b26ca2, 0x1b1ec62284c0bfc3, 0x58e0fcc4f0aa362b, - 0x5f4abefa878d458d, 0xfd74ac2f9607c519, 0xa4e3fb37df8cbfa9, 0xbf697e43cac574e5, - 0x86f14a3f68f4cd53, 0x24a23d076f1ce522, 0xe725cd8048868cc8, 0xbf3c729eb2464362, - 0xd8f6cd57b3cc1ed8, 0x6329e52425541577, 0x62aa688ad5ae1ac0, 0x0a242566269bf845, - 0x168b1a4753aca74b, 0xf789afefff2e7e3c, 0x6c3362093b6fccdb, 0x4ce8f50bd28c09b2, - 0x006a2db95ae8aa93, 0x975b0d623c3d1a8c, 0x18605d3935338c5b, 0x5bb6f6136cad3c71, - 0x0f53a20701f8d8a6, 0xab8c5ad2e7e93c67, 0x40b5ac5127acaa29, 0x8c7bf63c2075895f, - 0x78bd9f7e014a805c, 0xb2c9e9f4f9c8c032, 0xefd6049827eb91f3, 0x2be459f482c16fbd, - 0xd92ce0c5745aaa8c, 0x0aaa8fb298d965b9, 0x2b37f92c6c803b15, 0x8c54a5e94e0f0e78, - 0x95f9b6e90c0a3032, 0xe7939faa436c7874, 0xd16bfe8f6a8a40c9, 0x44982b86263fd2fa, - 0xe285fb39f984e583, 0x779a8df72d7619d3, 0xf2d79a8de8d5dd1e, 0xd1037354d66684e2, - 0x004c82a4e668a8e5, 0x31d40a7668b044e6, 0xd70578538bd02c11, 0xdb45431078c5f482, - 0x977121bb7f6a51ad, 0x73d5ccbd34eff8dd, 0xe437a07d356e17cd, 0x47b2782043c95627, - 0x9fb251413e41d49a, 0xccd70b60652513d3, 0x1c95b31e8a1b49b2, 0xcae73dfd1bcb4c1b, - 0x34d98331b1f5b70f, 0x784e39f22338d92f, 0x18613d4a064df420, 0xf1d8dae25f0bcebe, - 0x33f77c15ae855efc, 0x3c88b3b912eb109c, 0x956a2ec96bafeea5, 0x1aa005b5e0ad0e87, - 0x5500d70527c4bb8e, 0xe36c57196421cc44, 0x13c4d286cc36ee39, 0x5654a23d818b2a81, - 0x77b1dc13d161abdc, 0x734f44de5f8d5eb5, 0x60717e174a6c89a2, 0xd47d9649266a211e, - 0x5b13a4322bb69e90, 0xf7669609f8b5fc3c, 0x21e6ac55bedcdac9, 0x9b56b62b61166dea, - 0xf48f66b939797e9c, 0x35f332f9c0e6ae9a, 0xcc733f6a9a878db0, 0x3da161e41cc108c2, - 0xb7d74ae535914d51, 0x4d493b0b11d36469, 0xce264d1dfba9741a, 0xa9d1f2dc7436dc06, - 0x70738016604c2a27, 0x231d36e96e93f3d5, 0x7666881197838d19, 0x4a2a83090aaad40c, - 0xf1e761591668b35d, 0x7363236497f730a7, 0x301080e37379dd4d, 0x502dea2971827042, - 0xc2c5eb858f32625f, 0x786afb9edfafbdff, 0xdaee0d868490b2a4, 0x617366b3268609f6, - 0xae0e35a0fe46173e, 0xd1a07de93e824f11, 0x079b8b115ea4cca8, 0x93a99274558faebb, - 0xfb1e6e22e08a03b3, 0xea635fdba3698dd0, 0xcf53659328503a5c, 0xcde3b31e6fd5d780, - 0x8e3e4221d3614413, 0xef14d0d86bf1a22c, 0xe1d830d3f16c5ddb, 0xaabd2b2a451504e1}; - -const int64_t MIN_LEN = 256 * 1024; -const int64_t AVG_LEN = 1 * 1024 * 1024; -const int64_t MAX_LEN = 2 * 1024 * 1024; +constexpr uint64_t GEAR_HASH_TABLE[8][256] = { + {// seed = 0 + 0xf09f35a563783945, 0x0dcc5b3bc5ae410a, 0x63f1ea8d22554270, 0xfbe5ee7bd05a7b61, + 0x3f692ed5e9934aba, 0xaab3755952250eb8, 0xdefb168dc2888fa5, 0x501b36f7c77a7d47, + 0xd2fff45d1989642d, 0x80217c1c600e30a6, 0xb9469ee2e43df7ac, 0x3654b76a61999706, + 0x6ea73dfe5de0c6b6, 0xdfd662e1937a589d, 0x0dbe0cc74b188a68, 0xde45f4e6d73ffc6f, + 0xcdf7a7759e70d87e, 0x5d6a951b8d38c310, 0xdc9423c3813fcf2c, 0x25dc2976e167ffce, + 0xc2555baa1d031c84, 0x115bc3f2230a3ab6, 0xd4b10260f350bede, 0xdfd3501ab447d723, + 0x022e79217edaf167, 0x1635e2255c5a7526, 0xa0a750350cc77102, 0xc027133e05d39f56, + 0xd949459779cf0387, 0xb92f1464f5c688c2, 0xd9ac5f3e8b42f2f3, 0xdf02bb6f5ecaac21, + 0x8156f988fac7bfa4, 0xe4580f97bede2ec8, 0x44fe7d17a76fca32, 0x885f59bd54c2014c, + 0x435e63ec655ffae9, 0x5ebc51930967b1f1, 0x5428c2084ac29e47, 0x9465938fec30e36b, + 0xc7cb3de4977772cd, 0x15692d7c201e8c3a, 0x505ee65cdc4b17f4, 0x7d9839a0a7aead6b, + 0xeef5f5b6a0105291, 0x76c2fb232ce7f5bf, 0x5c13893c1c3ff3a9, 0x65b6b547d4442f98, + 0xb8ad7487c8c96fce, 0x906bcf51c99974f8, 0x2f56e48bb943a48c, 0xbc9ab109f82d3a44, + 0xcd5160cdc8c7e735, 0xbe9acb9df3427732, 0x386b91d477d7fade, 0x36be463621dd5af2, + 0xcbe6a2faffd627a8, 0x9c8fd528463a2f5a, 0xb9b88c6bb802b184, 0xb414b4e665c597c7, + 0xbedb142568209556, 0x5360d81c25429dce, 0x63a69a960a952f37, 0xc900d63899e1b503, + 0x1abc63a8b37c7728, 0xa8b3a8b6409080eb, 0x495e391f662959f6, 0xdf1e136f3e12229b, + 0x33d5fc526b0dd38d, 0x321221ae2abfac63, 0x7fde18351fda7395, 0xed79fe5c3a6aa4c3, + 0x2dd6965a4867d8d4, 0x54813ca20fe8799b, 0x5d59ea6456465c39, 0x0de0c294d1936b81, + 0x4aaf0755002c588c, 0x3530a1857ad04c6d, 0xb8a64f4ce184442b, 0xe0def10bceedfa17, + 0x46e38d0a443757ec, 0x9795a1c645ee16d7, 0x7e531def245eac8a, 0x683b25c43a0716cf, + 0x884583d372da219d, 0x5b06b62c910416e5, 0x54b6902fbebd3dbe, 0x931198d40a761a75, + 0xead7d8e830013590, 0x80b4d5dc99bfaced, 0xf98272c8108a1ad2, 0x1adce054289a0ec6, + 0x7d53a1143c56b465, 0x497fbe4f00c92b52, 0x525e4cc2e81ebd69, 0xc94478e0d5508ff6, + 0xb8a5da83c196d07c, 0x7667a921b65b0603, 0xf236fabbdefe6cd1, 0x53da978d19a92b98, + 0xc604f6e97087124d, 0x2cbd27221924b094, 0x65cd1102c985b1d2, 0x08c0755dc1a97eb4, + 0x5e0419e921c0fef1, 0x282d2c1196f84a29, 0xe21117fcfc5793f7, 0xcf4e985dc38e6c2e, + 0xd521f4f264d55616, 0xde69b04c485f2a10, 0x59410e245305178a, 0xceab1d477c943601, + 0xa9805732d71ee5e9, 0x054cd443896974f6, 0xf2b517717a423a3e, 0x09517937fa9fac95, + 0x4938233e9ca871e3, 0x9132cbaf56f83ec0, 0x4703421ed1dd027d, 0xfd9933f4e6f1ec4e, + 0xf237c7fded2274a8, 0xdf4616efe68cd7b4, 0x5e46de0f39f0a380, 0x3d41e0c6d8e095b0, + 0xc5272f8a5bb2df09, 0x68aa78e8301fb964, 0xbf5b5b52c8e32ae0, 0xbf28ed3df74bdcf7, + 0xd6198f64c833815a, 0x8cd99d2974267544, 0xd90560ea4465ff2c, 0x571d65ad7ad59261, + 0x309453518baa367a, 0xa60538377bc79fb2, 0xace515da1ab4183c, 0xf56d3c8d891d1c5b, + 0x5b0d8370b59def49, 0x775866ce7c83c762, 0x3d76085695c8e18a, 0xba064d1a9af1b114, + 0xc84ef7cd7b98b521, 0x90b9231681c2bc37, 0x37e2b13e6f585b6b, 0x1d0a34e55e0f369f, + 0x86bb8019cf41447c, 0x4b95c6ef55b3f71f, 0x3b6ed1660732b310, 0x617eee603d137f21, + 0xf4f6278b464f3bbc, 0xdfb763b720da205a, 0x353478899b871cb7, 0xe45fbbff574cc41e, + 0x1a94b60847907d72, 0xb10eef051eff67a5, 0xf0e012ec6a284d40, 0xcc1cd1a11b926d7c, + 0xcf9d9c5453e19cad, 0x270febcc0fc0e86b, 0xd6567568778b781e, 0x7323b98965eeb46b, + 0xccecd374567086ff, 0xef7b44bfc497a704, 0xebc479c051a9f0a5, 0xc9b7410e3e00a235, + 0x1d084f7ecdf83dab, 0xc8a9a97e33ba8ba3, 0x8c75318f5b2350d6, 0xaa3cd5d0c684bdda, + 0xa81125fe0901bedf, 0xf7bcd76020edfc93, 0x834ee4c12e75874f, 0xb2bb8a7beb44fa14, + 0x32cd26f50a4f4e4d, 0x0fc5817ca55d959a, 0xd6e4ae2e3ae10718, 0x074abdcceb8d6e38, + 0xc0cc5f4f9b3a9c43, 0x1115d364363595b2, 0x69861db2eb19f2e8, 0x59b8d804cf92bc67, + 0x9bac9785e5e4b863, 0x7fa0e17a41869561, 0x10d3c9633f0c709c, 0x534a03deee6bc44a, + 0x73b1f7201257f581, 0x46fd6a11e2e0706b, 0x494abb554946e67a, 0xb5d6da317864dc8e, + 0x402ded9238f39687, 0xd8fa37d2cbd6d290, 0xcc818293fcb06791, 0x6482ab344806cd4d, + 0x0956e6ee9d8eb60b, 0x01fee622d8465ac8, 0xae7ece370cbd9c35, 0x7ff09e937a177279, + 0xa2c29ee7a33ca5f1, 0x990e8dbee083923b, 0x4a819b72f610863a, 0xddecfad79d3f08be, + 0x627372480fac20a7, 0x802154d6eca2db4c, 0x8fcf02e42f805e55, 0x040a911ff8cea977, + 0xbb544485bc64d0d4, 0xaddde1aeb406d0fb, 0xf6b35fae23dce66f, 0xc07a9fb3645d2f9b, + 0xccd113907e9c0fed, 0xd17af369984fd213, 0x9223823c59a083e7, 0xe19d475606b81013, + 0xe181ac116a90e57a, 0x71f7b6258c6def4c, 0x2246f34b45964f7c, 0xd74aedaea2d31751, + 0xb1add86e5dd305d1, 0xeb9ba881f16d6471, 0xef7600e036f5c6ff, 0x1d50bc9735b8fb85, + 0xe63942bd1f3e2969, 0x9241ba9f8b3f4e72, 0xee8bb2bca07d35b6, 0x55cd55dab522654e, + 0x94d0cfa7c1a6845d, 0x02f9845d559884c3, 0x8ce70ea21063b560, 0xd70998028ef08b74, + 0xdfdb5bbee310876b, 0x4e21b2e348256d16, 0xde007a981c13debc, 0xe51950cbbddabfdd, + 0xd223301dbe9957c1, 0x084b8634cc2cce4b, 0x90e551378aa9d70c, 0x833b533ac633e448, + 0x7891e232882da57f, 0xa1bf26f0163ce2b3, 0xf33a0171eb9c68d5, 0x2e7de18ca69b3fa2, + 0x666fd6f175619199, 0x1239d37edb5feb9f, 0xfa9fc9382e61ff5c, 0x3ca4ad427e3c126f, + 0x37c6dd4c2c31ae6e, 0x1f1bacb619d427b2, 0x7dd09f5d10759afe, 0xc8d941432327d733, + 0x2b389ba25e1d43a7, 0xa4e3030c3740ff21, 0xcc56dae13fd37463, 0x2481457c175b560f, + 0x9deb35bde77c5c41, 0x847aa6ea5549a0c3, 0xcde01bb48b6e7f02, 0x15a28844e64cb211}, + {// seed = 1 + 0xecfcba92fe5691a3, 0x71377799fea34699, 0xb284c9096fa614e5, 0x54534170f40de6c8, + 0xbbd804d45884fba3, 0x44929a896388c8a1, 0x79b712508e0fa3b1, 0xeb53ab280af31054, + 0x351ea23a6319da7a, 0x2fbe55d9819d85a2, 0x34f4b6568dcd28b1, 0x8c94ea5e5d82967a, + 0x09068d333a46d3c5, 0x762ad4f64cb73381, 0xd5c6db5ef0e22640, 0x36d8ab5a36175680, + 0xd41fe333cdc3525a, 0xa1f51dbdf20ce781, 0x1410a95e786c8be6, 0x96b7499a670c2b41, + 0x3912e1037835d893, 0x272c5bd83e1e9115, 0x2ea7f91cad82a0d6, 0xcd10e85662ce9931, + 0xedad49be8d5e8b74, 0x7ccd8fe0f37d12bc, 0xfac0482005eed593, 0x4513991681f6c8b0, + 0x2804d612eb0ad37d, 0x7cca9e8412b81d34, 0x85ffd6707192b7b8, 0xea0560aeea954411, + 0x0122d28226102bba, 0xf51c47cdbd22fdd1, 0x3707d851183ff17c, 0xaef5a1465f3e902d, + 0xbcb38c2d8736a04f, 0x4025317e864bef15, 0x8d3f66d86e1ea58f, 0xc16759a3d97ed79a, + 0x1c62abdc0659f2f5, 0x23b3eb4e699bd28f, 0x5083c4fceed3ccaf, 0xa65bf34562cc989c, + 0xaa5865932fd79064, 0xf24d08d268c24593, 0x7fbd00a215196999, 0x7812cd366d752964, + 0x62e8dcb27ef3d945, 0xf08b7984e1b946dc, 0x547d23ad9a5c1dcf, 0x496b1fb249b27fb7, + 0xcd692e1db5f3b3ba, 0x41931e39f1e1bc61, 0x286c6a7d7edae82b, 0x17ef6638b6c4ca6e, + 0x609beb5a2576a934, 0xcc5e16fe4a69b83c, 0xbbd14d08b078fc24, 0x2a617680f481cb94, + 0x81dbbd5f86e6d039, 0xeb8205e1fc8ecc3c, 0xe5e3bb576faa8042, 0x5d6f1eb9d9df01b5, + 0x9a47b8739c10fb44, 0x398a7caad7ea7696, 0x9c0fc1d7c46adde6, 0x67cd6de0a51978a6, + 0x68ccc4b77a21cca4, 0x1e067066b82f415c, 0xf7ddade6535e1819, 0xf2185c884291751b, + 0xc322b7381fcbe34f, 0x242f593e88290b9b, 0x8e11ccc0ea5e84a3, 0x40e3a2e3346db8a2, + 0xf18bfc3ad2931a2c, 0x2468397394b00144, 0xeae199cce14e6817, 0x05b462686c75a1ae, + 0xda096cb859c51673, 0xd87aeb967a906bef, 0xaabc74493cb02fe6, 0x74d48fc2e7da143e, + 0x6ec1c8fed3f2c1fd, 0xe01e0704b463f18e, 0xc3d88a4d3a8056e4, 0xd01ae0ffab6c8f3f, + 0x881ba052620ae7c7, 0xcea033aef0a823a5, 0x8d2cad91d83df1e3, 0x18746d205e66dbe9, + 0x3061f8e58d046650, 0xd819c59f0ce2cf8b, 0x144e89e93635e870, 0x3415e88279b21651, + 0xd6f7ab944b86c3fa, 0x45f1dd15d0f67bdc, 0xbf0d97c7f4fa24f4, 0x34a7de520a57fcd2, + 0x4ba86fda03e9e2bc, 0xa7995265a025b552, 0x698f6819d5f51cf7, 0xd07dbe9d8a156981, + 0x2683945373857fc1, 0x116f8a84f96167de, 0x8bc832bd85595ebf, 0xb206519d74fdfafa, + 0xde9519b2e9b5cc5f, 0x16fdd6f2da1d8163, 0x7ba32bd48ef56f11, 0x6f4e4d7ee8b29717, + 0xd31576dde7468aad, 0x023bb08848676045, 0xf6dcc083178160b7, 0x42035f426250e683, + 0x343732993cfed89f, 0x0640a870a22d3d58, 0x65cff80b53b4ae6a, 0x27996fa17ab05215, + 0xfd5db01401b21a04, 0x894508784bc1673c, 0x5bfcf43a2380e27d, 0x4cd6dcc2715583b7, + 0xa43b3763e7d4c902, 0x6da83e12ef0c1257, 0xfe80a602b0335aff, 0x293a7d8f4ff344de, + 0xb4ae7c2b8956bf5a, 0x6b45432d38254b4d, 0xd086acbdf15d9455, 0xa4d19e43f41ea87b, + 0xf01f13ba4bb87fbf, 0xca582cf301a299ff, 0x0ddad3d45298fa7d, 0x0646a130459c3999, + 0xc08e3af3747e2cee, 0xfc7db8aa9ed67295, 0x783b329e7bd79d5f, 0x732dbc607957af7b, + 0x8e446ac19fb26555, 0xff1dfa4d61dc89a5, 0xb6fbc46bd8d011d8, 0x185147ec5779f0d7, + 0x6eb2cf6149a5380f, 0xb0e773df803a1eae, 0xc07706c5519bfce5, 0xc35abcf54fa95f14, + 0x40a01d99a38608ea, 0x776dcd6f603c277f, 0x6ae12389b1d6d0bb, 0x8bd981448df92bb9, + 0x426a6a7ca21a2c16, 0x87efd5b71c1bad26, 0x71fb7fc4cd41de48, 0xdd9033c45619d463, + 0x40eaab322654cef7, 0xe077fffed6f3e3a2, 0x375a4dbef9384447, 0x2066b009d2c4a100, + 0xeca4a5794a068447, 0x2128f64bddf341a1, 0x738b4bb1be90bd61, 0x433772cf3813d52e, + 0x9540c88add8e4474, 0x0b6d5decd21d3519, 0x654ead966745642d, 0xe1bfb03c3b4bdb4c, + 0x0b977a9937515b1f, 0x0a4587509ef63870, 0xe89f0de1d9cfd44a, 0x23a91390272e7f68, + 0xd92defbc9096b8d8, 0x004db87174612539, 0xc88ecaabdd1a71f1, 0x050de38393073346, + 0x8af1426d7964e038, 0xf352c4fef8ad5c87, 0x6f26bc7408e26548, 0x0d41543fd9bf3084, + 0xfc4e07553a840fc6, 0x5ef117de86a555a9, 0x1f11c42dffb5ae1b, 0x4147648f07490fa5, + 0x09b35fd7671b21aa, 0x1453b14f7ccca481, 0x944f6fcce4c9b2ba, 0x5b08dd2e3583dc06, + 0xe0220df78dc9c22d, 0x1c200b9506cbf666, 0x8a0b7465eadb523b, 0xfbcb43a91a1e2d80, + 0xe697f44be3c36a58, 0x2f8a8e48fb7e350d, 0x7baba71b8920d55f, 0x10edc0216105bc96, + 0x52db07c79d7a7a63, 0x1916e8cef9452ac3, 0x5cbbbf21f867b6cc, 0xadd583365a690a4b, + 0x4e4ca2c8bffc2fdb, 0xf5fe3416d2eebcfe, 0x839af8b85e452476, 0x8496c0c54ad44e16, + 0x6c46f1ecad4482bf, 0xb794cad76ae18715, 0x67b762eec7c62985, 0x52dc9e68df5b3a53, + 0x0cc7e444b422a5f9, 0xadbfe90841c112b0, 0xfe37b136f0ca5c34, 0xcfe9e47948a8d73e, + 0xee90572b86a30d91, 0x549e72d8262830aa, 0x3361564b469f32c6, 0x1e6eba9e0d2648e2, + 0x5f8e2b2ac5fcb4eb, 0xe4224fa5f71f7cc6, 0x7357a9230c76757b, 0xcad70f74aaf6b702, + 0xeef28ced23894cc2, 0x753fdd3352aefd68, 0x1fed6ba90bbeb9d2, 0x05316f4ab4034b4b, + 0x3396df022b9f63d6, 0x82d7125a7cfd0935, 0x3519a71caf1f87f0, 0xd1dfb7a5cc3974be, + 0xbfae40ecbdbbcc2a, 0x152c11778e08dd54, 0x4a96566a6c848554, 0x3a84d621c340cdd7, + 0xfd47aa1887e2fb03, 0xa63cae94b2f1d099, 0xed61783f3e5b75e0, 0xefd44864106019be, + 0x145ff78b80b081aa, 0x34670e5fcea9230e, 0x876ef976328db371, 0x4221f3a5269942a6, + 0x95315cbd85c648f4, 0x3ca344dc7c3b1600, 0x38421ea39ff28780, 0x31dbeee967c0435c, + 0x27437c3e268402e7, 0xdd0cf8343312a654, 0x965ab9dad1d8aa29, 0xf871706dd3e23509, + 0xce23d06c7a25e699, 0x1b37d59382b27589, 0x3407f004723d6324, 0x56efb69cdb5deaa1, + 0xf46cdd2b9fd604e0, 0xcad3ca79fdac69bd, 0x7252802a574e63cb, 0xc281fb8acc6ec1d3}, + {// seed = 2 + 0xdd16cb672ba6979c, 0x3954eaa9ec41ae41, 0x52cb802771d2966d, 0xf57ed8eb0d0294f2, + 0x768be23c71da2219, 0x6131e22d95a84ad3, 0xd849e4e49bb15842, 0x18e8e5c4978cf00d, + 0x3af5e5867ce1f9bd, 0x06c75a9fffe83d63, 0xe8de75a00b58a065, 0x0a773251bc0d755a, + 0x629dc21e54548329, 0x2a168f5e5a883e70, 0x33547375f0996c86, 0xdfcb4c7680451322, + 0x55c1ecaaaa57e397, 0x4546c346c24f5a31, 0x6f8f0401dfabc86c, 0x7760d2d36ee340b4, + 0xf6448e48bdeb229d, 0xba70e1633b4dba65, 0x069cda561e273054, 0xa010b6a84aebf340, + 0x5c23b8229eee34b6, 0xea63c926d90153af, 0x7d7de27b3e43ec1b, 0xea119541eddc3491, + 0xf1259daeddfc724c, 0x2873ca9a67730647, 0xa1e7710dade32607, 0x758de030b61d43fd, + 0xd2c9bcbfa475edb4, 0x18ade47bb8a0aa29, 0xf7a74af0ff1aea88, 0x6f8873274a987162, + 0x6963e8d876f4d282, 0xd435d4fe448c6c5b, 0x93ec80ba404cafff, 0xcf90d24c509e41e7, + 0x5f0fc8a62923e36e, 0x9224878fe458f3a4, 0xd9a039edf1945bcd, 0x0877d1892c288441, + 0x75205491f4b4740b, 0x30f9d2d523a9085b, 0x4b7f4029fa097c99, 0x170bb013745709d4, + 0x7087af537f11ef2e, 0x28c62b88e08fc464, 0x84bbcb3e0bb56271, 0x485a4b099165c681, + 0x357c63357caa9292, 0x819eb7d1aee2d27e, 0xdaa759eb9c0f8c9d, 0x42cdc36729cc3db5, + 0x9489aa852eddbb06, 0x8161e4f85a84e6d4, 0xa964863fdad3eb29, 0xcc095ddbce1a6702, + 0x3ecfadbb8dc2ce58, 0x971316509b95a231, 0xc8f484d1dbc38427, 0xae9c510c463574c0, + 0xdf2b31179600c21a, 0x440de87bada4dfa3, 0xbd8d30f3f6fb7522, 0x84e6d7f678a0e2d0, + 0x0ec4d74323e15975, 0xf6947610dad6d9ab, 0x73a55a95d73fe3a5, 0x3e5f623024d37eda, + 0x8d99a728d95d9344, 0x8b82a7956c4acdc4, 0x7faeaea4385b27f6, 0x540625ff4aa2ff21, + 0x4aa43b3ebd92ce2b, 0x899646a6df2da807, 0x49225115780942d7, 0xe16606636af89525, + 0xb980bcf893888e33, 0xf9ed57695291b0d8, 0x5c6dd14464619afa, 0x50606d69b733d4f3, + 0x7fb1af465b990f97, 0x3fab2634c8bbd936, 0x556da6168838b902, 0x0f15975902a30e1f, + 0xb29d782ae9e1991f, 0xae00e26ff8f7e739, 0xd3da86458bb292d5, 0x4528ee0afb27e4ce, + 0x49882d5ba49fabad, 0x7e873b6a7cf875ee, 0x777edd535113c912, 0x94ed05e7ff149594, + 0x0b8f95fc4211df43, 0x9135c2b42426fef2, 0x411e6c2b47307073, 0x503207d1af0c8cf8, + 0xd76f8619059f9a79, 0x64d24617855dee45, 0xf7bc7a877923196a, 0xd6cc42ed6a65be79, + 0xe3912ff09d4fc574, 0x4192d03b2bc2460a, 0xa0dcc37dad98af85, 0xfc59049b2a5818a4, + 0x2128bae90a5b975f, 0xbe7067ca05ea3294, 0x5bab7e7753064c4f, 0x42cbf0949ef88443, + 0x564df4bbd017492c, 0xf2c2eb500cf80564, 0x5b92e67eb00e92af, 0x8c4103eef59c0341, + 0x83412122b8284998, 0x888daf2da0636b6d, 0x4d54b10303dd07d6, 0x201190e7c1e7b5ed, + 0x3797510bb53a5771, 0x03f7bc598b570b79, 0xdc1e15d67d94f73e, 0x721e8b499ebe02c1, + 0x71f954f606d13fa0, 0x0c7a2e408c168bf0, 0x07df2ef14f69c89d, 0xe295096f46b4baaf, + 0x7a2037916438737e, 0xd1e861aeaf8676ea, 0xb36ebdce368b8108, 0xb7e53b090ddb5d25, + 0x5a606607b390b1aa, 0x475e52994f4a2471, 0xbcc2038ba55b2078, 0x28b8a6b6c80df694, + 0xb5f0130ec972c9a2, 0x7a87cd2a93276b54, 0x4d0eec7ecf92d625, 0xac1a8ce16269a42e, + 0xa4ca0237ca9637b8, 0xd8dc8ff91202b6ff, 0x75b29846799d7678, 0x761b11a5edd9c757, + 0xf2581db294ef3307, 0xe3173c2b6a48e20f, 0xe46fd7d486d65b3c, 0x1352024303580d1f, + 0x2d665dae485c1d6d, 0x4e0905c825d74d3b, 0x14ff470c331c229e, 0xbdc656b8613d8805, + 0x36de38e396345721, 0xaae682c1aa8ff13b, 0x57eb28d7b85a1052, 0xf3145290231d443a, + 0xd0f68095e23cbe39, 0x67f99b3c2570b33d, 0x54575285f3017a83, 0x9b2f7bb03d836a79, + 0xa57b209d303367a9, 0x7ccb545dd0939c79, 0x1392b79a37f4716d, 0x6e81bb91a3c79bcd, + 0x2c2cd80307dddf81, 0xb949e119e2a16cbb, 0x69625382c4c7596f, 0xf19c6d97204fb95c, + 0x1b2ea42a24b6b05e, 0x8976f83cd43d20ac, 0x7149dd3de44c9872, 0xc79f1ae2d2623059, + 0xca17a4f143a414e1, 0x66d7a1a21b6f0185, 0xed2c6198fe73f113, 0x16a5f0295cbe06af, + 0x5f27162e38d98013, 0xf54d9f295bdc0f76, 0x9ba7d562073ef77b, 0xa4a24daaa2cfc571, + 0x49884cf486da43cd, 0x74c641c0e2148a24, 0xbff9dcbff504c482, 0xf8fc2d9403c837ab, + 0x6ccc44828af0bb1e, 0xbcf0d69b4c19dfdb, 0x8fe0d962d47abf8f, 0xa65f1d9d5514271d, + 0x26ff393e62ef6a03, 0xc7153500f283e8fc, 0xea5ed99cdd9d15cd, 0xfc16ac2ba8b48bb7, + 0xf49694b70041c67a, 0xbd35dd30f5d15f72, 0xcf10ad7385f83f98, 0x709e52e27339cdc2, + 0xe9505cb3ec893b71, 0x2ffa610e4a229af7, 0x12e1bc774d1f0e52, 0xe301a3bb7eacccc8, + 0x1fdd3b6dcd877ebf, 0x56a7e8bda59c05aa, 0x99acd421035d6ab4, 0xfd21e401cecd2808, + 0x9a89d23df8b8d46f, 0x4e26b1f1eb297b9c, 0x9df24d973e1eae07, 0xe6cdc74da62a6318, + 0xfc360d74df992db0, 0xf4eca0a739514c98, 0x481c515ba9bf5215, 0xce89cce80f5f3022, + 0xf487a10fc80e4777, 0x235b379a87e41832, 0x76f72e028371f194, 0xd044d4a201325a7d, + 0x47d8e855e0ffbdde, 0x268ae196fe7334b0, 0x123f2b26db46faa8, 0x11741175b86eb083, + 0x72ee185a423e6e31, 0x8da113dfe6f6df89, 0x286b72e338bbd548, 0xa922246204973592, + 0x7237b4f939a6b629, 0x31babda9bedf039a, 0xb2e8f18c6aeec258, 0x0f5f6ce6dd65a45e, + 0x8f9071a0f23e57d3, 0x71307115ba598423, 0xcbe70264c0e1768c, 0x1c23729f955681a8, + 0xfbc829099bc2fc24, 0x9619355cbc37d5d6, 0xea694d4e59b59a74, 0xb41cf8d3a7c4f638, + 0xae1e792df721cd0b, 0x7cd855d28aac11f6, 0xca11ba0efec11238, 0x7c433e554ce261d8, + 0xe3140366f042b6ba, 0x8a59d68642b3b18c, 0x094fcdd5d7bccac2, 0x9517d80356362c37, + 0x4a20a9949c6c74e8, 0xc25bcf1699d3b326, 0xa8893f1d1ed2f340, 0x9b58986e0e8a886e, + 0x29d78c647587ce41, 0x3b210181df471767, 0xd45e8e807627849d, 0x1ec56bc3f2b653e3, + 0x974ff23068558b00, 0xdb72bdac5d34262c, 0x23225143bb206b57, 0xd0a34cfe027cbb7e}, + {// seed = 3 + 0x39209fb3eb541043, 0xee0cd3754563088f, 0x36c05fc545bf8abe, 0x842cb6381a9d396b, + 0xd5059dcb443ce3bf, 0xe92545a8dfa7097e, 0xb9d47558d8049174, 0xc6389e426f4c2fc0, + 0xd8e0a6e4c0b850d3, 0x7730e54360bd0d0d, 0x6ecb4d4c50d050d5, 0x07a16584d4eb229f, + 0x13305d05f4a92267, 0xb278ddd75db4baec, 0x32381b774138608f, 0x61fe7a7163948057, + 0x460c58a9092efee6, 0x553bf895d9b5ff62, 0x899daf2dabfd0189, 0xf388ab9c1c4b6f70, + 0xd600fe47027ea4cd, 0x16d527ec2b5ef355, 0x5ac1f58ff6908c81, 0xa08d79ff8ee9ffe8, + 0xc1060a80b7a5e117, 0x14b2c23118c60bda, 0x8cc0defbb890df8f, 0xe29540fd94c6d28b, + 0xa604f003f82d5b71, 0xa67583d4eb066d18, 0xd62cbd796322b3fc, 0x070cfe244cdcccf3, + 0x73557c30b3af47e5, 0x2e544e31153a2163, 0x996eef7464d5bead, 0xbc71cb5ab0586cdc, + 0x0bfcb6c1b517ed69, 0x62b4f1fcc82e8ca0, 0x0edbc68f544965c5, 0x40fa39baa24af412, + 0xf39aeb2413dab165, 0x17e6013e7afee738, 0x8109bff1c8d42a9d, 0x3cd99863390989b5, + 0x02021a4cc9c336c8, 0xa06060778cb60aa4, 0xd96591db60bc1e06, 0xd2727175183f4022, + 0xcdc1f1c5bce3e7ce, 0xb393ccc447872a37, 0xdf6efe63257ead3a, 0x20729d0340dbceb6, + 0x9f3d2d26fc0ea0d7, 0xf392e0885189bd79, 0xdf2ee01eb212b8b6, 0x6e103a0c0f97e2c3, + 0x96c604a763bd841b, 0x9fc590c43bba0169, 0xf92dcd5ddc248c40, 0x113a8b54446941dc, + 0x5943eda146b46bb8, 0xbf657901a36a39a7, 0x5a4e0e7ea6568971, 0xb94c635bae9f9117, + 0x2626fb65b3a4ef81, 0xa59bfd5478ce97de, 0x79112ba9cc1a1c63, 0xf41f102f002cf39c, + 0x0a589bcbfb7ff1c8, 0xa1478c53540c4fa1, 0x60d55e72c86dfaca, 0x312e7b6840ea7a39, + 0x8aae72dcccfe1f75, 0xff2f51f55bf0247a, 0x3c2e4b109edb4a90, 0x5c6d73f6525c7637, + 0xe49acb04a199f61c, 0x27860642d966df7f, 0x541ce75fb1e21c30, 0xd9fcd6f90806c7cc, + 0xb87c27bc93a7969b, 0x92f77a1179b8f8dc, 0xb1f29379deb89ed4, 0x7e63ead35808efe7, + 0x13545183d7fa5420, 0x575f593e34cf029d, 0x27f1199fb07344ae, 0xe67f95f7dc741455, + 0x49b478b761ab850b, 0xd7bedf794adfc21e, 0xdc788dcd2dda40ae, 0x14673eb9f4d8ad35, + 0x0cced3c71ecf5eb1, 0xe62d4e6c84471180, 0xdfe1b9e2cb4ada7d, 0x70185a8fce980426, + 0x0ce2db5e8f9553d6, 0x1fedc57bb37b7264, 0xb9310a2e970b3760, 0x989ff8ab9805e87d, + 0x0b912d7eb712d9ee, 0x1fe272830379e67c, 0x16e6a73aff4738fb, 0xeed196d98ba43866, + 0x7088ca12d356cbe2, 0x23539aa43a71eee0, 0xed52f0311fa0f7ad, 0xa12b16233f302eea, + 0xc477786f0870ecb4, 0xd603674717a93920, 0x4abe0ae17fa62a4c, 0xa18f1ad79e4edc8d, + 0xc49fe6db967c6981, 0xcc154d7e3c1271e9, 0xdd075d640013c0c0, 0xc026cd797d10922a, + 0xead7339703f95572, 0x4342f6f11739eb4b, 0x9862f4657d15c197, 0x4f3cb1d4d392f9ff, + 0xe35bffa018b97d03, 0x600c755031939ad3, 0xb8c6557ffea83abf, 0x14c9e7f2f8a122ea, + 0x0a2eb9285ee95a7c, 0x8823fec19840c46f, 0x2c4c445c736ed1d0, 0x83181dff233449f1, + 0x15ed3fca3107bef5, 0x305e9adb688a4c71, 0x7dbef196f68a3e2e, 0x93e47ece3e249187, + 0x8353c5e890ead93c, 0xea8a7ae66abafdf7, 0xf956dbb6becf7f74, 0x9f37c494fbfdb6e4, + 0x11c6cbaa2485dd32, 0x206f336fcca11320, 0x9befe9a59135d8fe, 0x5f3ef8b8db92c7db, + 0xbb305e556ce0ce9a, 0xf26bdafb1305887f, 0xcbf28abe23f08c61, 0x0bc64173b914e00b, + 0x9168da52e983f54a, 0x6ea41d09c3574a3e, 0x78aa44d4a74459ae, 0x2931422878387bf5, + 0x018f64a3a92c2d9c, 0x9be43f6752e66b34, 0xae378890decd1152, 0x07325329a1cb7623, + 0x3b96f4ee3dd9c525, 0x2d6ebcdbe77d61a3, 0x10e32b0e975f510c, 0xffc007b9da959bf9, + 0x38bf66c6559e5d90, 0xbe22bdf0bf8899fe, 0x87807d7a991632a8, 0x149a0d702816766a, + 0x026f723db057e9ab, 0xeeecb83625ec6798, 0xcec2ed5984208148, 0xd985a78e97f03c84, + 0xf96c279e7927b116, 0x99d5027b3204f6e2, 0x13a84878c3d34c55, 0x5cf5ec96229e9676, + 0x0bc36b07e4f8e289, 0xbed33b80a069914d, 0x2fbfbdd1ff4b9396, 0xab352bb6982da90f, + 0x154d219e4fa3f62b, 0x4d087512bb6b9be7, 0xc582e31775ee400e, 0x7dadb002ae8c4a4e, + 0xaae2957375c1aee2, 0x5f36ca643356625b, 0xf87cf8eb76e07fb7, 0x46f432a755e02cc3, + 0x36087e07aba09642, 0xe5642c1e4ebb9939, 0xb9152d22338eefad, 0xf7ba44278a22cf7f, + 0xd3b8013502acd838, 0x7761511da6482659, 0xb0857621638e8e50, 0x552eddb4a8b1d5f5, + 0xc43d9861e812c3ea, 0xd765c2aada47910c, 0x21c935b68f552b19, 0x6256d5641a2b47dc, + 0xab711d8e6c94bc79, 0xa8d0b91a2a01ab81, 0x5e6d66141e8d632a, 0x7638285124d5d602, + 0x794876dbca3e471f, 0x951937d8682670ce, 0x0f99cb1f52ed466a, 0x8c7cd205543b804c, + 0x2fd24d74a9c33783, 0xe5dcb7b7762e5af1, 0x45e6749cca4af77c, 0x540ac7ee61f2259f, + 0x89c505c72802ce86, 0xeab83b9d2d8000d1, 0x9f01d5e76748d005, 0xc740aaef3035b6d0, + 0x49afcd31d582d054, 0xcba5dc4c1efb5ddc, 0xc0a4c07434350ca1, 0xfc8dfaddcc65ee80, + 0x157c9780f6e4b2d9, 0x9762a872e1797617, 0xc4afae2cf3c7e1bd, 0x71cde14591b595d4, + 0x8843c3e0e641f3b9, 0xd92ecd91dce28750, 0x1474e7a1742cb19f, 0xec198e22764fa06b, + 0x39394edb47330c7d, 0x00ba1d925242533d, 0xaed8702536c6fb30, 0x6d3618e531c2967a, + 0x77f7cedcd7cc0411, 0xbc1e2ab82be5b752, 0x07b0cf9223676977, 0x596c693b099edd53, + 0xbb7f570f5b9b2811, 0x96bfdad3c4a6840c, 0x668015e79b60c534, 0x3ad38d72123f1366, + 0x6b994d81d2fcbb09, 0x70885f022c5052d8, 0xc891ee79d9306a7b, 0x2c4df05c0ed02497, + 0x19ebc13816898be2, 0xea7c64df11c392a2, 0xb7663e88dd12e1bd, 0x79f768cb8e154c21, + 0x1fb21b12e945933b, 0xe6a9045643f6906e, 0x544c47acd7e15371, 0xb7709b14f727e3d1, + 0x326ee36a46942971, 0x477f1cf7b0e2d847, 0x88b8f6b82b3b0c24, 0x18bc357b80e3cd5c, + 0x3333de70e4d66e0b, 0x4fd4c5e148583cf6, 0xae1b62f3008c0af3, 0xc49f419b6ab29cf5, + 0x2c29fa65afc3fa28, 0x4b19d93734d03009, 0x7dd6c09e589276ad, 0x1cece97f30de48ad}, + {// seed = 4 + 0x58bdf4338602e4fb, 0x71a5620b02c926d5, 0x3811c960129c2d9f, 0x29c2fb11fccac567, + 0x0d6b1ea7780f1352, 0xcc4d3ddfae3f87b3, 0xfdd30257362a586b, 0xabc948fde69f25f1, + 0x51b3523469d30f7b, 0xe0f0322724405ace, 0xd3729266d896da1e, 0xb10c37e5147915bf, + 0x8b577039f9fa32a3, 0xe677c6a9cbfb44b3, 0x7317a756ebb51a03, 0xf8e988ef37359485, + 0x600fc1ef3f469ff3, 0xbf0b8f8520444e01, 0x3711168b08b63d73, 0x34146f2944a6cb36, + 0x717feb263862cdde, 0x7185f8347db00412, 0x900798d82127e693, 0x84089e976a473268, + 0x10f8308c0d293719, 0xf62a618d4e5719b8, 0x8bdbd257a1a9516f, 0xf49f666fd7a75110, + 0xbaf45e2db7864339, 0xe4efa1ea0c627697, 0x3e71d4c82a09fe10, 0x54a2a51cf12127bb, + 0xa0592c9f54ba14cd, 0x27dd627a101c7a42, 0x3d2ceb44b3d20d72, 0x7ee1f94a68ca8f5d, + 0x7e8cb8651b006c36, 0xbd9fa7ca3a475259, 0x856de173586a7b34, 0xcedb291b594cb1b5, + 0xa3d6e462fd21cddc, 0x74561d10af9118e4, 0x13a3d389fc2d4b36, 0xeea8594a4a054856, + 0xf56d7474d9ba4b13, 0x25ddce2f6490b2fd, 0x920653ff3a8d830b, 0xcd8c0c9cdac740d1, + 0x2c348a738db9c4a0, 0x2967ccbe8ea44c22, 0x47963f69adb049f8, 0xf9d01eb5b4cf7eb6, + 0x7a5c26eb63a86bd2, 0x62ad8b7a71fa0566, 0xb373213179f250ae, 0x589d4e9a88245a4d, + 0x433dafebe2d558a8, 0x521fbef2c8fe4399, 0x62a31f9ff9ccd46b, 0x51602203eba7c1a6, + 0x9afc8c451b06c99f, 0xb529085bdbaffcea, 0xac251825cc75892b, 0x94976a5bce23d58e, + 0xdd17925b6c71b515, 0x568fd07a57bce92e, 0xefac31200d8bd340, 0x716c3e466b540ef9, + 0x3d2c9e380063c69b, 0x14168f9a3662dd83, 0xd298c7504dbc412f, 0x74490a94f016719f, + 0x0e0da431e1ab80c8, 0xe321f63dc6b169ae, 0xf08671544febc95a, 0x39324450cc394b3b, + 0xea6e3d35f1aa3a70, 0x8ef8a886508ce486, 0xdc1a631ef0a17f06, 0xfda2b3fbcd79e87b, + 0xd75bcae936403b10, 0xf88b5bd9f035f875, 0xc43efec2e3792dd4, 0xe9fac21a9d47cd94, + 0xc2876f0c4b7d47c3, 0xaba156cf49f368b4, 0x5ccda2170fa58bf9, 0xadc92c879ed18df7, + 0x110c1b227354e6c8, 0x298ee7a603249200, 0xde92142ede0e8ee7, 0x88e4a4610644ba9e, + 0xbb62d277e7641d3a, 0xb9be1985b7bf8073, 0x29024e5426cdb0d1, 0xf6aefd01f3092ab8, + 0x2a07087b313133aa, 0x6d71f445d6dfc839, 0x1e2412ff12e5526b, 0xed5cdeba6617b9e1, + 0x20b1d0d5e5f8760e, 0x12ff15705c368260, 0x7bf4338b7c387203, 0x34ff25f00cd06185, + 0x1148c706c518cf28, 0x5c04f0623388f025, 0xcb9d649275d87d79, 0x9b5f0c24fabc42ec, + 0x1a7b5e7964e33858, 0x2a81bbd8efdc6793, 0x8d05431ffe42752e, 0x83915cd511002677, + 0x580ed4d791837b31, 0x5982e041d19ff306, 0xcad0d08fa5d864ca, 0x867bee6efe1afa63, + 0x26467b0320f23009, 0xd842414dfda4ec36, 0x047fcdcbc0a76725, 0xbddb340a3768aeca, + 0xef4ce6fa6e99ab45, 0x88c5b66c7762bf9b, 0x5679f1c51ffb225d, 0xdab79048317d77ee, + 0xf14e9b8a8ba03803, 0xe77f07f7731184c1, 0x4c2aab9a108c1ef5, 0xa137795718e6ad97, + 0x8d6c7cc73350b88b, 0x5c34e2ae74131a49, 0xd4828f579570a056, 0xb7862594da5336fc, + 0x6fd590a4a2bed7a5, 0x138d327de35e0ec1, 0xe8290eb33d585b0b, 0xcee01d52cdf88833, + 0x165c7c76484f160e, 0x7232653da72fc7f6, 0x66600f13445ca481, 0x6bbdf0a01f7b127d, + 0xd7b71d6a1992c73b, 0xcf259d37ae3fda4a, 0xf570c70d05895acf, 0x1e01e6a3e8f60155, + 0x2dacbb83c2bd3671, 0x9c291f5a5bca81af, 0xd976826c68b4ee90, 0x95112eec1f6310a2, + 0x11ebc7f623bc4c9a, 0x18471781b1122b30, 0x48f7c65414b00187, 0x6834b03efa2f5c30, + 0x0875ef5c2c56b164, 0x45248d4f2a60ba71, 0x5a7d466e7f7ba830, 0x2bebe6a5e42c4a1d, + 0xd871d8483db51d10, 0x6ee37decd2fd392f, 0x7d724392010cede3, 0x8e96ef11e1c9bcc8, + 0x804a61d86b89d178, 0xbb1b83ce956055ec, 0xcb44e107410ff64f, 0xc426bb09ee0ba955, + 0x057c08f42c3dd7f1, 0x40ea1ec148602bdf, 0xc24688deeb65d7f1, 0xd8bcc53c768ba4e4, + 0x16e0e3af65c1106c, 0xfc12f7e7d647218b, 0x70d6e1d3ee93cef4, 0x01d2a505c4541ef9, + 0x1ef79e16e764d5c3, 0x0363d14d13870b98, 0xb56ef64345d06b11, 0xe653d557ebb7c346, + 0x8304a8597c2b2706, 0x1536e1322ce7e7bb, 0x525aec08a65af822, 0x91f66d6e98d28e43, + 0xe65af12c0b5c0274, 0xdf6ae56b7d5ea4c2, 0x5cef621cedf3c81c, 0x41e8b1ffd4889944, + 0xb5c0f452c213c3e5, 0x77af86f3e67e499b, 0xe20e76ea5b010704, 0xbdc205ab0c889ec0, + 0xc76d93eb0469cd83, 0x17ac27f65cab0034, 0xd49ec4531fd62133, 0x07a873ea2f1b9984, + 0xbff270dfef0032ee, 0x1764dbe91592f255, 0xe40363126f79e859, 0xa06cad3ab46971f6, + 0x0be596e90dedd875, 0x3387cce5c1658461, 0x44246acf88a9585e, 0xe0ad82b92d5ecb2c, + 0x2177491c9a1600a6, 0x16e7c4aac0f02422, 0x75792eeeec15c4e1, 0x2309cd359d08ee30, + 0x7cd9831dd1b83b0a, 0x374914a7c4ee8cf0, 0x0dd17765c9ac2e54, 0xb7847470ba9a7688, + 0xfba4f4bbe2991173, 0x422b203fc3de040e, 0x63bfcaf2ecf2ab0e, 0x0c5559f3a192946e, + 0xfdf80675c1847695, 0xf5f570accab842c9, 0x65cc5a448767afea, 0x1efeb0a7ee234f2f, + 0x9b05f03d81e7b5d2, 0xe7c31317a8626cf4, 0x620f2a53081d0398, 0x1b6de96cdd9943ae, + 0x8c226a436777d303, 0xa08fbbd50fafb10d, 0x6a64c5ec20104883, 0x9c9c653502c0f671, + 0x678a02b2174f52a0, 0x68e008ba16bbad4b, 0xa317c16d2efb860f, 0xeab2075d17ed714c, + 0x565eeeddf0c4ea15, 0x8ec8e94d242a6c19, 0x139e8e27d9000fae, 0xc977a7ff1b33d2f5, + 0x1d0accca84420346, 0xc9e82602cd436e03, 0x6a2231da53d2ccd3, 0xb44b12d917826e2a, + 0x4f4567c6a74cf0b9, 0xd8e115a42fc6da8f, 0xb6bbe79d95742a74, 0x5686c647f1707dab, + 0xa70d58eb6c008fc5, 0xaaedc2dbe4418026, 0x6661e2267bdcfd3d, 0x4882a6eda7706f9e, + 0xf6c2d2c912dafdd0, 0x2f2298c142fd61f9, 0x31d75afeb17143a8, 0x1f9b96580a2a982f, + 0xa6cd3e5604a8ad49, 0x0dae2a80aad17419, 0xdb9a9d12868124ac, 0x66b6109f80877fac, + 0x9a81d9c703a94029, 0xbd3b381b1e03c647, 0xe88bc07b70f31083, 0x4e17878356a55822}, + {// seed = 5 + 0xb3c58c2483ad5ead, 0x6570847428cdcf6c, 0x2b38adbf813ac866, 0x8cb9945d37eb9ad3, + 0xf5b409ec3d1aed1c, 0xa35f4bffc9bb5a93, 0x5db89cde3c9e9340, 0xff1225231b2afb2b, + 0x157b0b212b9cc47d, 0xf03faf97a2b2e04d, 0x86fdab8544a20f87, 0xfcb8732744ae5c1c, + 0xd91744c0787986d5, 0x5f8db2a76d65ad05, 0xcff605cbed17a90d, 0xf80284980a3164e7, + 0x59cc24e713fccc7d, 0x268982cada117ce4, 0xcd020e63896e730e, 0xe760dc46e9fe9885, + 0x6aaece8ab49c6b5d, 0x7451194d597aae3e, 0x35d4385900332457, 0xa40fb563a096583d, + 0xa797b612f7f11b76, 0x2fed6eb68e6a2b9b, 0x2f06ee64aeffd943, 0x9dd0e49d9ca45330, + 0x97d48f08bd7f1d8f, 0x1cfa7fe3ebe4d8ee, 0x2a2ba076bd397d42, 0x68c4344f7472f333, + 0xce21ec31987d74b5, 0xb73dabdc91d84088, 0x801aadee592222fe, 0xaf41345398ebc3f5, + 0x8a8f653d7f15ee46, 0xce2d065ff2ba2965, 0x4e05da515da2adb7, 0xa6dbdb8aa25f0fd4, + 0xca9f9666bbd2d5a9, 0x6b917ce50bd46408, 0x1550cc564ba6c84d, 0xb3063ae043506504, + 0x84e5f96bb796653d, 0xe2364798096cf6e3, 0x3b0dfedf6d3a53d0, 0xb7e4c7c77bde8d93, + 0xe99545bac9ab418a, 0xa0e31f96889507bb, 0x883c74f80c346885, 0xf674ae0b039fd341, + 0x8bb6ce2d5e8d1c75, 0x0c48737966a7ed7c, 0x04fcdf897b34c61c, 0xe96ac181bacbd4d6, + 0x5a9c55a6106a9c01, 0x2520f020de4f45d3, 0x935730955e94d208, 0xce5ad4d7f3f67d3b, + 0xa4b6d107fe2d81ca, 0x4f0033f50ae7944e, 0x32c5d28dd8a645a7, 0x57ce018223ef1039, + 0x2cbab15a661ab68e, 0x6de08798c0b5bec2, 0xee197fb2c5c007c6, 0x31b630ac63e7bda2, + 0xab98785aefe9efe3, 0xa36006158a606bf7, 0x7b20376b9f4af635, 0xa40762fdc3c08680, + 0x943b5faffd0ebee2, 0x7f39f41d0b81f06e, 0x7c4b399b116a90f8, 0x24e1662ac92bc9f3, + 0xcf586fc4e8e6c7db, 0xe46e0d047eeb12d7, 0xe8021076e4ea9958, 0x11fc13492e3ca22a, + 0xd61eae01410397e3, 0x7e8c4a58036a8e9f, 0x068a6de267970745, 0x64faab129bef1a41, + 0xb4a6f720943dad01, 0x631491058d73a9d5, 0xdad4fe95eab3ec02, 0x0a8b141c5c3a44f6, + 0x9fc69d4c2b335b98, 0x94d5f84a07d6e4cd, 0x1b73965de143c608, 0x443932c2dda54bcc, + 0x7397818fb0b04cd2, 0xef4ab03a1202b277, 0xf3d2ee459c0c2b92, 0x182d4daf8b058a87, + 0x90e63035d7b51368, 0xba4cd8b9a95d45fd, 0x12a7392c76731090, 0x890d264ec5d082d2, + 0xeeaf5c363da4994e, 0xd6aad756902123fb, 0xb531ebebdb28f191, 0xe71ce659fc59babd, + 0x37c1b94f63f2dcb5, 0xe4e3abeb311f9b96, 0x4a31b72ccb8695d3, 0x52cae1f0629fdce4, + 0xe5b0475e2ed71369, 0x2724e8c3506414fb, 0xbab0367920672deb, 0x0161a781c305449f, + 0x37b70f40f5bb60be, 0xddd1094c50251a01, 0x3b28283afd17224e, 0x06dec0cfe889fc6b, + 0x47608ea95bb4902d, 0xad883ebc12c00e82, 0x9e8d7ae0f7a8df29, 0xa79443e9f7c013a1, + 0xcfa26f68b7c68b71, 0x33ae6cc19bda1f23, 0xd9741e22b407887f, 0xf2bff78066d46b1c, + 0x794123191c9d32d4, 0x56cb6b903764ec76, 0x98775d0ef91e1a5a, 0xae7b713bc15c1db9, + 0x3b4c1a7870ed7a0d, 0x46666965f305cc34, 0x0ea0c3b2e9c6b3cd, 0x4dc387039a143bff, + 0x5f38bb9229ef9477, 0xea5d39ba72af7850, 0x69a5ed0174ce2b6d, 0x06969a36bfe7594d, + 0x0adee8e4065ccaa3, 0x908a581d57113718, 0x64822d6c5a8190ed, 0x8c5068b56ace4e4c, + 0x88ba3b4fb4e30bef, 0xa6ec0b8bb5896cfe, 0x4e23fcc6b47996fd, 0xe18e75b0dd549c7a, + 0xcd90f17e106cf939, 0x1666fdfb2ef7c52f, 0x4fae325f206dd88c, 0xe7bc1160e25b062d, + 0x3cc999cb246db950, 0xc5930a7326cd5c37, 0xb008a48a211367bd, 0xc5559da145a88fd4, + 0x1e3ad46655fac69c, 0x7834266b4841bfd7, 0xa764450fbffc58cc, 0x54d8cf93a939c667, + 0x93c51f11b21b2d9d, 0x0964112082ed65cc, 0x4c2df21213e7fb03, 0xf0405bc877468615, + 0x17b4fc835d116ab4, 0xa6b112ae5f3cb4ef, 0x23cfc8a7fd38a46e, 0x8e0a360dc2774808, + 0x24ca9c8092105ad5, 0xafd3f75524f2e0d5, 0x4f39ed7dbaddc24c, 0xe5e362c7679a7875, + 0x00914a916b07b389, 0xdfe1119b7d5ab5da, 0xabd6ed9940e46161, 0x630ed2044171e22c, + 0xdecc244157dd1601, 0x777e6d5b4b4868d5, 0x9b3530bee67017d8, 0xd2faf08b291fdcb9, + 0x006e99455d6523de, 0xd559b5817f6955b5, 0xefcc1063b0088c61, 0xed73145ae0f00ae7, + 0xab2af402cf5b7421, 0x897767f537644926, 0x26c9c0473ca83695, 0x192e34e1881b2962, + 0xf7cf666ec3b3d020, 0x27f9b79c7404afb7, 0xe533e8bed3010767, 0xe5817838e11d05d3, + 0x65659c531bd36517, 0xd427c5e0a23836fd, 0xf3eab7ea58fa3528, 0x07683adae1289f35, + 0x201d6af7e896dd32, 0xd5da938b9a21ad88, 0x843fb73ad67bc316, 0x1782ec7d5feef21b, + 0x943f66f6ec772877, 0x7e9112e7b26da097, 0xeac8161f8663c2c7, 0xe8600db480a9ebf4, + 0x07807fc90f6eaf5f, 0xe0e4c9deb41abf83, 0xbdf533db271f9c15, 0xb398411b0497afe2, + 0xdebb45ef25448940, 0xe7a5decefcd376c4, 0xaf1ef3c728c83735, 0xb8b83a99355cb15a, + 0x6444a0344f1611e4, 0xe8bb7f5cf3c60179, 0x77ab5c5177e75ff7, 0xc38fd6fa849d585d, + 0x390d57d53029060a, 0xa66327eb7b8b593c, 0x6350a14f6fcd5ac9, 0x2c08125bcd7008b4, + 0x2d00c299a6a6bf8e, 0x6b0039c1f68d1445, 0x0035150c5d06f143, 0xa34d01628cc927e1, + 0xdf5b3164d7b2ede1, 0x8167db1d0583d72e, 0x4e13b341cd2ae8bc, 0xa693d9b1f416e306, + 0xc15ed7ca0bc67609, 0xdc344313c1c4f0af, 0x88b6887ccf772bb4, 0x6326d8f93ca0b20e, + 0x6964fad667dc2f11, 0xe9783dd38fc6d515, 0x359ed258fa022718, 0x27ac934d1f7fd60a, + 0xd68130437294dbcc, 0xaf5f869921f8f416, 0x2b8f149b4ab4bf9f, 0xc41caca607e421cb, + 0x7746976904238ef9, 0x604cb5529b1532f0, 0x1c94cd17c4c4e4ab, 0xe833274b734d6bbe, + 0xe9f1d3ef674539ce, 0x64f56ed68d193c6a, 0xe34192343d8ecfc1, 0xcb162f6c3aa71fe8, + 0x99eaf25f4c0f8fa4, 0x92f11e7361cb8d02, 0xb89170cddff37197, 0x4f86e68a51e071e3, + 0x31abf6afd911a75b, 0x6d20cf259c269333, 0x4150b9f88fcb6513, 0x705063989ebf7451, + 0x559231d927c84410, 0x1ca8ec4b098bc687, 0xebed22405c9180e0, 0xaa815b37d052af59}, + {// seed = 6 + 0x946ac62246e04460, 0x9cebee264fcbc1ae, 0x8af54943a415652b, 0x2b327ed3b17b8682, + 0x983fde47b3c3847e, 0x10a3013f99a2ad33, 0x6e230bb92d2721ef, 0x1cf8b8369e5c5c50, + 0x7f64017f2b7b3738, 0xd393248a62417fa1, 0x9ff01c0b20a372c5, 0xb0e44abce7e7c220, + 0xcebb9f88d48a815f, 0xdb7df6bd09033886, 0x7844fc82b6fa9091, 0x72d095449863b8ec, + 0xc13e678c89da2c7e, 0x6caf4d5ad231d12f, 0x2e0ab7b5fcf35c49, 0xf410720cb932a70f, + 0xd66ea581f16fce06, 0x175c9f002f57dc98, 0xccbcfd0d32988775, 0xfde4c407d3b0a232, + 0x5db2931ae7e97223, 0x6e07e2173085809f, 0x6e1d1ec0f9cad73c, 0xb2fc251a7f802619, + 0xbc1fc17f04f342de, 0x8de8f21ec658e078, 0x72c0f40cbee53fd6, 0x0678244411fc17a1, + 0x1d5837ca166b9bbd, 0xc8cada003c554345, 0x6a2fe2bfb2e58652, 0xfca9d797a6f7988b, + 0x6699e24ac737948b, 0x69623ffcb05789ba, 0x946429c529d95b75, 0x0d14df0b2a13970f, + 0x593d8592c440dfec, 0x2ee176f3d7e74b94, 0xae003f1da3be9e26, 0x0c7b02c4c0f6764a, + 0x3117e2fa1f632462, 0xf0f23265b6f1eaeb, 0x3111255d9b10c137, 0xc82745e509a00397, + 0xbd1d04037005fea7, 0xe104ab0dd22a9036, 0x51b27ce50851ac7a, 0xb2cb9fb21b471b15, + 0x29d298074c5a3e26, 0x6ebdf2058b737418, 0xc4a974041431b96f, 0x1ec5a30ccb6bdaac, + 0xe818beede9bf4425, 0x4b69b1bce67a5555, 0xf5c35f1eb0d62698, 0xf4509bbd8e99867c, + 0xb17206debd52e1bc, 0x35785668c770b3be, 0xe9343987ff5863bc, 0x2ee768499ac73114, + 0x5132bb3426eeaaf4, 0x471bce2c6833c5ff, 0xbb9a2d5428e6f6f9, 0xd5678943c595792d, + 0xab2a65e7f81e479c, 0xa82407bb23990b31, 0xdae321383984923c, 0x01823bb22648e6f1, + 0xda6e8df4214a8b04, 0x0e172bb88e03d94f, 0x552da6c22e362777, 0x7ce67329fb0e90cb, + 0x7b2d7f287ede7ebf, 0xd44f8222500651bd, 0x4acca1ef58fbb8ab, 0x428ecf058df9656b, + 0xd7e1ec6a8987c185, 0x365be6a54b253246, 0x168849be1e271ee8, 0x6a00f3c4151a8db2, + 0x37602727ca94b33d, 0xf6b50f18504fa9ce, 0x1c10817f6bc872de, 0x4bfe1fe42b0f3638, + 0x135fad4b8ef6143b, 0x1b25ad2bafc25f58, 0x41e37f85cf321f92, 0xfc73f75d9d5b9bea, + 0x9eb3694d1e9cb7e1, 0x601d51f08fa83b90, 0x234a2a9b88366f41, 0x63fe903e16f2c3bf, + 0x1cdbd34fa751c0b0, 0x0ce4fc6747c0558c, 0x51ed72afb8bb49aa, 0x20313ba13ca12c96, + 0x271fa38f9ebd54c1, 0x3696a5ac03a8edde, 0x05602be7df625702, 0x11f1ac73790f7a9f, + 0xa2836c099f0810bd, 0xe5ac2e47caa532fa, 0xd9c000a66d39f681, 0xd93d900e6f3d9d5f, + 0x792c81c65b7900f2, 0x5c5dce790ee20da1, 0x74ff1950edec1aee, 0x71fc85fa1e277d8f, + 0x0e77df17d6546cbc, 0x07debad44816c3b4, 0xbafa721581e92a70, 0x8ab6fbe2ed27bba8, + 0xe83243a20dea304a, 0xaa85a63a84c00a07, 0xde0e79917fc4153a, 0x21bb445e83537896, + 0xeedcac49fc0b433a, 0xffb2926a810ae57a, 0xf724be1f41d28702, 0x79cb95746039bb3b, + 0x5a54fe3742a00900, 0xda4768d64922c04f, 0x420396a84a339dae, 0xa171e26ee5e8724e, + 0x4c8da7c5d289c20a, 0x9ebd79a1a8e94742, 0x39235232b97e9782, 0xb75df0be9bba7d80, + 0x0c1d204dd87d48fc, 0x8f81f3e7177266e8, 0xe4a460b39e78d72b, 0x50b98fa151e65351, + 0xb7cb585c3ee1eddc, 0x11cdad9a76ee1dc4, 0xa38054a78595dc1c, 0x92f09e2ec4978edc, + 0xa8f0061b5efdabaa, 0x04bcc4abc224d230, 0xc58606738e692d46, 0xdd2b27b565952433, + 0x19e6ed1b740beec0, 0xceadd49b2ef9891f, 0x328178c28fe95cad, 0xe5ad4c43afe02848, + 0x03c0cb538cd967c0, 0xec4352526d19a630, 0x4c7e99389d39b031, 0xf65dd05362c2deb6, + 0xd1e70daf6879d28d, 0xbe9f57db6309b265, 0xa4b66f370b872bb7, 0xe26896fbc6ee1fd5, + 0xac705e661bfcf7c5, 0xab4d0d07d7f09940, 0x976417c06aeb6267, 0x8161c684a6bd468c, + 0xf77b6b9976dc4601, 0xc6489b779a39c12c, 0xb2aa58d5681cea1a, 0x043b1b40f8c3e04c, + 0x681fcbfadc845430, 0xab8896c921ba8def, 0x57aaf172606f37b2, 0xc3735048cd5eb8d7, + 0xa7078b96955631bd, 0xdd6b3543aa187f33, 0xc7103ea4a2a697fd, 0x8d7b95f6ff1f7407, + 0xe44f419e84709530, 0xf340caa9132cbb0a, 0x2ba407283143c66c, 0xe1be240ca636c844, + 0x90d32f2877ac08bc, 0x5d26e6294b2c8673, 0x4a6b2f5b27c87a44, 0x961fb9043f76d34f, + 0x0afee02d8d3c55d2, 0x6228e3f48c42e5dc, 0xc338e69ee6593675, 0x853f74b16efb7bdd, + 0xd062f40bdd22e687, 0x647164b9ab4c4190, 0xf94689f67d598369, 0x8e4b29d87a5012d7, + 0xaf02b8b925656fbd, 0x7a722a767179a630, 0xb5c8afe937a75ace, 0xfdb8e8d02d279372, + 0x887ef700cb25fae1, 0xcfe9bd912f72cabe, 0xb1d4dedc24f978de, 0x517522d38319cc2a, + 0x7dd87b2b36aab798, 0x579c4ff3046b5a04, 0xf5c5975c5028b7a7, 0x7094579d1000ec84, + 0xbc8d5b1ea70a5291, 0x161b2d783be8855c, 0xd26d0b0d6d18279f, 0x0be1945f02a78bd5, + 0xb822a5a9e045415b, 0x2fe9d68b1ccc3562, 0xb2e375960033d14f, 0x26aca04e49b4ff22, + 0x732a81c862112aea, 0x8bd901ed6e4260b8, 0xe839532c561ad5b0, 0x8fb6e4d517a79b12, + 0x0dd37f8c0be9b429, 0xc8ad87ad12f1b1b0, 0xc51f3aa62b90318b, 0x031a7e8b86c1cefc, + 0xa95547af2b70fc76, 0x9cb3615c5a98801e, 0xa387e3c3341d7032, 0xa087ea52a1debaef, + 0x16325ec9a2e6e835, 0x587944a484c585eb, 0xc8879033bde22ecc, 0xa39dbfce709c464a, + 0x7acc010f99208774, 0x98dd2973a096c5ad, 0x26458b51139f198c, 0x2f5d19575e8c4f02, + 0x726643f0d38af352, 0x44d879b6d73e6e94, 0xa68a03885c980abe, 0x06048acd161c40c0, + 0xa4dab8f89d405d28, 0x7120c880cb04be18, 0xa062ace22a1cf0cf, 0x3901a9daf29704f4, + 0xff08f3ed989db30a, 0x6d22b13e874c67e9, 0x80c6f35518d73f4d, 0xc23c2a521aac6f29, + 0x2e708fd83aaa42e0, 0x7fc3780f55f1b0fd, 0xabb3075c98cf87f2, 0xb4df3f40f7c61143, + 0x2a04418098a76d75, 0x0d9eeee9509b2d37, 0x6be8ae51f4b59cdc, 0xe746cc7c00e4a2ab, + 0x785bc6df9cac597c, 0x33cb6620ce8adc48, 0xc1ba30739bffcef7, 0x6d95771f18e503f7, + 0xf7be3ae2e62652ff, 0xc8d82ffd2a73c62b, 0x8725a3ba5b110973, 0x67ed6b9c724757ec}, + {// seed = 7 + 0xc0272d42c19ff3ae, 0x4694228b43ea043b, 0x5709a6ef8a462841, 0xc9210a1e538805c9, + 0x279b171196113ec2, 0x859b769fc2d9e815, 0x0d5d3125a2bf14d3, 0x22bca1cfefa878ba, + 0x481b6bf58037bd83, 0x4933ba8647728d22, 0xf08c7b6b56f6e1b6, 0x374e8af5a15407c7, + 0xa95c4dc3d2487a5c, 0x9b832808ff11e751, 0xf2048507e9da01d5, 0xa9c576189f544a4a, + 0xf6c2a45b2e9d2b41, 0x9b9874c9f10ecc2f, 0x37d9b5f51f8c149e, 0x93aead54c9de9467, + 0x59cf0b4af262da23, 0xe7e9929af18194b2, 0x9df2644e33eb0178, 0xde4122d6f0671938, + 0xf005786c07f4800b, 0xb1fc9d254b5d1039, 0x0bf1088631f6dd7b, 0x665623f0a4b8f0c7, + 0x60f0113a9187db7c, 0xfd7cceda4f0d23a6, 0x26c01e9d89955940, 0x33afa1dfc0f5a6a0, + 0xeb77daf215e9283c, 0xc7575214bf85edb4, 0xeb0d804bf297e616, 0x84bff4ffd564f747, + 0xc4ac33189246f620, 0x43ef61213ecc1005, 0xcbbb0dea6cd96acd, 0x8ed27abfa8cfcb05, + 0x543b61529cb996b6, 0xa5f987ca41ea5e59, 0x3c50e0ac5254cb7a, 0x4192b0446c06d1e6, + 0x3e86592e21b45388, 0xdb766f06fcc6e51e, 0x0448ee36efe632db, 0x663c9db689253e35, + 0x72e0bd4985331dd4, 0xff501b5bf7d94e74, 0xe911ce758e2113a8, 0xec3a8d03a75a6ba4, + 0xaf6b4b72f56edc83, 0xf284857936c0a391, 0x5ba6feff407d46f4, 0x9d689c26de9d6702, + 0x28c04a9083726b5d, 0x2ccf4a627a029730, 0x7b4719500d4f0c71, 0x76470a9a7da250a8, + 0xcc48409404a1c890, 0xccefbdc7ec9a8055, 0xe0db91bff3cc42d3, 0x0532436426141254, + 0xf2ee9325e6f0ff0b, 0x149c20a5fbb28d9d, 0xe71624cd8d2d14d4, 0x8f01d4dc8cc2dd77, + 0x29cf409b333015b7, 0xba8bebd211884dd1, 0xc3396635e8c8db1d, 0x8ed0f6208d0528b8, + 0x0d90b43fdd0ee334, 0xd73c9a3333a044c7, 0xa2595cd208dbdc38, 0xae93cb264f940c09, + 0x8e0538d8afb07a97, 0x19115ec881385ba2, 0xa886f9e6a8039c6a, 0xcd5d62147ce3ecac, + 0xaecdf9e0bb4969f7, 0x2ddd631c53dcad10, 0x73ad1c97b3412054, 0xb08915fa2722efc6, + 0x97966047e5067eb0, 0x337f1675ed91445c, 0xb3a833d150b96a0d, 0x5940a98fe35e5e2e, + 0xfd03cc354ed0d8ff, 0x4e65b98291a8644a, 0x14a259f2852a60b2, 0x7648e3478c1e8e5f, + 0xbc0fbef6d9a919b4, 0xbec4302081346cf1, 0x57d2ce7aa1c7c511, 0x234c209d8f4e1ac3, + 0x87cf80cc933ce443, 0x7c262c616931e94e, 0xc5e33b049cf9eddf, 0x1a80790ed03ae51b, + 0xf2e8b9494f7220cf, 0x124cb59c14fff3ff, 0xa8a06cbfdb86ce18, 0x9068ef1f80b37653, + 0x0c55417b8d90338f, 0xcd579a523f6bcd30, 0xa31bfe2476a8d2a9, 0x1f8d142208094223, + 0x332dc40a5203cfad, 0xf8792fe5b2d33b4c, 0x443bd9668bf9461e, 0xc9019db0ace1409e, + 0x781bea919a113e8b, 0xb0f11d866abfbeec, 0xcfe139a60db0c26a, 0x869ab8721e6aa39e, + 0xdb48a4977717837a, 0x588a5ff151065b18, 0xe4a251ea0028864d, 0x7f0e43ba408a77c3, + 0x65f66dd50a536135, 0x6f49e934d9331c3e, 0xb8d742e0f0fa6b09, 0xe4e9b272deca2348, + 0xaee132ff902f773c, 0x43f658f7c2a0c90a, 0x28cb4dbc76cc53ea, 0x7d92253aa99ac39b, + 0x4fea3d832370baab, 0xb29e36936e51d78e, 0xea10778712321064, 0xff4f21f8ef274be2, + 0x84eff18ddfa0933f, 0xd0ec6a9f86c758a0, 0xaf82e5973c431ae0, 0x352023c00c045425, + 0xad34d7bc4a2f8961, 0xbdb4a02a24d4dee0, 0x354a4846d97447cf, 0x331a8b944d5bc19f, + 0x5ce04f8e17909035, 0x6497581bad8f4aab, 0x07c503bba647111e, 0x85f412ba78e1f7ff, + 0x7f3b920fd20f4cff, 0x424e1a9a4ce34e2f, 0x3035e2d62e1b9f0a, 0xef63114bff7b729a, + 0xe86a05889ab6bb60, 0xee0830cf095585a1, 0x4a54f7fa47d9c94b, 0x17daeece9fcb556a, + 0xc506d3f391834c6f, 0xb3f24be362e1af64, 0xc435e4e23608efdd, 0xeeba9caaa4cc1768, + 0x5a71f306daddc22d, 0x18e5205f41eba1a0, 0x7b29b4d1f6610925, 0x065cb65a0258d9a9, + 0x3e5ac8faa9fd1f95, 0x3b362362c1ea0470, 0xce0e4f6434db7a2e, 0xf327341098de52f2, + 0xcfca3b9e2a1992c3, 0x7483bf9401233e41, 0xbafbac531c6f9281, 0x4b52dd71b2c106f8, + 0xdf73b66e50b5a1f7, 0x237aec0202a20283, 0x23dd5be23dffdf2b, 0xea9730731ee122ef, + 0x5cb3f846014fbcd3, 0xc3b21c8ffdce9201, 0x06a99a02f91a8760, 0x721a81fa8fd7b7a3, + 0x6aafcdddc53cbcd8, 0xd03b464005a93bcc, 0x8212edc1b1669dcb, 0x71f4c31364c31bc7, + 0xfeeec0eba8772307, 0x1948d00a13d88cf1, 0x19064fd6d943ada8, 0x4ec8d31722697bfd, + 0x596d9a953a516609, 0xc4cb4bff53507da2, 0x1d59f3c5be36e4ca, 0xe5b4fc5bf6044c9b, + 0x1bb74e052232f735, 0x04e8a0db611ddd5d, 0x8d04eaa009b421bf, 0xa7878ae0ac0e6d58, + 0x28c1030217cab2b3, 0x827943767e56a883, 0x28fce5fa02d22809, 0xb30c322fffc8c58e, + 0x1ca5a6a9f8066c5b, 0xb24db5f1462b2513, 0x02f653b89b7e5f6c, 0xe31f8fb5d5f78eee, + 0x266acc514ed93501, 0x936879d1c6fddcc4, 0xcd51be3636af1952, 0x3fdbb6fc332c78c8, + 0x9eb656379fa73094, 0x056146cc92fa0f96, 0xed6c4f1836c027c3, 0x021e0bb5d2113f2a, + 0x8983e42ec1c626b3, 0x73ea9bc6513ad9c9, 0x0c904903b24f4247, 0xacbac1e6243e2525, + 0x0b1069a0c230fb06, 0x77d709fca3fc1ce5, 0x87ad0f65020947e6, 0x555302641c53f4e6, + 0x65ea87871fa9aaee, 0x58aaf4ecc1067bb4, 0x1a66c48cc4c65b3f, 0xca96aca48b2ea969, + 0xa68eb70bad14de2b, 0x5ccdb3d7e00a6f6e, 0xe178fbfec73fe72f, 0x2b63d6a16b83e890, + 0x32fdb7a5330fbae0, 0x2ab5803c8d1bf32c, 0xda838388c1527c94, 0x16a50bdc4de24acb, + 0xe561301f134c074a, 0xd7ae63d2816b4db1, 0x036aabd4df0dd741, 0xc5e0db8783435b9d, + 0x9c4386cf0a07f3b2, 0x6a72ac1aa56a13a1, 0x299bbdb04bb20a23, 0x138c1018fda16b81, + 0x0e354f0b3bda49df, 0x9f4c295b23127437, 0xd133ceb2bd561341, 0xd8b4bfd5a526ac29, + 0xcdd0a70ddc1c7bbd, 0x81dce595bf572225, 0x1c6f925c05f6efd7, 0x8ae5097553856ea0, + 0x3aabeaeef248f60d, 0xd9005809d19a69e2, 0x2a3a1a314311cc27, 0x89bb2dc76b2b624a, + 0x50a2a95d0412e289, 0x9def8df564e68581, 0xf49010a9b2e2ea5c, 0x8602ae175d9ff3f0, + 0xbf037e245369a618, 0x8038164365f6e2b5, 0xe2e1f6163b4e8d08, 0x8df9314914f0857e}, +}; -// const int64_t MIN_LEN = 512 * 1024; -// const int64_t AVG_LEN = 2 * MIN_LEN; -// const int64_t MAX_LEN = 2 * AVG_LEN; +const int64_t AVG_LEN = 1024 * 1024; // create a fake null array class with a GetView method returning 0 always class FakeNullArray { @@ -128,52 +564,33 @@ class FakeNullArray { int64_t null_count() const { return 0; } }; -// static uint64_t GetMask(uint64_t avg_len, uint8_t bit_adjustment) { -// size_t mask_bits = static_cast(std::floor(std::log2(avg_len))); -// size_t effective_bits = mask_bits + bit_adjustment; -// return MASK_TABLE[effective_bits]; -// } - -// static uint64_t GetMask(uint64_t avg_len, uint8_t bit_adjustment) { -// size_t mask_bits = static_cast(std::floor(std::log2(avg_len))); -// size_t effective_bits = mask_bits + bit_adjustment; -// return ((1ULL << effective_bits) - 1) << (64 - effective_bits); -// } - -static uint64_t GetMask(uint64_t avg_len, uint8_t bit_adjustment) { - size_t mask_bits = 16; - size_t effective_bits = mask_bits + bit_adjustment; - return ((1ULL << effective_bits) - 1) << (64 - effective_bits); +static uint64_t GetMask(uint64_t avg_len, size_t adjustement_level) { + size_t mask_bits = static_cast(std::ceil(std::log2(avg_len))); + return (1ULL << (mask_bits - adjustement_level)) - 1; } +// rename it since it is not FastCDC anymore class FastCDC { public: - FastCDC(const LevelInfo& level_info, uint64_t min_len, uint64_t avg_len, - uint64_t max_len, uint8_t normalization_level = 0) + FastCDC(const LevelInfo& level_info, uint64_t avg_len, uint8_t granurality_level = 5) : level_info_(level_info), - min_len_(min_len == 0 ? MIN_LEN : min_len), avg_len_(avg_len == 0 ? AVG_LEN : avg_len), - max_len_(max_len == 0 ? MAX_LEN : max_len), - mask_s_(GetMask(avg_len_, -normalization_level)), - mask_l_(GetMask(avg_len_, +normalization_level)) {} + min_len_(avg_len_ * 0.6), + max_len_(avg_len_ * 1.4), + hash_mask_(GetMask(avg_len_, granurality_level + 3)) {} template bool Roll(const T value) { constexpr size_t BYTE_WIDTH = sizeof(T); chunk_size_ += BYTE_WIDTH; - uint64_t mask; if (chunk_size_ < min_len_) { return false; - } else if (chunk_size_ < avg_len_) { - mask = mask_l_; - } else { - mask = mask_s_; } auto bytes = reinterpret_cast(&value); bool match = false; for (size_t i = 0; i < BYTE_WIDTH; ++i) { - hash_ = (hash_ << 1) + GEAR_TABLE[bytes[i]]; - if ((hash_ & mask) == 0) { + rolling_hash_ = (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][bytes[i]]; + if ((rolling_hash_ & hash_mask_) == 0) { match = true; } } @@ -182,18 +599,14 @@ class FastCDC { bool Roll(std::string_view value) { chunk_size_ += value.size(); - uint64_t mask; if (chunk_size_ < min_len_) { return false; - } else if (chunk_size_ < avg_len_) { - mask = mask_l_; - } else { - mask = mask_s_; } bool match = false; for (char c : value) { - hash_ = (hash_ << 1) + GEAR_TABLE[static_cast(c)]; - if ((hash_ & mask) == 0) { + rolling_hash_ = + (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][static_cast(c)]; + if ((rolling_hash_ & hash_mask_) == 0) { match = true; } } @@ -201,7 +614,15 @@ class FastCDC { } inline bool Check(bool match) { - if (match || (chunk_size_ >= max_len_)) { + if (match) { + if (++nth_run_ >= 7) { + nth_run_ = 0; + chunk_size_ = 0; + return true; + } else { + return false; + } + } else if (chunk_size_ >= max_len_) { chunk_size_ = 0; return true; } else { @@ -350,13 +771,14 @@ class FastCDC { private: const internal::LevelInfo& level_info_; - const uint64_t min_len_; const uint64_t avg_len_; + const uint64_t min_len_; const uint64_t max_len_; - const uint64_t mask_s_; - const uint64_t mask_l_; - uint64_t hash_ = 0; + const uint64_t hash_mask_; + + uint8_t nth_run_ = 0; uint64_t chunk_size_ = 0; + uint64_t rolling_hash_ = 0; }; } // namespace internal diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 9491ba70a112a..fca04ca8ee096 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -754,8 +754,7 @@ class ColumnWriterImpl { fallback_(false), definition_levels_sink_(allocator_), repetition_levels_sink_(allocator_), - content_defined_chunker_(level_info_, properties->cdc_min_size(), - properties->cdc_avg_size(), properties->cdc_max_size()) { + content_defined_chunker_(level_info_, properties->cdc_avg_size()) { definition_levels_rle_ = std::static_pointer_cast(AllocateBuffer(allocator_, 0)); repetition_levels_rle_ = @@ -1357,7 +1356,10 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< rep_levels + level_offset, levels_to_write, *sliced_array, ctx, maybe_parent_nulls)); } - AddDataPage(); + if (num_buffered_values_ > 0) { + AddDataPage(); + } + // AddDataPage(); } return Status::OK(); } else { diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h index 43586658d99ca..ed12e13f8cc72 100644 --- a/cpp/src/parquet/properties.h +++ b/cpp/src/parquet/properties.h @@ -263,9 +263,7 @@ class PARQUET_EXPORT WriterProperties { page_checksum_enabled_(false), size_statistics_level_(DEFAULT_SIZE_STATISTICS_LEVEL), cdc_enabled_(false), - cdc_avg_size_(0), - cdc_min_size_(0), - cdc_max_size_(0) {} + cdc_avg_size_(0) {} explicit Builder(const WriterProperties& properties) : pool_(properties.memory_pool()), @@ -282,8 +280,7 @@ class PARQUET_EXPORT WriterProperties { sorting_columns_(properties.sorting_columns()), default_column_properties_(properties.default_column_properties()), cdc_enabled_(properties.cdc_enabled()), - cdc_min_size_(properties.cdc_min_size()), - cdc_max_size_(properties.cdc_max_size()) {} + cdc_avg_size_(properties.cdc_avg_size()) {} virtual ~Builder() {} @@ -302,16 +299,6 @@ class PARQUET_EXPORT WriterProperties { return this; } - Builder* cdc_min_size(uint64_t min_size) { - cdc_min_size_ = min_size; - return this; - } - - Builder* cdc_max_size(uint64_t max_size) { - cdc_max_size_ = max_size; - return this; - } - /// Specify the memory pool for the writer. Default default_memory_pool. Builder* memory_pool(MemoryPool* pool) { pool_ = pool; @@ -735,7 +722,7 @@ class PARQUET_EXPORT WriterProperties { size_statistics_level_, std::move(file_encryption_properties_), default_column_properties_, column_properties, data_page_version_, store_decimal_as_integer_, std::move(sorting_columns_), cdc_enabled_, - cdc_avg_size_, cdc_min_size_, cdc_max_size_)); + cdc_avg_size_)); } private: @@ -767,8 +754,6 @@ class PARQUET_EXPORT WriterProperties { bool cdc_enabled_; uint64_t cdc_avg_size_; - uint64_t cdc_min_size_; - uint64_t cdc_max_size_; }; inline MemoryPool* memory_pool() const { return pool_; } @@ -795,8 +780,6 @@ class PARQUET_EXPORT WriterProperties { inline bool cdc_enabled() const { return cdc_enabled_; } inline uint64_t cdc_avg_size() const { return cdc_avg_size_; } - inline uint64_t cdc_min_size() const { return cdc_min_size_; } - inline uint64_t cdc_max_size() const { return cdc_max_size_; } inline SizeStatisticsLevel size_statistics_level() const { return size_statistics_level_; @@ -900,8 +883,7 @@ class PARQUET_EXPORT WriterProperties { const ColumnProperties& default_column_properties, const std::unordered_map& column_properties, ParquetDataPageVersion data_page_version, bool store_short_decimal_as_integer, - std::vector sorting_columns, bool cdc_enabled, uint64_t cdc_avg_size, - uint64_t cdc_min_size, uint64_t cdc_max_size) + std::vector sorting_columns, bool cdc_enabled, uint64_t cdc_avg_size) : pool_(pool), dictionary_pagesize_limit_(dictionary_pagesize_limit), write_batch_size_(write_batch_size), @@ -918,9 +900,9 @@ class PARQUET_EXPORT WriterProperties { default_column_properties_(default_column_properties), column_properties_(column_properties), cdc_enabled_(cdc_enabled), - cdc_avg_size_(cdc_avg_size), - cdc_min_size_(cdc_min_size), - cdc_max_size_(cdc_max_size) {} + cdc_avg_size_(cdc_avg_size) + + {} MemoryPool* pool_; int64_t dictionary_pagesize_limit_; @@ -943,8 +925,6 @@ class PARQUET_EXPORT WriterProperties { bool cdc_enabled_; uint64_t cdc_avg_size_; - uint64_t cdc_min_size_; - uint64_t cdc_max_size_; }; PARQUET_EXPORT const std::shared_ptr& default_writer_properties(); diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index 5b9d43c62a45d..8ad573de89ddd 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -449,8 +449,6 @@ cdef extern from "parquet/api/writer.h" namespace "parquet" nogil: Builder* enable_cdc() Builder* disable_cdc() Builder* cdc_avg_size(uint64_t avg_size) - Builder* cdc_min_size(uint64_t min_size) - Builder* cdc_max_size(uint64_t max_size) shared_ptr[WriterProperties] build() cdef cppclass ArrowWriterProperties: diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index a6a0bbbaad6c4..76ea6cb25277c 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -2020,11 +2020,9 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( elif content_defined_chunking is True: props.enable_cdc() elif isinstance(content_defined_chunking, tuple): - min_size, avg_size, max_size = content_defined_chunking + avg_size, = content_defined_chunking props.enable_cdc() props.cdc_avg_size(avg_size) - props.cdc_min_size(min_size) - props.cdc_max_size(max_size) else: raise ValueError( "Unsupported value for content_defined_chunking: {0}" From db116c8d00719e8e579c33fcf99955a086678846 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Thu, 6 Feb 2025 17:23:19 +0100 Subject: [PATCH 08/48] don't include loging --- cpp/src/parquet/column_chunker.h | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index 30601707e1c3d..9dd591943b8f1 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -21,7 +21,6 @@ #include #include #include "arrow/array.h" -#include "arrow/util/logging.h" #include "parquet/level_conversion.h" using arrow::internal::checked_cast; From eedc951e7da2c66c2ac372f5ca4b7004b1fcfa6c Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Thu, 6 Feb 2025 17:31:32 +0100 Subject: [PATCH 09/48] please msvc --- cpp/src/parquet/column_chunker.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index 9dd591943b8f1..d2840d72d79c7 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -574,8 +574,8 @@ class FastCDC { FastCDC(const LevelInfo& level_info, uint64_t avg_len, uint8_t granurality_level = 5) : level_info_(level_info), avg_len_(avg_len == 0 ? AVG_LEN : avg_len), - min_len_(avg_len_ * 0.6), - max_len_(avg_len_ * 1.4), + min_len_(static_cast(avg_len_ * 0.6)), + max_len_(static_cast(avg_len_ * 1.4)), hash_mask_(GetMask(avg_len_, granurality_level + 3)) {} template From b197f5317d37c81e5a338fd3f5c3946404866312 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 7 Feb 2025 10:41:26 +0100 Subject: [PATCH 10/48] increase the min/max bands around the avg chunk size --- cpp/src/parquet/column_chunker.h | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index d2840d72d79c7..f7d791168cd62 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -574,8 +574,8 @@ class FastCDC { FastCDC(const LevelInfo& level_info, uint64_t avg_len, uint8_t granurality_level = 5) : level_info_(level_info), avg_len_(avg_len == 0 ? AVG_LEN : avg_len), - min_len_(static_cast(avg_len_ * 0.6)), - max_len_(static_cast(avg_len_ * 1.4)), + min_len_(static_cast(avg_len_ * 0.5)), + max_len_(static_cast(avg_len_ * 2.0)), hash_mask_(GetMask(avg_len_, granurality_level + 3)) {} template @@ -589,9 +589,7 @@ class FastCDC { bool match = false; for (size_t i = 0; i < BYTE_WIDTH; ++i) { rolling_hash_ = (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][bytes[i]]; - if ((rolling_hash_ & hash_mask_) == 0) { - match = true; - } + match |= (rolling_hash_ & hash_mask_) == 0; } return match; } @@ -605,23 +603,17 @@ class FastCDC { for (char c : value) { rolling_hash_ = (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][static_cast(c)]; - if ((rolling_hash_ & hash_mask_) == 0) { - match = true; - } + match |= (rolling_hash_ & hash_mask_) == 0; } return match; } inline bool Check(bool match) { - if (match) { - if (++nth_run_ >= 7) { - nth_run_ = 0; - chunk_size_ = 0; - return true; - } else { - return false; - } - } else if (chunk_size_ >= max_len_) { + if (ARROW_PREDICT_FALSE(match && (++nth_run_ >= 7))) { + nth_run_ = 0; + chunk_size_ = 0; + return true; + } else if (ARROW_PREDICT_FALSE(chunk_size_ >= max_len_)) { chunk_size_ = 0; return true; } else { @@ -693,7 +685,7 @@ class FastCDC { def_match = Roll(def_level); rep_match = Roll(rep_level); - if (def_level >= level_info_.repeated_ancestor_def_level) { + if (ARROW_PREDICT_TRUE(def_level >= level_info_.repeated_ancestor_def_level)) { val_match = Roll(leaf_array.GetView(value_offset)); ++value_offset; } else { From c37eb328418c6b23807d476412fc6325a2ebd222 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 14 Feb 2025 11:55:52 +0100 Subject: [PATCH 11/48] use a chunk struct instead of a tuple to carry boundary information --- cpp/src/parquet/column_chunker.h | 36 ++++++++++++++++++++++---------- cpp/src/parquet/column_writer.cc | 20 +++++++----------- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index f7d791168cd62..6a9285c5b5876 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -551,7 +551,7 @@ constexpr uint64_t GEAR_HASH_TABLE[8][256] = { 0xbf037e245369a618, 0x8038164365f6e2b5, 0xe2e1f6163b4e8d08, 0x8df9314914f0857e}, }; -const int64_t AVG_LEN = 1024 * 1024; +const uint64_t AVG_LEN = 1024 * 1024; // create a fake null array class with a GetView method returning 0 always class FakeNullArray { @@ -563,9 +563,22 @@ class FakeNullArray { int64_t null_count() const { return 0; } }; -static uint64_t GetMask(uint64_t avg_len, size_t adjustement_level) { - size_t mask_bits = static_cast(std::ceil(std::log2(avg_len))); - return (1ULL << (mask_bits - adjustement_level)) - 1; +struct Chunk { + int64_t level_offset; + int64_t value_offset; + int64_t levels_to_write; + + Chunk(int64_t level_offset, int64_t value_offset, int64_t levels_to_write) + : level_offset(level_offset), + value_offset(value_offset), + levels_to_write(levels_to_write) {} +}; + +static uint64_t GetMask(uint64_t min_size, uint64_t max_size) { + uint64_t avg_size = (min_size + max_size) / 2; + size_t mask_bits = static_cast(std::ceil(std::log2(avg_size))); + size_t effective_bits = mask_bits - 3 - 5; + return (1ULL << effective_bits) - 1; } // rename it since it is not FastCDC anymore @@ -622,10 +635,10 @@ class FastCDC { } template - const std::vector> GetBoundaries( - const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, - const T& leaf_array) { - std::vector> result; + const std::vector GetBoundaries(const int16_t* def_levels, + const int16_t* rep_levels, int64_t num_levels, + const T& leaf_array) { + std::vector result; bool has_def_levels = level_info_.def_level > 0; bool has_rep_levels = level_info_.rep_level > 0; @@ -719,9 +732,10 @@ class FastCDC { return GetBoundaries(def_levels, rep_levels, num_levels, \ checked_cast(values)); - const ::arrow::Result>> GetBoundaries( - const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, - const ::arrow::Array& values) { + const ::arrow::Result> GetBoundaries(const int16_t* def_levels, + const int16_t* rep_levels, + int64_t num_levels, + const ::arrow::Array& values) { auto type_id = values.type()->id(); switch (type_id) { PRIMITIVE_CASE(BOOL, Boolean) diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index fca04ca8ee096..ad4dfa36e2dd2 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -1342,24 +1342,20 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< ARROW_ASSIGN_OR_RAISE(auto boundaries, content_defined_chunker_.GetBoundaries( def_levels, rep_levels, num_levels, leaf_array)); - for (auto boundary : boundaries) { - auto level_offset = std::get<0>(boundary); - auto array_offset = std::get<1>(boundary); - auto levels_to_write = std::get<2>(boundary); - auto sliced_array = leaf_array.Slice(array_offset); + for (auto chunk : boundaries) { + auto sliced_array = leaf_array.Slice(chunk.value_offset); if (leaf_array.type()->id() == ::arrow::Type::DICTIONARY) { - ARROW_CHECK_OK(WriteArrowDictionary(def_levels + level_offset, - rep_levels + level_offset, levels_to_write, - *sliced_array, ctx, maybe_parent_nulls)); + ARROW_CHECK_OK(WriteArrowDictionary( + def_levels + chunk.level_offset, rep_levels + chunk.level_offset, + chunk.levels_to_write, *sliced_array, ctx, maybe_parent_nulls)); } else { - ARROW_CHECK_OK(WriteArrowDense(def_levels + level_offset, - rep_levels + level_offset, levels_to_write, - *sliced_array, ctx, maybe_parent_nulls)); + ARROW_CHECK_OK(WriteArrowDense( + def_levels + chunk.level_offset, rep_levels + chunk.level_offset, + chunk.levels_to_write, *sliced_array, ctx, maybe_parent_nulls)); } if (num_buffered_values_ > 0) { AddDataPage(); } - // AddDataPage(); } return Status::OK(); } else { From 57458e3c94950bba2fbd415ced07448bf527f35b Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 14 Feb 2025 12:43:57 +0100 Subject: [PATCH 12/48] split implementation and header files --- cpp/src/parquet/CMakeLists.txt | 1 + cpp/src/parquet/column_chunker.cc | 764 ++++++++++++++++++++++++++++++ cpp/src/parquet/column_chunker.h | 738 +---------------------------- 3 files changed, 777 insertions(+), 726 deletions(-) create mode 100644 cpp/src/parquet/column_chunker.cc diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 245e3ae552d04..fd01f566cd413 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -160,6 +160,7 @@ set(PARQUET_SRCS arrow/writer.cc bloom_filter.cc bloom_filter_reader.cc + column_chunker.cc column_reader.cc column_scanner.cc column_writer.cc diff --git a/cpp/src/parquet/column_chunker.cc b/cpp/src/parquet/column_chunker.cc new file mode 100644 index 0000000000000..a9564a79e7ceb --- /dev/null +++ b/cpp/src/parquet/column_chunker.cc @@ -0,0 +1,764 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "parquet/column_chunker.h" +#include +#include +#include +#include "arrow/array.h" +#include "parquet/level_conversion.h" + +using arrow::internal::checked_cast; + +namespace parquet { +namespace internal { + +constexpr uint64_t GEAR_HASH_TABLE[8][256] = { + {// seed = 0 + 0xf09f35a563783945, 0x0dcc5b3bc5ae410a, 0x63f1ea8d22554270, 0xfbe5ee7bd05a7b61, + 0x3f692ed5e9934aba, 0xaab3755952250eb8, 0xdefb168dc2888fa5, 0x501b36f7c77a7d47, + 0xd2fff45d1989642d, 0x80217c1c600e30a6, 0xb9469ee2e43df7ac, 0x3654b76a61999706, + 0x6ea73dfe5de0c6b6, 0xdfd662e1937a589d, 0x0dbe0cc74b188a68, 0xde45f4e6d73ffc6f, + 0xcdf7a7759e70d87e, 0x5d6a951b8d38c310, 0xdc9423c3813fcf2c, 0x25dc2976e167ffce, + 0xc2555baa1d031c84, 0x115bc3f2230a3ab6, 0xd4b10260f350bede, 0xdfd3501ab447d723, + 0x022e79217edaf167, 0x1635e2255c5a7526, 0xa0a750350cc77102, 0xc027133e05d39f56, + 0xd949459779cf0387, 0xb92f1464f5c688c2, 0xd9ac5f3e8b42f2f3, 0xdf02bb6f5ecaac21, + 0x8156f988fac7bfa4, 0xe4580f97bede2ec8, 0x44fe7d17a76fca32, 0x885f59bd54c2014c, + 0x435e63ec655ffae9, 0x5ebc51930967b1f1, 0x5428c2084ac29e47, 0x9465938fec30e36b, + 0xc7cb3de4977772cd, 0x15692d7c201e8c3a, 0x505ee65cdc4b17f4, 0x7d9839a0a7aead6b, + 0xeef5f5b6a0105291, 0x76c2fb232ce7f5bf, 0x5c13893c1c3ff3a9, 0x65b6b547d4442f98, + 0xb8ad7487c8c96fce, 0x906bcf51c99974f8, 0x2f56e48bb943a48c, 0xbc9ab109f82d3a44, + 0xcd5160cdc8c7e735, 0xbe9acb9df3427732, 0x386b91d477d7fade, 0x36be463621dd5af2, + 0xcbe6a2faffd627a8, 0x9c8fd528463a2f5a, 0xb9b88c6bb802b184, 0xb414b4e665c597c7, + 0xbedb142568209556, 0x5360d81c25429dce, 0x63a69a960a952f37, 0xc900d63899e1b503, + 0x1abc63a8b37c7728, 0xa8b3a8b6409080eb, 0x495e391f662959f6, 0xdf1e136f3e12229b, + 0x33d5fc526b0dd38d, 0x321221ae2abfac63, 0x7fde18351fda7395, 0xed79fe5c3a6aa4c3, + 0x2dd6965a4867d8d4, 0x54813ca20fe8799b, 0x5d59ea6456465c39, 0x0de0c294d1936b81, + 0x4aaf0755002c588c, 0x3530a1857ad04c6d, 0xb8a64f4ce184442b, 0xe0def10bceedfa17, + 0x46e38d0a443757ec, 0x9795a1c645ee16d7, 0x7e531def245eac8a, 0x683b25c43a0716cf, + 0x884583d372da219d, 0x5b06b62c910416e5, 0x54b6902fbebd3dbe, 0x931198d40a761a75, + 0xead7d8e830013590, 0x80b4d5dc99bfaced, 0xf98272c8108a1ad2, 0x1adce054289a0ec6, + 0x7d53a1143c56b465, 0x497fbe4f00c92b52, 0x525e4cc2e81ebd69, 0xc94478e0d5508ff6, + 0xb8a5da83c196d07c, 0x7667a921b65b0603, 0xf236fabbdefe6cd1, 0x53da978d19a92b98, + 0xc604f6e97087124d, 0x2cbd27221924b094, 0x65cd1102c985b1d2, 0x08c0755dc1a97eb4, + 0x5e0419e921c0fef1, 0x282d2c1196f84a29, 0xe21117fcfc5793f7, 0xcf4e985dc38e6c2e, + 0xd521f4f264d55616, 0xde69b04c485f2a10, 0x59410e245305178a, 0xceab1d477c943601, + 0xa9805732d71ee5e9, 0x054cd443896974f6, 0xf2b517717a423a3e, 0x09517937fa9fac95, + 0x4938233e9ca871e3, 0x9132cbaf56f83ec0, 0x4703421ed1dd027d, 0xfd9933f4e6f1ec4e, + 0xf237c7fded2274a8, 0xdf4616efe68cd7b4, 0x5e46de0f39f0a380, 0x3d41e0c6d8e095b0, + 0xc5272f8a5bb2df09, 0x68aa78e8301fb964, 0xbf5b5b52c8e32ae0, 0xbf28ed3df74bdcf7, + 0xd6198f64c833815a, 0x8cd99d2974267544, 0xd90560ea4465ff2c, 0x571d65ad7ad59261, + 0x309453518baa367a, 0xa60538377bc79fb2, 0xace515da1ab4183c, 0xf56d3c8d891d1c5b, + 0x5b0d8370b59def49, 0x775866ce7c83c762, 0x3d76085695c8e18a, 0xba064d1a9af1b114, + 0xc84ef7cd7b98b521, 0x90b9231681c2bc37, 0x37e2b13e6f585b6b, 0x1d0a34e55e0f369f, + 0x86bb8019cf41447c, 0x4b95c6ef55b3f71f, 0x3b6ed1660732b310, 0x617eee603d137f21, + 0xf4f6278b464f3bbc, 0xdfb763b720da205a, 0x353478899b871cb7, 0xe45fbbff574cc41e, + 0x1a94b60847907d72, 0xb10eef051eff67a5, 0xf0e012ec6a284d40, 0xcc1cd1a11b926d7c, + 0xcf9d9c5453e19cad, 0x270febcc0fc0e86b, 0xd6567568778b781e, 0x7323b98965eeb46b, + 0xccecd374567086ff, 0xef7b44bfc497a704, 0xebc479c051a9f0a5, 0xc9b7410e3e00a235, + 0x1d084f7ecdf83dab, 0xc8a9a97e33ba8ba3, 0x8c75318f5b2350d6, 0xaa3cd5d0c684bdda, + 0xa81125fe0901bedf, 0xf7bcd76020edfc93, 0x834ee4c12e75874f, 0xb2bb8a7beb44fa14, + 0x32cd26f50a4f4e4d, 0x0fc5817ca55d959a, 0xd6e4ae2e3ae10718, 0x074abdcceb8d6e38, + 0xc0cc5f4f9b3a9c43, 0x1115d364363595b2, 0x69861db2eb19f2e8, 0x59b8d804cf92bc67, + 0x9bac9785e5e4b863, 0x7fa0e17a41869561, 0x10d3c9633f0c709c, 0x534a03deee6bc44a, + 0x73b1f7201257f581, 0x46fd6a11e2e0706b, 0x494abb554946e67a, 0xb5d6da317864dc8e, + 0x402ded9238f39687, 0xd8fa37d2cbd6d290, 0xcc818293fcb06791, 0x6482ab344806cd4d, + 0x0956e6ee9d8eb60b, 0x01fee622d8465ac8, 0xae7ece370cbd9c35, 0x7ff09e937a177279, + 0xa2c29ee7a33ca5f1, 0x990e8dbee083923b, 0x4a819b72f610863a, 0xddecfad79d3f08be, + 0x627372480fac20a7, 0x802154d6eca2db4c, 0x8fcf02e42f805e55, 0x040a911ff8cea977, + 0xbb544485bc64d0d4, 0xaddde1aeb406d0fb, 0xf6b35fae23dce66f, 0xc07a9fb3645d2f9b, + 0xccd113907e9c0fed, 0xd17af369984fd213, 0x9223823c59a083e7, 0xe19d475606b81013, + 0xe181ac116a90e57a, 0x71f7b6258c6def4c, 0x2246f34b45964f7c, 0xd74aedaea2d31751, + 0xb1add86e5dd305d1, 0xeb9ba881f16d6471, 0xef7600e036f5c6ff, 0x1d50bc9735b8fb85, + 0xe63942bd1f3e2969, 0x9241ba9f8b3f4e72, 0xee8bb2bca07d35b6, 0x55cd55dab522654e, + 0x94d0cfa7c1a6845d, 0x02f9845d559884c3, 0x8ce70ea21063b560, 0xd70998028ef08b74, + 0xdfdb5bbee310876b, 0x4e21b2e348256d16, 0xde007a981c13debc, 0xe51950cbbddabfdd, + 0xd223301dbe9957c1, 0x084b8634cc2cce4b, 0x90e551378aa9d70c, 0x833b533ac633e448, + 0x7891e232882da57f, 0xa1bf26f0163ce2b3, 0xf33a0171eb9c68d5, 0x2e7de18ca69b3fa2, + 0x666fd6f175619199, 0x1239d37edb5feb9f, 0xfa9fc9382e61ff5c, 0x3ca4ad427e3c126f, + 0x37c6dd4c2c31ae6e, 0x1f1bacb619d427b2, 0x7dd09f5d10759afe, 0xc8d941432327d733, + 0x2b389ba25e1d43a7, 0xa4e3030c3740ff21, 0xcc56dae13fd37463, 0x2481457c175b560f, + 0x9deb35bde77c5c41, 0x847aa6ea5549a0c3, 0xcde01bb48b6e7f02, 0x15a28844e64cb211}, + {// seed = 1 + 0xecfcba92fe5691a3, 0x71377799fea34699, 0xb284c9096fa614e5, 0x54534170f40de6c8, + 0xbbd804d45884fba3, 0x44929a896388c8a1, 0x79b712508e0fa3b1, 0xeb53ab280af31054, + 0x351ea23a6319da7a, 0x2fbe55d9819d85a2, 0x34f4b6568dcd28b1, 0x8c94ea5e5d82967a, + 0x09068d333a46d3c5, 0x762ad4f64cb73381, 0xd5c6db5ef0e22640, 0x36d8ab5a36175680, + 0xd41fe333cdc3525a, 0xa1f51dbdf20ce781, 0x1410a95e786c8be6, 0x96b7499a670c2b41, + 0x3912e1037835d893, 0x272c5bd83e1e9115, 0x2ea7f91cad82a0d6, 0xcd10e85662ce9931, + 0xedad49be8d5e8b74, 0x7ccd8fe0f37d12bc, 0xfac0482005eed593, 0x4513991681f6c8b0, + 0x2804d612eb0ad37d, 0x7cca9e8412b81d34, 0x85ffd6707192b7b8, 0xea0560aeea954411, + 0x0122d28226102bba, 0xf51c47cdbd22fdd1, 0x3707d851183ff17c, 0xaef5a1465f3e902d, + 0xbcb38c2d8736a04f, 0x4025317e864bef15, 0x8d3f66d86e1ea58f, 0xc16759a3d97ed79a, + 0x1c62abdc0659f2f5, 0x23b3eb4e699bd28f, 0x5083c4fceed3ccaf, 0xa65bf34562cc989c, + 0xaa5865932fd79064, 0xf24d08d268c24593, 0x7fbd00a215196999, 0x7812cd366d752964, + 0x62e8dcb27ef3d945, 0xf08b7984e1b946dc, 0x547d23ad9a5c1dcf, 0x496b1fb249b27fb7, + 0xcd692e1db5f3b3ba, 0x41931e39f1e1bc61, 0x286c6a7d7edae82b, 0x17ef6638b6c4ca6e, + 0x609beb5a2576a934, 0xcc5e16fe4a69b83c, 0xbbd14d08b078fc24, 0x2a617680f481cb94, + 0x81dbbd5f86e6d039, 0xeb8205e1fc8ecc3c, 0xe5e3bb576faa8042, 0x5d6f1eb9d9df01b5, + 0x9a47b8739c10fb44, 0x398a7caad7ea7696, 0x9c0fc1d7c46adde6, 0x67cd6de0a51978a6, + 0x68ccc4b77a21cca4, 0x1e067066b82f415c, 0xf7ddade6535e1819, 0xf2185c884291751b, + 0xc322b7381fcbe34f, 0x242f593e88290b9b, 0x8e11ccc0ea5e84a3, 0x40e3a2e3346db8a2, + 0xf18bfc3ad2931a2c, 0x2468397394b00144, 0xeae199cce14e6817, 0x05b462686c75a1ae, + 0xda096cb859c51673, 0xd87aeb967a906bef, 0xaabc74493cb02fe6, 0x74d48fc2e7da143e, + 0x6ec1c8fed3f2c1fd, 0xe01e0704b463f18e, 0xc3d88a4d3a8056e4, 0xd01ae0ffab6c8f3f, + 0x881ba052620ae7c7, 0xcea033aef0a823a5, 0x8d2cad91d83df1e3, 0x18746d205e66dbe9, + 0x3061f8e58d046650, 0xd819c59f0ce2cf8b, 0x144e89e93635e870, 0x3415e88279b21651, + 0xd6f7ab944b86c3fa, 0x45f1dd15d0f67bdc, 0xbf0d97c7f4fa24f4, 0x34a7de520a57fcd2, + 0x4ba86fda03e9e2bc, 0xa7995265a025b552, 0x698f6819d5f51cf7, 0xd07dbe9d8a156981, + 0x2683945373857fc1, 0x116f8a84f96167de, 0x8bc832bd85595ebf, 0xb206519d74fdfafa, + 0xde9519b2e9b5cc5f, 0x16fdd6f2da1d8163, 0x7ba32bd48ef56f11, 0x6f4e4d7ee8b29717, + 0xd31576dde7468aad, 0x023bb08848676045, 0xf6dcc083178160b7, 0x42035f426250e683, + 0x343732993cfed89f, 0x0640a870a22d3d58, 0x65cff80b53b4ae6a, 0x27996fa17ab05215, + 0xfd5db01401b21a04, 0x894508784bc1673c, 0x5bfcf43a2380e27d, 0x4cd6dcc2715583b7, + 0xa43b3763e7d4c902, 0x6da83e12ef0c1257, 0xfe80a602b0335aff, 0x293a7d8f4ff344de, + 0xb4ae7c2b8956bf5a, 0x6b45432d38254b4d, 0xd086acbdf15d9455, 0xa4d19e43f41ea87b, + 0xf01f13ba4bb87fbf, 0xca582cf301a299ff, 0x0ddad3d45298fa7d, 0x0646a130459c3999, + 0xc08e3af3747e2cee, 0xfc7db8aa9ed67295, 0x783b329e7bd79d5f, 0x732dbc607957af7b, + 0x8e446ac19fb26555, 0xff1dfa4d61dc89a5, 0xb6fbc46bd8d011d8, 0x185147ec5779f0d7, + 0x6eb2cf6149a5380f, 0xb0e773df803a1eae, 0xc07706c5519bfce5, 0xc35abcf54fa95f14, + 0x40a01d99a38608ea, 0x776dcd6f603c277f, 0x6ae12389b1d6d0bb, 0x8bd981448df92bb9, + 0x426a6a7ca21a2c16, 0x87efd5b71c1bad26, 0x71fb7fc4cd41de48, 0xdd9033c45619d463, + 0x40eaab322654cef7, 0xe077fffed6f3e3a2, 0x375a4dbef9384447, 0x2066b009d2c4a100, + 0xeca4a5794a068447, 0x2128f64bddf341a1, 0x738b4bb1be90bd61, 0x433772cf3813d52e, + 0x9540c88add8e4474, 0x0b6d5decd21d3519, 0x654ead966745642d, 0xe1bfb03c3b4bdb4c, + 0x0b977a9937515b1f, 0x0a4587509ef63870, 0xe89f0de1d9cfd44a, 0x23a91390272e7f68, + 0xd92defbc9096b8d8, 0x004db87174612539, 0xc88ecaabdd1a71f1, 0x050de38393073346, + 0x8af1426d7964e038, 0xf352c4fef8ad5c87, 0x6f26bc7408e26548, 0x0d41543fd9bf3084, + 0xfc4e07553a840fc6, 0x5ef117de86a555a9, 0x1f11c42dffb5ae1b, 0x4147648f07490fa5, + 0x09b35fd7671b21aa, 0x1453b14f7ccca481, 0x944f6fcce4c9b2ba, 0x5b08dd2e3583dc06, + 0xe0220df78dc9c22d, 0x1c200b9506cbf666, 0x8a0b7465eadb523b, 0xfbcb43a91a1e2d80, + 0xe697f44be3c36a58, 0x2f8a8e48fb7e350d, 0x7baba71b8920d55f, 0x10edc0216105bc96, + 0x52db07c79d7a7a63, 0x1916e8cef9452ac3, 0x5cbbbf21f867b6cc, 0xadd583365a690a4b, + 0x4e4ca2c8bffc2fdb, 0xf5fe3416d2eebcfe, 0x839af8b85e452476, 0x8496c0c54ad44e16, + 0x6c46f1ecad4482bf, 0xb794cad76ae18715, 0x67b762eec7c62985, 0x52dc9e68df5b3a53, + 0x0cc7e444b422a5f9, 0xadbfe90841c112b0, 0xfe37b136f0ca5c34, 0xcfe9e47948a8d73e, + 0xee90572b86a30d91, 0x549e72d8262830aa, 0x3361564b469f32c6, 0x1e6eba9e0d2648e2, + 0x5f8e2b2ac5fcb4eb, 0xe4224fa5f71f7cc6, 0x7357a9230c76757b, 0xcad70f74aaf6b702, + 0xeef28ced23894cc2, 0x753fdd3352aefd68, 0x1fed6ba90bbeb9d2, 0x05316f4ab4034b4b, + 0x3396df022b9f63d6, 0x82d7125a7cfd0935, 0x3519a71caf1f87f0, 0xd1dfb7a5cc3974be, + 0xbfae40ecbdbbcc2a, 0x152c11778e08dd54, 0x4a96566a6c848554, 0x3a84d621c340cdd7, + 0xfd47aa1887e2fb03, 0xa63cae94b2f1d099, 0xed61783f3e5b75e0, 0xefd44864106019be, + 0x145ff78b80b081aa, 0x34670e5fcea9230e, 0x876ef976328db371, 0x4221f3a5269942a6, + 0x95315cbd85c648f4, 0x3ca344dc7c3b1600, 0x38421ea39ff28780, 0x31dbeee967c0435c, + 0x27437c3e268402e7, 0xdd0cf8343312a654, 0x965ab9dad1d8aa29, 0xf871706dd3e23509, + 0xce23d06c7a25e699, 0x1b37d59382b27589, 0x3407f004723d6324, 0x56efb69cdb5deaa1, + 0xf46cdd2b9fd604e0, 0xcad3ca79fdac69bd, 0x7252802a574e63cb, 0xc281fb8acc6ec1d3}, + {// seed = 2 + 0xdd16cb672ba6979c, 0x3954eaa9ec41ae41, 0x52cb802771d2966d, 0xf57ed8eb0d0294f2, + 0x768be23c71da2219, 0x6131e22d95a84ad3, 0xd849e4e49bb15842, 0x18e8e5c4978cf00d, + 0x3af5e5867ce1f9bd, 0x06c75a9fffe83d63, 0xe8de75a00b58a065, 0x0a773251bc0d755a, + 0x629dc21e54548329, 0x2a168f5e5a883e70, 0x33547375f0996c86, 0xdfcb4c7680451322, + 0x55c1ecaaaa57e397, 0x4546c346c24f5a31, 0x6f8f0401dfabc86c, 0x7760d2d36ee340b4, + 0xf6448e48bdeb229d, 0xba70e1633b4dba65, 0x069cda561e273054, 0xa010b6a84aebf340, + 0x5c23b8229eee34b6, 0xea63c926d90153af, 0x7d7de27b3e43ec1b, 0xea119541eddc3491, + 0xf1259daeddfc724c, 0x2873ca9a67730647, 0xa1e7710dade32607, 0x758de030b61d43fd, + 0xd2c9bcbfa475edb4, 0x18ade47bb8a0aa29, 0xf7a74af0ff1aea88, 0x6f8873274a987162, + 0x6963e8d876f4d282, 0xd435d4fe448c6c5b, 0x93ec80ba404cafff, 0xcf90d24c509e41e7, + 0x5f0fc8a62923e36e, 0x9224878fe458f3a4, 0xd9a039edf1945bcd, 0x0877d1892c288441, + 0x75205491f4b4740b, 0x30f9d2d523a9085b, 0x4b7f4029fa097c99, 0x170bb013745709d4, + 0x7087af537f11ef2e, 0x28c62b88e08fc464, 0x84bbcb3e0bb56271, 0x485a4b099165c681, + 0x357c63357caa9292, 0x819eb7d1aee2d27e, 0xdaa759eb9c0f8c9d, 0x42cdc36729cc3db5, + 0x9489aa852eddbb06, 0x8161e4f85a84e6d4, 0xa964863fdad3eb29, 0xcc095ddbce1a6702, + 0x3ecfadbb8dc2ce58, 0x971316509b95a231, 0xc8f484d1dbc38427, 0xae9c510c463574c0, + 0xdf2b31179600c21a, 0x440de87bada4dfa3, 0xbd8d30f3f6fb7522, 0x84e6d7f678a0e2d0, + 0x0ec4d74323e15975, 0xf6947610dad6d9ab, 0x73a55a95d73fe3a5, 0x3e5f623024d37eda, + 0x8d99a728d95d9344, 0x8b82a7956c4acdc4, 0x7faeaea4385b27f6, 0x540625ff4aa2ff21, + 0x4aa43b3ebd92ce2b, 0x899646a6df2da807, 0x49225115780942d7, 0xe16606636af89525, + 0xb980bcf893888e33, 0xf9ed57695291b0d8, 0x5c6dd14464619afa, 0x50606d69b733d4f3, + 0x7fb1af465b990f97, 0x3fab2634c8bbd936, 0x556da6168838b902, 0x0f15975902a30e1f, + 0xb29d782ae9e1991f, 0xae00e26ff8f7e739, 0xd3da86458bb292d5, 0x4528ee0afb27e4ce, + 0x49882d5ba49fabad, 0x7e873b6a7cf875ee, 0x777edd535113c912, 0x94ed05e7ff149594, + 0x0b8f95fc4211df43, 0x9135c2b42426fef2, 0x411e6c2b47307073, 0x503207d1af0c8cf8, + 0xd76f8619059f9a79, 0x64d24617855dee45, 0xf7bc7a877923196a, 0xd6cc42ed6a65be79, + 0xe3912ff09d4fc574, 0x4192d03b2bc2460a, 0xa0dcc37dad98af85, 0xfc59049b2a5818a4, + 0x2128bae90a5b975f, 0xbe7067ca05ea3294, 0x5bab7e7753064c4f, 0x42cbf0949ef88443, + 0x564df4bbd017492c, 0xf2c2eb500cf80564, 0x5b92e67eb00e92af, 0x8c4103eef59c0341, + 0x83412122b8284998, 0x888daf2da0636b6d, 0x4d54b10303dd07d6, 0x201190e7c1e7b5ed, + 0x3797510bb53a5771, 0x03f7bc598b570b79, 0xdc1e15d67d94f73e, 0x721e8b499ebe02c1, + 0x71f954f606d13fa0, 0x0c7a2e408c168bf0, 0x07df2ef14f69c89d, 0xe295096f46b4baaf, + 0x7a2037916438737e, 0xd1e861aeaf8676ea, 0xb36ebdce368b8108, 0xb7e53b090ddb5d25, + 0x5a606607b390b1aa, 0x475e52994f4a2471, 0xbcc2038ba55b2078, 0x28b8a6b6c80df694, + 0xb5f0130ec972c9a2, 0x7a87cd2a93276b54, 0x4d0eec7ecf92d625, 0xac1a8ce16269a42e, + 0xa4ca0237ca9637b8, 0xd8dc8ff91202b6ff, 0x75b29846799d7678, 0x761b11a5edd9c757, + 0xf2581db294ef3307, 0xe3173c2b6a48e20f, 0xe46fd7d486d65b3c, 0x1352024303580d1f, + 0x2d665dae485c1d6d, 0x4e0905c825d74d3b, 0x14ff470c331c229e, 0xbdc656b8613d8805, + 0x36de38e396345721, 0xaae682c1aa8ff13b, 0x57eb28d7b85a1052, 0xf3145290231d443a, + 0xd0f68095e23cbe39, 0x67f99b3c2570b33d, 0x54575285f3017a83, 0x9b2f7bb03d836a79, + 0xa57b209d303367a9, 0x7ccb545dd0939c79, 0x1392b79a37f4716d, 0x6e81bb91a3c79bcd, + 0x2c2cd80307dddf81, 0xb949e119e2a16cbb, 0x69625382c4c7596f, 0xf19c6d97204fb95c, + 0x1b2ea42a24b6b05e, 0x8976f83cd43d20ac, 0x7149dd3de44c9872, 0xc79f1ae2d2623059, + 0xca17a4f143a414e1, 0x66d7a1a21b6f0185, 0xed2c6198fe73f113, 0x16a5f0295cbe06af, + 0x5f27162e38d98013, 0xf54d9f295bdc0f76, 0x9ba7d562073ef77b, 0xa4a24daaa2cfc571, + 0x49884cf486da43cd, 0x74c641c0e2148a24, 0xbff9dcbff504c482, 0xf8fc2d9403c837ab, + 0x6ccc44828af0bb1e, 0xbcf0d69b4c19dfdb, 0x8fe0d962d47abf8f, 0xa65f1d9d5514271d, + 0x26ff393e62ef6a03, 0xc7153500f283e8fc, 0xea5ed99cdd9d15cd, 0xfc16ac2ba8b48bb7, + 0xf49694b70041c67a, 0xbd35dd30f5d15f72, 0xcf10ad7385f83f98, 0x709e52e27339cdc2, + 0xe9505cb3ec893b71, 0x2ffa610e4a229af7, 0x12e1bc774d1f0e52, 0xe301a3bb7eacccc8, + 0x1fdd3b6dcd877ebf, 0x56a7e8bda59c05aa, 0x99acd421035d6ab4, 0xfd21e401cecd2808, + 0x9a89d23df8b8d46f, 0x4e26b1f1eb297b9c, 0x9df24d973e1eae07, 0xe6cdc74da62a6318, + 0xfc360d74df992db0, 0xf4eca0a739514c98, 0x481c515ba9bf5215, 0xce89cce80f5f3022, + 0xf487a10fc80e4777, 0x235b379a87e41832, 0x76f72e028371f194, 0xd044d4a201325a7d, + 0x47d8e855e0ffbdde, 0x268ae196fe7334b0, 0x123f2b26db46faa8, 0x11741175b86eb083, + 0x72ee185a423e6e31, 0x8da113dfe6f6df89, 0x286b72e338bbd548, 0xa922246204973592, + 0x7237b4f939a6b629, 0x31babda9bedf039a, 0xb2e8f18c6aeec258, 0x0f5f6ce6dd65a45e, + 0x8f9071a0f23e57d3, 0x71307115ba598423, 0xcbe70264c0e1768c, 0x1c23729f955681a8, + 0xfbc829099bc2fc24, 0x9619355cbc37d5d6, 0xea694d4e59b59a74, 0xb41cf8d3a7c4f638, + 0xae1e792df721cd0b, 0x7cd855d28aac11f6, 0xca11ba0efec11238, 0x7c433e554ce261d8, + 0xe3140366f042b6ba, 0x8a59d68642b3b18c, 0x094fcdd5d7bccac2, 0x9517d80356362c37, + 0x4a20a9949c6c74e8, 0xc25bcf1699d3b326, 0xa8893f1d1ed2f340, 0x9b58986e0e8a886e, + 0x29d78c647587ce41, 0x3b210181df471767, 0xd45e8e807627849d, 0x1ec56bc3f2b653e3, + 0x974ff23068558b00, 0xdb72bdac5d34262c, 0x23225143bb206b57, 0xd0a34cfe027cbb7e}, + {// seed = 3 + 0x39209fb3eb541043, 0xee0cd3754563088f, 0x36c05fc545bf8abe, 0x842cb6381a9d396b, + 0xd5059dcb443ce3bf, 0xe92545a8dfa7097e, 0xb9d47558d8049174, 0xc6389e426f4c2fc0, + 0xd8e0a6e4c0b850d3, 0x7730e54360bd0d0d, 0x6ecb4d4c50d050d5, 0x07a16584d4eb229f, + 0x13305d05f4a92267, 0xb278ddd75db4baec, 0x32381b774138608f, 0x61fe7a7163948057, + 0x460c58a9092efee6, 0x553bf895d9b5ff62, 0x899daf2dabfd0189, 0xf388ab9c1c4b6f70, + 0xd600fe47027ea4cd, 0x16d527ec2b5ef355, 0x5ac1f58ff6908c81, 0xa08d79ff8ee9ffe8, + 0xc1060a80b7a5e117, 0x14b2c23118c60bda, 0x8cc0defbb890df8f, 0xe29540fd94c6d28b, + 0xa604f003f82d5b71, 0xa67583d4eb066d18, 0xd62cbd796322b3fc, 0x070cfe244cdcccf3, + 0x73557c30b3af47e5, 0x2e544e31153a2163, 0x996eef7464d5bead, 0xbc71cb5ab0586cdc, + 0x0bfcb6c1b517ed69, 0x62b4f1fcc82e8ca0, 0x0edbc68f544965c5, 0x40fa39baa24af412, + 0xf39aeb2413dab165, 0x17e6013e7afee738, 0x8109bff1c8d42a9d, 0x3cd99863390989b5, + 0x02021a4cc9c336c8, 0xa06060778cb60aa4, 0xd96591db60bc1e06, 0xd2727175183f4022, + 0xcdc1f1c5bce3e7ce, 0xb393ccc447872a37, 0xdf6efe63257ead3a, 0x20729d0340dbceb6, + 0x9f3d2d26fc0ea0d7, 0xf392e0885189bd79, 0xdf2ee01eb212b8b6, 0x6e103a0c0f97e2c3, + 0x96c604a763bd841b, 0x9fc590c43bba0169, 0xf92dcd5ddc248c40, 0x113a8b54446941dc, + 0x5943eda146b46bb8, 0xbf657901a36a39a7, 0x5a4e0e7ea6568971, 0xb94c635bae9f9117, + 0x2626fb65b3a4ef81, 0xa59bfd5478ce97de, 0x79112ba9cc1a1c63, 0xf41f102f002cf39c, + 0x0a589bcbfb7ff1c8, 0xa1478c53540c4fa1, 0x60d55e72c86dfaca, 0x312e7b6840ea7a39, + 0x8aae72dcccfe1f75, 0xff2f51f55bf0247a, 0x3c2e4b109edb4a90, 0x5c6d73f6525c7637, + 0xe49acb04a199f61c, 0x27860642d966df7f, 0x541ce75fb1e21c30, 0xd9fcd6f90806c7cc, + 0xb87c27bc93a7969b, 0x92f77a1179b8f8dc, 0xb1f29379deb89ed4, 0x7e63ead35808efe7, + 0x13545183d7fa5420, 0x575f593e34cf029d, 0x27f1199fb07344ae, 0xe67f95f7dc741455, + 0x49b478b761ab850b, 0xd7bedf794adfc21e, 0xdc788dcd2dda40ae, 0x14673eb9f4d8ad35, + 0x0cced3c71ecf5eb1, 0xe62d4e6c84471180, 0xdfe1b9e2cb4ada7d, 0x70185a8fce980426, + 0x0ce2db5e8f9553d6, 0x1fedc57bb37b7264, 0xb9310a2e970b3760, 0x989ff8ab9805e87d, + 0x0b912d7eb712d9ee, 0x1fe272830379e67c, 0x16e6a73aff4738fb, 0xeed196d98ba43866, + 0x7088ca12d356cbe2, 0x23539aa43a71eee0, 0xed52f0311fa0f7ad, 0xa12b16233f302eea, + 0xc477786f0870ecb4, 0xd603674717a93920, 0x4abe0ae17fa62a4c, 0xa18f1ad79e4edc8d, + 0xc49fe6db967c6981, 0xcc154d7e3c1271e9, 0xdd075d640013c0c0, 0xc026cd797d10922a, + 0xead7339703f95572, 0x4342f6f11739eb4b, 0x9862f4657d15c197, 0x4f3cb1d4d392f9ff, + 0xe35bffa018b97d03, 0x600c755031939ad3, 0xb8c6557ffea83abf, 0x14c9e7f2f8a122ea, + 0x0a2eb9285ee95a7c, 0x8823fec19840c46f, 0x2c4c445c736ed1d0, 0x83181dff233449f1, + 0x15ed3fca3107bef5, 0x305e9adb688a4c71, 0x7dbef196f68a3e2e, 0x93e47ece3e249187, + 0x8353c5e890ead93c, 0xea8a7ae66abafdf7, 0xf956dbb6becf7f74, 0x9f37c494fbfdb6e4, + 0x11c6cbaa2485dd32, 0x206f336fcca11320, 0x9befe9a59135d8fe, 0x5f3ef8b8db92c7db, + 0xbb305e556ce0ce9a, 0xf26bdafb1305887f, 0xcbf28abe23f08c61, 0x0bc64173b914e00b, + 0x9168da52e983f54a, 0x6ea41d09c3574a3e, 0x78aa44d4a74459ae, 0x2931422878387bf5, + 0x018f64a3a92c2d9c, 0x9be43f6752e66b34, 0xae378890decd1152, 0x07325329a1cb7623, + 0x3b96f4ee3dd9c525, 0x2d6ebcdbe77d61a3, 0x10e32b0e975f510c, 0xffc007b9da959bf9, + 0x38bf66c6559e5d90, 0xbe22bdf0bf8899fe, 0x87807d7a991632a8, 0x149a0d702816766a, + 0x026f723db057e9ab, 0xeeecb83625ec6798, 0xcec2ed5984208148, 0xd985a78e97f03c84, + 0xf96c279e7927b116, 0x99d5027b3204f6e2, 0x13a84878c3d34c55, 0x5cf5ec96229e9676, + 0x0bc36b07e4f8e289, 0xbed33b80a069914d, 0x2fbfbdd1ff4b9396, 0xab352bb6982da90f, + 0x154d219e4fa3f62b, 0x4d087512bb6b9be7, 0xc582e31775ee400e, 0x7dadb002ae8c4a4e, + 0xaae2957375c1aee2, 0x5f36ca643356625b, 0xf87cf8eb76e07fb7, 0x46f432a755e02cc3, + 0x36087e07aba09642, 0xe5642c1e4ebb9939, 0xb9152d22338eefad, 0xf7ba44278a22cf7f, + 0xd3b8013502acd838, 0x7761511da6482659, 0xb0857621638e8e50, 0x552eddb4a8b1d5f5, + 0xc43d9861e812c3ea, 0xd765c2aada47910c, 0x21c935b68f552b19, 0x6256d5641a2b47dc, + 0xab711d8e6c94bc79, 0xa8d0b91a2a01ab81, 0x5e6d66141e8d632a, 0x7638285124d5d602, + 0x794876dbca3e471f, 0x951937d8682670ce, 0x0f99cb1f52ed466a, 0x8c7cd205543b804c, + 0x2fd24d74a9c33783, 0xe5dcb7b7762e5af1, 0x45e6749cca4af77c, 0x540ac7ee61f2259f, + 0x89c505c72802ce86, 0xeab83b9d2d8000d1, 0x9f01d5e76748d005, 0xc740aaef3035b6d0, + 0x49afcd31d582d054, 0xcba5dc4c1efb5ddc, 0xc0a4c07434350ca1, 0xfc8dfaddcc65ee80, + 0x157c9780f6e4b2d9, 0x9762a872e1797617, 0xc4afae2cf3c7e1bd, 0x71cde14591b595d4, + 0x8843c3e0e641f3b9, 0xd92ecd91dce28750, 0x1474e7a1742cb19f, 0xec198e22764fa06b, + 0x39394edb47330c7d, 0x00ba1d925242533d, 0xaed8702536c6fb30, 0x6d3618e531c2967a, + 0x77f7cedcd7cc0411, 0xbc1e2ab82be5b752, 0x07b0cf9223676977, 0x596c693b099edd53, + 0xbb7f570f5b9b2811, 0x96bfdad3c4a6840c, 0x668015e79b60c534, 0x3ad38d72123f1366, + 0x6b994d81d2fcbb09, 0x70885f022c5052d8, 0xc891ee79d9306a7b, 0x2c4df05c0ed02497, + 0x19ebc13816898be2, 0xea7c64df11c392a2, 0xb7663e88dd12e1bd, 0x79f768cb8e154c21, + 0x1fb21b12e945933b, 0xe6a9045643f6906e, 0x544c47acd7e15371, 0xb7709b14f727e3d1, + 0x326ee36a46942971, 0x477f1cf7b0e2d847, 0x88b8f6b82b3b0c24, 0x18bc357b80e3cd5c, + 0x3333de70e4d66e0b, 0x4fd4c5e148583cf6, 0xae1b62f3008c0af3, 0xc49f419b6ab29cf5, + 0x2c29fa65afc3fa28, 0x4b19d93734d03009, 0x7dd6c09e589276ad, 0x1cece97f30de48ad}, + {// seed = 4 + 0x58bdf4338602e4fb, 0x71a5620b02c926d5, 0x3811c960129c2d9f, 0x29c2fb11fccac567, + 0x0d6b1ea7780f1352, 0xcc4d3ddfae3f87b3, 0xfdd30257362a586b, 0xabc948fde69f25f1, + 0x51b3523469d30f7b, 0xe0f0322724405ace, 0xd3729266d896da1e, 0xb10c37e5147915bf, + 0x8b577039f9fa32a3, 0xe677c6a9cbfb44b3, 0x7317a756ebb51a03, 0xf8e988ef37359485, + 0x600fc1ef3f469ff3, 0xbf0b8f8520444e01, 0x3711168b08b63d73, 0x34146f2944a6cb36, + 0x717feb263862cdde, 0x7185f8347db00412, 0x900798d82127e693, 0x84089e976a473268, + 0x10f8308c0d293719, 0xf62a618d4e5719b8, 0x8bdbd257a1a9516f, 0xf49f666fd7a75110, + 0xbaf45e2db7864339, 0xe4efa1ea0c627697, 0x3e71d4c82a09fe10, 0x54a2a51cf12127bb, + 0xa0592c9f54ba14cd, 0x27dd627a101c7a42, 0x3d2ceb44b3d20d72, 0x7ee1f94a68ca8f5d, + 0x7e8cb8651b006c36, 0xbd9fa7ca3a475259, 0x856de173586a7b34, 0xcedb291b594cb1b5, + 0xa3d6e462fd21cddc, 0x74561d10af9118e4, 0x13a3d389fc2d4b36, 0xeea8594a4a054856, + 0xf56d7474d9ba4b13, 0x25ddce2f6490b2fd, 0x920653ff3a8d830b, 0xcd8c0c9cdac740d1, + 0x2c348a738db9c4a0, 0x2967ccbe8ea44c22, 0x47963f69adb049f8, 0xf9d01eb5b4cf7eb6, + 0x7a5c26eb63a86bd2, 0x62ad8b7a71fa0566, 0xb373213179f250ae, 0x589d4e9a88245a4d, + 0x433dafebe2d558a8, 0x521fbef2c8fe4399, 0x62a31f9ff9ccd46b, 0x51602203eba7c1a6, + 0x9afc8c451b06c99f, 0xb529085bdbaffcea, 0xac251825cc75892b, 0x94976a5bce23d58e, + 0xdd17925b6c71b515, 0x568fd07a57bce92e, 0xefac31200d8bd340, 0x716c3e466b540ef9, + 0x3d2c9e380063c69b, 0x14168f9a3662dd83, 0xd298c7504dbc412f, 0x74490a94f016719f, + 0x0e0da431e1ab80c8, 0xe321f63dc6b169ae, 0xf08671544febc95a, 0x39324450cc394b3b, + 0xea6e3d35f1aa3a70, 0x8ef8a886508ce486, 0xdc1a631ef0a17f06, 0xfda2b3fbcd79e87b, + 0xd75bcae936403b10, 0xf88b5bd9f035f875, 0xc43efec2e3792dd4, 0xe9fac21a9d47cd94, + 0xc2876f0c4b7d47c3, 0xaba156cf49f368b4, 0x5ccda2170fa58bf9, 0xadc92c879ed18df7, + 0x110c1b227354e6c8, 0x298ee7a603249200, 0xde92142ede0e8ee7, 0x88e4a4610644ba9e, + 0xbb62d277e7641d3a, 0xb9be1985b7bf8073, 0x29024e5426cdb0d1, 0xf6aefd01f3092ab8, + 0x2a07087b313133aa, 0x6d71f445d6dfc839, 0x1e2412ff12e5526b, 0xed5cdeba6617b9e1, + 0x20b1d0d5e5f8760e, 0x12ff15705c368260, 0x7bf4338b7c387203, 0x34ff25f00cd06185, + 0x1148c706c518cf28, 0x5c04f0623388f025, 0xcb9d649275d87d79, 0x9b5f0c24fabc42ec, + 0x1a7b5e7964e33858, 0x2a81bbd8efdc6793, 0x8d05431ffe42752e, 0x83915cd511002677, + 0x580ed4d791837b31, 0x5982e041d19ff306, 0xcad0d08fa5d864ca, 0x867bee6efe1afa63, + 0x26467b0320f23009, 0xd842414dfda4ec36, 0x047fcdcbc0a76725, 0xbddb340a3768aeca, + 0xef4ce6fa6e99ab45, 0x88c5b66c7762bf9b, 0x5679f1c51ffb225d, 0xdab79048317d77ee, + 0xf14e9b8a8ba03803, 0xe77f07f7731184c1, 0x4c2aab9a108c1ef5, 0xa137795718e6ad97, + 0x8d6c7cc73350b88b, 0x5c34e2ae74131a49, 0xd4828f579570a056, 0xb7862594da5336fc, + 0x6fd590a4a2bed7a5, 0x138d327de35e0ec1, 0xe8290eb33d585b0b, 0xcee01d52cdf88833, + 0x165c7c76484f160e, 0x7232653da72fc7f6, 0x66600f13445ca481, 0x6bbdf0a01f7b127d, + 0xd7b71d6a1992c73b, 0xcf259d37ae3fda4a, 0xf570c70d05895acf, 0x1e01e6a3e8f60155, + 0x2dacbb83c2bd3671, 0x9c291f5a5bca81af, 0xd976826c68b4ee90, 0x95112eec1f6310a2, + 0x11ebc7f623bc4c9a, 0x18471781b1122b30, 0x48f7c65414b00187, 0x6834b03efa2f5c30, + 0x0875ef5c2c56b164, 0x45248d4f2a60ba71, 0x5a7d466e7f7ba830, 0x2bebe6a5e42c4a1d, + 0xd871d8483db51d10, 0x6ee37decd2fd392f, 0x7d724392010cede3, 0x8e96ef11e1c9bcc8, + 0x804a61d86b89d178, 0xbb1b83ce956055ec, 0xcb44e107410ff64f, 0xc426bb09ee0ba955, + 0x057c08f42c3dd7f1, 0x40ea1ec148602bdf, 0xc24688deeb65d7f1, 0xd8bcc53c768ba4e4, + 0x16e0e3af65c1106c, 0xfc12f7e7d647218b, 0x70d6e1d3ee93cef4, 0x01d2a505c4541ef9, + 0x1ef79e16e764d5c3, 0x0363d14d13870b98, 0xb56ef64345d06b11, 0xe653d557ebb7c346, + 0x8304a8597c2b2706, 0x1536e1322ce7e7bb, 0x525aec08a65af822, 0x91f66d6e98d28e43, + 0xe65af12c0b5c0274, 0xdf6ae56b7d5ea4c2, 0x5cef621cedf3c81c, 0x41e8b1ffd4889944, + 0xb5c0f452c213c3e5, 0x77af86f3e67e499b, 0xe20e76ea5b010704, 0xbdc205ab0c889ec0, + 0xc76d93eb0469cd83, 0x17ac27f65cab0034, 0xd49ec4531fd62133, 0x07a873ea2f1b9984, + 0xbff270dfef0032ee, 0x1764dbe91592f255, 0xe40363126f79e859, 0xa06cad3ab46971f6, + 0x0be596e90dedd875, 0x3387cce5c1658461, 0x44246acf88a9585e, 0xe0ad82b92d5ecb2c, + 0x2177491c9a1600a6, 0x16e7c4aac0f02422, 0x75792eeeec15c4e1, 0x2309cd359d08ee30, + 0x7cd9831dd1b83b0a, 0x374914a7c4ee8cf0, 0x0dd17765c9ac2e54, 0xb7847470ba9a7688, + 0xfba4f4bbe2991173, 0x422b203fc3de040e, 0x63bfcaf2ecf2ab0e, 0x0c5559f3a192946e, + 0xfdf80675c1847695, 0xf5f570accab842c9, 0x65cc5a448767afea, 0x1efeb0a7ee234f2f, + 0x9b05f03d81e7b5d2, 0xe7c31317a8626cf4, 0x620f2a53081d0398, 0x1b6de96cdd9943ae, + 0x8c226a436777d303, 0xa08fbbd50fafb10d, 0x6a64c5ec20104883, 0x9c9c653502c0f671, + 0x678a02b2174f52a0, 0x68e008ba16bbad4b, 0xa317c16d2efb860f, 0xeab2075d17ed714c, + 0x565eeeddf0c4ea15, 0x8ec8e94d242a6c19, 0x139e8e27d9000fae, 0xc977a7ff1b33d2f5, + 0x1d0accca84420346, 0xc9e82602cd436e03, 0x6a2231da53d2ccd3, 0xb44b12d917826e2a, + 0x4f4567c6a74cf0b9, 0xd8e115a42fc6da8f, 0xb6bbe79d95742a74, 0x5686c647f1707dab, + 0xa70d58eb6c008fc5, 0xaaedc2dbe4418026, 0x6661e2267bdcfd3d, 0x4882a6eda7706f9e, + 0xf6c2d2c912dafdd0, 0x2f2298c142fd61f9, 0x31d75afeb17143a8, 0x1f9b96580a2a982f, + 0xa6cd3e5604a8ad49, 0x0dae2a80aad17419, 0xdb9a9d12868124ac, 0x66b6109f80877fac, + 0x9a81d9c703a94029, 0xbd3b381b1e03c647, 0xe88bc07b70f31083, 0x4e17878356a55822}, + {// seed = 5 + 0xb3c58c2483ad5ead, 0x6570847428cdcf6c, 0x2b38adbf813ac866, 0x8cb9945d37eb9ad3, + 0xf5b409ec3d1aed1c, 0xa35f4bffc9bb5a93, 0x5db89cde3c9e9340, 0xff1225231b2afb2b, + 0x157b0b212b9cc47d, 0xf03faf97a2b2e04d, 0x86fdab8544a20f87, 0xfcb8732744ae5c1c, + 0xd91744c0787986d5, 0x5f8db2a76d65ad05, 0xcff605cbed17a90d, 0xf80284980a3164e7, + 0x59cc24e713fccc7d, 0x268982cada117ce4, 0xcd020e63896e730e, 0xe760dc46e9fe9885, + 0x6aaece8ab49c6b5d, 0x7451194d597aae3e, 0x35d4385900332457, 0xa40fb563a096583d, + 0xa797b612f7f11b76, 0x2fed6eb68e6a2b9b, 0x2f06ee64aeffd943, 0x9dd0e49d9ca45330, + 0x97d48f08bd7f1d8f, 0x1cfa7fe3ebe4d8ee, 0x2a2ba076bd397d42, 0x68c4344f7472f333, + 0xce21ec31987d74b5, 0xb73dabdc91d84088, 0x801aadee592222fe, 0xaf41345398ebc3f5, + 0x8a8f653d7f15ee46, 0xce2d065ff2ba2965, 0x4e05da515da2adb7, 0xa6dbdb8aa25f0fd4, + 0xca9f9666bbd2d5a9, 0x6b917ce50bd46408, 0x1550cc564ba6c84d, 0xb3063ae043506504, + 0x84e5f96bb796653d, 0xe2364798096cf6e3, 0x3b0dfedf6d3a53d0, 0xb7e4c7c77bde8d93, + 0xe99545bac9ab418a, 0xa0e31f96889507bb, 0x883c74f80c346885, 0xf674ae0b039fd341, + 0x8bb6ce2d5e8d1c75, 0x0c48737966a7ed7c, 0x04fcdf897b34c61c, 0xe96ac181bacbd4d6, + 0x5a9c55a6106a9c01, 0x2520f020de4f45d3, 0x935730955e94d208, 0xce5ad4d7f3f67d3b, + 0xa4b6d107fe2d81ca, 0x4f0033f50ae7944e, 0x32c5d28dd8a645a7, 0x57ce018223ef1039, + 0x2cbab15a661ab68e, 0x6de08798c0b5bec2, 0xee197fb2c5c007c6, 0x31b630ac63e7bda2, + 0xab98785aefe9efe3, 0xa36006158a606bf7, 0x7b20376b9f4af635, 0xa40762fdc3c08680, + 0x943b5faffd0ebee2, 0x7f39f41d0b81f06e, 0x7c4b399b116a90f8, 0x24e1662ac92bc9f3, + 0xcf586fc4e8e6c7db, 0xe46e0d047eeb12d7, 0xe8021076e4ea9958, 0x11fc13492e3ca22a, + 0xd61eae01410397e3, 0x7e8c4a58036a8e9f, 0x068a6de267970745, 0x64faab129bef1a41, + 0xb4a6f720943dad01, 0x631491058d73a9d5, 0xdad4fe95eab3ec02, 0x0a8b141c5c3a44f6, + 0x9fc69d4c2b335b98, 0x94d5f84a07d6e4cd, 0x1b73965de143c608, 0x443932c2dda54bcc, + 0x7397818fb0b04cd2, 0xef4ab03a1202b277, 0xf3d2ee459c0c2b92, 0x182d4daf8b058a87, + 0x90e63035d7b51368, 0xba4cd8b9a95d45fd, 0x12a7392c76731090, 0x890d264ec5d082d2, + 0xeeaf5c363da4994e, 0xd6aad756902123fb, 0xb531ebebdb28f191, 0xe71ce659fc59babd, + 0x37c1b94f63f2dcb5, 0xe4e3abeb311f9b96, 0x4a31b72ccb8695d3, 0x52cae1f0629fdce4, + 0xe5b0475e2ed71369, 0x2724e8c3506414fb, 0xbab0367920672deb, 0x0161a781c305449f, + 0x37b70f40f5bb60be, 0xddd1094c50251a01, 0x3b28283afd17224e, 0x06dec0cfe889fc6b, + 0x47608ea95bb4902d, 0xad883ebc12c00e82, 0x9e8d7ae0f7a8df29, 0xa79443e9f7c013a1, + 0xcfa26f68b7c68b71, 0x33ae6cc19bda1f23, 0xd9741e22b407887f, 0xf2bff78066d46b1c, + 0x794123191c9d32d4, 0x56cb6b903764ec76, 0x98775d0ef91e1a5a, 0xae7b713bc15c1db9, + 0x3b4c1a7870ed7a0d, 0x46666965f305cc34, 0x0ea0c3b2e9c6b3cd, 0x4dc387039a143bff, + 0x5f38bb9229ef9477, 0xea5d39ba72af7850, 0x69a5ed0174ce2b6d, 0x06969a36bfe7594d, + 0x0adee8e4065ccaa3, 0x908a581d57113718, 0x64822d6c5a8190ed, 0x8c5068b56ace4e4c, + 0x88ba3b4fb4e30bef, 0xa6ec0b8bb5896cfe, 0x4e23fcc6b47996fd, 0xe18e75b0dd549c7a, + 0xcd90f17e106cf939, 0x1666fdfb2ef7c52f, 0x4fae325f206dd88c, 0xe7bc1160e25b062d, + 0x3cc999cb246db950, 0xc5930a7326cd5c37, 0xb008a48a211367bd, 0xc5559da145a88fd4, + 0x1e3ad46655fac69c, 0x7834266b4841bfd7, 0xa764450fbffc58cc, 0x54d8cf93a939c667, + 0x93c51f11b21b2d9d, 0x0964112082ed65cc, 0x4c2df21213e7fb03, 0xf0405bc877468615, + 0x17b4fc835d116ab4, 0xa6b112ae5f3cb4ef, 0x23cfc8a7fd38a46e, 0x8e0a360dc2774808, + 0x24ca9c8092105ad5, 0xafd3f75524f2e0d5, 0x4f39ed7dbaddc24c, 0xe5e362c7679a7875, + 0x00914a916b07b389, 0xdfe1119b7d5ab5da, 0xabd6ed9940e46161, 0x630ed2044171e22c, + 0xdecc244157dd1601, 0x777e6d5b4b4868d5, 0x9b3530bee67017d8, 0xd2faf08b291fdcb9, + 0x006e99455d6523de, 0xd559b5817f6955b5, 0xefcc1063b0088c61, 0xed73145ae0f00ae7, + 0xab2af402cf5b7421, 0x897767f537644926, 0x26c9c0473ca83695, 0x192e34e1881b2962, + 0xf7cf666ec3b3d020, 0x27f9b79c7404afb7, 0xe533e8bed3010767, 0xe5817838e11d05d3, + 0x65659c531bd36517, 0xd427c5e0a23836fd, 0xf3eab7ea58fa3528, 0x07683adae1289f35, + 0x201d6af7e896dd32, 0xd5da938b9a21ad88, 0x843fb73ad67bc316, 0x1782ec7d5feef21b, + 0x943f66f6ec772877, 0x7e9112e7b26da097, 0xeac8161f8663c2c7, 0xe8600db480a9ebf4, + 0x07807fc90f6eaf5f, 0xe0e4c9deb41abf83, 0xbdf533db271f9c15, 0xb398411b0497afe2, + 0xdebb45ef25448940, 0xe7a5decefcd376c4, 0xaf1ef3c728c83735, 0xb8b83a99355cb15a, + 0x6444a0344f1611e4, 0xe8bb7f5cf3c60179, 0x77ab5c5177e75ff7, 0xc38fd6fa849d585d, + 0x390d57d53029060a, 0xa66327eb7b8b593c, 0x6350a14f6fcd5ac9, 0x2c08125bcd7008b4, + 0x2d00c299a6a6bf8e, 0x6b0039c1f68d1445, 0x0035150c5d06f143, 0xa34d01628cc927e1, + 0xdf5b3164d7b2ede1, 0x8167db1d0583d72e, 0x4e13b341cd2ae8bc, 0xa693d9b1f416e306, + 0xc15ed7ca0bc67609, 0xdc344313c1c4f0af, 0x88b6887ccf772bb4, 0x6326d8f93ca0b20e, + 0x6964fad667dc2f11, 0xe9783dd38fc6d515, 0x359ed258fa022718, 0x27ac934d1f7fd60a, + 0xd68130437294dbcc, 0xaf5f869921f8f416, 0x2b8f149b4ab4bf9f, 0xc41caca607e421cb, + 0x7746976904238ef9, 0x604cb5529b1532f0, 0x1c94cd17c4c4e4ab, 0xe833274b734d6bbe, + 0xe9f1d3ef674539ce, 0x64f56ed68d193c6a, 0xe34192343d8ecfc1, 0xcb162f6c3aa71fe8, + 0x99eaf25f4c0f8fa4, 0x92f11e7361cb8d02, 0xb89170cddff37197, 0x4f86e68a51e071e3, + 0x31abf6afd911a75b, 0x6d20cf259c269333, 0x4150b9f88fcb6513, 0x705063989ebf7451, + 0x559231d927c84410, 0x1ca8ec4b098bc687, 0xebed22405c9180e0, 0xaa815b37d052af59}, + {// seed = 6 + 0x946ac62246e04460, 0x9cebee264fcbc1ae, 0x8af54943a415652b, 0x2b327ed3b17b8682, + 0x983fde47b3c3847e, 0x10a3013f99a2ad33, 0x6e230bb92d2721ef, 0x1cf8b8369e5c5c50, + 0x7f64017f2b7b3738, 0xd393248a62417fa1, 0x9ff01c0b20a372c5, 0xb0e44abce7e7c220, + 0xcebb9f88d48a815f, 0xdb7df6bd09033886, 0x7844fc82b6fa9091, 0x72d095449863b8ec, + 0xc13e678c89da2c7e, 0x6caf4d5ad231d12f, 0x2e0ab7b5fcf35c49, 0xf410720cb932a70f, + 0xd66ea581f16fce06, 0x175c9f002f57dc98, 0xccbcfd0d32988775, 0xfde4c407d3b0a232, + 0x5db2931ae7e97223, 0x6e07e2173085809f, 0x6e1d1ec0f9cad73c, 0xb2fc251a7f802619, + 0xbc1fc17f04f342de, 0x8de8f21ec658e078, 0x72c0f40cbee53fd6, 0x0678244411fc17a1, + 0x1d5837ca166b9bbd, 0xc8cada003c554345, 0x6a2fe2bfb2e58652, 0xfca9d797a6f7988b, + 0x6699e24ac737948b, 0x69623ffcb05789ba, 0x946429c529d95b75, 0x0d14df0b2a13970f, + 0x593d8592c440dfec, 0x2ee176f3d7e74b94, 0xae003f1da3be9e26, 0x0c7b02c4c0f6764a, + 0x3117e2fa1f632462, 0xf0f23265b6f1eaeb, 0x3111255d9b10c137, 0xc82745e509a00397, + 0xbd1d04037005fea7, 0xe104ab0dd22a9036, 0x51b27ce50851ac7a, 0xb2cb9fb21b471b15, + 0x29d298074c5a3e26, 0x6ebdf2058b737418, 0xc4a974041431b96f, 0x1ec5a30ccb6bdaac, + 0xe818beede9bf4425, 0x4b69b1bce67a5555, 0xf5c35f1eb0d62698, 0xf4509bbd8e99867c, + 0xb17206debd52e1bc, 0x35785668c770b3be, 0xe9343987ff5863bc, 0x2ee768499ac73114, + 0x5132bb3426eeaaf4, 0x471bce2c6833c5ff, 0xbb9a2d5428e6f6f9, 0xd5678943c595792d, + 0xab2a65e7f81e479c, 0xa82407bb23990b31, 0xdae321383984923c, 0x01823bb22648e6f1, + 0xda6e8df4214a8b04, 0x0e172bb88e03d94f, 0x552da6c22e362777, 0x7ce67329fb0e90cb, + 0x7b2d7f287ede7ebf, 0xd44f8222500651bd, 0x4acca1ef58fbb8ab, 0x428ecf058df9656b, + 0xd7e1ec6a8987c185, 0x365be6a54b253246, 0x168849be1e271ee8, 0x6a00f3c4151a8db2, + 0x37602727ca94b33d, 0xf6b50f18504fa9ce, 0x1c10817f6bc872de, 0x4bfe1fe42b0f3638, + 0x135fad4b8ef6143b, 0x1b25ad2bafc25f58, 0x41e37f85cf321f92, 0xfc73f75d9d5b9bea, + 0x9eb3694d1e9cb7e1, 0x601d51f08fa83b90, 0x234a2a9b88366f41, 0x63fe903e16f2c3bf, + 0x1cdbd34fa751c0b0, 0x0ce4fc6747c0558c, 0x51ed72afb8bb49aa, 0x20313ba13ca12c96, + 0x271fa38f9ebd54c1, 0x3696a5ac03a8edde, 0x05602be7df625702, 0x11f1ac73790f7a9f, + 0xa2836c099f0810bd, 0xe5ac2e47caa532fa, 0xd9c000a66d39f681, 0xd93d900e6f3d9d5f, + 0x792c81c65b7900f2, 0x5c5dce790ee20da1, 0x74ff1950edec1aee, 0x71fc85fa1e277d8f, + 0x0e77df17d6546cbc, 0x07debad44816c3b4, 0xbafa721581e92a70, 0x8ab6fbe2ed27bba8, + 0xe83243a20dea304a, 0xaa85a63a84c00a07, 0xde0e79917fc4153a, 0x21bb445e83537896, + 0xeedcac49fc0b433a, 0xffb2926a810ae57a, 0xf724be1f41d28702, 0x79cb95746039bb3b, + 0x5a54fe3742a00900, 0xda4768d64922c04f, 0x420396a84a339dae, 0xa171e26ee5e8724e, + 0x4c8da7c5d289c20a, 0x9ebd79a1a8e94742, 0x39235232b97e9782, 0xb75df0be9bba7d80, + 0x0c1d204dd87d48fc, 0x8f81f3e7177266e8, 0xe4a460b39e78d72b, 0x50b98fa151e65351, + 0xb7cb585c3ee1eddc, 0x11cdad9a76ee1dc4, 0xa38054a78595dc1c, 0x92f09e2ec4978edc, + 0xa8f0061b5efdabaa, 0x04bcc4abc224d230, 0xc58606738e692d46, 0xdd2b27b565952433, + 0x19e6ed1b740beec0, 0xceadd49b2ef9891f, 0x328178c28fe95cad, 0xe5ad4c43afe02848, + 0x03c0cb538cd967c0, 0xec4352526d19a630, 0x4c7e99389d39b031, 0xf65dd05362c2deb6, + 0xd1e70daf6879d28d, 0xbe9f57db6309b265, 0xa4b66f370b872bb7, 0xe26896fbc6ee1fd5, + 0xac705e661bfcf7c5, 0xab4d0d07d7f09940, 0x976417c06aeb6267, 0x8161c684a6bd468c, + 0xf77b6b9976dc4601, 0xc6489b779a39c12c, 0xb2aa58d5681cea1a, 0x043b1b40f8c3e04c, + 0x681fcbfadc845430, 0xab8896c921ba8def, 0x57aaf172606f37b2, 0xc3735048cd5eb8d7, + 0xa7078b96955631bd, 0xdd6b3543aa187f33, 0xc7103ea4a2a697fd, 0x8d7b95f6ff1f7407, + 0xe44f419e84709530, 0xf340caa9132cbb0a, 0x2ba407283143c66c, 0xe1be240ca636c844, + 0x90d32f2877ac08bc, 0x5d26e6294b2c8673, 0x4a6b2f5b27c87a44, 0x961fb9043f76d34f, + 0x0afee02d8d3c55d2, 0x6228e3f48c42e5dc, 0xc338e69ee6593675, 0x853f74b16efb7bdd, + 0xd062f40bdd22e687, 0x647164b9ab4c4190, 0xf94689f67d598369, 0x8e4b29d87a5012d7, + 0xaf02b8b925656fbd, 0x7a722a767179a630, 0xb5c8afe937a75ace, 0xfdb8e8d02d279372, + 0x887ef700cb25fae1, 0xcfe9bd912f72cabe, 0xb1d4dedc24f978de, 0x517522d38319cc2a, + 0x7dd87b2b36aab798, 0x579c4ff3046b5a04, 0xf5c5975c5028b7a7, 0x7094579d1000ec84, + 0xbc8d5b1ea70a5291, 0x161b2d783be8855c, 0xd26d0b0d6d18279f, 0x0be1945f02a78bd5, + 0xb822a5a9e045415b, 0x2fe9d68b1ccc3562, 0xb2e375960033d14f, 0x26aca04e49b4ff22, + 0x732a81c862112aea, 0x8bd901ed6e4260b8, 0xe839532c561ad5b0, 0x8fb6e4d517a79b12, + 0x0dd37f8c0be9b429, 0xc8ad87ad12f1b1b0, 0xc51f3aa62b90318b, 0x031a7e8b86c1cefc, + 0xa95547af2b70fc76, 0x9cb3615c5a98801e, 0xa387e3c3341d7032, 0xa087ea52a1debaef, + 0x16325ec9a2e6e835, 0x587944a484c585eb, 0xc8879033bde22ecc, 0xa39dbfce709c464a, + 0x7acc010f99208774, 0x98dd2973a096c5ad, 0x26458b51139f198c, 0x2f5d19575e8c4f02, + 0x726643f0d38af352, 0x44d879b6d73e6e94, 0xa68a03885c980abe, 0x06048acd161c40c0, + 0xa4dab8f89d405d28, 0x7120c880cb04be18, 0xa062ace22a1cf0cf, 0x3901a9daf29704f4, + 0xff08f3ed989db30a, 0x6d22b13e874c67e9, 0x80c6f35518d73f4d, 0xc23c2a521aac6f29, + 0x2e708fd83aaa42e0, 0x7fc3780f55f1b0fd, 0xabb3075c98cf87f2, 0xb4df3f40f7c61143, + 0x2a04418098a76d75, 0x0d9eeee9509b2d37, 0x6be8ae51f4b59cdc, 0xe746cc7c00e4a2ab, + 0x785bc6df9cac597c, 0x33cb6620ce8adc48, 0xc1ba30739bffcef7, 0x6d95771f18e503f7, + 0xf7be3ae2e62652ff, 0xc8d82ffd2a73c62b, 0x8725a3ba5b110973, 0x67ed6b9c724757ec}, + {// seed = 7 + 0xc0272d42c19ff3ae, 0x4694228b43ea043b, 0x5709a6ef8a462841, 0xc9210a1e538805c9, + 0x279b171196113ec2, 0x859b769fc2d9e815, 0x0d5d3125a2bf14d3, 0x22bca1cfefa878ba, + 0x481b6bf58037bd83, 0x4933ba8647728d22, 0xf08c7b6b56f6e1b6, 0x374e8af5a15407c7, + 0xa95c4dc3d2487a5c, 0x9b832808ff11e751, 0xf2048507e9da01d5, 0xa9c576189f544a4a, + 0xf6c2a45b2e9d2b41, 0x9b9874c9f10ecc2f, 0x37d9b5f51f8c149e, 0x93aead54c9de9467, + 0x59cf0b4af262da23, 0xe7e9929af18194b2, 0x9df2644e33eb0178, 0xde4122d6f0671938, + 0xf005786c07f4800b, 0xb1fc9d254b5d1039, 0x0bf1088631f6dd7b, 0x665623f0a4b8f0c7, + 0x60f0113a9187db7c, 0xfd7cceda4f0d23a6, 0x26c01e9d89955940, 0x33afa1dfc0f5a6a0, + 0xeb77daf215e9283c, 0xc7575214bf85edb4, 0xeb0d804bf297e616, 0x84bff4ffd564f747, + 0xc4ac33189246f620, 0x43ef61213ecc1005, 0xcbbb0dea6cd96acd, 0x8ed27abfa8cfcb05, + 0x543b61529cb996b6, 0xa5f987ca41ea5e59, 0x3c50e0ac5254cb7a, 0x4192b0446c06d1e6, + 0x3e86592e21b45388, 0xdb766f06fcc6e51e, 0x0448ee36efe632db, 0x663c9db689253e35, + 0x72e0bd4985331dd4, 0xff501b5bf7d94e74, 0xe911ce758e2113a8, 0xec3a8d03a75a6ba4, + 0xaf6b4b72f56edc83, 0xf284857936c0a391, 0x5ba6feff407d46f4, 0x9d689c26de9d6702, + 0x28c04a9083726b5d, 0x2ccf4a627a029730, 0x7b4719500d4f0c71, 0x76470a9a7da250a8, + 0xcc48409404a1c890, 0xccefbdc7ec9a8055, 0xe0db91bff3cc42d3, 0x0532436426141254, + 0xf2ee9325e6f0ff0b, 0x149c20a5fbb28d9d, 0xe71624cd8d2d14d4, 0x8f01d4dc8cc2dd77, + 0x29cf409b333015b7, 0xba8bebd211884dd1, 0xc3396635e8c8db1d, 0x8ed0f6208d0528b8, + 0x0d90b43fdd0ee334, 0xd73c9a3333a044c7, 0xa2595cd208dbdc38, 0xae93cb264f940c09, + 0x8e0538d8afb07a97, 0x19115ec881385ba2, 0xa886f9e6a8039c6a, 0xcd5d62147ce3ecac, + 0xaecdf9e0bb4969f7, 0x2ddd631c53dcad10, 0x73ad1c97b3412054, 0xb08915fa2722efc6, + 0x97966047e5067eb0, 0x337f1675ed91445c, 0xb3a833d150b96a0d, 0x5940a98fe35e5e2e, + 0xfd03cc354ed0d8ff, 0x4e65b98291a8644a, 0x14a259f2852a60b2, 0x7648e3478c1e8e5f, + 0xbc0fbef6d9a919b4, 0xbec4302081346cf1, 0x57d2ce7aa1c7c511, 0x234c209d8f4e1ac3, + 0x87cf80cc933ce443, 0x7c262c616931e94e, 0xc5e33b049cf9eddf, 0x1a80790ed03ae51b, + 0xf2e8b9494f7220cf, 0x124cb59c14fff3ff, 0xa8a06cbfdb86ce18, 0x9068ef1f80b37653, + 0x0c55417b8d90338f, 0xcd579a523f6bcd30, 0xa31bfe2476a8d2a9, 0x1f8d142208094223, + 0x332dc40a5203cfad, 0xf8792fe5b2d33b4c, 0x443bd9668bf9461e, 0xc9019db0ace1409e, + 0x781bea919a113e8b, 0xb0f11d866abfbeec, 0xcfe139a60db0c26a, 0x869ab8721e6aa39e, + 0xdb48a4977717837a, 0x588a5ff151065b18, 0xe4a251ea0028864d, 0x7f0e43ba408a77c3, + 0x65f66dd50a536135, 0x6f49e934d9331c3e, 0xb8d742e0f0fa6b09, 0xe4e9b272deca2348, + 0xaee132ff902f773c, 0x43f658f7c2a0c90a, 0x28cb4dbc76cc53ea, 0x7d92253aa99ac39b, + 0x4fea3d832370baab, 0xb29e36936e51d78e, 0xea10778712321064, 0xff4f21f8ef274be2, + 0x84eff18ddfa0933f, 0xd0ec6a9f86c758a0, 0xaf82e5973c431ae0, 0x352023c00c045425, + 0xad34d7bc4a2f8961, 0xbdb4a02a24d4dee0, 0x354a4846d97447cf, 0x331a8b944d5bc19f, + 0x5ce04f8e17909035, 0x6497581bad8f4aab, 0x07c503bba647111e, 0x85f412ba78e1f7ff, + 0x7f3b920fd20f4cff, 0x424e1a9a4ce34e2f, 0x3035e2d62e1b9f0a, 0xef63114bff7b729a, + 0xe86a05889ab6bb60, 0xee0830cf095585a1, 0x4a54f7fa47d9c94b, 0x17daeece9fcb556a, + 0xc506d3f391834c6f, 0xb3f24be362e1af64, 0xc435e4e23608efdd, 0xeeba9caaa4cc1768, + 0x5a71f306daddc22d, 0x18e5205f41eba1a0, 0x7b29b4d1f6610925, 0x065cb65a0258d9a9, + 0x3e5ac8faa9fd1f95, 0x3b362362c1ea0470, 0xce0e4f6434db7a2e, 0xf327341098de52f2, + 0xcfca3b9e2a1992c3, 0x7483bf9401233e41, 0xbafbac531c6f9281, 0x4b52dd71b2c106f8, + 0xdf73b66e50b5a1f7, 0x237aec0202a20283, 0x23dd5be23dffdf2b, 0xea9730731ee122ef, + 0x5cb3f846014fbcd3, 0xc3b21c8ffdce9201, 0x06a99a02f91a8760, 0x721a81fa8fd7b7a3, + 0x6aafcdddc53cbcd8, 0xd03b464005a93bcc, 0x8212edc1b1669dcb, 0x71f4c31364c31bc7, + 0xfeeec0eba8772307, 0x1948d00a13d88cf1, 0x19064fd6d943ada8, 0x4ec8d31722697bfd, + 0x596d9a953a516609, 0xc4cb4bff53507da2, 0x1d59f3c5be36e4ca, 0xe5b4fc5bf6044c9b, + 0x1bb74e052232f735, 0x04e8a0db611ddd5d, 0x8d04eaa009b421bf, 0xa7878ae0ac0e6d58, + 0x28c1030217cab2b3, 0x827943767e56a883, 0x28fce5fa02d22809, 0xb30c322fffc8c58e, + 0x1ca5a6a9f8066c5b, 0xb24db5f1462b2513, 0x02f653b89b7e5f6c, 0xe31f8fb5d5f78eee, + 0x266acc514ed93501, 0x936879d1c6fddcc4, 0xcd51be3636af1952, 0x3fdbb6fc332c78c8, + 0x9eb656379fa73094, 0x056146cc92fa0f96, 0xed6c4f1836c027c3, 0x021e0bb5d2113f2a, + 0x8983e42ec1c626b3, 0x73ea9bc6513ad9c9, 0x0c904903b24f4247, 0xacbac1e6243e2525, + 0x0b1069a0c230fb06, 0x77d709fca3fc1ce5, 0x87ad0f65020947e6, 0x555302641c53f4e6, + 0x65ea87871fa9aaee, 0x58aaf4ecc1067bb4, 0x1a66c48cc4c65b3f, 0xca96aca48b2ea969, + 0xa68eb70bad14de2b, 0x5ccdb3d7e00a6f6e, 0xe178fbfec73fe72f, 0x2b63d6a16b83e890, + 0x32fdb7a5330fbae0, 0x2ab5803c8d1bf32c, 0xda838388c1527c94, 0x16a50bdc4de24acb, + 0xe561301f134c074a, 0xd7ae63d2816b4db1, 0x036aabd4df0dd741, 0xc5e0db8783435b9d, + 0x9c4386cf0a07f3b2, 0x6a72ac1aa56a13a1, 0x299bbdb04bb20a23, 0x138c1018fda16b81, + 0x0e354f0b3bda49df, 0x9f4c295b23127437, 0xd133ceb2bd561341, 0xd8b4bfd5a526ac29, + 0xcdd0a70ddc1c7bbd, 0x81dce595bf572225, 0x1c6f925c05f6efd7, 0x8ae5097553856ea0, + 0x3aabeaeef248f60d, 0xd9005809d19a69e2, 0x2a3a1a314311cc27, 0x89bb2dc76b2b624a, + 0x50a2a95d0412e289, 0x9def8df564e68581, 0xf49010a9b2e2ea5c, 0x8602ae175d9ff3f0, + 0xbf037e245369a618, 0x8038164365f6e2b5, 0xe2e1f6163b4e8d08, 0x8df9314914f0857e}, +}; + +const uint64_t AVG_LEN = 1024 * 1024; + +// create a fake null array class with a GetView method returning 0 always +class FakeNullArray { + public: + uint8_t GetView(int64_t i) const { return 0; } + + std::shared_ptr<::arrow::DataType> type() const { return ::arrow::null(); } + + int64_t null_count() const { return 0; } +}; + +static uint64_t GetMask(uint64_t min_size, uint64_t max_size) { + uint64_t avg_size = (min_size + max_size) / 2; + size_t mask_bits = static_cast(std::ceil(std::log2(avg_size))); + size_t effective_bits = mask_bits - 3 - 5; + return (1ULL << effective_bits) - 1; +} + +// rename it since it is not FastCDC anymore + +FastCDC::FastCDC(const LevelInfo& level_info, uint64_t avg_len, uint8_t granurality_level) + : level_info_(level_info), + avg_len_(avg_len == 0 ? AVG_LEN : avg_len), + min_len_(static_cast(avg_len_ * 0.5)), + max_len_(static_cast(avg_len_ * 2.0)), + hash_mask_(GetMask(avg_len_, granurality_level + 3)) {} + +template +bool FastCDC::Roll(const T value) { + constexpr size_t BYTE_WIDTH = sizeof(T); + chunk_size_ += BYTE_WIDTH; + if (chunk_size_ < min_len_) { + return false; + } + auto bytes = reinterpret_cast(&value); + bool match = false; + for (size_t i = 0; i < BYTE_WIDTH; ++i) { + rolling_hash_ = (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][bytes[i]]; + match |= (rolling_hash_ & hash_mask_) == 0; + } + return match; +} + +bool FastCDC::Roll(std::string_view value) { + chunk_size_ += value.size(); + if (chunk_size_ < min_len_) { + return false; + } + bool match = false; + for (char c : value) { + rolling_hash_ = + (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][static_cast(c)]; + match |= (rolling_hash_ & hash_mask_) == 0; + } + return match; +} + +bool FastCDC::Check(bool match) { + if (ARROW_PREDICT_FALSE(match && (++nth_run_ >= 7))) { + nth_run_ = 0; + chunk_size_ = 0; + return true; + } else if (ARROW_PREDICT_FALSE(chunk_size_ >= max_len_)) { + chunk_size_ = 0; + return true; + } else { + return false; + } +} + +template +const std::vector FastCDC::Calculate(const int16_t* def_levels, + const int16_t* rep_levels, int64_t num_levels, + const T& leaf_array) { + std::vector result; + bool has_def_levels = level_info_.def_level > 0; + bool has_rep_levels = level_info_.rep_level > 0; + + if (!has_rep_levels && !has_def_levels) { + // fastest path for non-repeated non-null data + bool val_match; + int64_t offset = 0; + int64_t prev_offset = 0; + while (offset < num_levels) { + val_match = Roll(leaf_array.GetView(offset)); + ++offset; + if (Check(val_match)) { + result.emplace_back(prev_offset, prev_offset, offset - prev_offset); + prev_offset = offset; + } + } + if (prev_offset < num_levels) { + result.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); + } + } else if (!has_rep_levels) { + // non-repeated data possibly with nulls + bool def_match, val_match; + int64_t offset = 0; + int64_t prev_offset = 0; + while (offset < num_levels) { + def_match = Roll(def_levels[offset]); + val_match = Roll(leaf_array.GetView(offset)); + ++offset; + if (Check(def_match || val_match)) { + result.emplace_back(prev_offset, prev_offset, offset - prev_offset); + prev_offset = offset; + } + } + if (prev_offset < num_levels) { + result.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); + } + } else { + // repeated data possibly with nulls + bool def_match, rep_match, val_match; + int16_t def_level; + int16_t rep_level; + int64_t level_offset = 0; + int64_t value_offset = 0; + int64_t record_level_offset = 0; + int64_t record_value_offset = 0; + int64_t prev_record_level_offset = 0; + int64_t prev_record_value_offset = 0; + + while (level_offset < num_levels) { + def_level = def_levels[level_offset]; + rep_level = rep_levels[level_offset]; + if (rep_level == 0) { + record_level_offset = level_offset; + record_value_offset = value_offset; + } + ++level_offset; + + def_match = Roll(def_level); + rep_match = Roll(rep_level); + if (ARROW_PREDICT_TRUE(def_level >= level_info_.repeated_ancestor_def_level)) { + val_match = Roll(leaf_array.GetView(value_offset)); + ++value_offset; + } else { + val_match = false; + } + + if (Check(def_match || rep_match || val_match)) { + auto levels_to_write = record_level_offset - prev_record_level_offset; + if (levels_to_write > 0) { + result.emplace_back(prev_record_level_offset, prev_record_value_offset, + levels_to_write); + prev_record_level_offset = record_level_offset; + prev_record_value_offset = record_value_offset; + } + } + } + + auto levels_to_write = num_levels - prev_record_level_offset; + if (levels_to_write > 0) { + result.emplace_back(prev_record_level_offset, prev_record_value_offset, + levels_to_write); + } + return result; + } + + return result; +} + +#define PRIMITIVE_CASE(TYPE_ID, ArrowType) \ + case ::arrow::Type::TYPE_ID: \ + return Calculate(def_levels, rep_levels, num_levels, \ + checked_cast(values)); + +const ::arrow::Result> FastCDC::GetBoundaries( + const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, + const ::arrow::Array& values) { + auto type_id = values.type()->id(); + switch (type_id) { + PRIMITIVE_CASE(BOOL, Boolean) + PRIMITIVE_CASE(INT8, Int8) + PRIMITIVE_CASE(INT16, Int16) + PRIMITIVE_CASE(INT32, Int32) + PRIMITIVE_CASE(INT64, Int64) + PRIMITIVE_CASE(UINT8, UInt8) + PRIMITIVE_CASE(UINT16, UInt16) + PRIMITIVE_CASE(UINT32, UInt32) + PRIMITIVE_CASE(UINT64, UInt64) + PRIMITIVE_CASE(HALF_FLOAT, HalfFloat) + PRIMITIVE_CASE(FLOAT, Float) + PRIMITIVE_CASE(DOUBLE, Double) + PRIMITIVE_CASE(STRING, String) + PRIMITIVE_CASE(BINARY, Binary) + PRIMITIVE_CASE(FIXED_SIZE_BINARY, FixedSizeBinary) + PRIMITIVE_CASE(DATE32, Date32) + PRIMITIVE_CASE(DATE64, Date64) + PRIMITIVE_CASE(TIME32, Time32) + PRIMITIVE_CASE(TIME64, Time64) + PRIMITIVE_CASE(TIMESTAMP, Timestamp) + PRIMITIVE_CASE(DURATION, Duration) + PRIMITIVE_CASE(DECIMAL128, Decimal128) + PRIMITIVE_CASE(DECIMAL256, Decimal256) + case ::arrow::Type::DICTIONARY: + return GetBoundaries( + def_levels, rep_levels, num_levels, + *checked_cast(values).indices()); + case ::arrow::Type::NA: + FakeNullArray fake_null_array; + return Calculate(def_levels, rep_levels, num_levels, fake_null_array); + default: + return ::arrow::Status::NotImplemented("Unsupported type " + + values.type()->ToString()); + } +} + +} // namespace internal +} // namespace parquet diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index 6a9285c5b5876..ba96abc0ad659 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -28,541 +28,6 @@ using arrow::internal::checked_cast; namespace parquet { namespace internal { -constexpr uint64_t GEAR_HASH_TABLE[8][256] = { - {// seed = 0 - 0xf09f35a563783945, 0x0dcc5b3bc5ae410a, 0x63f1ea8d22554270, 0xfbe5ee7bd05a7b61, - 0x3f692ed5e9934aba, 0xaab3755952250eb8, 0xdefb168dc2888fa5, 0x501b36f7c77a7d47, - 0xd2fff45d1989642d, 0x80217c1c600e30a6, 0xb9469ee2e43df7ac, 0x3654b76a61999706, - 0x6ea73dfe5de0c6b6, 0xdfd662e1937a589d, 0x0dbe0cc74b188a68, 0xde45f4e6d73ffc6f, - 0xcdf7a7759e70d87e, 0x5d6a951b8d38c310, 0xdc9423c3813fcf2c, 0x25dc2976e167ffce, - 0xc2555baa1d031c84, 0x115bc3f2230a3ab6, 0xd4b10260f350bede, 0xdfd3501ab447d723, - 0x022e79217edaf167, 0x1635e2255c5a7526, 0xa0a750350cc77102, 0xc027133e05d39f56, - 0xd949459779cf0387, 0xb92f1464f5c688c2, 0xd9ac5f3e8b42f2f3, 0xdf02bb6f5ecaac21, - 0x8156f988fac7bfa4, 0xe4580f97bede2ec8, 0x44fe7d17a76fca32, 0x885f59bd54c2014c, - 0x435e63ec655ffae9, 0x5ebc51930967b1f1, 0x5428c2084ac29e47, 0x9465938fec30e36b, - 0xc7cb3de4977772cd, 0x15692d7c201e8c3a, 0x505ee65cdc4b17f4, 0x7d9839a0a7aead6b, - 0xeef5f5b6a0105291, 0x76c2fb232ce7f5bf, 0x5c13893c1c3ff3a9, 0x65b6b547d4442f98, - 0xb8ad7487c8c96fce, 0x906bcf51c99974f8, 0x2f56e48bb943a48c, 0xbc9ab109f82d3a44, - 0xcd5160cdc8c7e735, 0xbe9acb9df3427732, 0x386b91d477d7fade, 0x36be463621dd5af2, - 0xcbe6a2faffd627a8, 0x9c8fd528463a2f5a, 0xb9b88c6bb802b184, 0xb414b4e665c597c7, - 0xbedb142568209556, 0x5360d81c25429dce, 0x63a69a960a952f37, 0xc900d63899e1b503, - 0x1abc63a8b37c7728, 0xa8b3a8b6409080eb, 0x495e391f662959f6, 0xdf1e136f3e12229b, - 0x33d5fc526b0dd38d, 0x321221ae2abfac63, 0x7fde18351fda7395, 0xed79fe5c3a6aa4c3, - 0x2dd6965a4867d8d4, 0x54813ca20fe8799b, 0x5d59ea6456465c39, 0x0de0c294d1936b81, - 0x4aaf0755002c588c, 0x3530a1857ad04c6d, 0xb8a64f4ce184442b, 0xe0def10bceedfa17, - 0x46e38d0a443757ec, 0x9795a1c645ee16d7, 0x7e531def245eac8a, 0x683b25c43a0716cf, - 0x884583d372da219d, 0x5b06b62c910416e5, 0x54b6902fbebd3dbe, 0x931198d40a761a75, - 0xead7d8e830013590, 0x80b4d5dc99bfaced, 0xf98272c8108a1ad2, 0x1adce054289a0ec6, - 0x7d53a1143c56b465, 0x497fbe4f00c92b52, 0x525e4cc2e81ebd69, 0xc94478e0d5508ff6, - 0xb8a5da83c196d07c, 0x7667a921b65b0603, 0xf236fabbdefe6cd1, 0x53da978d19a92b98, - 0xc604f6e97087124d, 0x2cbd27221924b094, 0x65cd1102c985b1d2, 0x08c0755dc1a97eb4, - 0x5e0419e921c0fef1, 0x282d2c1196f84a29, 0xe21117fcfc5793f7, 0xcf4e985dc38e6c2e, - 0xd521f4f264d55616, 0xde69b04c485f2a10, 0x59410e245305178a, 0xceab1d477c943601, - 0xa9805732d71ee5e9, 0x054cd443896974f6, 0xf2b517717a423a3e, 0x09517937fa9fac95, - 0x4938233e9ca871e3, 0x9132cbaf56f83ec0, 0x4703421ed1dd027d, 0xfd9933f4e6f1ec4e, - 0xf237c7fded2274a8, 0xdf4616efe68cd7b4, 0x5e46de0f39f0a380, 0x3d41e0c6d8e095b0, - 0xc5272f8a5bb2df09, 0x68aa78e8301fb964, 0xbf5b5b52c8e32ae0, 0xbf28ed3df74bdcf7, - 0xd6198f64c833815a, 0x8cd99d2974267544, 0xd90560ea4465ff2c, 0x571d65ad7ad59261, - 0x309453518baa367a, 0xa60538377bc79fb2, 0xace515da1ab4183c, 0xf56d3c8d891d1c5b, - 0x5b0d8370b59def49, 0x775866ce7c83c762, 0x3d76085695c8e18a, 0xba064d1a9af1b114, - 0xc84ef7cd7b98b521, 0x90b9231681c2bc37, 0x37e2b13e6f585b6b, 0x1d0a34e55e0f369f, - 0x86bb8019cf41447c, 0x4b95c6ef55b3f71f, 0x3b6ed1660732b310, 0x617eee603d137f21, - 0xf4f6278b464f3bbc, 0xdfb763b720da205a, 0x353478899b871cb7, 0xe45fbbff574cc41e, - 0x1a94b60847907d72, 0xb10eef051eff67a5, 0xf0e012ec6a284d40, 0xcc1cd1a11b926d7c, - 0xcf9d9c5453e19cad, 0x270febcc0fc0e86b, 0xd6567568778b781e, 0x7323b98965eeb46b, - 0xccecd374567086ff, 0xef7b44bfc497a704, 0xebc479c051a9f0a5, 0xc9b7410e3e00a235, - 0x1d084f7ecdf83dab, 0xc8a9a97e33ba8ba3, 0x8c75318f5b2350d6, 0xaa3cd5d0c684bdda, - 0xa81125fe0901bedf, 0xf7bcd76020edfc93, 0x834ee4c12e75874f, 0xb2bb8a7beb44fa14, - 0x32cd26f50a4f4e4d, 0x0fc5817ca55d959a, 0xd6e4ae2e3ae10718, 0x074abdcceb8d6e38, - 0xc0cc5f4f9b3a9c43, 0x1115d364363595b2, 0x69861db2eb19f2e8, 0x59b8d804cf92bc67, - 0x9bac9785e5e4b863, 0x7fa0e17a41869561, 0x10d3c9633f0c709c, 0x534a03deee6bc44a, - 0x73b1f7201257f581, 0x46fd6a11e2e0706b, 0x494abb554946e67a, 0xb5d6da317864dc8e, - 0x402ded9238f39687, 0xd8fa37d2cbd6d290, 0xcc818293fcb06791, 0x6482ab344806cd4d, - 0x0956e6ee9d8eb60b, 0x01fee622d8465ac8, 0xae7ece370cbd9c35, 0x7ff09e937a177279, - 0xa2c29ee7a33ca5f1, 0x990e8dbee083923b, 0x4a819b72f610863a, 0xddecfad79d3f08be, - 0x627372480fac20a7, 0x802154d6eca2db4c, 0x8fcf02e42f805e55, 0x040a911ff8cea977, - 0xbb544485bc64d0d4, 0xaddde1aeb406d0fb, 0xf6b35fae23dce66f, 0xc07a9fb3645d2f9b, - 0xccd113907e9c0fed, 0xd17af369984fd213, 0x9223823c59a083e7, 0xe19d475606b81013, - 0xe181ac116a90e57a, 0x71f7b6258c6def4c, 0x2246f34b45964f7c, 0xd74aedaea2d31751, - 0xb1add86e5dd305d1, 0xeb9ba881f16d6471, 0xef7600e036f5c6ff, 0x1d50bc9735b8fb85, - 0xe63942bd1f3e2969, 0x9241ba9f8b3f4e72, 0xee8bb2bca07d35b6, 0x55cd55dab522654e, - 0x94d0cfa7c1a6845d, 0x02f9845d559884c3, 0x8ce70ea21063b560, 0xd70998028ef08b74, - 0xdfdb5bbee310876b, 0x4e21b2e348256d16, 0xde007a981c13debc, 0xe51950cbbddabfdd, - 0xd223301dbe9957c1, 0x084b8634cc2cce4b, 0x90e551378aa9d70c, 0x833b533ac633e448, - 0x7891e232882da57f, 0xa1bf26f0163ce2b3, 0xf33a0171eb9c68d5, 0x2e7de18ca69b3fa2, - 0x666fd6f175619199, 0x1239d37edb5feb9f, 0xfa9fc9382e61ff5c, 0x3ca4ad427e3c126f, - 0x37c6dd4c2c31ae6e, 0x1f1bacb619d427b2, 0x7dd09f5d10759afe, 0xc8d941432327d733, - 0x2b389ba25e1d43a7, 0xa4e3030c3740ff21, 0xcc56dae13fd37463, 0x2481457c175b560f, - 0x9deb35bde77c5c41, 0x847aa6ea5549a0c3, 0xcde01bb48b6e7f02, 0x15a28844e64cb211}, - {// seed = 1 - 0xecfcba92fe5691a3, 0x71377799fea34699, 0xb284c9096fa614e5, 0x54534170f40de6c8, - 0xbbd804d45884fba3, 0x44929a896388c8a1, 0x79b712508e0fa3b1, 0xeb53ab280af31054, - 0x351ea23a6319da7a, 0x2fbe55d9819d85a2, 0x34f4b6568dcd28b1, 0x8c94ea5e5d82967a, - 0x09068d333a46d3c5, 0x762ad4f64cb73381, 0xd5c6db5ef0e22640, 0x36d8ab5a36175680, - 0xd41fe333cdc3525a, 0xa1f51dbdf20ce781, 0x1410a95e786c8be6, 0x96b7499a670c2b41, - 0x3912e1037835d893, 0x272c5bd83e1e9115, 0x2ea7f91cad82a0d6, 0xcd10e85662ce9931, - 0xedad49be8d5e8b74, 0x7ccd8fe0f37d12bc, 0xfac0482005eed593, 0x4513991681f6c8b0, - 0x2804d612eb0ad37d, 0x7cca9e8412b81d34, 0x85ffd6707192b7b8, 0xea0560aeea954411, - 0x0122d28226102bba, 0xf51c47cdbd22fdd1, 0x3707d851183ff17c, 0xaef5a1465f3e902d, - 0xbcb38c2d8736a04f, 0x4025317e864bef15, 0x8d3f66d86e1ea58f, 0xc16759a3d97ed79a, - 0x1c62abdc0659f2f5, 0x23b3eb4e699bd28f, 0x5083c4fceed3ccaf, 0xa65bf34562cc989c, - 0xaa5865932fd79064, 0xf24d08d268c24593, 0x7fbd00a215196999, 0x7812cd366d752964, - 0x62e8dcb27ef3d945, 0xf08b7984e1b946dc, 0x547d23ad9a5c1dcf, 0x496b1fb249b27fb7, - 0xcd692e1db5f3b3ba, 0x41931e39f1e1bc61, 0x286c6a7d7edae82b, 0x17ef6638b6c4ca6e, - 0x609beb5a2576a934, 0xcc5e16fe4a69b83c, 0xbbd14d08b078fc24, 0x2a617680f481cb94, - 0x81dbbd5f86e6d039, 0xeb8205e1fc8ecc3c, 0xe5e3bb576faa8042, 0x5d6f1eb9d9df01b5, - 0x9a47b8739c10fb44, 0x398a7caad7ea7696, 0x9c0fc1d7c46adde6, 0x67cd6de0a51978a6, - 0x68ccc4b77a21cca4, 0x1e067066b82f415c, 0xf7ddade6535e1819, 0xf2185c884291751b, - 0xc322b7381fcbe34f, 0x242f593e88290b9b, 0x8e11ccc0ea5e84a3, 0x40e3a2e3346db8a2, - 0xf18bfc3ad2931a2c, 0x2468397394b00144, 0xeae199cce14e6817, 0x05b462686c75a1ae, - 0xda096cb859c51673, 0xd87aeb967a906bef, 0xaabc74493cb02fe6, 0x74d48fc2e7da143e, - 0x6ec1c8fed3f2c1fd, 0xe01e0704b463f18e, 0xc3d88a4d3a8056e4, 0xd01ae0ffab6c8f3f, - 0x881ba052620ae7c7, 0xcea033aef0a823a5, 0x8d2cad91d83df1e3, 0x18746d205e66dbe9, - 0x3061f8e58d046650, 0xd819c59f0ce2cf8b, 0x144e89e93635e870, 0x3415e88279b21651, - 0xd6f7ab944b86c3fa, 0x45f1dd15d0f67bdc, 0xbf0d97c7f4fa24f4, 0x34a7de520a57fcd2, - 0x4ba86fda03e9e2bc, 0xa7995265a025b552, 0x698f6819d5f51cf7, 0xd07dbe9d8a156981, - 0x2683945373857fc1, 0x116f8a84f96167de, 0x8bc832bd85595ebf, 0xb206519d74fdfafa, - 0xde9519b2e9b5cc5f, 0x16fdd6f2da1d8163, 0x7ba32bd48ef56f11, 0x6f4e4d7ee8b29717, - 0xd31576dde7468aad, 0x023bb08848676045, 0xf6dcc083178160b7, 0x42035f426250e683, - 0x343732993cfed89f, 0x0640a870a22d3d58, 0x65cff80b53b4ae6a, 0x27996fa17ab05215, - 0xfd5db01401b21a04, 0x894508784bc1673c, 0x5bfcf43a2380e27d, 0x4cd6dcc2715583b7, - 0xa43b3763e7d4c902, 0x6da83e12ef0c1257, 0xfe80a602b0335aff, 0x293a7d8f4ff344de, - 0xb4ae7c2b8956bf5a, 0x6b45432d38254b4d, 0xd086acbdf15d9455, 0xa4d19e43f41ea87b, - 0xf01f13ba4bb87fbf, 0xca582cf301a299ff, 0x0ddad3d45298fa7d, 0x0646a130459c3999, - 0xc08e3af3747e2cee, 0xfc7db8aa9ed67295, 0x783b329e7bd79d5f, 0x732dbc607957af7b, - 0x8e446ac19fb26555, 0xff1dfa4d61dc89a5, 0xb6fbc46bd8d011d8, 0x185147ec5779f0d7, - 0x6eb2cf6149a5380f, 0xb0e773df803a1eae, 0xc07706c5519bfce5, 0xc35abcf54fa95f14, - 0x40a01d99a38608ea, 0x776dcd6f603c277f, 0x6ae12389b1d6d0bb, 0x8bd981448df92bb9, - 0x426a6a7ca21a2c16, 0x87efd5b71c1bad26, 0x71fb7fc4cd41de48, 0xdd9033c45619d463, - 0x40eaab322654cef7, 0xe077fffed6f3e3a2, 0x375a4dbef9384447, 0x2066b009d2c4a100, - 0xeca4a5794a068447, 0x2128f64bddf341a1, 0x738b4bb1be90bd61, 0x433772cf3813d52e, - 0x9540c88add8e4474, 0x0b6d5decd21d3519, 0x654ead966745642d, 0xe1bfb03c3b4bdb4c, - 0x0b977a9937515b1f, 0x0a4587509ef63870, 0xe89f0de1d9cfd44a, 0x23a91390272e7f68, - 0xd92defbc9096b8d8, 0x004db87174612539, 0xc88ecaabdd1a71f1, 0x050de38393073346, - 0x8af1426d7964e038, 0xf352c4fef8ad5c87, 0x6f26bc7408e26548, 0x0d41543fd9bf3084, - 0xfc4e07553a840fc6, 0x5ef117de86a555a9, 0x1f11c42dffb5ae1b, 0x4147648f07490fa5, - 0x09b35fd7671b21aa, 0x1453b14f7ccca481, 0x944f6fcce4c9b2ba, 0x5b08dd2e3583dc06, - 0xe0220df78dc9c22d, 0x1c200b9506cbf666, 0x8a0b7465eadb523b, 0xfbcb43a91a1e2d80, - 0xe697f44be3c36a58, 0x2f8a8e48fb7e350d, 0x7baba71b8920d55f, 0x10edc0216105bc96, - 0x52db07c79d7a7a63, 0x1916e8cef9452ac3, 0x5cbbbf21f867b6cc, 0xadd583365a690a4b, - 0x4e4ca2c8bffc2fdb, 0xf5fe3416d2eebcfe, 0x839af8b85e452476, 0x8496c0c54ad44e16, - 0x6c46f1ecad4482bf, 0xb794cad76ae18715, 0x67b762eec7c62985, 0x52dc9e68df5b3a53, - 0x0cc7e444b422a5f9, 0xadbfe90841c112b0, 0xfe37b136f0ca5c34, 0xcfe9e47948a8d73e, - 0xee90572b86a30d91, 0x549e72d8262830aa, 0x3361564b469f32c6, 0x1e6eba9e0d2648e2, - 0x5f8e2b2ac5fcb4eb, 0xe4224fa5f71f7cc6, 0x7357a9230c76757b, 0xcad70f74aaf6b702, - 0xeef28ced23894cc2, 0x753fdd3352aefd68, 0x1fed6ba90bbeb9d2, 0x05316f4ab4034b4b, - 0x3396df022b9f63d6, 0x82d7125a7cfd0935, 0x3519a71caf1f87f0, 0xd1dfb7a5cc3974be, - 0xbfae40ecbdbbcc2a, 0x152c11778e08dd54, 0x4a96566a6c848554, 0x3a84d621c340cdd7, - 0xfd47aa1887e2fb03, 0xa63cae94b2f1d099, 0xed61783f3e5b75e0, 0xefd44864106019be, - 0x145ff78b80b081aa, 0x34670e5fcea9230e, 0x876ef976328db371, 0x4221f3a5269942a6, - 0x95315cbd85c648f4, 0x3ca344dc7c3b1600, 0x38421ea39ff28780, 0x31dbeee967c0435c, - 0x27437c3e268402e7, 0xdd0cf8343312a654, 0x965ab9dad1d8aa29, 0xf871706dd3e23509, - 0xce23d06c7a25e699, 0x1b37d59382b27589, 0x3407f004723d6324, 0x56efb69cdb5deaa1, - 0xf46cdd2b9fd604e0, 0xcad3ca79fdac69bd, 0x7252802a574e63cb, 0xc281fb8acc6ec1d3}, - {// seed = 2 - 0xdd16cb672ba6979c, 0x3954eaa9ec41ae41, 0x52cb802771d2966d, 0xf57ed8eb0d0294f2, - 0x768be23c71da2219, 0x6131e22d95a84ad3, 0xd849e4e49bb15842, 0x18e8e5c4978cf00d, - 0x3af5e5867ce1f9bd, 0x06c75a9fffe83d63, 0xe8de75a00b58a065, 0x0a773251bc0d755a, - 0x629dc21e54548329, 0x2a168f5e5a883e70, 0x33547375f0996c86, 0xdfcb4c7680451322, - 0x55c1ecaaaa57e397, 0x4546c346c24f5a31, 0x6f8f0401dfabc86c, 0x7760d2d36ee340b4, - 0xf6448e48bdeb229d, 0xba70e1633b4dba65, 0x069cda561e273054, 0xa010b6a84aebf340, - 0x5c23b8229eee34b6, 0xea63c926d90153af, 0x7d7de27b3e43ec1b, 0xea119541eddc3491, - 0xf1259daeddfc724c, 0x2873ca9a67730647, 0xa1e7710dade32607, 0x758de030b61d43fd, - 0xd2c9bcbfa475edb4, 0x18ade47bb8a0aa29, 0xf7a74af0ff1aea88, 0x6f8873274a987162, - 0x6963e8d876f4d282, 0xd435d4fe448c6c5b, 0x93ec80ba404cafff, 0xcf90d24c509e41e7, - 0x5f0fc8a62923e36e, 0x9224878fe458f3a4, 0xd9a039edf1945bcd, 0x0877d1892c288441, - 0x75205491f4b4740b, 0x30f9d2d523a9085b, 0x4b7f4029fa097c99, 0x170bb013745709d4, - 0x7087af537f11ef2e, 0x28c62b88e08fc464, 0x84bbcb3e0bb56271, 0x485a4b099165c681, - 0x357c63357caa9292, 0x819eb7d1aee2d27e, 0xdaa759eb9c0f8c9d, 0x42cdc36729cc3db5, - 0x9489aa852eddbb06, 0x8161e4f85a84e6d4, 0xa964863fdad3eb29, 0xcc095ddbce1a6702, - 0x3ecfadbb8dc2ce58, 0x971316509b95a231, 0xc8f484d1dbc38427, 0xae9c510c463574c0, - 0xdf2b31179600c21a, 0x440de87bada4dfa3, 0xbd8d30f3f6fb7522, 0x84e6d7f678a0e2d0, - 0x0ec4d74323e15975, 0xf6947610dad6d9ab, 0x73a55a95d73fe3a5, 0x3e5f623024d37eda, - 0x8d99a728d95d9344, 0x8b82a7956c4acdc4, 0x7faeaea4385b27f6, 0x540625ff4aa2ff21, - 0x4aa43b3ebd92ce2b, 0x899646a6df2da807, 0x49225115780942d7, 0xe16606636af89525, - 0xb980bcf893888e33, 0xf9ed57695291b0d8, 0x5c6dd14464619afa, 0x50606d69b733d4f3, - 0x7fb1af465b990f97, 0x3fab2634c8bbd936, 0x556da6168838b902, 0x0f15975902a30e1f, - 0xb29d782ae9e1991f, 0xae00e26ff8f7e739, 0xd3da86458bb292d5, 0x4528ee0afb27e4ce, - 0x49882d5ba49fabad, 0x7e873b6a7cf875ee, 0x777edd535113c912, 0x94ed05e7ff149594, - 0x0b8f95fc4211df43, 0x9135c2b42426fef2, 0x411e6c2b47307073, 0x503207d1af0c8cf8, - 0xd76f8619059f9a79, 0x64d24617855dee45, 0xf7bc7a877923196a, 0xd6cc42ed6a65be79, - 0xe3912ff09d4fc574, 0x4192d03b2bc2460a, 0xa0dcc37dad98af85, 0xfc59049b2a5818a4, - 0x2128bae90a5b975f, 0xbe7067ca05ea3294, 0x5bab7e7753064c4f, 0x42cbf0949ef88443, - 0x564df4bbd017492c, 0xf2c2eb500cf80564, 0x5b92e67eb00e92af, 0x8c4103eef59c0341, - 0x83412122b8284998, 0x888daf2da0636b6d, 0x4d54b10303dd07d6, 0x201190e7c1e7b5ed, - 0x3797510bb53a5771, 0x03f7bc598b570b79, 0xdc1e15d67d94f73e, 0x721e8b499ebe02c1, - 0x71f954f606d13fa0, 0x0c7a2e408c168bf0, 0x07df2ef14f69c89d, 0xe295096f46b4baaf, - 0x7a2037916438737e, 0xd1e861aeaf8676ea, 0xb36ebdce368b8108, 0xb7e53b090ddb5d25, - 0x5a606607b390b1aa, 0x475e52994f4a2471, 0xbcc2038ba55b2078, 0x28b8a6b6c80df694, - 0xb5f0130ec972c9a2, 0x7a87cd2a93276b54, 0x4d0eec7ecf92d625, 0xac1a8ce16269a42e, - 0xa4ca0237ca9637b8, 0xd8dc8ff91202b6ff, 0x75b29846799d7678, 0x761b11a5edd9c757, - 0xf2581db294ef3307, 0xe3173c2b6a48e20f, 0xe46fd7d486d65b3c, 0x1352024303580d1f, - 0x2d665dae485c1d6d, 0x4e0905c825d74d3b, 0x14ff470c331c229e, 0xbdc656b8613d8805, - 0x36de38e396345721, 0xaae682c1aa8ff13b, 0x57eb28d7b85a1052, 0xf3145290231d443a, - 0xd0f68095e23cbe39, 0x67f99b3c2570b33d, 0x54575285f3017a83, 0x9b2f7bb03d836a79, - 0xa57b209d303367a9, 0x7ccb545dd0939c79, 0x1392b79a37f4716d, 0x6e81bb91a3c79bcd, - 0x2c2cd80307dddf81, 0xb949e119e2a16cbb, 0x69625382c4c7596f, 0xf19c6d97204fb95c, - 0x1b2ea42a24b6b05e, 0x8976f83cd43d20ac, 0x7149dd3de44c9872, 0xc79f1ae2d2623059, - 0xca17a4f143a414e1, 0x66d7a1a21b6f0185, 0xed2c6198fe73f113, 0x16a5f0295cbe06af, - 0x5f27162e38d98013, 0xf54d9f295bdc0f76, 0x9ba7d562073ef77b, 0xa4a24daaa2cfc571, - 0x49884cf486da43cd, 0x74c641c0e2148a24, 0xbff9dcbff504c482, 0xf8fc2d9403c837ab, - 0x6ccc44828af0bb1e, 0xbcf0d69b4c19dfdb, 0x8fe0d962d47abf8f, 0xa65f1d9d5514271d, - 0x26ff393e62ef6a03, 0xc7153500f283e8fc, 0xea5ed99cdd9d15cd, 0xfc16ac2ba8b48bb7, - 0xf49694b70041c67a, 0xbd35dd30f5d15f72, 0xcf10ad7385f83f98, 0x709e52e27339cdc2, - 0xe9505cb3ec893b71, 0x2ffa610e4a229af7, 0x12e1bc774d1f0e52, 0xe301a3bb7eacccc8, - 0x1fdd3b6dcd877ebf, 0x56a7e8bda59c05aa, 0x99acd421035d6ab4, 0xfd21e401cecd2808, - 0x9a89d23df8b8d46f, 0x4e26b1f1eb297b9c, 0x9df24d973e1eae07, 0xe6cdc74da62a6318, - 0xfc360d74df992db0, 0xf4eca0a739514c98, 0x481c515ba9bf5215, 0xce89cce80f5f3022, - 0xf487a10fc80e4777, 0x235b379a87e41832, 0x76f72e028371f194, 0xd044d4a201325a7d, - 0x47d8e855e0ffbdde, 0x268ae196fe7334b0, 0x123f2b26db46faa8, 0x11741175b86eb083, - 0x72ee185a423e6e31, 0x8da113dfe6f6df89, 0x286b72e338bbd548, 0xa922246204973592, - 0x7237b4f939a6b629, 0x31babda9bedf039a, 0xb2e8f18c6aeec258, 0x0f5f6ce6dd65a45e, - 0x8f9071a0f23e57d3, 0x71307115ba598423, 0xcbe70264c0e1768c, 0x1c23729f955681a8, - 0xfbc829099bc2fc24, 0x9619355cbc37d5d6, 0xea694d4e59b59a74, 0xb41cf8d3a7c4f638, - 0xae1e792df721cd0b, 0x7cd855d28aac11f6, 0xca11ba0efec11238, 0x7c433e554ce261d8, - 0xe3140366f042b6ba, 0x8a59d68642b3b18c, 0x094fcdd5d7bccac2, 0x9517d80356362c37, - 0x4a20a9949c6c74e8, 0xc25bcf1699d3b326, 0xa8893f1d1ed2f340, 0x9b58986e0e8a886e, - 0x29d78c647587ce41, 0x3b210181df471767, 0xd45e8e807627849d, 0x1ec56bc3f2b653e3, - 0x974ff23068558b00, 0xdb72bdac5d34262c, 0x23225143bb206b57, 0xd0a34cfe027cbb7e}, - {// seed = 3 - 0x39209fb3eb541043, 0xee0cd3754563088f, 0x36c05fc545bf8abe, 0x842cb6381a9d396b, - 0xd5059dcb443ce3bf, 0xe92545a8dfa7097e, 0xb9d47558d8049174, 0xc6389e426f4c2fc0, - 0xd8e0a6e4c0b850d3, 0x7730e54360bd0d0d, 0x6ecb4d4c50d050d5, 0x07a16584d4eb229f, - 0x13305d05f4a92267, 0xb278ddd75db4baec, 0x32381b774138608f, 0x61fe7a7163948057, - 0x460c58a9092efee6, 0x553bf895d9b5ff62, 0x899daf2dabfd0189, 0xf388ab9c1c4b6f70, - 0xd600fe47027ea4cd, 0x16d527ec2b5ef355, 0x5ac1f58ff6908c81, 0xa08d79ff8ee9ffe8, - 0xc1060a80b7a5e117, 0x14b2c23118c60bda, 0x8cc0defbb890df8f, 0xe29540fd94c6d28b, - 0xa604f003f82d5b71, 0xa67583d4eb066d18, 0xd62cbd796322b3fc, 0x070cfe244cdcccf3, - 0x73557c30b3af47e5, 0x2e544e31153a2163, 0x996eef7464d5bead, 0xbc71cb5ab0586cdc, - 0x0bfcb6c1b517ed69, 0x62b4f1fcc82e8ca0, 0x0edbc68f544965c5, 0x40fa39baa24af412, - 0xf39aeb2413dab165, 0x17e6013e7afee738, 0x8109bff1c8d42a9d, 0x3cd99863390989b5, - 0x02021a4cc9c336c8, 0xa06060778cb60aa4, 0xd96591db60bc1e06, 0xd2727175183f4022, - 0xcdc1f1c5bce3e7ce, 0xb393ccc447872a37, 0xdf6efe63257ead3a, 0x20729d0340dbceb6, - 0x9f3d2d26fc0ea0d7, 0xf392e0885189bd79, 0xdf2ee01eb212b8b6, 0x6e103a0c0f97e2c3, - 0x96c604a763bd841b, 0x9fc590c43bba0169, 0xf92dcd5ddc248c40, 0x113a8b54446941dc, - 0x5943eda146b46bb8, 0xbf657901a36a39a7, 0x5a4e0e7ea6568971, 0xb94c635bae9f9117, - 0x2626fb65b3a4ef81, 0xa59bfd5478ce97de, 0x79112ba9cc1a1c63, 0xf41f102f002cf39c, - 0x0a589bcbfb7ff1c8, 0xa1478c53540c4fa1, 0x60d55e72c86dfaca, 0x312e7b6840ea7a39, - 0x8aae72dcccfe1f75, 0xff2f51f55bf0247a, 0x3c2e4b109edb4a90, 0x5c6d73f6525c7637, - 0xe49acb04a199f61c, 0x27860642d966df7f, 0x541ce75fb1e21c30, 0xd9fcd6f90806c7cc, - 0xb87c27bc93a7969b, 0x92f77a1179b8f8dc, 0xb1f29379deb89ed4, 0x7e63ead35808efe7, - 0x13545183d7fa5420, 0x575f593e34cf029d, 0x27f1199fb07344ae, 0xe67f95f7dc741455, - 0x49b478b761ab850b, 0xd7bedf794adfc21e, 0xdc788dcd2dda40ae, 0x14673eb9f4d8ad35, - 0x0cced3c71ecf5eb1, 0xe62d4e6c84471180, 0xdfe1b9e2cb4ada7d, 0x70185a8fce980426, - 0x0ce2db5e8f9553d6, 0x1fedc57bb37b7264, 0xb9310a2e970b3760, 0x989ff8ab9805e87d, - 0x0b912d7eb712d9ee, 0x1fe272830379e67c, 0x16e6a73aff4738fb, 0xeed196d98ba43866, - 0x7088ca12d356cbe2, 0x23539aa43a71eee0, 0xed52f0311fa0f7ad, 0xa12b16233f302eea, - 0xc477786f0870ecb4, 0xd603674717a93920, 0x4abe0ae17fa62a4c, 0xa18f1ad79e4edc8d, - 0xc49fe6db967c6981, 0xcc154d7e3c1271e9, 0xdd075d640013c0c0, 0xc026cd797d10922a, - 0xead7339703f95572, 0x4342f6f11739eb4b, 0x9862f4657d15c197, 0x4f3cb1d4d392f9ff, - 0xe35bffa018b97d03, 0x600c755031939ad3, 0xb8c6557ffea83abf, 0x14c9e7f2f8a122ea, - 0x0a2eb9285ee95a7c, 0x8823fec19840c46f, 0x2c4c445c736ed1d0, 0x83181dff233449f1, - 0x15ed3fca3107bef5, 0x305e9adb688a4c71, 0x7dbef196f68a3e2e, 0x93e47ece3e249187, - 0x8353c5e890ead93c, 0xea8a7ae66abafdf7, 0xf956dbb6becf7f74, 0x9f37c494fbfdb6e4, - 0x11c6cbaa2485dd32, 0x206f336fcca11320, 0x9befe9a59135d8fe, 0x5f3ef8b8db92c7db, - 0xbb305e556ce0ce9a, 0xf26bdafb1305887f, 0xcbf28abe23f08c61, 0x0bc64173b914e00b, - 0x9168da52e983f54a, 0x6ea41d09c3574a3e, 0x78aa44d4a74459ae, 0x2931422878387bf5, - 0x018f64a3a92c2d9c, 0x9be43f6752e66b34, 0xae378890decd1152, 0x07325329a1cb7623, - 0x3b96f4ee3dd9c525, 0x2d6ebcdbe77d61a3, 0x10e32b0e975f510c, 0xffc007b9da959bf9, - 0x38bf66c6559e5d90, 0xbe22bdf0bf8899fe, 0x87807d7a991632a8, 0x149a0d702816766a, - 0x026f723db057e9ab, 0xeeecb83625ec6798, 0xcec2ed5984208148, 0xd985a78e97f03c84, - 0xf96c279e7927b116, 0x99d5027b3204f6e2, 0x13a84878c3d34c55, 0x5cf5ec96229e9676, - 0x0bc36b07e4f8e289, 0xbed33b80a069914d, 0x2fbfbdd1ff4b9396, 0xab352bb6982da90f, - 0x154d219e4fa3f62b, 0x4d087512bb6b9be7, 0xc582e31775ee400e, 0x7dadb002ae8c4a4e, - 0xaae2957375c1aee2, 0x5f36ca643356625b, 0xf87cf8eb76e07fb7, 0x46f432a755e02cc3, - 0x36087e07aba09642, 0xe5642c1e4ebb9939, 0xb9152d22338eefad, 0xf7ba44278a22cf7f, - 0xd3b8013502acd838, 0x7761511da6482659, 0xb0857621638e8e50, 0x552eddb4a8b1d5f5, - 0xc43d9861e812c3ea, 0xd765c2aada47910c, 0x21c935b68f552b19, 0x6256d5641a2b47dc, - 0xab711d8e6c94bc79, 0xa8d0b91a2a01ab81, 0x5e6d66141e8d632a, 0x7638285124d5d602, - 0x794876dbca3e471f, 0x951937d8682670ce, 0x0f99cb1f52ed466a, 0x8c7cd205543b804c, - 0x2fd24d74a9c33783, 0xe5dcb7b7762e5af1, 0x45e6749cca4af77c, 0x540ac7ee61f2259f, - 0x89c505c72802ce86, 0xeab83b9d2d8000d1, 0x9f01d5e76748d005, 0xc740aaef3035b6d0, - 0x49afcd31d582d054, 0xcba5dc4c1efb5ddc, 0xc0a4c07434350ca1, 0xfc8dfaddcc65ee80, - 0x157c9780f6e4b2d9, 0x9762a872e1797617, 0xc4afae2cf3c7e1bd, 0x71cde14591b595d4, - 0x8843c3e0e641f3b9, 0xd92ecd91dce28750, 0x1474e7a1742cb19f, 0xec198e22764fa06b, - 0x39394edb47330c7d, 0x00ba1d925242533d, 0xaed8702536c6fb30, 0x6d3618e531c2967a, - 0x77f7cedcd7cc0411, 0xbc1e2ab82be5b752, 0x07b0cf9223676977, 0x596c693b099edd53, - 0xbb7f570f5b9b2811, 0x96bfdad3c4a6840c, 0x668015e79b60c534, 0x3ad38d72123f1366, - 0x6b994d81d2fcbb09, 0x70885f022c5052d8, 0xc891ee79d9306a7b, 0x2c4df05c0ed02497, - 0x19ebc13816898be2, 0xea7c64df11c392a2, 0xb7663e88dd12e1bd, 0x79f768cb8e154c21, - 0x1fb21b12e945933b, 0xe6a9045643f6906e, 0x544c47acd7e15371, 0xb7709b14f727e3d1, - 0x326ee36a46942971, 0x477f1cf7b0e2d847, 0x88b8f6b82b3b0c24, 0x18bc357b80e3cd5c, - 0x3333de70e4d66e0b, 0x4fd4c5e148583cf6, 0xae1b62f3008c0af3, 0xc49f419b6ab29cf5, - 0x2c29fa65afc3fa28, 0x4b19d93734d03009, 0x7dd6c09e589276ad, 0x1cece97f30de48ad}, - {// seed = 4 - 0x58bdf4338602e4fb, 0x71a5620b02c926d5, 0x3811c960129c2d9f, 0x29c2fb11fccac567, - 0x0d6b1ea7780f1352, 0xcc4d3ddfae3f87b3, 0xfdd30257362a586b, 0xabc948fde69f25f1, - 0x51b3523469d30f7b, 0xe0f0322724405ace, 0xd3729266d896da1e, 0xb10c37e5147915bf, - 0x8b577039f9fa32a3, 0xe677c6a9cbfb44b3, 0x7317a756ebb51a03, 0xf8e988ef37359485, - 0x600fc1ef3f469ff3, 0xbf0b8f8520444e01, 0x3711168b08b63d73, 0x34146f2944a6cb36, - 0x717feb263862cdde, 0x7185f8347db00412, 0x900798d82127e693, 0x84089e976a473268, - 0x10f8308c0d293719, 0xf62a618d4e5719b8, 0x8bdbd257a1a9516f, 0xf49f666fd7a75110, - 0xbaf45e2db7864339, 0xe4efa1ea0c627697, 0x3e71d4c82a09fe10, 0x54a2a51cf12127bb, - 0xa0592c9f54ba14cd, 0x27dd627a101c7a42, 0x3d2ceb44b3d20d72, 0x7ee1f94a68ca8f5d, - 0x7e8cb8651b006c36, 0xbd9fa7ca3a475259, 0x856de173586a7b34, 0xcedb291b594cb1b5, - 0xa3d6e462fd21cddc, 0x74561d10af9118e4, 0x13a3d389fc2d4b36, 0xeea8594a4a054856, - 0xf56d7474d9ba4b13, 0x25ddce2f6490b2fd, 0x920653ff3a8d830b, 0xcd8c0c9cdac740d1, - 0x2c348a738db9c4a0, 0x2967ccbe8ea44c22, 0x47963f69adb049f8, 0xf9d01eb5b4cf7eb6, - 0x7a5c26eb63a86bd2, 0x62ad8b7a71fa0566, 0xb373213179f250ae, 0x589d4e9a88245a4d, - 0x433dafebe2d558a8, 0x521fbef2c8fe4399, 0x62a31f9ff9ccd46b, 0x51602203eba7c1a6, - 0x9afc8c451b06c99f, 0xb529085bdbaffcea, 0xac251825cc75892b, 0x94976a5bce23d58e, - 0xdd17925b6c71b515, 0x568fd07a57bce92e, 0xefac31200d8bd340, 0x716c3e466b540ef9, - 0x3d2c9e380063c69b, 0x14168f9a3662dd83, 0xd298c7504dbc412f, 0x74490a94f016719f, - 0x0e0da431e1ab80c8, 0xe321f63dc6b169ae, 0xf08671544febc95a, 0x39324450cc394b3b, - 0xea6e3d35f1aa3a70, 0x8ef8a886508ce486, 0xdc1a631ef0a17f06, 0xfda2b3fbcd79e87b, - 0xd75bcae936403b10, 0xf88b5bd9f035f875, 0xc43efec2e3792dd4, 0xe9fac21a9d47cd94, - 0xc2876f0c4b7d47c3, 0xaba156cf49f368b4, 0x5ccda2170fa58bf9, 0xadc92c879ed18df7, - 0x110c1b227354e6c8, 0x298ee7a603249200, 0xde92142ede0e8ee7, 0x88e4a4610644ba9e, - 0xbb62d277e7641d3a, 0xb9be1985b7bf8073, 0x29024e5426cdb0d1, 0xf6aefd01f3092ab8, - 0x2a07087b313133aa, 0x6d71f445d6dfc839, 0x1e2412ff12e5526b, 0xed5cdeba6617b9e1, - 0x20b1d0d5e5f8760e, 0x12ff15705c368260, 0x7bf4338b7c387203, 0x34ff25f00cd06185, - 0x1148c706c518cf28, 0x5c04f0623388f025, 0xcb9d649275d87d79, 0x9b5f0c24fabc42ec, - 0x1a7b5e7964e33858, 0x2a81bbd8efdc6793, 0x8d05431ffe42752e, 0x83915cd511002677, - 0x580ed4d791837b31, 0x5982e041d19ff306, 0xcad0d08fa5d864ca, 0x867bee6efe1afa63, - 0x26467b0320f23009, 0xd842414dfda4ec36, 0x047fcdcbc0a76725, 0xbddb340a3768aeca, - 0xef4ce6fa6e99ab45, 0x88c5b66c7762bf9b, 0x5679f1c51ffb225d, 0xdab79048317d77ee, - 0xf14e9b8a8ba03803, 0xe77f07f7731184c1, 0x4c2aab9a108c1ef5, 0xa137795718e6ad97, - 0x8d6c7cc73350b88b, 0x5c34e2ae74131a49, 0xd4828f579570a056, 0xb7862594da5336fc, - 0x6fd590a4a2bed7a5, 0x138d327de35e0ec1, 0xe8290eb33d585b0b, 0xcee01d52cdf88833, - 0x165c7c76484f160e, 0x7232653da72fc7f6, 0x66600f13445ca481, 0x6bbdf0a01f7b127d, - 0xd7b71d6a1992c73b, 0xcf259d37ae3fda4a, 0xf570c70d05895acf, 0x1e01e6a3e8f60155, - 0x2dacbb83c2bd3671, 0x9c291f5a5bca81af, 0xd976826c68b4ee90, 0x95112eec1f6310a2, - 0x11ebc7f623bc4c9a, 0x18471781b1122b30, 0x48f7c65414b00187, 0x6834b03efa2f5c30, - 0x0875ef5c2c56b164, 0x45248d4f2a60ba71, 0x5a7d466e7f7ba830, 0x2bebe6a5e42c4a1d, - 0xd871d8483db51d10, 0x6ee37decd2fd392f, 0x7d724392010cede3, 0x8e96ef11e1c9bcc8, - 0x804a61d86b89d178, 0xbb1b83ce956055ec, 0xcb44e107410ff64f, 0xc426bb09ee0ba955, - 0x057c08f42c3dd7f1, 0x40ea1ec148602bdf, 0xc24688deeb65d7f1, 0xd8bcc53c768ba4e4, - 0x16e0e3af65c1106c, 0xfc12f7e7d647218b, 0x70d6e1d3ee93cef4, 0x01d2a505c4541ef9, - 0x1ef79e16e764d5c3, 0x0363d14d13870b98, 0xb56ef64345d06b11, 0xe653d557ebb7c346, - 0x8304a8597c2b2706, 0x1536e1322ce7e7bb, 0x525aec08a65af822, 0x91f66d6e98d28e43, - 0xe65af12c0b5c0274, 0xdf6ae56b7d5ea4c2, 0x5cef621cedf3c81c, 0x41e8b1ffd4889944, - 0xb5c0f452c213c3e5, 0x77af86f3e67e499b, 0xe20e76ea5b010704, 0xbdc205ab0c889ec0, - 0xc76d93eb0469cd83, 0x17ac27f65cab0034, 0xd49ec4531fd62133, 0x07a873ea2f1b9984, - 0xbff270dfef0032ee, 0x1764dbe91592f255, 0xe40363126f79e859, 0xa06cad3ab46971f6, - 0x0be596e90dedd875, 0x3387cce5c1658461, 0x44246acf88a9585e, 0xe0ad82b92d5ecb2c, - 0x2177491c9a1600a6, 0x16e7c4aac0f02422, 0x75792eeeec15c4e1, 0x2309cd359d08ee30, - 0x7cd9831dd1b83b0a, 0x374914a7c4ee8cf0, 0x0dd17765c9ac2e54, 0xb7847470ba9a7688, - 0xfba4f4bbe2991173, 0x422b203fc3de040e, 0x63bfcaf2ecf2ab0e, 0x0c5559f3a192946e, - 0xfdf80675c1847695, 0xf5f570accab842c9, 0x65cc5a448767afea, 0x1efeb0a7ee234f2f, - 0x9b05f03d81e7b5d2, 0xe7c31317a8626cf4, 0x620f2a53081d0398, 0x1b6de96cdd9943ae, - 0x8c226a436777d303, 0xa08fbbd50fafb10d, 0x6a64c5ec20104883, 0x9c9c653502c0f671, - 0x678a02b2174f52a0, 0x68e008ba16bbad4b, 0xa317c16d2efb860f, 0xeab2075d17ed714c, - 0x565eeeddf0c4ea15, 0x8ec8e94d242a6c19, 0x139e8e27d9000fae, 0xc977a7ff1b33d2f5, - 0x1d0accca84420346, 0xc9e82602cd436e03, 0x6a2231da53d2ccd3, 0xb44b12d917826e2a, - 0x4f4567c6a74cf0b9, 0xd8e115a42fc6da8f, 0xb6bbe79d95742a74, 0x5686c647f1707dab, - 0xa70d58eb6c008fc5, 0xaaedc2dbe4418026, 0x6661e2267bdcfd3d, 0x4882a6eda7706f9e, - 0xf6c2d2c912dafdd0, 0x2f2298c142fd61f9, 0x31d75afeb17143a8, 0x1f9b96580a2a982f, - 0xa6cd3e5604a8ad49, 0x0dae2a80aad17419, 0xdb9a9d12868124ac, 0x66b6109f80877fac, - 0x9a81d9c703a94029, 0xbd3b381b1e03c647, 0xe88bc07b70f31083, 0x4e17878356a55822}, - {// seed = 5 - 0xb3c58c2483ad5ead, 0x6570847428cdcf6c, 0x2b38adbf813ac866, 0x8cb9945d37eb9ad3, - 0xf5b409ec3d1aed1c, 0xa35f4bffc9bb5a93, 0x5db89cde3c9e9340, 0xff1225231b2afb2b, - 0x157b0b212b9cc47d, 0xf03faf97a2b2e04d, 0x86fdab8544a20f87, 0xfcb8732744ae5c1c, - 0xd91744c0787986d5, 0x5f8db2a76d65ad05, 0xcff605cbed17a90d, 0xf80284980a3164e7, - 0x59cc24e713fccc7d, 0x268982cada117ce4, 0xcd020e63896e730e, 0xe760dc46e9fe9885, - 0x6aaece8ab49c6b5d, 0x7451194d597aae3e, 0x35d4385900332457, 0xa40fb563a096583d, - 0xa797b612f7f11b76, 0x2fed6eb68e6a2b9b, 0x2f06ee64aeffd943, 0x9dd0e49d9ca45330, - 0x97d48f08bd7f1d8f, 0x1cfa7fe3ebe4d8ee, 0x2a2ba076bd397d42, 0x68c4344f7472f333, - 0xce21ec31987d74b5, 0xb73dabdc91d84088, 0x801aadee592222fe, 0xaf41345398ebc3f5, - 0x8a8f653d7f15ee46, 0xce2d065ff2ba2965, 0x4e05da515da2adb7, 0xa6dbdb8aa25f0fd4, - 0xca9f9666bbd2d5a9, 0x6b917ce50bd46408, 0x1550cc564ba6c84d, 0xb3063ae043506504, - 0x84e5f96bb796653d, 0xe2364798096cf6e3, 0x3b0dfedf6d3a53d0, 0xb7e4c7c77bde8d93, - 0xe99545bac9ab418a, 0xa0e31f96889507bb, 0x883c74f80c346885, 0xf674ae0b039fd341, - 0x8bb6ce2d5e8d1c75, 0x0c48737966a7ed7c, 0x04fcdf897b34c61c, 0xe96ac181bacbd4d6, - 0x5a9c55a6106a9c01, 0x2520f020de4f45d3, 0x935730955e94d208, 0xce5ad4d7f3f67d3b, - 0xa4b6d107fe2d81ca, 0x4f0033f50ae7944e, 0x32c5d28dd8a645a7, 0x57ce018223ef1039, - 0x2cbab15a661ab68e, 0x6de08798c0b5bec2, 0xee197fb2c5c007c6, 0x31b630ac63e7bda2, - 0xab98785aefe9efe3, 0xa36006158a606bf7, 0x7b20376b9f4af635, 0xa40762fdc3c08680, - 0x943b5faffd0ebee2, 0x7f39f41d0b81f06e, 0x7c4b399b116a90f8, 0x24e1662ac92bc9f3, - 0xcf586fc4e8e6c7db, 0xe46e0d047eeb12d7, 0xe8021076e4ea9958, 0x11fc13492e3ca22a, - 0xd61eae01410397e3, 0x7e8c4a58036a8e9f, 0x068a6de267970745, 0x64faab129bef1a41, - 0xb4a6f720943dad01, 0x631491058d73a9d5, 0xdad4fe95eab3ec02, 0x0a8b141c5c3a44f6, - 0x9fc69d4c2b335b98, 0x94d5f84a07d6e4cd, 0x1b73965de143c608, 0x443932c2dda54bcc, - 0x7397818fb0b04cd2, 0xef4ab03a1202b277, 0xf3d2ee459c0c2b92, 0x182d4daf8b058a87, - 0x90e63035d7b51368, 0xba4cd8b9a95d45fd, 0x12a7392c76731090, 0x890d264ec5d082d2, - 0xeeaf5c363da4994e, 0xd6aad756902123fb, 0xb531ebebdb28f191, 0xe71ce659fc59babd, - 0x37c1b94f63f2dcb5, 0xe4e3abeb311f9b96, 0x4a31b72ccb8695d3, 0x52cae1f0629fdce4, - 0xe5b0475e2ed71369, 0x2724e8c3506414fb, 0xbab0367920672deb, 0x0161a781c305449f, - 0x37b70f40f5bb60be, 0xddd1094c50251a01, 0x3b28283afd17224e, 0x06dec0cfe889fc6b, - 0x47608ea95bb4902d, 0xad883ebc12c00e82, 0x9e8d7ae0f7a8df29, 0xa79443e9f7c013a1, - 0xcfa26f68b7c68b71, 0x33ae6cc19bda1f23, 0xd9741e22b407887f, 0xf2bff78066d46b1c, - 0x794123191c9d32d4, 0x56cb6b903764ec76, 0x98775d0ef91e1a5a, 0xae7b713bc15c1db9, - 0x3b4c1a7870ed7a0d, 0x46666965f305cc34, 0x0ea0c3b2e9c6b3cd, 0x4dc387039a143bff, - 0x5f38bb9229ef9477, 0xea5d39ba72af7850, 0x69a5ed0174ce2b6d, 0x06969a36bfe7594d, - 0x0adee8e4065ccaa3, 0x908a581d57113718, 0x64822d6c5a8190ed, 0x8c5068b56ace4e4c, - 0x88ba3b4fb4e30bef, 0xa6ec0b8bb5896cfe, 0x4e23fcc6b47996fd, 0xe18e75b0dd549c7a, - 0xcd90f17e106cf939, 0x1666fdfb2ef7c52f, 0x4fae325f206dd88c, 0xe7bc1160e25b062d, - 0x3cc999cb246db950, 0xc5930a7326cd5c37, 0xb008a48a211367bd, 0xc5559da145a88fd4, - 0x1e3ad46655fac69c, 0x7834266b4841bfd7, 0xa764450fbffc58cc, 0x54d8cf93a939c667, - 0x93c51f11b21b2d9d, 0x0964112082ed65cc, 0x4c2df21213e7fb03, 0xf0405bc877468615, - 0x17b4fc835d116ab4, 0xa6b112ae5f3cb4ef, 0x23cfc8a7fd38a46e, 0x8e0a360dc2774808, - 0x24ca9c8092105ad5, 0xafd3f75524f2e0d5, 0x4f39ed7dbaddc24c, 0xe5e362c7679a7875, - 0x00914a916b07b389, 0xdfe1119b7d5ab5da, 0xabd6ed9940e46161, 0x630ed2044171e22c, - 0xdecc244157dd1601, 0x777e6d5b4b4868d5, 0x9b3530bee67017d8, 0xd2faf08b291fdcb9, - 0x006e99455d6523de, 0xd559b5817f6955b5, 0xefcc1063b0088c61, 0xed73145ae0f00ae7, - 0xab2af402cf5b7421, 0x897767f537644926, 0x26c9c0473ca83695, 0x192e34e1881b2962, - 0xf7cf666ec3b3d020, 0x27f9b79c7404afb7, 0xe533e8bed3010767, 0xe5817838e11d05d3, - 0x65659c531bd36517, 0xd427c5e0a23836fd, 0xf3eab7ea58fa3528, 0x07683adae1289f35, - 0x201d6af7e896dd32, 0xd5da938b9a21ad88, 0x843fb73ad67bc316, 0x1782ec7d5feef21b, - 0x943f66f6ec772877, 0x7e9112e7b26da097, 0xeac8161f8663c2c7, 0xe8600db480a9ebf4, - 0x07807fc90f6eaf5f, 0xe0e4c9deb41abf83, 0xbdf533db271f9c15, 0xb398411b0497afe2, - 0xdebb45ef25448940, 0xe7a5decefcd376c4, 0xaf1ef3c728c83735, 0xb8b83a99355cb15a, - 0x6444a0344f1611e4, 0xe8bb7f5cf3c60179, 0x77ab5c5177e75ff7, 0xc38fd6fa849d585d, - 0x390d57d53029060a, 0xa66327eb7b8b593c, 0x6350a14f6fcd5ac9, 0x2c08125bcd7008b4, - 0x2d00c299a6a6bf8e, 0x6b0039c1f68d1445, 0x0035150c5d06f143, 0xa34d01628cc927e1, - 0xdf5b3164d7b2ede1, 0x8167db1d0583d72e, 0x4e13b341cd2ae8bc, 0xa693d9b1f416e306, - 0xc15ed7ca0bc67609, 0xdc344313c1c4f0af, 0x88b6887ccf772bb4, 0x6326d8f93ca0b20e, - 0x6964fad667dc2f11, 0xe9783dd38fc6d515, 0x359ed258fa022718, 0x27ac934d1f7fd60a, - 0xd68130437294dbcc, 0xaf5f869921f8f416, 0x2b8f149b4ab4bf9f, 0xc41caca607e421cb, - 0x7746976904238ef9, 0x604cb5529b1532f0, 0x1c94cd17c4c4e4ab, 0xe833274b734d6bbe, - 0xe9f1d3ef674539ce, 0x64f56ed68d193c6a, 0xe34192343d8ecfc1, 0xcb162f6c3aa71fe8, - 0x99eaf25f4c0f8fa4, 0x92f11e7361cb8d02, 0xb89170cddff37197, 0x4f86e68a51e071e3, - 0x31abf6afd911a75b, 0x6d20cf259c269333, 0x4150b9f88fcb6513, 0x705063989ebf7451, - 0x559231d927c84410, 0x1ca8ec4b098bc687, 0xebed22405c9180e0, 0xaa815b37d052af59}, - {// seed = 6 - 0x946ac62246e04460, 0x9cebee264fcbc1ae, 0x8af54943a415652b, 0x2b327ed3b17b8682, - 0x983fde47b3c3847e, 0x10a3013f99a2ad33, 0x6e230bb92d2721ef, 0x1cf8b8369e5c5c50, - 0x7f64017f2b7b3738, 0xd393248a62417fa1, 0x9ff01c0b20a372c5, 0xb0e44abce7e7c220, - 0xcebb9f88d48a815f, 0xdb7df6bd09033886, 0x7844fc82b6fa9091, 0x72d095449863b8ec, - 0xc13e678c89da2c7e, 0x6caf4d5ad231d12f, 0x2e0ab7b5fcf35c49, 0xf410720cb932a70f, - 0xd66ea581f16fce06, 0x175c9f002f57dc98, 0xccbcfd0d32988775, 0xfde4c407d3b0a232, - 0x5db2931ae7e97223, 0x6e07e2173085809f, 0x6e1d1ec0f9cad73c, 0xb2fc251a7f802619, - 0xbc1fc17f04f342de, 0x8de8f21ec658e078, 0x72c0f40cbee53fd6, 0x0678244411fc17a1, - 0x1d5837ca166b9bbd, 0xc8cada003c554345, 0x6a2fe2bfb2e58652, 0xfca9d797a6f7988b, - 0x6699e24ac737948b, 0x69623ffcb05789ba, 0x946429c529d95b75, 0x0d14df0b2a13970f, - 0x593d8592c440dfec, 0x2ee176f3d7e74b94, 0xae003f1da3be9e26, 0x0c7b02c4c0f6764a, - 0x3117e2fa1f632462, 0xf0f23265b6f1eaeb, 0x3111255d9b10c137, 0xc82745e509a00397, - 0xbd1d04037005fea7, 0xe104ab0dd22a9036, 0x51b27ce50851ac7a, 0xb2cb9fb21b471b15, - 0x29d298074c5a3e26, 0x6ebdf2058b737418, 0xc4a974041431b96f, 0x1ec5a30ccb6bdaac, - 0xe818beede9bf4425, 0x4b69b1bce67a5555, 0xf5c35f1eb0d62698, 0xf4509bbd8e99867c, - 0xb17206debd52e1bc, 0x35785668c770b3be, 0xe9343987ff5863bc, 0x2ee768499ac73114, - 0x5132bb3426eeaaf4, 0x471bce2c6833c5ff, 0xbb9a2d5428e6f6f9, 0xd5678943c595792d, - 0xab2a65e7f81e479c, 0xa82407bb23990b31, 0xdae321383984923c, 0x01823bb22648e6f1, - 0xda6e8df4214a8b04, 0x0e172bb88e03d94f, 0x552da6c22e362777, 0x7ce67329fb0e90cb, - 0x7b2d7f287ede7ebf, 0xd44f8222500651bd, 0x4acca1ef58fbb8ab, 0x428ecf058df9656b, - 0xd7e1ec6a8987c185, 0x365be6a54b253246, 0x168849be1e271ee8, 0x6a00f3c4151a8db2, - 0x37602727ca94b33d, 0xf6b50f18504fa9ce, 0x1c10817f6bc872de, 0x4bfe1fe42b0f3638, - 0x135fad4b8ef6143b, 0x1b25ad2bafc25f58, 0x41e37f85cf321f92, 0xfc73f75d9d5b9bea, - 0x9eb3694d1e9cb7e1, 0x601d51f08fa83b90, 0x234a2a9b88366f41, 0x63fe903e16f2c3bf, - 0x1cdbd34fa751c0b0, 0x0ce4fc6747c0558c, 0x51ed72afb8bb49aa, 0x20313ba13ca12c96, - 0x271fa38f9ebd54c1, 0x3696a5ac03a8edde, 0x05602be7df625702, 0x11f1ac73790f7a9f, - 0xa2836c099f0810bd, 0xe5ac2e47caa532fa, 0xd9c000a66d39f681, 0xd93d900e6f3d9d5f, - 0x792c81c65b7900f2, 0x5c5dce790ee20da1, 0x74ff1950edec1aee, 0x71fc85fa1e277d8f, - 0x0e77df17d6546cbc, 0x07debad44816c3b4, 0xbafa721581e92a70, 0x8ab6fbe2ed27bba8, - 0xe83243a20dea304a, 0xaa85a63a84c00a07, 0xde0e79917fc4153a, 0x21bb445e83537896, - 0xeedcac49fc0b433a, 0xffb2926a810ae57a, 0xf724be1f41d28702, 0x79cb95746039bb3b, - 0x5a54fe3742a00900, 0xda4768d64922c04f, 0x420396a84a339dae, 0xa171e26ee5e8724e, - 0x4c8da7c5d289c20a, 0x9ebd79a1a8e94742, 0x39235232b97e9782, 0xb75df0be9bba7d80, - 0x0c1d204dd87d48fc, 0x8f81f3e7177266e8, 0xe4a460b39e78d72b, 0x50b98fa151e65351, - 0xb7cb585c3ee1eddc, 0x11cdad9a76ee1dc4, 0xa38054a78595dc1c, 0x92f09e2ec4978edc, - 0xa8f0061b5efdabaa, 0x04bcc4abc224d230, 0xc58606738e692d46, 0xdd2b27b565952433, - 0x19e6ed1b740beec0, 0xceadd49b2ef9891f, 0x328178c28fe95cad, 0xe5ad4c43afe02848, - 0x03c0cb538cd967c0, 0xec4352526d19a630, 0x4c7e99389d39b031, 0xf65dd05362c2deb6, - 0xd1e70daf6879d28d, 0xbe9f57db6309b265, 0xa4b66f370b872bb7, 0xe26896fbc6ee1fd5, - 0xac705e661bfcf7c5, 0xab4d0d07d7f09940, 0x976417c06aeb6267, 0x8161c684a6bd468c, - 0xf77b6b9976dc4601, 0xc6489b779a39c12c, 0xb2aa58d5681cea1a, 0x043b1b40f8c3e04c, - 0x681fcbfadc845430, 0xab8896c921ba8def, 0x57aaf172606f37b2, 0xc3735048cd5eb8d7, - 0xa7078b96955631bd, 0xdd6b3543aa187f33, 0xc7103ea4a2a697fd, 0x8d7b95f6ff1f7407, - 0xe44f419e84709530, 0xf340caa9132cbb0a, 0x2ba407283143c66c, 0xe1be240ca636c844, - 0x90d32f2877ac08bc, 0x5d26e6294b2c8673, 0x4a6b2f5b27c87a44, 0x961fb9043f76d34f, - 0x0afee02d8d3c55d2, 0x6228e3f48c42e5dc, 0xc338e69ee6593675, 0x853f74b16efb7bdd, - 0xd062f40bdd22e687, 0x647164b9ab4c4190, 0xf94689f67d598369, 0x8e4b29d87a5012d7, - 0xaf02b8b925656fbd, 0x7a722a767179a630, 0xb5c8afe937a75ace, 0xfdb8e8d02d279372, - 0x887ef700cb25fae1, 0xcfe9bd912f72cabe, 0xb1d4dedc24f978de, 0x517522d38319cc2a, - 0x7dd87b2b36aab798, 0x579c4ff3046b5a04, 0xf5c5975c5028b7a7, 0x7094579d1000ec84, - 0xbc8d5b1ea70a5291, 0x161b2d783be8855c, 0xd26d0b0d6d18279f, 0x0be1945f02a78bd5, - 0xb822a5a9e045415b, 0x2fe9d68b1ccc3562, 0xb2e375960033d14f, 0x26aca04e49b4ff22, - 0x732a81c862112aea, 0x8bd901ed6e4260b8, 0xe839532c561ad5b0, 0x8fb6e4d517a79b12, - 0x0dd37f8c0be9b429, 0xc8ad87ad12f1b1b0, 0xc51f3aa62b90318b, 0x031a7e8b86c1cefc, - 0xa95547af2b70fc76, 0x9cb3615c5a98801e, 0xa387e3c3341d7032, 0xa087ea52a1debaef, - 0x16325ec9a2e6e835, 0x587944a484c585eb, 0xc8879033bde22ecc, 0xa39dbfce709c464a, - 0x7acc010f99208774, 0x98dd2973a096c5ad, 0x26458b51139f198c, 0x2f5d19575e8c4f02, - 0x726643f0d38af352, 0x44d879b6d73e6e94, 0xa68a03885c980abe, 0x06048acd161c40c0, - 0xa4dab8f89d405d28, 0x7120c880cb04be18, 0xa062ace22a1cf0cf, 0x3901a9daf29704f4, - 0xff08f3ed989db30a, 0x6d22b13e874c67e9, 0x80c6f35518d73f4d, 0xc23c2a521aac6f29, - 0x2e708fd83aaa42e0, 0x7fc3780f55f1b0fd, 0xabb3075c98cf87f2, 0xb4df3f40f7c61143, - 0x2a04418098a76d75, 0x0d9eeee9509b2d37, 0x6be8ae51f4b59cdc, 0xe746cc7c00e4a2ab, - 0x785bc6df9cac597c, 0x33cb6620ce8adc48, 0xc1ba30739bffcef7, 0x6d95771f18e503f7, - 0xf7be3ae2e62652ff, 0xc8d82ffd2a73c62b, 0x8725a3ba5b110973, 0x67ed6b9c724757ec}, - {// seed = 7 - 0xc0272d42c19ff3ae, 0x4694228b43ea043b, 0x5709a6ef8a462841, 0xc9210a1e538805c9, - 0x279b171196113ec2, 0x859b769fc2d9e815, 0x0d5d3125a2bf14d3, 0x22bca1cfefa878ba, - 0x481b6bf58037bd83, 0x4933ba8647728d22, 0xf08c7b6b56f6e1b6, 0x374e8af5a15407c7, - 0xa95c4dc3d2487a5c, 0x9b832808ff11e751, 0xf2048507e9da01d5, 0xa9c576189f544a4a, - 0xf6c2a45b2e9d2b41, 0x9b9874c9f10ecc2f, 0x37d9b5f51f8c149e, 0x93aead54c9de9467, - 0x59cf0b4af262da23, 0xe7e9929af18194b2, 0x9df2644e33eb0178, 0xde4122d6f0671938, - 0xf005786c07f4800b, 0xb1fc9d254b5d1039, 0x0bf1088631f6dd7b, 0x665623f0a4b8f0c7, - 0x60f0113a9187db7c, 0xfd7cceda4f0d23a6, 0x26c01e9d89955940, 0x33afa1dfc0f5a6a0, - 0xeb77daf215e9283c, 0xc7575214bf85edb4, 0xeb0d804bf297e616, 0x84bff4ffd564f747, - 0xc4ac33189246f620, 0x43ef61213ecc1005, 0xcbbb0dea6cd96acd, 0x8ed27abfa8cfcb05, - 0x543b61529cb996b6, 0xa5f987ca41ea5e59, 0x3c50e0ac5254cb7a, 0x4192b0446c06d1e6, - 0x3e86592e21b45388, 0xdb766f06fcc6e51e, 0x0448ee36efe632db, 0x663c9db689253e35, - 0x72e0bd4985331dd4, 0xff501b5bf7d94e74, 0xe911ce758e2113a8, 0xec3a8d03a75a6ba4, - 0xaf6b4b72f56edc83, 0xf284857936c0a391, 0x5ba6feff407d46f4, 0x9d689c26de9d6702, - 0x28c04a9083726b5d, 0x2ccf4a627a029730, 0x7b4719500d4f0c71, 0x76470a9a7da250a8, - 0xcc48409404a1c890, 0xccefbdc7ec9a8055, 0xe0db91bff3cc42d3, 0x0532436426141254, - 0xf2ee9325e6f0ff0b, 0x149c20a5fbb28d9d, 0xe71624cd8d2d14d4, 0x8f01d4dc8cc2dd77, - 0x29cf409b333015b7, 0xba8bebd211884dd1, 0xc3396635e8c8db1d, 0x8ed0f6208d0528b8, - 0x0d90b43fdd0ee334, 0xd73c9a3333a044c7, 0xa2595cd208dbdc38, 0xae93cb264f940c09, - 0x8e0538d8afb07a97, 0x19115ec881385ba2, 0xa886f9e6a8039c6a, 0xcd5d62147ce3ecac, - 0xaecdf9e0bb4969f7, 0x2ddd631c53dcad10, 0x73ad1c97b3412054, 0xb08915fa2722efc6, - 0x97966047e5067eb0, 0x337f1675ed91445c, 0xb3a833d150b96a0d, 0x5940a98fe35e5e2e, - 0xfd03cc354ed0d8ff, 0x4e65b98291a8644a, 0x14a259f2852a60b2, 0x7648e3478c1e8e5f, - 0xbc0fbef6d9a919b4, 0xbec4302081346cf1, 0x57d2ce7aa1c7c511, 0x234c209d8f4e1ac3, - 0x87cf80cc933ce443, 0x7c262c616931e94e, 0xc5e33b049cf9eddf, 0x1a80790ed03ae51b, - 0xf2e8b9494f7220cf, 0x124cb59c14fff3ff, 0xa8a06cbfdb86ce18, 0x9068ef1f80b37653, - 0x0c55417b8d90338f, 0xcd579a523f6bcd30, 0xa31bfe2476a8d2a9, 0x1f8d142208094223, - 0x332dc40a5203cfad, 0xf8792fe5b2d33b4c, 0x443bd9668bf9461e, 0xc9019db0ace1409e, - 0x781bea919a113e8b, 0xb0f11d866abfbeec, 0xcfe139a60db0c26a, 0x869ab8721e6aa39e, - 0xdb48a4977717837a, 0x588a5ff151065b18, 0xe4a251ea0028864d, 0x7f0e43ba408a77c3, - 0x65f66dd50a536135, 0x6f49e934d9331c3e, 0xb8d742e0f0fa6b09, 0xe4e9b272deca2348, - 0xaee132ff902f773c, 0x43f658f7c2a0c90a, 0x28cb4dbc76cc53ea, 0x7d92253aa99ac39b, - 0x4fea3d832370baab, 0xb29e36936e51d78e, 0xea10778712321064, 0xff4f21f8ef274be2, - 0x84eff18ddfa0933f, 0xd0ec6a9f86c758a0, 0xaf82e5973c431ae0, 0x352023c00c045425, - 0xad34d7bc4a2f8961, 0xbdb4a02a24d4dee0, 0x354a4846d97447cf, 0x331a8b944d5bc19f, - 0x5ce04f8e17909035, 0x6497581bad8f4aab, 0x07c503bba647111e, 0x85f412ba78e1f7ff, - 0x7f3b920fd20f4cff, 0x424e1a9a4ce34e2f, 0x3035e2d62e1b9f0a, 0xef63114bff7b729a, - 0xe86a05889ab6bb60, 0xee0830cf095585a1, 0x4a54f7fa47d9c94b, 0x17daeece9fcb556a, - 0xc506d3f391834c6f, 0xb3f24be362e1af64, 0xc435e4e23608efdd, 0xeeba9caaa4cc1768, - 0x5a71f306daddc22d, 0x18e5205f41eba1a0, 0x7b29b4d1f6610925, 0x065cb65a0258d9a9, - 0x3e5ac8faa9fd1f95, 0x3b362362c1ea0470, 0xce0e4f6434db7a2e, 0xf327341098de52f2, - 0xcfca3b9e2a1992c3, 0x7483bf9401233e41, 0xbafbac531c6f9281, 0x4b52dd71b2c106f8, - 0xdf73b66e50b5a1f7, 0x237aec0202a20283, 0x23dd5be23dffdf2b, 0xea9730731ee122ef, - 0x5cb3f846014fbcd3, 0xc3b21c8ffdce9201, 0x06a99a02f91a8760, 0x721a81fa8fd7b7a3, - 0x6aafcdddc53cbcd8, 0xd03b464005a93bcc, 0x8212edc1b1669dcb, 0x71f4c31364c31bc7, - 0xfeeec0eba8772307, 0x1948d00a13d88cf1, 0x19064fd6d943ada8, 0x4ec8d31722697bfd, - 0x596d9a953a516609, 0xc4cb4bff53507da2, 0x1d59f3c5be36e4ca, 0xe5b4fc5bf6044c9b, - 0x1bb74e052232f735, 0x04e8a0db611ddd5d, 0x8d04eaa009b421bf, 0xa7878ae0ac0e6d58, - 0x28c1030217cab2b3, 0x827943767e56a883, 0x28fce5fa02d22809, 0xb30c322fffc8c58e, - 0x1ca5a6a9f8066c5b, 0xb24db5f1462b2513, 0x02f653b89b7e5f6c, 0xe31f8fb5d5f78eee, - 0x266acc514ed93501, 0x936879d1c6fddcc4, 0xcd51be3636af1952, 0x3fdbb6fc332c78c8, - 0x9eb656379fa73094, 0x056146cc92fa0f96, 0xed6c4f1836c027c3, 0x021e0bb5d2113f2a, - 0x8983e42ec1c626b3, 0x73ea9bc6513ad9c9, 0x0c904903b24f4247, 0xacbac1e6243e2525, - 0x0b1069a0c230fb06, 0x77d709fca3fc1ce5, 0x87ad0f65020947e6, 0x555302641c53f4e6, - 0x65ea87871fa9aaee, 0x58aaf4ecc1067bb4, 0x1a66c48cc4c65b3f, 0xca96aca48b2ea969, - 0xa68eb70bad14de2b, 0x5ccdb3d7e00a6f6e, 0xe178fbfec73fe72f, 0x2b63d6a16b83e890, - 0x32fdb7a5330fbae0, 0x2ab5803c8d1bf32c, 0xda838388c1527c94, 0x16a50bdc4de24acb, - 0xe561301f134c074a, 0xd7ae63d2816b4db1, 0x036aabd4df0dd741, 0xc5e0db8783435b9d, - 0x9c4386cf0a07f3b2, 0x6a72ac1aa56a13a1, 0x299bbdb04bb20a23, 0x138c1018fda16b81, - 0x0e354f0b3bda49df, 0x9f4c295b23127437, 0xd133ceb2bd561341, 0xd8b4bfd5a526ac29, - 0xcdd0a70ddc1c7bbd, 0x81dce595bf572225, 0x1c6f925c05f6efd7, 0x8ae5097553856ea0, - 0x3aabeaeef248f60d, 0xd9005809d19a69e2, 0x2a3a1a314311cc27, 0x89bb2dc76b2b624a, - 0x50a2a95d0412e289, 0x9def8df564e68581, 0xf49010a9b2e2ea5c, 0x8602ae175d9ff3f0, - 0xbf037e245369a618, 0x8038164365f6e2b5, 0xe2e1f6163b4e8d08, 0x8df9314914f0857e}, -}; - -const uint64_t AVG_LEN = 1024 * 1024; - -// create a fake null array class with a GetView method returning 0 always -class FakeNullArray { - public: - uint8_t GetView(int64_t i) const { return 0; } - - std::shared_ptr<::arrow::DataType> type() const { return ::arrow::null(); } - - int64_t null_count() const { return 0; } -}; - struct Chunk { int64_t level_offset; int64_t value_offset; @@ -574,207 +39,28 @@ struct Chunk { levels_to_write(levels_to_write) {} }; -static uint64_t GetMask(uint64_t min_size, uint64_t max_size) { - uint64_t avg_size = (min_size + max_size) / 2; - size_t mask_bits = static_cast(std::ceil(std::log2(avg_size))); - size_t effective_bits = mask_bits - 3 - 5; - return (1ULL << effective_bits) - 1; -} +// have a chunker here // rename it since it is not FastCDC anymore class FastCDC { public: - FastCDC(const LevelInfo& level_info, uint64_t avg_len, uint8_t granurality_level = 5) - : level_info_(level_info), - avg_len_(avg_len == 0 ? AVG_LEN : avg_len), - min_len_(static_cast(avg_len_ * 0.5)), - max_len_(static_cast(avg_len_ * 2.0)), - hash_mask_(GetMask(avg_len_, granurality_level + 3)) {} - - template - bool Roll(const T value) { - constexpr size_t BYTE_WIDTH = sizeof(T); - chunk_size_ += BYTE_WIDTH; - if (chunk_size_ < min_len_) { - return false; - } - auto bytes = reinterpret_cast(&value); - bool match = false; - for (size_t i = 0; i < BYTE_WIDTH; ++i) { - rolling_hash_ = (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][bytes[i]]; - match |= (rolling_hash_ & hash_mask_) == 0; - } - return match; - } - - bool Roll(std::string_view value) { - chunk_size_ += value.size(); - if (chunk_size_ < min_len_) { - return false; - } - bool match = false; - for (char c : value) { - rolling_hash_ = - (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][static_cast(c)]; - match |= (rolling_hash_ & hash_mask_) == 0; - } - return match; - } - - inline bool Check(bool match) { - if (ARROW_PREDICT_FALSE(match && (++nth_run_ >= 7))) { - nth_run_ = 0; - chunk_size_ = 0; - return true; - } else if (ARROW_PREDICT_FALSE(chunk_size_ >= max_len_)) { - chunk_size_ = 0; - return true; - } else { - return false; - } - } - - template - const std::vector GetBoundaries(const int16_t* def_levels, - const int16_t* rep_levels, int64_t num_levels, - const T& leaf_array) { - std::vector result; - bool has_def_levels = level_info_.def_level > 0; - bool has_rep_levels = level_info_.rep_level > 0; - - if (!has_rep_levels && !has_def_levels) { - // fastest path for non-repeated non-null data - bool val_match; - int64_t offset = 0; - int64_t prev_offset = 0; - while (offset < num_levels) { - val_match = Roll(leaf_array.GetView(offset)); - ++offset; - if (Check(val_match)) { - result.emplace_back(prev_offset, prev_offset, offset - prev_offset); - prev_offset = offset; - } - } - if (prev_offset < num_levels) { - result.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); - } - } else if (!has_rep_levels) { - // non-repeated data possibly with nulls - bool def_match, val_match; - int64_t offset = 0; - int64_t prev_offset = 0; - while (offset < num_levels) { - def_match = Roll(def_levels[offset]); - val_match = Roll(leaf_array.GetView(offset)); - ++offset; - if (Check(def_match || val_match)) { - result.emplace_back(prev_offset, prev_offset, offset - prev_offset); - prev_offset = offset; - } - } - if (prev_offset < num_levels) { - result.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); - } - } else { - // repeated data possibly with nulls - bool def_match, rep_match, val_match; - int16_t def_level; - int16_t rep_level; - int64_t level_offset = 0; - int64_t value_offset = 0; - int64_t record_level_offset = 0; - int64_t record_value_offset = 0; - int64_t prev_record_level_offset = 0; - int64_t prev_record_value_offset = 0; - - while (level_offset < num_levels) { - def_level = def_levels[level_offset]; - rep_level = rep_levels[level_offset]; - if (rep_level == 0) { - record_level_offset = level_offset; - record_value_offset = value_offset; - } - ++level_offset; - - def_match = Roll(def_level); - rep_match = Roll(rep_level); - if (ARROW_PREDICT_TRUE(def_level >= level_info_.repeated_ancestor_def_level)) { - val_match = Roll(leaf_array.GetView(value_offset)); - ++value_offset; - } else { - val_match = false; - } - - if (Check(def_match || rep_match || val_match)) { - auto levels_to_write = record_level_offset - prev_record_level_offset; - if (levels_to_write > 0) { - result.emplace_back(prev_record_level_offset, prev_record_value_offset, - levels_to_write); - prev_record_level_offset = record_level_offset; - prev_record_value_offset = record_value_offset; - } - } - } - - auto levels_to_write = num_levels - prev_record_level_offset; - if (levels_to_write > 0) { - result.emplace_back(prev_record_level_offset, prev_record_value_offset, - levels_to_write); - } - return result; - } - - return result; - } - -#define PRIMITIVE_CASE(TYPE_ID, ArrowType) \ - case ::arrow::Type::TYPE_ID: \ - return GetBoundaries(def_levels, rep_levels, num_levels, \ - checked_cast(values)); + FastCDC(const LevelInfo& level_info, uint64_t avg_len, uint8_t granurality_level = 5); const ::arrow::Result> GetBoundaries(const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, - const ::arrow::Array& values) { - auto type_id = values.type()->id(); - switch (type_id) { - PRIMITIVE_CASE(BOOL, Boolean) - PRIMITIVE_CASE(INT8, Int8) - PRIMITIVE_CASE(INT16, Int16) - PRIMITIVE_CASE(INT32, Int32) - PRIMITIVE_CASE(INT64, Int64) - PRIMITIVE_CASE(UINT8, UInt8) - PRIMITIVE_CASE(UINT16, UInt16) - PRIMITIVE_CASE(UINT32, UInt32) - PRIMITIVE_CASE(UINT64, UInt64) - PRIMITIVE_CASE(HALF_FLOAT, HalfFloat) - PRIMITIVE_CASE(FLOAT, Float) - PRIMITIVE_CASE(DOUBLE, Double) - PRIMITIVE_CASE(STRING, String) - PRIMITIVE_CASE(BINARY, Binary) - PRIMITIVE_CASE(FIXED_SIZE_BINARY, FixedSizeBinary) - PRIMITIVE_CASE(DATE32, Date32) - PRIMITIVE_CASE(DATE64, Date64) - PRIMITIVE_CASE(TIME32, Time32) - PRIMITIVE_CASE(TIME64, Time64) - PRIMITIVE_CASE(TIMESTAMP, Timestamp) - PRIMITIVE_CASE(DURATION, Duration) - PRIMITIVE_CASE(DECIMAL128, Decimal128) - PRIMITIVE_CASE(DECIMAL256, Decimal256) - case ::arrow::Type::DICTIONARY: - return GetBoundaries( - def_levels, rep_levels, num_levels, - *checked_cast(values).indices()); - case ::arrow::Type::NA: - FakeNullArray fake_null_array; - return GetBoundaries(def_levels, rep_levels, num_levels, fake_null_array); - default: - return ::arrow::Status::NotImplemented("Unsupported type " + - values.type()->ToString()); - } - } + const ::arrow::Array& values); private: + template + bool Roll(const T value); + bool Roll(std::string_view value); + inline bool Check(bool match); + + template + const std::vector Calculate(const int16_t* def_levels, const int16_t* rep_levels, + int64_t num_levels, const T& leaf_array); + const internal::LevelInfo& level_info_; const uint64_t avg_len_; const uint64_t min_len_; From bc073f5cc6c851370c25e039f3bbe4a5ed991bd7 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 17 Feb 2025 18:56:23 +0100 Subject: [PATCH 13/48] change the api to define min_chunk_size and max_chunk_size and automatically center the mask --- cpp/src/parquet/column_chunker.cc | 53 ++-- cpp/src/parquet/column_chunker.h | 15 +- cpp/src/parquet/column_chunker_test.cc | 415 ++++++++++++++++++++++++- cpp/src/parquet/column_writer.cc | 21 +- cpp/src/parquet/properties.h | 23 +- python/pyarrow/_parquet.pxd | 2 +- python/pyarrow/_parquet.pyx | 5 +- 7 files changed, 468 insertions(+), 66 deletions(-) diff --git a/cpp/src/parquet/column_chunker.cc b/cpp/src/parquet/column_chunker.cc index a9564a79e7ceb..d99d36b8483c4 100644 --- a/cpp/src/parquet/column_chunker.cc +++ b/cpp/src/parquet/column_chunker.cc @@ -22,8 +22,6 @@ #include "arrow/array.h" #include "parquet/level_conversion.h" -using arrow::internal::checked_cast; - namespace parquet { namespace internal { @@ -550,8 +548,6 @@ constexpr uint64_t GEAR_HASH_TABLE[8][256] = { 0xbf037e245369a618, 0x8038164365f6e2b5, 0xe2e1f6163b4e8d08, 0x8df9314914f0857e}, }; -const uint64_t AVG_LEN = 1024 * 1024; - // create a fake null array class with a GetView method returning 0 always class FakeNullArray { public: @@ -564,25 +560,27 @@ class FakeNullArray { static uint64_t GetMask(uint64_t min_size, uint64_t max_size) { uint64_t avg_size = (min_size + max_size) / 2; - size_t mask_bits = static_cast(std::ceil(std::log2(avg_size))); - size_t effective_bits = mask_bits - 3 - 5; - return (1ULL << effective_bits) - 1; + uint64_t target_size = avg_size - min_size; + size_t mask_bits = static_cast(std::floor(std::log2(target_size))); + // -3 because we are using 8 hash tables to have more gaussian-like distribution + // -1 narrows the chunk size distribution in order to avoid having too many hard + // cuts at the minimum and maximum chunk sizes + size_t effective_bits = mask_bits - 3 - 1; + return std::numeric_limits::max() << (64 - effective_bits); } -// rename it since it is not FastCDC anymore - -FastCDC::FastCDC(const LevelInfo& level_info, uint64_t avg_len, uint8_t granurality_level) +ContentDefinedChunker::ContentDefinedChunker(const LevelInfo& level_info, + uint64_t min_size, uint64_t max_size) : level_info_(level_info), - avg_len_(avg_len == 0 ? AVG_LEN : avg_len), - min_len_(static_cast(avg_len_ * 0.5)), - max_len_(static_cast(avg_len_ * 2.0)), - hash_mask_(GetMask(avg_len_, granurality_level + 3)) {} + min_size_(min_size), + max_size_(max_size), + hash_mask_(GetMask(min_size, max_size)) {} template -bool FastCDC::Roll(const T value) { +bool ContentDefinedChunker::Roll(const T value) { constexpr size_t BYTE_WIDTH = sizeof(T); chunk_size_ += BYTE_WIDTH; - if (chunk_size_ < min_len_) { + if (chunk_size_ < min_size_) { return false; } auto bytes = reinterpret_cast(&value); @@ -594,9 +592,9 @@ bool FastCDC::Roll(const T value) { return match; } -bool FastCDC::Roll(std::string_view value) { +bool ContentDefinedChunker::Roll(std::string_view value) { chunk_size_ += value.size(); - if (chunk_size_ < min_len_) { + if (chunk_size_ < min_size_) { return false; } bool match = false; @@ -608,12 +606,12 @@ bool FastCDC::Roll(std::string_view value) { return match; } -bool FastCDC::Check(bool match) { - if (ARROW_PREDICT_FALSE(match && (++nth_run_ >= 7))) { +bool ContentDefinedChunker::Check(bool match) { + if (ARROW_PREDICT_FALSE(match && ++nth_run_ >= 7)) { nth_run_ = 0; chunk_size_ = 0; return true; - } else if (ARROW_PREDICT_FALSE(chunk_size_ >= max_len_)) { + } else if (ARROW_PREDICT_FALSE(chunk_size_ >= max_size_)) { chunk_size_ = 0; return true; } else { @@ -622,9 +620,10 @@ bool FastCDC::Check(bool match) { } template -const std::vector FastCDC::Calculate(const int16_t* def_levels, - const int16_t* rep_levels, int64_t num_levels, - const T& leaf_array) { +const std::vector ContentDefinedChunker::Calculate(const int16_t* def_levels, + const int16_t* rep_levels, + int64_t num_levels, + const T& leaf_array) { std::vector result; bool has_def_levels = level_info_.def_level > 0; bool has_rep_levels = level_info_.rep_level > 0; @@ -717,9 +716,9 @@ const std::vector FastCDC::Calculate(const int16_t* def_levels, #define PRIMITIVE_CASE(TYPE_ID, ArrowType) \ case ::arrow::Type::TYPE_ID: \ return Calculate(def_levels, rep_levels, num_levels, \ - checked_cast(values)); + static_cast(values)); -const ::arrow::Result> FastCDC::GetBoundaries( +const ::arrow::Result> ContentDefinedChunker::GetBoundaries( const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, const ::arrow::Array& values) { auto type_id = values.type()->id(); @@ -750,7 +749,7 @@ const ::arrow::Result> FastCDC::GetBoundaries( case ::arrow::Type::DICTIONARY: return GetBoundaries( def_levels, rep_levels, num_levels, - *checked_cast(values).indices()); + *static_cast(values).indices()); case ::arrow::Type::NA: FakeNullArray fake_null_array; return Calculate(def_levels, rep_levels, num_levels, fake_null_array); diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index ba96abc0ad659..25ed78cb288ae 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -39,12 +39,10 @@ struct Chunk { levels_to_write(levels_to_write) {} }; -// have a chunker here - -// rename it since it is not FastCDC anymore -class FastCDC { +class ContentDefinedChunker { public: - FastCDC(const LevelInfo& level_info, uint64_t avg_len, uint8_t granurality_level = 5); + ContentDefinedChunker(const LevelInfo& level_info, uint64_t min_size, + uint64_t max_size); const ::arrow::Result> GetBoundaries(const int16_t* def_levels, const int16_t* rep_levels, @@ -62,12 +60,11 @@ class FastCDC { int64_t num_levels, const T& leaf_array); const internal::LevelInfo& level_info_; - const uint64_t avg_len_; - const uint64_t min_len_; - const uint64_t max_len_; + const uint64_t min_size_; + const uint64_t max_size_; const uint64_t hash_mask_; - uint8_t nth_run_ = 0; + uint64_t nth_run_ = 0; uint64_t chunk_size_ = 0; uint64_t rolling_hash_ = 0; }; diff --git a/cpp/src/parquet/column_chunker_test.cc b/cpp/src/parquet/column_chunker_test.cc index b248758bc120c..c4332c882d402 100644 --- a/cpp/src/parquet/column_chunker_test.cc +++ b/cpp/src/parquet/column_chunker_test.cc @@ -7,10 +7,411 @@ // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. + +#include + +#include "arrow/array.h" +#include "arrow/array/builder_binary.h" +#include "arrow/array/builder_decimal.h" +#include "arrow/array/builder_primitive.h" +#include "arrow/table.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" +#include "arrow/type_fwd.h" +#include "arrow/type_traits.h" +#include "arrow/util/decimal.h" +#include "arrow/util/float16.h" +#include "parquet/arrow/reader.h" +#include "parquet/arrow/reader_internal.h" +#include "parquet/arrow/schema.h" +#include "parquet/arrow/test_util.h" +#include "parquet/arrow/writer.h" +#include "parquet/column_writer.h" +#include "parquet/file_writer.h" +#include "parquet/page_index.h" +#include "parquet/test_util.h" + +namespace parquet { + +using ::arrow::Array; +using ::arrow::ChunkedArray; +using ::arrow::ConcatenateTables; +using ::arrow::default_memory_pool; +using ::arrow::Field; +using ::arrow::Result; +using ::arrow::Table; +using ::arrow::io::BufferReader; +using ::arrow::random::GenerateArray; +using ::arrow::random::GenerateBatch; +using ::parquet::arrow::FileReader; +using ::parquet::arrow::FileReaderBuilder; +using ::parquet::arrow::MakeSimpleTable; +using ::parquet::arrow::NonNullArray; +using ::parquet::arrow::WriteTable; + +using ::testing::Bool; +using ::testing::Combine; +using ::testing::Values; + +std::shared_ptr GenerateTable(const std::vector>& fields, + int64_t size, int32_t seed = 42) { + auto batch = GenerateBatch(fields, size, seed); + return Table::FromRecordBatches({batch}).ValueOrDie(); +} + +std::shared_ptr
ConcatAndCombine( + const std::vector>& parts) { + auto table = ConcatenateTables(parts).ValueOrDie(); + return table->CombineChunks().ValueOrDie(); +} + +Result> WriteTableToBuffer(const std::shared_ptr
& table, + uint64_t min_chunk_size, + uint64_t max_chunk_size, + int64_t row_group_size = 1024 * 1024) { + auto sink = CreateOutputStream(); + + auto write_props = WriterProperties::Builder() + .disable_dictionary() + ->enable_cdc() + ->cdc_size_range(min_chunk_size, max_chunk_size) + ->build(); + auto arrow_props = default_arrow_writer_properties(); + RETURN_NOT_OK(WriteTable(*table, default_memory_pool(), sink, row_group_size, + write_props, arrow_props)); + return sink->Finish(); +} + +Result> ReadTableFromBuffer(const std::shared_ptr& data) { + std::shared_ptr
result; + FileReaderBuilder builder; + std::unique_ptr reader; + RETURN_NOT_OK(builder.Open(std::make_shared(data))); + RETURN_NOT_OK(builder.memory_pool(::arrow::default_memory_pool()) + ->properties(default_arrow_reader_properties()) + ->Build(&reader)); + RETURN_NOT_OK(reader->ReadTable(&result)); + return result; +} + +std::vector GetColumnPageLengths(const std::shared_ptr& data, + int column_index = 0) { + std::vector page_lengths; + + auto buffer_reader = std::make_shared(data); + auto parquet_reader = ParquetFileReader::Open(std::move(buffer_reader)); + + auto metadata = parquet_reader->metadata(); + for (int rg = 0; rg < metadata->num_row_groups(); rg++) { + auto page_reader = parquet_reader->RowGroup(rg)->GetColumnPageReader(column_index); + while (auto page = page_reader->NextPage()) { + if (page->type() == PageType::DATA_PAGE || page->type() == PageType::DATA_PAGE_V2) { + auto data_page = static_cast(page.get()); + page_lengths.push_back(data_page->num_values()); + } + } + } + + return page_lengths; +} + +Result> WriteAndGetPageLengths(const std::shared_ptr
& table, + uint64_t min_chunk_size, + uint64_t max_chunk_size, + int column_index = 0) { + ARROW_ASSIGN_OR_RAISE(auto buffer, + WriteTableToBuffer(table, min_chunk_size, max_chunk_size)); + ARROW_ASSIGN_OR_RAISE(auto readback, ReadTableFromBuffer(buffer)); + + RETURN_NOT_OK(readback->ValidateFull()); + ARROW_RETURN_IF(!readback->Equals(*table), + Status::Invalid("Readback table not equal to original")); + return GetColumnPageLengths(buffer, column_index); +} + +void AssertAllBetween(const std::vector& values, uint64_t min, uint64_t max) { + // expect the last chunk since it is not guaranteed to be within the range + for (size_t i = 0; i < values.size() - 1; i++) { + ASSERT_GE(values[i], min); + ASSERT_LE(values[i], max); + } + ASSERT_LE(values.back(), max); +} + +void AssertUpdateCase(const std::vector& original, + const std::vector& modified) { + ASSERT_EQ(original.size(), modified.size()); + for (size_t i = 0; i < original.size(); i++) { + ASSERT_EQ(original[i], modified[i]); + } +} + +void AssertDeleteCase(const std::vector& original, + const std::vector& modified, + uint8_t n_modifications = 1) { + ASSERT_EQ(original.size(), modified.size()); + size_t smaller_count = 0; + for (size_t i = 0; i < original.size(); i++) { + if (modified[i] < original[i]) { + smaller_count++; + ASSERT_LT(modified[i], original[i]); + } else { + ASSERT_EQ(modified[i], original[i]); + } + } + ASSERT_EQ(smaller_count, n_modifications); +} + +void AssertInsertCase(const std::vector& original, + const std::vector& modified, + uint8_t n_modifications = 1) { + ASSERT_EQ(original.size(), modified.size()); + size_t larger_count = 0; + for (size_t i = 0; i < original.size(); i++) { + if (modified[i] > original[i]) { + larger_count++; + ASSERT_GT(modified[i], original[i]); + } else { + ASSERT_EQ(modified[i], original[i]); + } + } + ASSERT_EQ(larger_count, n_modifications); +} + +void AssertAppendCase(const std::vector& original, + const std::vector& modified) { + ASSERT_GE(modified.size(), original.size()); + for (size_t i = 0; i < original.size() - 1; i++) { + ASSERT_EQ(original[i], modified[i]); + } + ASSERT_GT(modified[original.size() - 1], original.back()); +} + +uint64_t ElementCount(uint64_t size, int32_t byte_width, bool nullable) { + if (nullable) { + byte_width += 2; + } + return size / byte_width; +} + +constexpr uint64_t kMinChunkSize = 128 * 1024; +constexpr uint64_t kMaxChunkSize = 256 * 1024; + +// TODO: +// - test nullable types +// - test nested types +// - test dictionary encoding +// - test multiple row groups + +class TestColumnChunker : public ::testing::TestWithParam< + std::tuple, bool>> {}; + +TEST_P(TestColumnChunker, DeleteOnce) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, 128 * 1024); + auto part2 = GenerateTable({field}, 32, /*seed=*/1); + auto part3 = GenerateTable({field}, 128 * 1024); + + auto base = ConcatAndCombine({part1, part2, part3}); + auto modified = ConcatAndCombine({part1, part3}); + + auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); + auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); + + ASSERT_OK_AND_ASSIGN(auto base_lengths, + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_lengths, + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); + + AssertAllBetween(base_lengths, min_length, max_length); + AssertAllBetween(modified_lengths, min_length, max_length); + AssertDeleteCase(base_lengths, modified_lengths, 1); +} + +TEST_P(TestColumnChunker, DeleteTwice) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, 128 * 1024); + auto part2 = GenerateTable({field}, 32, /*seed=*/1); + auto part3 = GenerateTable({field}, 128 * 1024); + auto part4 = GenerateTable({field}, 32, /*seed=*/2); + auto part5 = GenerateTable({field}, 128 * 1024); + + auto base = ConcatAndCombine({part1, part2, part3, part4, part5}); + auto modified = ConcatAndCombine({part1, part3, part5}); + + auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); + auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); + + ASSERT_OK_AND_ASSIGN(auto base_lengths, + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_lengths, + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); + + AssertAllBetween(base_lengths, min_length, max_length); + AssertAllBetween(modified_lengths, min_length, max_length); + AssertDeleteCase(base_lengths, modified_lengths, 2); +} + +TEST_P(TestColumnChunker, UpdateOnce) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, 128 * 1024); + auto part2 = GenerateTable({field}, 32, /*seed=*/1); + auto part3 = GenerateTable({field}, 128 * 1024); + auto part4 = GenerateTable({field}, 32, /*seed=*/2); + + auto base = ConcatAndCombine({part1, part2, part3}); + auto modified = ConcatAndCombine({part1, part4, part3}); + + auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); + auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); + + ASSERT_OK_AND_ASSIGN(auto base_lengths, + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_lengths, + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); + + AssertAllBetween(base_lengths, min_length, max_length); + AssertAllBetween(modified_lengths, min_length, max_length); + AssertUpdateCase(base_lengths, modified_lengths); +} + +TEST_P(TestColumnChunker, UpdateTwice) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, 128 * 1024); + auto part2 = GenerateTable({field}, 32, /*seed=*/1); + auto part3 = GenerateTable({field}, 128 * 1024); + auto part4 = GenerateTable({field}, 32, /*seed=*/2); + auto part5 = GenerateTable({field}, 128 * 1024); + auto part6 = GenerateTable({field}, 32, /*seed=*/3); + auto part7 = GenerateTable({field}, 32, /*seed=*/4); + + auto base = ConcatAndCombine({part1, part2, part3, part4, part5}); + auto modified = ConcatAndCombine({part1, part6, part3, part7, part5}); + + auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); + auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); + + ASSERT_OK_AND_ASSIGN(auto base_lengths, + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_lengths, + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); + + AssertAllBetween(base_lengths, min_length, max_length); + AssertAllBetween(modified_lengths, min_length, max_length); + AssertUpdateCase(base_lengths, modified_lengths); +} + +TEST_P(TestColumnChunker, InsertOnce) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, 128 * 1024); + auto part2 = GenerateTable({field}, 32, /*seed=*/1); + auto part3 = GenerateTable({field}, 128 * 1024); + auto part4 = GenerateTable({field}, 64); + + auto base = ConcatAndCombine({part1, part2, part3}); + auto modified = ConcatAndCombine({part1, part2, part4, part3}); + + auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); + auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); + + ASSERT_OK_AND_ASSIGN(auto base_lengths, + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_lengths, + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); + + AssertAllBetween(base_lengths, min_length, max_length); + AssertAllBetween(modified_lengths, min_length, max_length); + AssertInsertCase(base_lengths, modified_lengths, 1); +} + +TEST_P(TestColumnChunker, InsertTwice) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, 128 * 1024); + auto part2 = GenerateTable({field}, 32, /*seed=*/1); + auto part3 = GenerateTable({field}, 128 * 1024); + auto part4 = GenerateTable({field}, 32, /*seed=*/2); + auto part5 = GenerateTable({field}, 128 * 1024); + auto part6 = GenerateTable({field}, 64); + auto part7 = GenerateTable({field}, 64); + + auto base = ConcatAndCombine({part1, part2, part3, part4, part5}); + auto modified = ConcatAndCombine({part1, part2, part6, part3, part4, part7, part5}); + + auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); + auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); + + ASSERT_OK_AND_ASSIGN(auto base_lengths, + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_lengths, + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); + + AssertAllBetween(base_lengths, min_length, max_length); + AssertAllBetween(modified_lengths, min_length, max_length); + AssertInsertCase(base_lengths, modified_lengths, 2); +} + +TEST_P(TestColumnChunker, Append) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, 128 * 1024); + auto part2 = GenerateTable({field}, 32, /*seed=*/1); + auto part3 = GenerateTable({field}, 128 * 1024); + auto part4 = GenerateTable({field}, 32 * 1024); + + auto base = ConcatAndCombine({part1, part2, part3}); + auto modified = ConcatAndCombine({part1, part2, part3, part4}); + + auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); + auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); + + ASSERT_OK_AND_ASSIGN(auto base_lengths, + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_lengths, + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); + + AssertAllBetween(base_lengths, min_length, max_length); + AssertAllBetween(modified_lengths, min_length, max_length); + AssertAppendCase(base_lengths, modified_lengths); +} + +INSTANTIATE_TEST_SUITE_P( + TypeRoundtrip, TestColumnChunker, + Combine(Values(::arrow::uint8(), ::arrow::uint16(), ::arrow::uint32(), + ::arrow::uint64(), ::arrow::int8(), ::arrow::int16(), ::arrow::int32(), + ::arrow::int64(), ::arrow::float16(), ::arrow::float32(), + ::arrow::float64()), + Bool())); + +} // namespace parquet + +// - check that the state is maintained across rowgroups, so the edits should be +// consistent +// - check that the edits are consistent between writes +// - some smoke testing like approach would be nice to test several arrow types diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index ad4dfa36e2dd2..bf2aeeaf2d1c1 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -754,7 +754,8 @@ class ColumnWriterImpl { fallback_(false), definition_levels_sink_(allocator_), repetition_levels_sink_(allocator_), - content_defined_chunker_(level_info_, properties->cdc_avg_size()) { + content_defined_chunker_(level_info_, properties->cdc_size_range().first, + properties->cdc_size_range().second) { definition_levels_rle_ = std::static_pointer_cast(AllocateBuffer(allocator_, 0)); repetition_levels_rle_ = @@ -894,7 +895,7 @@ class ColumnWriterImpl { std::vector> data_pages_; - internal::FastCDC content_defined_chunker_; + internal::ContentDefinedChunker content_defined_chunker_; private: void InitSinks() { @@ -1343,15 +1344,17 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< content_defined_chunker_.GetBoundaries( def_levels, rep_levels, num_levels, leaf_array)); for (auto chunk : boundaries) { - auto sliced_array = leaf_array.Slice(chunk.value_offset); + auto chunk_array = leaf_array.Slice(chunk.value_offset); + auto chunk_def_levels = AddIfNotNull(def_levels, chunk.level_offset); + auto chunk_rep_levels = AddIfNotNull(rep_levels, chunk.level_offset); if (leaf_array.type()->id() == ::arrow::Type::DICTIONARY) { - ARROW_CHECK_OK(WriteArrowDictionary( - def_levels + chunk.level_offset, rep_levels + chunk.level_offset, - chunk.levels_to_write, *sliced_array, ctx, maybe_parent_nulls)); + ARROW_CHECK_OK(WriteArrowDictionary(chunk_def_levels, chunk_rep_levels, + chunk.levels_to_write, *chunk_array, ctx, + maybe_parent_nulls)); } else { - ARROW_CHECK_OK(WriteArrowDense( - def_levels + chunk.level_offset, rep_levels + chunk.level_offset, - chunk.levels_to_write, *sliced_array, ctx, maybe_parent_nulls)); + ARROW_CHECK_OK(WriteArrowDense(chunk_def_levels, chunk_rep_levels, + chunk.levels_to_write, *chunk_array, ctx, + maybe_parent_nulls)); } if (num_buffered_values_ > 0) { AddDataPage(); diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h index ed12e13f8cc72..185edccd777a3 100644 --- a/cpp/src/parquet/properties.h +++ b/cpp/src/parquet/properties.h @@ -168,6 +168,8 @@ static constexpr Compression::type DEFAULT_COMPRESSION_TYPE = Compression::UNCOM static constexpr bool DEFAULT_IS_PAGE_INDEX_ENABLED = true; static constexpr SizeStatisticsLevel DEFAULT_SIZE_STATISTICS_LEVEL = SizeStatisticsLevel::PageAndColumnChunk; +static constexpr std::pair DEFAULT_CDC_SIZE_RANGE = + std::make_pair(256 * 1024, 1024 * 1024); class PARQUET_EXPORT ColumnProperties { public: @@ -263,7 +265,7 @@ class PARQUET_EXPORT WriterProperties { page_checksum_enabled_(false), size_statistics_level_(DEFAULT_SIZE_STATISTICS_LEVEL), cdc_enabled_(false), - cdc_avg_size_(0) {} + cdc_size_range_(DEFAULT_CDC_SIZE_RANGE) {} explicit Builder(const WriterProperties& properties) : pool_(properties.memory_pool()), @@ -280,7 +282,7 @@ class PARQUET_EXPORT WriterProperties { sorting_columns_(properties.sorting_columns()), default_column_properties_(properties.default_column_properties()), cdc_enabled_(properties.cdc_enabled()), - cdc_avg_size_(properties.cdc_avg_size()) {} + cdc_size_range_(properties.cdc_size_range()) {} virtual ~Builder() {} @@ -294,8 +296,8 @@ class PARQUET_EXPORT WriterProperties { return this; } - Builder* cdc_avg_size(uint64_t avg_size) { - cdc_avg_size_ = avg_size; + Builder* cdc_size_range(uint64_t min_size, uint64_t max_size) { + cdc_size_range_ = std::make_pair(min_size, max_size); return this; } @@ -722,7 +724,7 @@ class PARQUET_EXPORT WriterProperties { size_statistics_level_, std::move(file_encryption_properties_), default_column_properties_, column_properties, data_page_version_, store_decimal_as_integer_, std::move(sorting_columns_), cdc_enabled_, - cdc_avg_size_)); + cdc_size_range_)); } private: @@ -753,7 +755,7 @@ class PARQUET_EXPORT WriterProperties { std::unordered_map page_index_enabled_; bool cdc_enabled_; - uint64_t cdc_avg_size_; + std::pair cdc_size_range_; }; inline MemoryPool* memory_pool() const { return pool_; } @@ -779,7 +781,7 @@ class PARQUET_EXPORT WriterProperties { inline bool page_checksum_enabled() const { return page_checksum_enabled_; } inline bool cdc_enabled() const { return cdc_enabled_; } - inline uint64_t cdc_avg_size() const { return cdc_avg_size_; } + inline std::pair cdc_size_range() const { return cdc_size_range_; } inline SizeStatisticsLevel size_statistics_level() const { return size_statistics_level_; @@ -883,7 +885,8 @@ class PARQUET_EXPORT WriterProperties { const ColumnProperties& default_column_properties, const std::unordered_map& column_properties, ParquetDataPageVersion data_page_version, bool store_short_decimal_as_integer, - std::vector sorting_columns, bool cdc_enabled, uint64_t cdc_avg_size) + std::vector sorting_columns, bool cdc_enabled, + std::pair cdc_size_range) : pool_(pool), dictionary_pagesize_limit_(dictionary_pagesize_limit), write_batch_size_(write_batch_size), @@ -900,7 +903,7 @@ class PARQUET_EXPORT WriterProperties { default_column_properties_(default_column_properties), column_properties_(column_properties), cdc_enabled_(cdc_enabled), - cdc_avg_size_(cdc_avg_size) + cdc_size_range_(cdc_size_range) {} @@ -924,7 +927,7 @@ class PARQUET_EXPORT WriterProperties { std::unordered_map column_properties_; bool cdc_enabled_; - uint64_t cdc_avg_size_; + std::pair cdc_size_range_; }; PARQUET_EXPORT const std::shared_ptr& default_writer_properties(); diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index 8ad573de89ddd..e764756436165 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -448,7 +448,7 @@ cdef extern from "parquet/api/writer.h" namespace "parquet" nogil: Builder* disable_page_checksum() Builder* enable_cdc() Builder* disable_cdc() - Builder* cdc_avg_size(uint64_t avg_size) + Builder* cdc_size_range(uint64_t min_size, uint64_t max_size) shared_ptr[WriterProperties] build() cdef cppclass ArrowWriterProperties: diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 76ea6cb25277c..32dad29e6b3ec 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -2014,15 +2014,14 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( props.dictionary_pagesize_limit(dictionary_pagesize_limit) # content defined chunking - if content_defined_chunking is False: props.disable_cdc() elif content_defined_chunking is True: props.enable_cdc() elif isinstance(content_defined_chunking, tuple): - avg_size, = content_defined_chunking + min_size, max_size = content_defined_chunking props.enable_cdc() - props.cdc_avg_size(avg_size) + props.cdc_size_range(min_size, max_size) else: raise ValueError( "Unsupported value for content_defined_chunking: {0}" From 469253fa91c03a7a1a114eab36db7483167d1d6a Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 21 Feb 2025 17:08:35 +0100 Subject: [PATCH 14/48] additional testing (more types, dictionary encoding, nullable types) --- cpp/src/parquet/column_chunker_test.cc | 491 ++++++++++++++++++------- 1 file changed, 355 insertions(+), 136 deletions(-) diff --git a/cpp/src/parquet/column_chunker_test.cc b/cpp/src/parquet/column_chunker_test.cc index c4332c882d402..a1682ec1027c7 100644 --- a/cpp/src/parquet/column_chunker_test.cc +++ b/cpp/src/parquet/column_chunker_test.cc @@ -7,8 +7,18 @@ // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. #include +#include +#include +#include #include "arrow/array.h" #include "arrow/array/builder_binary.h" @@ -68,14 +78,20 @@ std::shared_ptr
ConcatAndCombine( Result> WriteTableToBuffer(const std::shared_ptr
& table, uint64_t min_chunk_size, uint64_t max_chunk_size, + bool enable_dictionary = false, + int64_t row_group_size = 1024 * 1024) { auto sink = CreateOutputStream(); - auto write_props = WriterProperties::Builder() - .disable_dictionary() - ->enable_cdc() - ->cdc_size_range(min_chunk_size, max_chunk_size) - ->build(); + auto builder = WriterProperties::Builder(); + // enable content defined chunking + builder.enable_cdc()->cdc_size_range(min_chunk_size, max_chunk_size); + if (enable_dictionary) { + builder.enable_dictionary(); + } else { + builder.disable_dictionary(); + } + auto write_props = builder.build(); auto arrow_props = default_arrow_writer_properties(); RETURN_NOT_OK(WriteTable(*table, default_memory_pool(), sink, row_group_size, write_props, arrow_props)); @@ -118,9 +134,12 @@ std::vector GetColumnPageLengths(const std::shared_ptr& data, Result> WriteAndGetPageLengths(const std::shared_ptr
& table, uint64_t min_chunk_size, uint64_t max_chunk_size, + + bool enable_dictionary = false, int column_index = 0) { - ARROW_ASSIGN_OR_RAISE(auto buffer, - WriteTableToBuffer(table, min_chunk_size, max_chunk_size)); + ARROW_ASSIGN_OR_RAISE( + auto buffer, + WriteTableToBuffer(table, min_chunk_size, max_chunk_size, enable_dictionary)); ARROW_ASSIGN_OR_RAISE(auto readback, ReadTableFromBuffer(buffer)); RETURN_NOT_OK(readback->ValidateFull()); @@ -129,53 +148,213 @@ Result> WriteAndGetPageLengths(const std::shared_ptr
& values, uint64_t min, uint64_t max) { +void AssertAllBetween(const std::vector& values, uint64_t min, uint64_t max, + bool expect_dictionary_fallback = false) { // expect the last chunk since it is not guaranteed to be within the range - for (size_t i = 0; i < values.size() - 1; i++) { - ASSERT_GE(values[i], min); - ASSERT_LE(values[i], max); + if (expect_dictionary_fallback) { + // if dictionary encoding is enabled, the writer can fallback to plain + // encoding splitting within a content defined chunk, so we can't + // guarantee that all chunks are within the range in this case, but we + // know that there can be at most 2 pages smaller than the min_chunk_size + size_t smaller_count = 0; + for (size_t i = 0; i < values.size() - 1; i++) { + if (values[i] < min) { + smaller_count++; + } else { + ASSERT_LE(values[i], max); + } + } + ASSERT_LE(smaller_count, 2); + } else { + for (size_t i = 0; i < values.size() - 1; i++) { + ASSERT_GE(values[i], min); + ASSERT_LE(values[i], max); + } } ASSERT_LE(values.back(), max); } +std::vector, std::vector>> FindDifferences( + const std::vector& first, const std::vector& second) { + auto n = first.size(), m = second.size(); + + // Build DP table for LCS. + std::vector> dp(n + 1, std::vector(m + 1, 0)); + for (size_t i = 0; i < n; ++i) { + for (size_t j = 0; j < m; ++j) { + dp[i + 1][j + 1] = + (first[i] == second[j]) ? dp[i][j] + 1 : std::max(dp[i + 1][j], dp[i][j + 1]); + } + } + + // Backtrack to recover LCS indices. + std::vector> common; + for (auto i = n, j = m; i > 0 && j > 0;) { + if (first[i - 1] == second[j - 1]) { + common.emplace_back(i - 1, j - 1); + --i, --j; + } else if (dp[i - 1][j] >= dp[i][j - 1]) { + --i; + } else { + --j; + } + } + std::reverse(common.begin(), common.end()); + + // Extract differences using the common indices as anchors. + std::vector, std::vector>> result; + size_t last_i = 0, last_j = 0; + for (auto [ci, cj] : common) { + std::vector diff1(first.begin() + last_i, first.begin() + ci); + std::vector diff2(second.begin() + last_j, second.begin() + cj); + if (!diff1.empty() || !diff2.empty()) { + result.emplace_back(std::move(diff1), std::move(diff2)); + } + last_i = ci + 1; + last_j = cj + 1; + } + // Add any remaining elements after the last common index. + std::vector diff1(first.begin() + last_i, first.end()); + std::vector diff2(second.begin() + last_j, second.end()); + if (!diff1.empty() || !diff2.empty()) { + result.emplace_back(std::move(diff1), std::move(diff2)); + } + + return result; +} + +TEST(TestFindDifferences, Basic) { + std::vector first = {1, 2, 3, 4, 5}; + std::vector second = {1, 7, 8, 4, 5}; + + auto diffs = FindDifferences(first, second); + + ASSERT_EQ(diffs.size(), 1); + ASSERT_EQ(diffs[0].first, std::vector({2, 3})); + ASSERT_EQ(diffs[0].second, std::vector({7, 8})); +} + +TEST(TestFindDifferences, MultipleDifferences) { + std::vector first = {1, 2, 3, 4, 5, 6, 7}; + std::vector second = {1, 8, 9, 4, 10, 6, 11}; + auto diffs = FindDifferences(first, second); + + ASSERT_EQ(diffs.size(), 3); + + ASSERT_EQ(diffs[0].first, std::vector({2, 3})); + ASSERT_EQ(diffs[0].second, std::vector({8, 9})); + + ASSERT_EQ(diffs[1].first, std::vector({5})); + ASSERT_EQ(diffs[1].second, std::vector({10})); + + ASSERT_EQ(diffs[2].first, std::vector({7})); + ASSERT_EQ(diffs[2].second, std::vector({11})); +} + +TEST(TestFindDifferences, DifferentLengths) { + std::vector first = {1, 2, 3}; + std::vector second = {1, 2, 3, 4, 5}; + auto diffs = FindDifferences(first, second); + + ASSERT_EQ(diffs.size(), 1); + ASSERT_TRUE(diffs[0].first.empty()); + ASSERT_EQ(diffs[0].second, std::vector({4, 5})); +} + +TEST(TestFindDifferences, EmptyArrays) { + std::vector first = {}; + std::vector second = {}; + auto diffs = FindDifferences(first, second); + ASSERT_TRUE(diffs.empty()); +} + +TEST(TestFindDifferences, LongSequenceWithSingleDifference) { + std::vector first = { + 1994, 2193, 2700, 1913, 2052, + }; + std::vector second = {2048, 43, 2080, 2700, 1913, 2052}; + auto diffs = FindDifferences(first, second); + + ASSERT_EQ(diffs.size(), 1); + ASSERT_EQ(diffs[0].first, std::vector({1994, 2193})); + ASSERT_EQ(diffs[0].second, std::vector({2048, 43, 2080})); + + // Verify that elements after the difference are identical + for (size_t i = 3; i < second.size(); i++) { + ASSERT_EQ(first[i - 1], second[i]); + } +} + +TEST(TestFindDifferences, LongSequenceWithMiddleChanges) { + std::vector first = {2169, 1976, 2180, 2147, 1934, 1772, + 1914, 2075, 2154, 1940, 1934, 1970}; + std::vector second = {2169, 1976, 2180, 2147, 2265, 1804, + 1717, 1925, 2122, 1940, 1934, 1970}; + auto diffs = FindDifferences(first, second); + + ASSERT_EQ(diffs.size(), 1); + ASSERT_EQ(diffs[0].first, std::vector({1934, 1772, 1914, 2075, 2154})); + ASSERT_EQ(diffs[0].second, std::vector({2265, 1804, 1717, 1925, 2122})); + + // Verify elements before and after the difference are identical + for (size_t i = 0; i < 4; i++) { + ASSERT_EQ(first[i], second[i]); + } + for (size_t i = 9; i < first.size(); i++) { + ASSERT_EQ(first[i], second[i]); + } +} + void AssertUpdateCase(const std::vector& original, - const std::vector& modified) { - ASSERT_EQ(original.size(), modified.size()); - for (size_t i = 0; i < original.size(); i++) { - ASSERT_EQ(original[i], modified[i]); + const std::vector& modified, uint8_t n_modifications) { + auto diffs = FindDifferences(original, modified); + ASSERT_LE(diffs.size(), n_modifications); + + for (const auto& diff : diffs) { + uint64_t left_sum = 0, right_sum = 0; + for (const auto& val : diff.first) left_sum += val; + for (const auto& val : diff.second) right_sum += val; + ASSERT_EQ(left_sum, right_sum); + ASSERT_LE(diff.first.size(), 2); + ASSERT_LE(diff.second.size(), 2); + } + + if (diffs.size() == 0) { + // no differences found, the arrays are equal + ASSERT_TRUE(original == modified); } } void AssertDeleteCase(const std::vector& original, - const std::vector& modified, - uint8_t n_modifications = 1) { - ASSERT_EQ(original.size(), modified.size()); - size_t smaller_count = 0; - for (size_t i = 0; i < original.size(); i++) { - if (modified[i] < original[i]) { - smaller_count++; - ASSERT_LT(modified[i], original[i]); - } else { - ASSERT_EQ(modified[i], original[i]); - } + const std::vector& modified, uint8_t n_modifications, + uint64_t edit_length) { + auto diffs = FindDifferences(original, modified); + ASSERT_EQ(diffs.size(), n_modifications); + + for (const auto& diff : diffs) { + uint64_t left_sum = 0, right_sum = 0; + for (const auto& val : diff.first) left_sum += val; + for (const auto& val : diff.second) right_sum += val; + ASSERT_EQ(left_sum, right_sum + edit_length); + ASSERT_LE(diff.first.size(), 2); + ASSERT_LE(diff.second.size(), 2); } - ASSERT_EQ(smaller_count, n_modifications); } void AssertInsertCase(const std::vector& original, - const std::vector& modified, - uint8_t n_modifications = 1) { - ASSERT_EQ(original.size(), modified.size()); - size_t larger_count = 0; - for (size_t i = 0; i < original.size(); i++) { - if (modified[i] > original[i]) { - larger_count++; - ASSERT_GT(modified[i], original[i]); - } else { - ASSERT_EQ(modified[i], original[i]); - } + const std::vector& modified, uint8_t n_modifications, + uint64_t edit_length) { + auto diffs = FindDifferences(original, modified); + ASSERT_EQ(diffs.size(), n_modifications); + + for (const auto& diff : diffs) { + uint64_t left_sum = 0, right_sum = 0; + for (const auto& val : diff.first) left_sum += val; + for (const auto& val : diff.second) right_sum += val; + ASSERT_EQ(left_sum + edit_length, right_sum); + ASSERT_LE(diff.first.size(), 2); + ASSERT_LE(diff.second.size(), 2); } - ASSERT_EQ(larger_count, n_modifications); } void AssertAppendCase(const std::vector& original, @@ -189,229 +368,269 @@ void AssertAppendCase(const std::vector& original, uint64_t ElementCount(uint64_t size, int32_t byte_width, bool nullable) { if (nullable) { + // in case of nullable types the def_levels are also fed through the chunker + // to identify changes in the null bitmap, this will increase the byte width + // and decrease the number of elements per chunk byte_width += 2; } return size / byte_width; } -constexpr uint64_t kMinChunkSize = 128 * 1024; -constexpr uint64_t kMaxChunkSize = 256 * 1024; +constexpr uint64_t kMinChunkSize = 32 * 1024; +constexpr uint64_t kMaxChunkSize = 128 * 1024; +constexpr uint64_t kPartLength = 128 * 1024; +constexpr uint64_t kEditLength = 32; // TODO: -// - test nullable types // - test nested types -// - test dictionary encoding // - test multiple row groups -class TestColumnChunker : public ::testing::TestWithParam< - std::tuple, bool>> {}; +class TestContentDefinedChunker + : public ::testing::TestWithParam< + std::tuple, bool, bool>> {}; -TEST_P(TestColumnChunker, DeleteOnce) { +TEST_P(TestContentDefinedChunker, DeleteOnce) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); + auto enable_dictionary = std::get<2>(GetParam()); auto field = ::arrow::field("f0", dtype, nullable); - auto part1 = GenerateTable({field}, 128 * 1024); - auto part2 = GenerateTable({field}, 32, /*seed=*/1); - auto part3 = GenerateTable({field}, 128 * 1024); + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); auto base = ConcatAndCombine({part1, part2, part3}); auto modified = ConcatAndCombine({part1, part3}); + ASSERT_FALSE(base->Equals(*modified)); auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertAllBetween(base_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertAllBetween(modified_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(base_lengths, min_length, max_length); - AssertAllBetween(modified_lengths, min_length, max_length); - AssertDeleteCase(base_lengths, modified_lengths, 1); + AssertDeleteCase(base_lengths, modified_lengths, 1, kEditLength); } -TEST_P(TestColumnChunker, DeleteTwice) { +TEST_P(TestContentDefinedChunker, DeleteTwice) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); + auto enable_dictionary = std::get<2>(GetParam()); auto field = ::arrow::field("f0", dtype, nullable); - auto part1 = GenerateTable({field}, 128 * 1024); - auto part2 = GenerateTable({field}, 32, /*seed=*/1); - auto part3 = GenerateTable({field}, 128 * 1024); - auto part4 = GenerateTable({field}, 32, /*seed=*/2); - auto part5 = GenerateTable({field}, 128 * 1024); + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); + auto part5 = GenerateTable({field}, kPartLength, /*seed=*/5); auto base = ConcatAndCombine({part1, part2, part3, part4, part5}); auto modified = ConcatAndCombine({part1, part3, part5}); + ASSERT_FALSE(base->Equals(*modified)); auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); - - AssertAllBetween(base_lengths, min_length, max_length); - AssertAllBetween(modified_lengths, min_length, max_length); - AssertDeleteCase(base_lengths, modified_lengths, 2); + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertAllBetween(base_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertAllBetween(modified_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertDeleteCase(base_lengths, modified_lengths, 2, kEditLength); } -TEST_P(TestColumnChunker, UpdateOnce) { +TEST_P(TestContentDefinedChunker, UpdateOnce) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); + auto enable_dictionary = std::get<2>(GetParam()); auto field = ::arrow::field("f0", dtype, nullable); - auto part1 = GenerateTable({field}, 128 * 1024); - auto part2 = GenerateTable({field}, 32, /*seed=*/1); - auto part3 = GenerateTable({field}, 128 * 1024); - auto part4 = GenerateTable({field}, 32, /*seed=*/2); + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); auto base = ConcatAndCombine({part1, part2, part3}); auto modified = ConcatAndCombine({part1, part4, part3}); + ASSERT_FALSE(base->Equals(*modified)); auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); - - AssertAllBetween(base_lengths, min_length, max_length); - AssertAllBetween(modified_lengths, min_length, max_length); - AssertUpdateCase(base_lengths, modified_lengths); + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertAllBetween(base_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertAllBetween(modified_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertUpdateCase(base_lengths, modified_lengths, 1); } -TEST_P(TestColumnChunker, UpdateTwice) { +TEST_P(TestContentDefinedChunker, UpdateTwice) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); + auto enable_dictionary = std::get<2>(GetParam()); auto field = ::arrow::field("f0", dtype, nullable); - auto part1 = GenerateTable({field}, 128 * 1024); - auto part2 = GenerateTable({field}, 32, /*seed=*/1); - auto part3 = GenerateTable({field}, 128 * 1024); - auto part4 = GenerateTable({field}, 32, /*seed=*/2); - auto part5 = GenerateTable({field}, 128 * 1024); - auto part6 = GenerateTable({field}, 32, /*seed=*/3); - auto part7 = GenerateTable({field}, 32, /*seed=*/4); + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); + auto part5 = GenerateTable({field}, kPartLength, /*seed=*/5); + auto part6 = GenerateTable({field}, kEditLength, /*seed=*/6); + auto part7 = GenerateTable({field}, kEditLength, /*seed=*/7); auto base = ConcatAndCombine({part1, part2, part3, part4, part5}); auto modified = ConcatAndCombine({part1, part6, part3, part7, part5}); + ASSERT_FALSE(base->Equals(*modified)); auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); - - AssertAllBetween(base_lengths, min_length, max_length); - AssertAllBetween(modified_lengths, min_length, max_length); - AssertUpdateCase(base_lengths, modified_lengths); + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertAllBetween(base_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertAllBetween(modified_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertUpdateCase(base_lengths, modified_lengths, 2); } -TEST_P(TestColumnChunker, InsertOnce) { +TEST_P(TestContentDefinedChunker, InsertOnce) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); + auto enable_dictionary = std::get<2>(GetParam()); auto field = ::arrow::field("f0", dtype, nullable); - auto part1 = GenerateTable({field}, 128 * 1024); - auto part2 = GenerateTable({field}, 32, /*seed=*/1); - auto part3 = GenerateTable({field}, 128 * 1024); - auto part4 = GenerateTable({field}, 64); + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - auto base = ConcatAndCombine({part1, part2, part3}); - auto modified = ConcatAndCombine({part1, part2, part4, part3}); + auto base = ConcatAndCombine({part1, part3}); + auto modified = ConcatAndCombine({part1, part2, part3}); + ASSERT_FALSE(base->Equals(*modified)); auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); - - AssertAllBetween(base_lengths, min_length, max_length); - AssertAllBetween(modified_lengths, min_length, max_length); - AssertInsertCase(base_lengths, modified_lengths, 1); + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertAllBetween(base_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertAllBetween(modified_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertInsertCase(base_lengths, modified_lengths, 1, kEditLength); } -TEST_P(TestColumnChunker, InsertTwice) { +TEST_P(TestContentDefinedChunker, InsertTwice) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); + auto enable_dictionary = std::get<2>(GetParam()); + enable_dictionary = false; auto field = ::arrow::field("f0", dtype, nullable); - auto part1 = GenerateTable({field}, 128 * 1024); - auto part2 = GenerateTable({field}, 32, /*seed=*/1); - auto part3 = GenerateTable({field}, 128 * 1024); - auto part4 = GenerateTable({field}, 32, /*seed=*/2); - auto part5 = GenerateTable({field}, 128 * 1024); - auto part6 = GenerateTable({field}, 64); - auto part7 = GenerateTable({field}, 64); + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); + auto part5 = GenerateTable({field}, kPartLength, /*seed=*/5); - auto base = ConcatAndCombine({part1, part2, part3, part4, part5}); - auto modified = ConcatAndCombine({part1, part2, part6, part3, part4, part7, part5}); + auto base = ConcatAndCombine({part1, part3, part5}); + auto modified = ConcatAndCombine({part1, part2, part3, part4, part5}); + ASSERT_FALSE(base->Equals(*modified)); auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); - - AssertAllBetween(base_lengths, min_length, max_length); - AssertAllBetween(modified_lengths, min_length, max_length); - AssertInsertCase(base_lengths, modified_lengths, 2); + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertAllBetween(base_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertAllBetween(modified_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertInsertCase(base_lengths, modified_lengths, 2, kEditLength); } -TEST_P(TestColumnChunker, Append) { +TEST_P(TestContentDefinedChunker, Append) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); + auto enable_dictionary = std::get<2>(GetParam()); auto field = ::arrow::field("f0", dtype, nullable); - auto part1 = GenerateTable({field}, 128 * 1024); - auto part2 = GenerateTable({field}, 32, /*seed=*/1); - auto part3 = GenerateTable({field}, 128 * 1024); - auto part4 = GenerateTable({field}, 32 * 1024); + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); auto base = ConcatAndCombine({part1, part2, part3}); auto modified = ConcatAndCombine({part1, part2, part3, part4}); + ASSERT_FALSE(base->Equals(*modified)); auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize)); + WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize)); + WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_lengths, min_length, max_length); - AssertAllBetween(modified_lengths, min_length, max_length); + AssertAllBetween(base_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertAllBetween(modified_lengths, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); AssertAppendCase(base_lengths, modified_lengths); } INSTANTIATE_TEST_SUITE_P( - TypeRoundtrip, TestColumnChunker, + FixedSizedTypes, TestContentDefinedChunker, Combine(Values(::arrow::uint8(), ::arrow::uint16(), ::arrow::uint32(), ::arrow::uint64(), ::arrow::int8(), ::arrow::int16(), ::arrow::int32(), ::arrow::int64(), ::arrow::float16(), ::arrow::float32(), ::arrow::float64()), - Bool())); + Bool(), Bool())); } // namespace parquet - -// - check that the state is maintained across rowgroups, so the edits should be -// consistent -// - check that the edits are consistent between writes -// - some smoke testing like approach would be nice to test several arrow types From 8651386d4b5cfe37309d8502805dba4f9c62f23f Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Sat, 22 Feb 2025 00:19:06 +0100 Subject: [PATCH 15/48] test cases for binary-like types --- cpp/src/parquet/column_chunker.cc | 2 + cpp/src/parquet/column_chunker_test.cc | 366 +++++++++++++++++++------ 2 files changed, 282 insertions(+), 86 deletions(-) diff --git a/cpp/src/parquet/column_chunker.cc b/cpp/src/parquet/column_chunker.cc index d99d36b8483c4..bc5640cbd7d38 100644 --- a/cpp/src/parquet/column_chunker.cc +++ b/cpp/src/parquet/column_chunker.cc @@ -736,7 +736,9 @@ const ::arrow::Result> ContentDefinedChunker::GetBoundaries( PRIMITIVE_CASE(FLOAT, Float) PRIMITIVE_CASE(DOUBLE, Double) PRIMITIVE_CASE(STRING, String) + PRIMITIVE_CASE(LARGE_STRING, LargeString) PRIMITIVE_CASE(BINARY, Binary) + PRIMITIVE_CASE(LARGE_BINARY, LargeBinary) PRIMITIVE_CASE(FIXED_SIZE_BINARY, FixedSizeBinary) PRIMITIVE_CASE(DATE32, Date32) PRIMITIVE_CASE(DATE64, Date64) diff --git a/cpp/src/parquet/column_chunker_test.cc b/cpp/src/parquet/column_chunker_test.cc index a1682ec1027c7..3133888cc0feb 100644 --- a/cpp/src/parquet/column_chunker_test.cc +++ b/cpp/src/parquet/column_chunker_test.cc @@ -92,7 +92,8 @@ Result> WriteTableToBuffer(const std::shared_ptr
& builder.disable_dictionary(); } auto write_props = builder.build(); - auto arrow_props = default_arrow_writer_properties(); + + auto arrow_props = ArrowWriterProperties::Builder().store_schema()->build(); RETURN_NOT_OK(WriteTable(*table, default_memory_pool(), sink, row_group_size, write_props, arrow_props)); return sink->Finish(); @@ -110,8 +111,9 @@ Result> ReadTableFromBuffer(const std::shared_ptr return result; } -std::vector GetColumnPageLengths(const std::shared_ptr& data, - int column_index = 0) { +std::pair, std::vector> GetColumnPageSizes( + const std::shared_ptr& data, int column_index = 0) { + std::vector page_sizes; std::vector page_lengths; auto buffer_reader = std::make_shared(data); @@ -123,20 +125,18 @@ std::vector GetColumnPageLengths(const std::shared_ptr& data, while (auto page = page_reader->NextPage()) { if (page->type() == PageType::DATA_PAGE || page->type() == PageType::DATA_PAGE_V2) { auto data_page = static_cast(page.get()); + page_sizes.push_back(data_page->size()); page_lengths.push_back(data_page->num_values()); } } } - return page_lengths; + return {page_lengths, page_sizes}; } -Result> WriteAndGetPageLengths(const std::shared_ptr
& table, - uint64_t min_chunk_size, - uint64_t max_chunk_size, - - bool enable_dictionary = false, - int column_index = 0) { +Result, std::vector>> WriteAndGetPageSizes( + const std::shared_ptr
& table, uint64_t min_chunk_size, uint64_t max_chunk_size, + bool enable_dictionary = false, int column_index = 0) { ARROW_ASSIGN_OR_RAISE( auto buffer, WriteTableToBuffer(table, min_chunk_size, max_chunk_size, enable_dictionary)); @@ -145,7 +145,7 @@ Result> WriteAndGetPageLengths(const std::shared_ptr
ValidateFull()); ARROW_RETURN_IF(!readback->Equals(*table), Status::Invalid("Readback table not equal to original")); - return GetColumnPageLengths(buffer, column_index); + return GetColumnPageSizes(buffer, column_index); } void AssertAllBetween(const std::vector& values, uint64_t min, uint64_t max, @@ -385,11 +385,11 @@ constexpr uint64_t kEditLength = 32; // - test nested types // - test multiple row groups -class TestContentDefinedChunker +class TestFixedSizedTypeCDC : public ::testing::TestWithParam< std::tuple, bool, bool>> {}; -TEST_P(TestContentDefinedChunker, DeleteOnce) { +TEST_P(TestFixedSizedTypeCDC, DeleteOnce) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -406,23 +406,22 @@ TEST_P(TestContentDefinedChunker, DeleteOnce) { auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - - ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - - AssertAllBetween(base_lengths, min_length, max_length, + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertAllBetween(base_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_lengths, min_length, max_length, + AssertAllBetween(modified_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertDeleteCase(base_lengths, modified_lengths, 1, kEditLength); + AssertDeleteCase(base_result.first, modified_result.first, 1, kEditLength); } -TEST_P(TestContentDefinedChunker, DeleteTwice) { +TEST_P(TestFixedSizedTypeCDC, DeleteTwice) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -442,21 +441,21 @@ TEST_P(TestContentDefinedChunker, DeleteTwice) { auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_lengths, min_length, max_length, + AssertAllBetween(base_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_lengths, min_length, max_length, + AssertAllBetween(modified_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertDeleteCase(base_lengths, modified_lengths, 2, kEditLength); + AssertDeleteCase(base_result.first, modified_result.first, 2, kEditLength); } -TEST_P(TestContentDefinedChunker, UpdateOnce) { +TEST_P(TestFixedSizedTypeCDC, UpdateOnce) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -475,21 +474,21 @@ TEST_P(TestContentDefinedChunker, UpdateOnce) { auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_lengths, min_length, max_length, + AssertAllBetween(base_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_lengths, min_length, max_length, + AssertAllBetween(modified_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertUpdateCase(base_lengths, modified_lengths, 1); + AssertUpdateCase(base_result.first, modified_result.first, 1); } -TEST_P(TestContentDefinedChunker, UpdateTwice) { +TEST_P(TestFixedSizedTypeCDC, UpdateTwice) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -511,21 +510,21 @@ TEST_P(TestContentDefinedChunker, UpdateTwice) { auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_lengths, min_length, max_length, + AssertAllBetween(base_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_lengths, min_length, max_length, + AssertAllBetween(modified_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertUpdateCase(base_lengths, modified_lengths, 2); + AssertUpdateCase(base_result.first, modified_result.first, 2); } -TEST_P(TestContentDefinedChunker, InsertOnce) { +TEST_P(TestFixedSizedTypeCDC, InsertOnce) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -543,25 +542,24 @@ TEST_P(TestContentDefinedChunker, InsertOnce) { auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_lengths, min_length, max_length, + AssertAllBetween(base_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_lengths, min_length, max_length, + AssertAllBetween(modified_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertInsertCase(base_lengths, modified_lengths, 1, kEditLength); + AssertInsertCase(base_result.first, modified_result.first, 1, kEditLength); } -TEST_P(TestContentDefinedChunker, InsertTwice) { +TEST_P(TestFixedSizedTypeCDC, InsertTwice) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); - enable_dictionary = false; auto field = ::arrow::field("f0", dtype, nullable); @@ -578,21 +576,21 @@ TEST_P(TestContentDefinedChunker, InsertTwice) { auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_lengths, min_length, max_length, + AssertAllBetween(base_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_lengths, min_length, max_length, + AssertAllBetween(modified_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertInsertCase(base_lengths, modified_lengths, 2, kEditLength); + AssertInsertCase(base_result.first, modified_result.first, 2, kEditLength); } -TEST_P(TestContentDefinedChunker, Append) { +TEST_P(TestFixedSizedTypeCDC, Append) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -611,26 +609,222 @@ TEST_P(TestContentDefinedChunker, Append) { auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - ASSERT_OK_AND_ASSIGN(auto base_lengths, - WriteAndGetPageLengths(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_lengths, - WriteAndGetPageLengths(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_lengths, min_length, max_length, + AssertAllBetween(base_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_lengths, min_length, max_length, + AssertAllBetween(modified_result.first, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertAppendCase(base_lengths, modified_lengths); + AssertAppendCase(base_result.first, modified_result.first); } INSTANTIATE_TEST_SUITE_P( - FixedSizedTypes, TestContentDefinedChunker, + FixedSizedTypes, TestFixedSizedTypeCDC, Combine(Values(::arrow::uint8(), ::arrow::uint16(), ::arrow::uint32(), ::arrow::uint64(), ::arrow::int8(), ::arrow::int16(), ::arrow::int32(), ::arrow::int64(), ::arrow::float16(), ::arrow::float32(), ::arrow::float64()), Bool(), Bool())); +class TestVariableLengthTypeCDC + : public ::testing::TestWithParam< + std::tuple, bool>> {}; + +TEST_P(TestVariableLengthTypeCDC, Append) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); + + auto base = ConcatAndCombine({part1, part2, part3}); + auto modified = ConcatAndCombine({part1, part2, part3, part4}); + ASSERT_FALSE(base->Equals(*modified)); + + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); + + if (!nullable) { + AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); + AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); + } + AssertAppendCase(base_result.first, modified_result.first); +} + +TEST_P(TestVariableLengthTypeCDC, UpdateOnce) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); + + auto base = ConcatAndCombine({part1, part2, part3}); + auto modified = ConcatAndCombine({part1, part4, part3}); + ASSERT_FALSE(base->Equals(*modified)); + + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); + if (!nullable) { + AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); + AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); + } + AssertUpdateCase(base_result.first, modified_result.first, 1); +} + +TEST_P(TestVariableLengthTypeCDC, UpdateTwice) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); + auto part5 = GenerateTable({field}, kPartLength, /*seed=*/5); + auto part6 = GenerateTable({field}, kEditLength, /*seed=*/6); + auto part7 = GenerateTable({field}, kEditLength, /*seed=*/7); + + auto base = ConcatAndCombine({part1, part2, part3, part4, part5}); + auto modified = ConcatAndCombine({part1, part6, part3, part7, part5}); + ASSERT_FALSE(base->Equals(*modified)); + + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); + if (!nullable) { + AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); + AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); + } + AssertUpdateCase(base_result.first, modified_result.first, 2); +} + +TEST_P(TestVariableLengthTypeCDC, InsertOnce) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + + auto base = ConcatAndCombine({part1, part3}); + auto modified = ConcatAndCombine({part1, part2, part3}); + ASSERT_FALSE(base->Equals(*modified)); + + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); + if (!nullable) { + AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); + AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); + } + AssertInsertCase(base_result.first, modified_result.first, 1, kEditLength); +} + +TEST_P(TestVariableLengthTypeCDC, InsertTwice) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); + auto part5 = GenerateTable({field}, kPartLength, /*seed=*/5); + + auto base = ConcatAndCombine({part1, part3, part5}); + auto modified = ConcatAndCombine({part1, part2, part3, part4, part5}); + ASSERT_FALSE(base->Equals(*modified)); + + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); + if (!nullable) { + AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); + AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); + } + AssertInsertCase(base_result.first, modified_result.first, 2, kEditLength); +} + +TEST_P(TestVariableLengthTypeCDC, DeleteOnce) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + + auto base = ConcatAndCombine({part1, part2, part3}); + auto modified = ConcatAndCombine({part1, part3}); + ASSERT_FALSE(base->Equals(*modified)); + + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); + if (!nullable) { + AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); + AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); + } + AssertDeleteCase(base_result.first, modified_result.first, 1, kEditLength); +} + +TEST_P(TestVariableLengthTypeCDC, DeleteTwice) { + auto dtype = std::get<0>(GetParam()); + auto nullable = std::get<1>(GetParam()); + + auto field = ::arrow::field("f0", dtype, nullable); + + auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); + auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); + auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); + auto part5 = GenerateTable({field}, kPartLength, /*seed=*/5); + + auto base = ConcatAndCombine({part1, part2, part3, part4, part5}); + auto modified = ConcatAndCombine({part1, part3, part5}); + ASSERT_FALSE(base->Equals(*modified)); + + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); + if (!nullable) { + AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); + AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); + } + AssertDeleteCase(base_result.first, modified_result.first, 2, kEditLength); +} + +INSTANTIATE_TEST_SUITE_P(VarLenTypes, TestVariableLengthTypeCDC, + Combine(Values(::arrow::utf8(), ::arrow::large_utf8(), + ::arrow::binary(), ::arrow::large_binary(), + ::arrow::fixed_size_binary(16)), + Bool())); + } // namespace parquet From 4750660815aafe226917ebe8293eaaa8b505b548 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Sat, 22 Feb 2025 00:37:30 +0100 Subject: [PATCH 16/48] reduce duplication in testing --- cpp/src/parquet/column_chunker_test.cc | 313 ++++--------------------- 1 file changed, 52 insertions(+), 261 deletions(-) diff --git a/cpp/src/parquet/column_chunker_test.cc b/cpp/src/parquet/column_chunker_test.cc index 3133888cc0feb..574df08c47f52 100644 --- a/cpp/src/parquet/column_chunker_test.cc +++ b/cpp/src/parquet/column_chunker_test.cc @@ -111,8 +111,9 @@ Result> ReadTableFromBuffer(const std::shared_ptr return result; } -std::pair, std::vector> GetColumnPageSizes( - const std::shared_ptr& data, int column_index = 0) { +using PageSizes = std::pair, std::vector>; + +PageSizes GetColumnPageSizes(const std::shared_ptr& data, int column_index = 0) { std::vector page_sizes; std::vector page_lengths; @@ -134,9 +135,10 @@ std::pair, std::vector> GetColumnPageSizes( return {page_lengths, page_sizes}; } -Result, std::vector>> WriteAndGetPageSizes( - const std::shared_ptr
& table, uint64_t min_chunk_size, uint64_t max_chunk_size, - bool enable_dictionary = false, int column_index = 0) { +Result WriteAndGetPageSizes(const std::shared_ptr
& table, + uint64_t min_chunk_size, uint64_t max_chunk_size, + bool enable_dictionary = false, + int column_index = 0) { ARROW_ASSIGN_OR_RAISE( auto buffer, WriteTableToBuffer(table, min_chunk_size, max_chunk_size, enable_dictionary)); @@ -376,6 +378,24 @@ uint64_t ElementCount(uint64_t size, int32_t byte_width, bool nullable) { return size / byte_width; } +void AssertChunkSizes(const std::shared_ptr<::arrow::DataType>& dtype, + PageSizes base_result, PageSizes modified_result, bool nullable, + bool enable_dictionary, uint64_t min_chunk_size, + uint64_t max_chunk_size) { + if (::arrow::is_fixed_width(dtype->id())) { + auto min_length = ElementCount(min_chunk_size, dtype->byte_width(), nullable); + auto max_length = ElementCount(max_chunk_size, dtype->byte_width(), nullable); + AssertAllBetween(base_result.first, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + AssertAllBetween(modified_result.first, min_length, max_length, + /*expect_dictionary_fallback=*/enable_dictionary); + } else if (::arrow::is_base_binary_like(dtype->id()) && !nullable && + !enable_dictionary) { + AssertAllBetween(base_result.second, min_chunk_size, max_chunk_size); + AssertAllBetween(modified_result.second, min_chunk_size, max_chunk_size); + } +} + constexpr uint64_t kMinChunkSize = 32 * 1024; constexpr uint64_t kMaxChunkSize = 128 * 1024; constexpr uint64_t kPartLength = 128 * 1024; @@ -385,11 +405,10 @@ constexpr uint64_t kEditLength = 32; // - test nested types // - test multiple row groups -class TestFixedSizedTypeCDC - : public ::testing::TestWithParam< - std::tuple, bool, bool>> {}; +class TestColumnCDC : public ::testing::TestWithParam< + std::tuple, bool, bool>> {}; -TEST_P(TestFixedSizedTypeCDC, DeleteOnce) { +TEST_P(TestColumnCDC, DeleteOnce) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -404,8 +423,6 @@ TEST_P(TestFixedSizedTypeCDC, DeleteOnce) { auto modified = ConcatAndCombine({part1, part3}); ASSERT_FALSE(base->Equals(*modified)); - auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); - auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); ASSERT_OK_AND_ASSIGN(auto base_result, WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); @@ -413,15 +430,13 @@ TEST_P(TestFixedSizedTypeCDC, DeleteOnce) { WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); AssertDeleteCase(base_result.first, modified_result.first, 1, kEditLength); } -TEST_P(TestFixedSizedTypeCDC, DeleteTwice) { +TEST_P(TestColumnCDC, DeleteTwice) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -438,9 +453,6 @@ TEST_P(TestFixedSizedTypeCDC, DeleteTwice) { auto modified = ConcatAndCombine({part1, part3, part5}); ASSERT_FALSE(base->Equals(*modified)); - auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); - auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - ASSERT_OK_AND_ASSIGN(auto base_result, WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); @@ -448,14 +460,12 @@ TEST_P(TestFixedSizedTypeCDC, DeleteTwice) { WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); AssertDeleteCase(base_result.first, modified_result.first, 2, kEditLength); } -TEST_P(TestFixedSizedTypeCDC, UpdateOnce) { +TEST_P(TestColumnCDC, UpdateOnce) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -471,9 +481,6 @@ TEST_P(TestFixedSizedTypeCDC, UpdateOnce) { auto modified = ConcatAndCombine({part1, part4, part3}); ASSERT_FALSE(base->Equals(*modified)); - auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); - auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - ASSERT_OK_AND_ASSIGN(auto base_result, WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); @@ -481,14 +488,12 @@ TEST_P(TestFixedSizedTypeCDC, UpdateOnce) { WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); AssertUpdateCase(base_result.first, modified_result.first, 1); } -TEST_P(TestFixedSizedTypeCDC, UpdateTwice) { +TEST_P(TestColumnCDC, UpdateTwice) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -507,9 +512,6 @@ TEST_P(TestFixedSizedTypeCDC, UpdateTwice) { auto modified = ConcatAndCombine({part1, part6, part3, part7, part5}); ASSERT_FALSE(base->Equals(*modified)); - auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); - auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - ASSERT_OK_AND_ASSIGN(auto base_result, WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); @@ -517,14 +519,12 @@ TEST_P(TestFixedSizedTypeCDC, UpdateTwice) { WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); AssertUpdateCase(base_result.first, modified_result.first, 2); } -TEST_P(TestFixedSizedTypeCDC, InsertOnce) { +TEST_P(TestColumnCDC, InsertOnce) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -539,9 +539,6 @@ TEST_P(TestFixedSizedTypeCDC, InsertOnce) { auto modified = ConcatAndCombine({part1, part2, part3}); ASSERT_FALSE(base->Equals(*modified)); - auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); - auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - ASSERT_OK_AND_ASSIGN(auto base_result, WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); @@ -549,14 +546,12 @@ TEST_P(TestFixedSizedTypeCDC, InsertOnce) { WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); AssertInsertCase(base_result.first, modified_result.first, 1, kEditLength); } -TEST_P(TestFixedSizedTypeCDC, InsertTwice) { +TEST_P(TestColumnCDC, InsertTwice) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -573,9 +568,6 @@ TEST_P(TestFixedSizedTypeCDC, InsertTwice) { auto modified = ConcatAndCombine({part1, part2, part3, part4, part5}); ASSERT_FALSE(base->Equals(*modified)); - auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); - auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - ASSERT_OK_AND_ASSIGN(auto base_result, WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); @@ -583,14 +575,12 @@ TEST_P(TestFixedSizedTypeCDC, InsertTwice) { WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); AssertInsertCase(base_result.first, modified_result.first, 2, kEditLength); } -TEST_P(TestFixedSizedTypeCDC, Append) { +TEST_P(TestColumnCDC, Append) { auto dtype = std::get<0>(GetParam()); auto nullable = std::get<1>(GetParam()); auto enable_dictionary = std::get<2>(GetParam()); @@ -606,9 +596,6 @@ TEST_P(TestFixedSizedTypeCDC, Append) { auto modified = ConcatAndCombine({part1, part2, part3, part4}); ASSERT_FALSE(base->Equals(*modified)); - auto min_length = ElementCount(kMinChunkSize, dtype->byte_width(), nullable); - auto max_length = ElementCount(kMaxChunkSize, dtype->byte_width(), nullable); - ASSERT_OK_AND_ASSIGN(auto base_result, WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); @@ -616,215 +603,19 @@ TEST_P(TestFixedSizedTypeCDC, Append) { WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, /*enable_dictionary=*/enable_dictionary)); - AssertAllBetween(base_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_result.first, min_length, max_length, - /*expect_dictionary_fallback=*/enable_dictionary); + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); AssertAppendCase(base_result.first, modified_result.first); } INSTANTIATE_TEST_SUITE_P( - FixedSizedTypes, TestFixedSizedTypeCDC, + FixedSizedTypes, TestColumnCDC, Combine(Values(::arrow::uint8(), ::arrow::uint16(), ::arrow::uint32(), ::arrow::uint64(), ::arrow::int8(), ::arrow::int16(), ::arrow::int32(), ::arrow::int64(), ::arrow::float16(), ::arrow::float32(), - ::arrow::float64()), + ::arrow::float64(), ::arrow::binary(), ::arrow::large_binary(), + ::arrow::fixed_size_binary(16), ::arrow::utf8(), + ::arrow::large_utf8()), Bool(), Bool())); -class TestVariableLengthTypeCDC - : public ::testing::TestWithParam< - std::tuple, bool>> {}; - -TEST_P(TestVariableLengthTypeCDC, Append) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - - auto field = ::arrow::field("f0", dtype, nullable); - - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); - - auto base = ConcatAndCombine({part1, part2, part3}); - auto modified = ConcatAndCombine({part1, part2, part3, part4}); - ASSERT_FALSE(base->Equals(*modified)); - - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); - - if (!nullable) { - AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); - AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); - } - AssertAppendCase(base_result.first, modified_result.first); -} - -TEST_P(TestVariableLengthTypeCDC, UpdateOnce) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - - auto field = ::arrow::field("f0", dtype, nullable); - - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); - - auto base = ConcatAndCombine({part1, part2, part3}); - auto modified = ConcatAndCombine({part1, part4, part3}); - ASSERT_FALSE(base->Equals(*modified)); - - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); - if (!nullable) { - AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); - AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); - } - AssertUpdateCase(base_result.first, modified_result.first, 1); -} - -TEST_P(TestVariableLengthTypeCDC, UpdateTwice) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - - auto field = ::arrow::field("f0", dtype, nullable); - - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); - auto part5 = GenerateTable({field}, kPartLength, /*seed=*/5); - auto part6 = GenerateTable({field}, kEditLength, /*seed=*/6); - auto part7 = GenerateTable({field}, kEditLength, /*seed=*/7); - - auto base = ConcatAndCombine({part1, part2, part3, part4, part5}); - auto modified = ConcatAndCombine({part1, part6, part3, part7, part5}); - ASSERT_FALSE(base->Equals(*modified)); - - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); - if (!nullable) { - AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); - AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); - } - AssertUpdateCase(base_result.first, modified_result.first, 2); -} - -TEST_P(TestVariableLengthTypeCDC, InsertOnce) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - - auto field = ::arrow::field("f0", dtype, nullable); - - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - - auto base = ConcatAndCombine({part1, part3}); - auto modified = ConcatAndCombine({part1, part2, part3}); - ASSERT_FALSE(base->Equals(*modified)); - - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); - if (!nullable) { - AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); - AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); - } - AssertInsertCase(base_result.first, modified_result.first, 1, kEditLength); -} - -TEST_P(TestVariableLengthTypeCDC, InsertTwice) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - - auto field = ::arrow::field("f0", dtype, nullable); - - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); - auto part5 = GenerateTable({field}, kPartLength, /*seed=*/5); - - auto base = ConcatAndCombine({part1, part3, part5}); - auto modified = ConcatAndCombine({part1, part2, part3, part4, part5}); - ASSERT_FALSE(base->Equals(*modified)); - - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); - if (!nullable) { - AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); - AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); - } - AssertInsertCase(base_result.first, modified_result.first, 2, kEditLength); -} - -TEST_P(TestVariableLengthTypeCDC, DeleteOnce) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - - auto field = ::arrow::field("f0", dtype, nullable); - - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - - auto base = ConcatAndCombine({part1, part2, part3}); - auto modified = ConcatAndCombine({part1, part3}); - ASSERT_FALSE(base->Equals(*modified)); - - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); - if (!nullable) { - AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); - AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); - } - AssertDeleteCase(base_result.first, modified_result.first, 1, kEditLength); -} - -TEST_P(TestVariableLengthTypeCDC, DeleteTwice) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - - auto field = ::arrow::field("f0", dtype, nullable); - - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); - auto part5 = GenerateTable({field}, kPartLength, /*seed=*/5); - - auto base = ConcatAndCombine({part1, part2, part3, part4, part5}); - auto modified = ConcatAndCombine({part1, part3, part5}); - ASSERT_FALSE(base->Equals(*modified)); - - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize)); - if (!nullable) { - AssertAllBetween(base_result.second, kMinChunkSize, kMaxChunkSize); - AssertAllBetween(modified_result.second, kMinChunkSize, kMaxChunkSize); - } - AssertDeleteCase(base_result.first, modified_result.first, 2, kEditLength); -} - -INSTANTIATE_TEST_SUITE_P(VarLenTypes, TestVariableLengthTypeCDC, - Combine(Values(::arrow::utf8(), ::arrow::large_utf8(), - ::arrow::binary(), ::arrow::large_binary(), - ::arrow::fixed_size_binary(16)), - Bool())); - } // namespace parquet From 58c272087ac739846a71c2e8b5b6aadfdd7330ca Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Sat, 22 Feb 2025 00:55:00 +0100 Subject: [PATCH 17/48] reduce duplication in testing --- cpp/src/parquet/column_chunker_test.cc | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/cpp/src/parquet/column_chunker_test.cc b/cpp/src/parquet/column_chunker_test.cc index 574df08c47f52..bf4f6ed77e50a 100644 --- a/cpp/src/parquet/column_chunker_test.cc +++ b/cpp/src/parquet/column_chunker_test.cc @@ -145,8 +145,10 @@ Result WriteAndGetPageSizes(const std::shared_ptr
& table, ARROW_ASSIGN_OR_RAISE(auto readback, ReadTableFromBuffer(buffer)); RETURN_NOT_OK(readback->ValidateFull()); - ARROW_RETURN_IF(!readback->Equals(*table), - Status::Invalid("Readback table not equal to original")); + if (readback->schema()->Equals(*table->schema())) { + ARROW_RETURN_IF(!readback->Equals(*table), + Status::Invalid("Readback table not equal to original")); + } return GetColumnPageSizes(buffer, column_index); } @@ -338,8 +340,8 @@ void AssertDeleteCase(const std::vector& original, for (const auto& val : diff.first) left_sum += val; for (const auto& val : diff.second) right_sum += val; ASSERT_EQ(left_sum, right_sum + edit_length); - ASSERT_LE(diff.first.size(), 2); - ASSERT_LE(diff.second.size(), 2); + ASSERT_LE(diff.first.size(), 3); + ASSERT_LE(diff.second.size(), 3); } } @@ -354,8 +356,8 @@ void AssertInsertCase(const std::vector& original, for (const auto& val : diff.first) left_sum += val; for (const auto& val : diff.second) right_sum += val; ASSERT_EQ(left_sum + edit_length, right_sum); - ASSERT_LE(diff.first.size(), 2); - ASSERT_LE(diff.second.size(), 2); + ASSERT_LE(diff.first.size(), 3); + ASSERT_LE(diff.second.size(), 3); } } @@ -608,14 +610,19 @@ TEST_P(TestColumnCDC, Append) { AssertAppendCase(base_result.first, modified_result.first); } +// TODO(kszucs): add extension type and dictionary type INSTANTIATE_TEST_SUITE_P( FixedSizedTypes, TestColumnCDC, Combine(Values(::arrow::uint8(), ::arrow::uint16(), ::arrow::uint32(), ::arrow::uint64(), ::arrow::int8(), ::arrow::int16(), ::arrow::int32(), ::arrow::int64(), ::arrow::float16(), ::arrow::float32(), ::arrow::float64(), ::arrow::binary(), ::arrow::large_binary(), - ::arrow::fixed_size_binary(16), ::arrow::utf8(), - ::arrow::large_utf8()), + ::arrow::fixed_size_binary(16), ::arrow::utf8(), ::arrow::large_utf8(), + ::arrow::date32(), ::arrow::date64(), ::arrow::decimal128(18, 6), + ::arrow::decimal256(40, 6), ::arrow::time32(::arrow::TimeUnit::SECOND), + ::arrow::time64(::arrow::TimeUnit::NANO), + ::arrow::timestamp(::arrow::TimeUnit::NANO), + ::arrow::duration(::arrow::TimeUnit::NANO)), Bool(), Bool())); } // namespace parquet From a3f5e2e58041099fa845641c2332c3385cd1efc4 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 24 Feb 2025 19:19:26 +0100 Subject: [PATCH 18/48] refactoring + testing + introduce norm_factor parameter --- cpp/src/parquet/column_chunker.cc | 135 ++++--- cpp/src/parquet/column_chunker.h | 12 +- cpp/src/parquet/column_chunker_test.cc | 526 ++++++++++++++----------- cpp/src/parquet/column_writer.cc | 4 +- cpp/src/parquet/properties.h | 24 +- python/pyarrow/_parquet.pxd | 5 +- python/pyarrow/_parquet.pyx | 26 +- 7 files changed, 412 insertions(+), 320 deletions(-) diff --git a/cpp/src/parquet/column_chunker.cc b/cpp/src/parquet/column_chunker.cc index bc5640cbd7d38..14de6d81575b9 100644 --- a/cpp/src/parquet/column_chunker.cc +++ b/cpp/src/parquet/column_chunker.cc @@ -20,6 +20,7 @@ #include #include #include "arrow/array.h" +#include "arrow/util/logging.h" #include "parquet/level_conversion.h" namespace parquet { @@ -558,65 +559,83 @@ class FakeNullArray { int64_t null_count() const { return 0; } }; -static uint64_t GetMask(uint64_t min_size, uint64_t max_size) { +static uint64_t GetMask(uint64_t min_size, uint64_t max_size, uint8_t norm_factor) { + // we aim for gaussian-like distribution of chunk sizes between min_size and max_size uint64_t avg_size = (min_size + max_size) / 2; + // we skip calculating gearhash for the first `min_size` bytes, so we are looking for + // a smaller chunk as the average size uint64_t target_size = avg_size - min_size; size_t mask_bits = static_cast(std::floor(std::log2(target_size))); // -3 because we are using 8 hash tables to have more gaussian-like distribution - // -1 narrows the chunk size distribution in order to avoid having too many hard - // cuts at the minimum and maximum chunk sizes - size_t effective_bits = mask_bits - 3 - 1; + // `norm_factor` narrows the chunk size distribution aroun avg_size + size_t effective_bits = mask_bits - 3 - norm_factor; return std::numeric_limits::max() << (64 - effective_bits); } ContentDefinedChunker::ContentDefinedChunker(const LevelInfo& level_info, - uint64_t min_size, uint64_t max_size) + std::pair size_range, + uint8_t norm_factor) : level_info_(level_info), - min_size_(min_size), - max_size_(max_size), - hash_mask_(GetMask(min_size, max_size)) {} + min_size_(size_range.first), + max_size_(size_range.second), + hash_mask_(GetMask(size_range.first, size_range.second, norm_factor)) {} template -bool ContentDefinedChunker::Roll(const T value) { +void ContentDefinedChunker::Roll(const T value) { constexpr size_t BYTE_WIDTH = sizeof(T); chunk_size_ += BYTE_WIDTH; if (chunk_size_ < min_size_) { - return false; + // short-circuit if we haven't reached the minimum chunk size, this speeds up the + // chunking process since the gearhash doesn't need to be updated + return; } auto bytes = reinterpret_cast(&value); - bool match = false; for (size_t i = 0; i < BYTE_WIDTH; ++i) { rolling_hash_ = (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][bytes[i]]; - match |= (rolling_hash_ & hash_mask_) == 0; + if ((rolling_hash_ & hash_mask_) == 0) { + has_matched_ = true; + } } - return match; } -bool ContentDefinedChunker::Roll(std::string_view value) { +void ContentDefinedChunker::Roll(std::string_view value) { chunk_size_ += value.size(); if (chunk_size_ < min_size_) { - return false; + // short-circuit if we haven't reached the minimum chunk size, this speeds up the + // chunking process since the gearhash doesn't need to be updated + return; } - bool match = false; for (char c : value) { rolling_hash_ = (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][static_cast(c)]; - match |= (rolling_hash_ & hash_mask_) == 0; + if ((rolling_hash_ & hash_mask_) == 0) { + has_matched_ = true; + } } - return match; } -bool ContentDefinedChunker::Check(bool match) { - if (ARROW_PREDICT_FALSE(match && ++nth_run_ >= 7)) { - nth_run_ = 0; - chunk_size_ = 0; - return true; - } else if (ARROW_PREDICT_FALSE(chunk_size_ >= max_size_)) { +bool ContentDefinedChunker::Check() { + // decide whether to create a new chunk based on the rolling hash; has_matched_ is + // set to true if we encountered a match since the last Check() call + if (ARROW_PREDICT_FALSE(has_matched_)) { + has_matched_ = false; + // in order to have a normal distribution of chunk sizes, we only create a new chunk + // if the adjused mask matches the rolling hash 8 times in a row, each run uses a + // different gearhash table (gearhash's chunk size has exponential distribution, and + // we use central limit theorem to approximate normal distribution) + if (ARROW_PREDICT_FALSE(++nth_run_ >= 7)) { + nth_run_ = 0; + chunk_size_ = 0; + return true; + } + } + if (ARROW_PREDICT_FALSE(chunk_size_ >= max_size_)) { + // we have a hard limit on the maximum chunk size, not that we don't reset the rolling + // hash state here, so the next Check() call will continue from the current state chunk_size_ = 0; return true; - } else { - return false; } + return false; } template @@ -629,14 +648,13 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev bool has_rep_levels = level_info_.rep_level > 0; if (!has_rep_levels && !has_def_levels) { - // fastest path for non-repeated non-null data - bool val_match; + // fastest path for non-nested non-null data int64_t offset = 0; int64_t prev_offset = 0; while (offset < num_levels) { - val_match = Roll(leaf_array.GetView(offset)); + Roll(leaf_array.GetView(offset)); ++offset; - if (Check(val_match)) { + if (Check()) { result.emplace_back(prev_offset, prev_offset, offset - prev_offset); prev_offset = offset; } @@ -645,15 +663,14 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev result.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); } } else if (!has_rep_levels) { - // non-repeated data possibly with nulls - bool def_match, val_match; + // non-nested data with nulls int64_t offset = 0; int64_t prev_offset = 0; while (offset < num_levels) { - def_match = Roll(def_levels[offset]); - val_match = Roll(leaf_array.GetView(offset)); + Roll(def_levels[offset]); + Roll(leaf_array.GetView(offset)); ++offset; - if (Check(def_match || val_match)) { + if (Check()) { result.emplace_back(prev_offset, prev_offset, offset - prev_offset); prev_offset = offset; } @@ -662,52 +679,48 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev result.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); } } else { - // repeated data possibly with nulls - bool def_match, rep_match, val_match; + // nested data with nulls + bool has_leaf_value; + bool is_record_boundary; int16_t def_level; int16_t rep_level; int64_t level_offset = 0; int64_t value_offset = 0; int64_t record_level_offset = 0; int64_t record_value_offset = 0; - int64_t prev_record_level_offset = 0; - int64_t prev_record_value_offset = 0; while (level_offset < num_levels) { def_level = def_levels[level_offset]; rep_level = rep_levels[level_offset]; - if (rep_level == 0) { - record_level_offset = level_offset; - record_value_offset = value_offset; - } - ++level_offset; - def_match = Roll(def_level); - rep_match = Roll(rep_level); - if (ARROW_PREDICT_TRUE(def_level >= level_info_.repeated_ancestor_def_level)) { - val_match = Roll(leaf_array.GetView(value_offset)); - ++value_offset; - } else { - val_match = false; + has_leaf_value = def_level >= level_info_.repeated_ancestor_def_level; + is_record_boundary = rep_level == 0; + + Roll(def_level); + Roll(rep_level); + if (has_leaf_value) { + Roll(leaf_array.GetView(value_offset)); } - if (Check(def_match || rep_match || val_match)) { - auto levels_to_write = record_level_offset - prev_record_level_offset; + if (is_record_boundary && Check()) { + auto levels_to_write = level_offset - record_level_offset; if (levels_to_write > 0) { - result.emplace_back(prev_record_level_offset, prev_record_value_offset, - levels_to_write); - prev_record_level_offset = record_level_offset; - prev_record_value_offset = record_value_offset; + result.emplace_back(record_level_offset, record_value_offset, levels_to_write); + record_level_offset = level_offset; + record_value_offset = value_offset; } } + + ++level_offset; + if (has_leaf_value) { + ++value_offset; + } } - auto levels_to_write = num_levels - prev_record_level_offset; + auto levels_to_write = num_levels - record_level_offset; if (levels_to_write > 0) { - result.emplace_back(prev_record_level_offset, prev_record_value_offset, - levels_to_write); + result.emplace_back(record_level_offset, record_value_offset, levels_to_write); } - return result; } return result; diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index 25ed78cb288ae..5011620bd31d9 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -41,8 +41,9 @@ struct Chunk { class ContentDefinedChunker { public: - ContentDefinedChunker(const LevelInfo& level_info, uint64_t min_size, - uint64_t max_size); + ContentDefinedChunker(const LevelInfo& level_info, + std::pair size_range, + uint8_t norm_factor = 1); const ::arrow::Result> GetBoundaries(const int16_t* def_levels, const int16_t* rep_levels, @@ -51,9 +52,9 @@ class ContentDefinedChunker { private: template - bool Roll(const T value); - bool Roll(std::string_view value); - inline bool Check(bool match); + void Roll(const T value); + void Roll(std::string_view value); + inline bool Check(); template const std::vector Calculate(const int16_t* def_levels, const int16_t* rep_levels, @@ -64,6 +65,7 @@ class ContentDefinedChunker { const uint64_t max_size_; const uint64_t hash_mask_; + bool has_matched_ = false; uint64_t nth_run_ = 0; uint64_t chunk_size_ = 0; uint64_t rolling_hash_ = 0; diff --git a/cpp/src/parquet/column_chunker_test.cc b/cpp/src/parquet/column_chunker_test.cc index bf4f6ed77e50a..7bfdb4ed13868 100644 --- a/cpp/src/parquet/column_chunker_test.cc +++ b/cpp/src/parquet/column_chunker_test.cc @@ -31,6 +31,7 @@ #include "arrow/type_traits.h" #include "arrow/util/decimal.h" #include "arrow/util/float16.h" +#include "arrow/util/logging.h" #include "parquet/arrow/reader.h" #include "parquet/arrow/reader_internal.h" #include "parquet/arrow/schema.h" @@ -71,6 +72,8 @@ std::shared_ptr
GenerateTable(const std::vector>& std::shared_ptr
ConcatAndCombine( const std::vector>& parts) { + // Concatenate and combine chunks so the table doesn't carry information about + // the modification points auto table = ConcatenateTables(parts).ValueOrDie(); return table->CombineChunks().ValueOrDie(); } @@ -84,7 +87,6 @@ Result> WriteTableToBuffer(const std::shared_ptr
& auto sink = CreateOutputStream(); auto builder = WriterProperties::Builder(); - // enable content defined chunking builder.enable_cdc()->cdc_size_range(min_chunk_size, max_chunk_size); if (enable_dictionary) { builder.enable_dictionary(); @@ -92,7 +94,6 @@ Result> WriteTableToBuffer(const std::shared_ptr
& builder.disable_dictionary(); } auto write_props = builder.build(); - auto arrow_props = ArrowWriterProperties::Builder().store_schema()->build(); RETURN_NOT_OK(WriteTable(*table, default_memory_pool(), sink, row_group_size, write_props, arrow_props)); @@ -111,11 +112,16 @@ Result> ReadTableFromBuffer(const std::shared_ptr return result; } -using PageSizes = std::pair, std::vector>; +struct PageSizes { + std::vector lengths; + std::vector sizes; +}; PageSizes GetColumnPageSizes(const std::shared_ptr& data, int column_index = 0) { - std::vector page_sizes; - std::vector page_lengths; + // Read the parquet data out of the buffer and get the sizes and lengths of the + // data pages in given column. We assert on the sizes and lengths of the pages + // to ensure that the chunking is done correctly. + PageSizes result; auto buffer_reader = std::make_shared(data); auto parquet_reader = ParquetFileReader::Open(std::move(buffer_reader)); @@ -126,19 +132,20 @@ PageSizes GetColumnPageSizes(const std::shared_ptr& data, int column_ind while (auto page = page_reader->NextPage()) { if (page->type() == PageType::DATA_PAGE || page->type() == PageType::DATA_PAGE_V2) { auto data_page = static_cast(page.get()); - page_sizes.push_back(data_page->size()); - page_lengths.push_back(data_page->num_values()); + result.sizes.push_back(data_page->size()); + result.lengths.push_back(data_page->num_values()); } } } - return {page_lengths, page_sizes}; + return result; } Result WriteAndGetPageSizes(const std::shared_ptr
& table, uint64_t min_chunk_size, uint64_t max_chunk_size, bool enable_dictionary = false, int column_index = 0) { + // Write the table to a buffer and read it back to get the page sizes ARROW_ASSIGN_OR_RAISE( auto buffer, WriteTableToBuffer(table, min_chunk_size, max_chunk_size, enable_dictionary)); @@ -180,51 +187,74 @@ void AssertAllBetween(const std::vector& values, uint64_t min, uint64_ std::vector, std::vector>> FindDifferences( const std::vector& first, const std::vector& second) { - auto n = first.size(), m = second.size(); - - // Build DP table for LCS. + // Compute LCS table. + size_t n = first.size(), m = second.size(); std::vector> dp(n + 1, std::vector(m + 1, 0)); - for (size_t i = 0; i < n; ++i) { - for (size_t j = 0; j < m; ++j) { - dp[i + 1][j + 1] = - (first[i] == second[j]) ? dp[i][j] + 1 : std::max(dp[i + 1][j], dp[i][j + 1]); + for (size_t i = 0; i < n; i++) { + for (size_t j = 0; j < m; j++) { + if (first[i] == second[j]) { + dp[i + 1][j + 1] = dp[i][j] + 1; + } else { + dp[i + 1][j + 1] = std::max(dp[i + 1][j], dp[i][j + 1]); + } } } - // Backtrack to recover LCS indices. + // Backtrack to get common indices. std::vector> common; - for (auto i = n, j = m; i > 0 && j > 0;) { + for (size_t i = n, j = m; i > 0 && j > 0;) { if (first[i - 1] == second[j - 1]) { common.emplace_back(i - 1, j - 1); - --i, --j; + i--, j--; } else if (dp[i - 1][j] >= dp[i][j - 1]) { - --i; + i--; } else { - --j; + j--; } } std::reverse(common.begin(), common.end()); - // Extract differences using the common indices as anchors. + // Build raw differences. std::vector, std::vector>> result; size_t last_i = 0, last_j = 0; - for (auto [ci, cj] : common) { - std::vector diff1(first.begin() + last_i, first.begin() + ci); - std::vector diff2(second.begin() + last_j, second.begin() + cj); - if (!diff1.empty() || !diff2.empty()) { - result.emplace_back(std::move(diff1), std::move(diff2)); + for (auto& c : common) { + auto ci = c.first; + auto cj = c.second; + if (ci > last_i || cj > last_j) { + result.push_back({{first.begin() + last_i, first.begin() + ci}, + {second.begin() + last_j, second.begin() + cj}}); } last_i = ci + 1; last_j = cj + 1; } - // Add any remaining elements after the last common index. - std::vector diff1(first.begin() + last_i, first.end()); - std::vector diff2(second.begin() + last_j, second.end()); - if (!diff1.empty() || !diff2.empty()) { - result.emplace_back(std::move(diff1), std::move(diff2)); + if (last_i < n || last_j < m) { + result.push_back( + {{first.begin() + last_i, first.end()}, {second.begin() + last_j, second.end()}}); } - return result; + // Merge adjacent diffs if one side is empty in the first diff and the other side + // is empty in the next diff, to avoid splitting single changes into two parts. + std::vector, std::vector>> merged; + for (auto& diff : result) { + if (!merged.empty()) { + auto& prev = merged.back(); + bool can_merge_a = prev.first.empty() && !prev.second.empty() && + !diff.first.empty() && diff.second.empty(); + bool can_merge_b = prev.second.empty() && !prev.first.empty() && + !diff.second.empty() && diff.first.empty(); + if (can_merge_a) { + // Combine into one change + prev.first = std::move(diff.first); + continue; + } else if (can_merge_b) { + prev.second = std::move(diff.second); + continue; + } + } + merged.push_back(std::move(diff)); + } + + return merged; } TEST(TestFindDifferences, Basic) { @@ -309,16 +339,40 @@ TEST(TestFindDifferences, LongSequenceWithMiddleChanges) { } } -void AssertUpdateCase(const std::vector& original, +TEST(TestFindDifferences, AdditionalCase) { + std::vector original = {445, 312, 393, 401, 410, 138, 558, 457}; + std::vector modified = {445, 312, 393, 393, 410, 138, 558, 457}; + + auto diffs = FindDifferences(original, modified); + ASSERT_EQ(diffs.size(), 1); + + ASSERT_EQ(diffs[0].first, std::vector({401})); + ASSERT_EQ(diffs[0].second, std::vector({393})); + + // Verify elements before and after the difference are identical + for (size_t i = 0; i < 3; i++) { + ASSERT_EQ(original[i], modified[i]); + } + for (size_t i = 4; i < original.size(); i++) { + ASSERT_EQ(original[i], modified[i]); + } +} + +void AssertUpdateCase(const std::shared_ptr<::arrow::DataType>& dtype, + const std::vector& original, const std::vector& modified, uint8_t n_modifications) { auto diffs = FindDifferences(original, modified); + // Print diffs, original, and modified sequences for debugging purposes + ASSERT_LE(diffs.size(), n_modifications); for (const auto& diff : diffs) { - uint64_t left_sum = 0, right_sum = 0; - for (const auto& val : diff.first) left_sum += val; - for (const auto& val : diff.second) right_sum += val; - ASSERT_EQ(left_sum, right_sum); + if (!::arrow::is_list_like(dtype->id())) { + uint64_t left_sum = 0, right_sum = 0; + for (const auto& val : diff.first) left_sum += val; + for (const auto& val : diff.second) right_sum += val; + ASSERT_EQ(left_sum, right_sum); + } ASSERT_LE(diff.first.size(), 2); ASSERT_LE(diff.second.size(), 2); } @@ -329,35 +383,41 @@ void AssertUpdateCase(const std::vector& original, } } -void AssertDeleteCase(const std::vector& original, +void AssertDeleteCase(const std::shared_ptr<::arrow::DataType>& dtype, + const std::vector& original, const std::vector& modified, uint8_t n_modifications, uint64_t edit_length) { auto diffs = FindDifferences(original, modified); ASSERT_EQ(diffs.size(), n_modifications); for (const auto& diff : diffs) { - uint64_t left_sum = 0, right_sum = 0; - for (const auto& val : diff.first) left_sum += val; - for (const auto& val : diff.second) right_sum += val; - ASSERT_EQ(left_sum, right_sum + edit_length); - ASSERT_LE(diff.first.size(), 3); - ASSERT_LE(diff.second.size(), 3); + if (!::arrow::is_list_like(dtype->id())) { + uint64_t left_sum = 0, right_sum = 0; + for (const auto& val : diff.first) left_sum += val; + for (const auto& val : diff.second) right_sum += val; + ASSERT_EQ(left_sum, right_sum + edit_length); + } + ASSERT_LE(diff.first.size(), 2); + ASSERT_LE(diff.second.size(), 2); } } -void AssertInsertCase(const std::vector& original, +void AssertInsertCase(const std::shared_ptr<::arrow::DataType>& dtype, + const std::vector& original, const std::vector& modified, uint8_t n_modifications, uint64_t edit_length) { auto diffs = FindDifferences(original, modified); ASSERT_EQ(diffs.size(), n_modifications); for (const auto& diff : diffs) { - uint64_t left_sum = 0, right_sum = 0; - for (const auto& val : diff.first) left_sum += val; - for (const auto& val : diff.second) right_sum += val; - ASSERT_EQ(left_sum + edit_length, right_sum); - ASSERT_LE(diff.first.size(), 3); - ASSERT_LE(diff.second.size(), 3); + if (!::arrow::is_list_like(dtype->id())) { + uint64_t left_sum = 0, right_sum = 0; + for (const auto& val : diff.first) left_sum += val; + for (const auto& val : diff.second) right_sum += val; + ASSERT_EQ(left_sum + edit_length, right_sum); + } + ASSERT_LE(diff.first.size(), 2); + ASSERT_LE(diff.second.size(), 2); } } @@ -387,242 +447,244 @@ void AssertChunkSizes(const std::shared_ptr<::arrow::DataType>& dtype, if (::arrow::is_fixed_width(dtype->id())) { auto min_length = ElementCount(min_chunk_size, dtype->byte_width(), nullable); auto max_length = ElementCount(max_chunk_size, dtype->byte_width(), nullable); - AssertAllBetween(base_result.first, min_length, max_length, + AssertAllBetween(base_result.lengths, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); - AssertAllBetween(modified_result.first, min_length, max_length, + AssertAllBetween(modified_result.lengths, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); } else if (::arrow::is_base_binary_like(dtype->id()) && !nullable && !enable_dictionary) { - AssertAllBetween(base_result.second, min_chunk_size, max_chunk_size); - AssertAllBetween(modified_result.second, min_chunk_size, max_chunk_size); + AssertAllBetween(base_result.sizes, min_chunk_size, max_chunk_size); + AssertAllBetween(modified_result.sizes, min_chunk_size, max_chunk_size); } } -constexpr uint64_t kMinChunkSize = 32 * 1024; -constexpr uint64_t kMaxChunkSize = 128 * 1024; -constexpr uint64_t kPartLength = 128 * 1024; -constexpr uint64_t kEditLength = 32; - -// TODO: -// - test nested types -// - test multiple row groups +constexpr uint64_t kMinChunkSize = 16 * 1024; +constexpr uint64_t kMaxChunkSize = 64 * 1024; +constexpr uint64_t kPartSize = 256 * 1024; +constexpr uint64_t kEditSize = 256; class TestColumnCDC : public ::testing::TestWithParam< - std::tuple, bool, bool>> {}; + std::tuple, bool, size_t>> { + protected: + // Column random table parts for testing + std::shared_ptr field_; + std::shared_ptr
part1_, part2_, part3_, part4_, part5_, part6_, part7_; + + void SetUp() override { + auto [dtype, nullable, byte_per_record] = GetParam(); + auto field_ = ::arrow::field("f0", dtype, nullable); + + auto part_length = kPartSize / byte_per_record; + auto edit_length = kEditSize / byte_per_record; + // Generate random table parts, these are later concatenated to simulate + // different scenarios like insert, update, delete, and append. + part1_ = GenerateTable({field_}, part_length, /*seed=*/1); + part2_ = GenerateTable({field_}, edit_length, /*seed=*/2); + part3_ = GenerateTable({field_}, part_length, /*seed=*/3); + part4_ = GenerateTable({field_}, edit_length, /*seed=*/4); + part5_ = GenerateTable({field_}, part_length, /*seed=*/5); + part6_ = GenerateTable({field_}, edit_length, /*seed=*/6); + part7_ = GenerateTable({field_}, edit_length, /*seed=*/7); + } +}; TEST_P(TestColumnCDC, DeleteOnce) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - auto enable_dictionary = std::get<2>(GetParam()); - - auto field = ::arrow::field("f0", dtype, nullable); - - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); + auto [dtype, nullable, _] = GetParam(); - auto base = ConcatAndCombine({part1, part2, part3}); - auto modified = ConcatAndCombine({part1, part3}); + auto base = ConcatAndCombine({part1_, part2_, part3_}); + auto modified = ConcatAndCombine({part1_, part3_}); ASSERT_FALSE(base->Equals(*modified)); - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + for (bool enable_dictionary : {false, true}) { + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); - AssertDeleteCase(base_result.first, modified_result.first, 1, kEditLength); + AssertDeleteCase(dtype, base_result.lengths, modified_result.lengths, 1, + part2_->num_rows()); + } } TEST_P(TestColumnCDC, DeleteTwice) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - auto enable_dictionary = std::get<2>(GetParam()); - - auto field = ::arrow::field("f0", dtype, nullable); + auto [dtype, nullable, _] = GetParam(); - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); - auto part5 = GenerateTable({field}, kPartLength, /*seed=*/5); - - auto base = ConcatAndCombine({part1, part2, part3, part4, part5}); - auto modified = ConcatAndCombine({part1, part3, part5}); + auto base = ConcatAndCombine({part1_, part2_, part3_, part4_, part5_}); + auto modified = ConcatAndCombine({part1_, part3_, part5_}); ASSERT_FALSE(base->Equals(*modified)); - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); - AssertDeleteCase(base_result.first, modified_result.first, 2, kEditLength); + for (bool enable_dictionary : {false, true}) { + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); + AssertDeleteCase(dtype, base_result.lengths, modified_result.lengths, 2, + part2_->num_rows()); + } } TEST_P(TestColumnCDC, UpdateOnce) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - auto enable_dictionary = std::get<2>(GetParam()); - - auto field = ::arrow::field("f0", dtype, nullable); + auto [dtype, nullable, _] = GetParam(); - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); - - auto base = ConcatAndCombine({part1, part2, part3}); - auto modified = ConcatAndCombine({part1, part4, part3}); + auto base = ConcatAndCombine({part1_, part2_, part3_}); + auto modified = ConcatAndCombine({part1_, part4_, part3_}); ASSERT_FALSE(base->Equals(*modified)); - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); - AssertUpdateCase(base_result.first, modified_result.first, 1); + for (bool enable_dictionary : {false, true}) { + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); + AssertUpdateCase(dtype, base_result.lengths, modified_result.lengths, 1); + } } TEST_P(TestColumnCDC, UpdateTwice) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - auto enable_dictionary = std::get<2>(GetParam()); - - auto field = ::arrow::field("f0", dtype, nullable); - - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); - auto part5 = GenerateTable({field}, kPartLength, /*seed=*/5); - auto part6 = GenerateTable({field}, kEditLength, /*seed=*/6); - auto part7 = GenerateTable({field}, kEditLength, /*seed=*/7); - - auto base = ConcatAndCombine({part1, part2, part3, part4, part5}); - auto modified = ConcatAndCombine({part1, part6, part3, part7, part5}); - ASSERT_FALSE(base->Equals(*modified)); + auto [dtype, nullable, _] = GetParam(); - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + auto base = ConcatAndCombine({part1_, part2_, part3_, part4_, part5_}); + auto modified = ConcatAndCombine({part1_, part6_, part3_, part7_, part5_}); + ASSERT_FALSE(base->Equals(*modified)); - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); - AssertUpdateCase(base_result.first, modified_result.first, 2); + for (bool enable_dictionary : {false, true}) { + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); + AssertUpdateCase(dtype, base_result.lengths, modified_result.lengths, 2); + } } TEST_P(TestColumnCDC, InsertOnce) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - auto enable_dictionary = std::get<2>(GetParam()); + auto [dtype, nullable, _] = GetParam(); - auto field = ::arrow::field("f0", dtype, nullable); - - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - - auto base = ConcatAndCombine({part1, part3}); - auto modified = ConcatAndCombine({part1, part2, part3}); + auto base = ConcatAndCombine({part1_, part3_}); + auto modified = ConcatAndCombine({part1_, part2_, part3_}); ASSERT_FALSE(base->Equals(*modified)); - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); - AssertInsertCase(base_result.first, modified_result.first, 1, kEditLength); + for (bool enable_dictionary : {false, true}) { + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); + AssertInsertCase(dtype, base_result.lengths, modified_result.lengths, 1, + part2_->num_rows()); + } } TEST_P(TestColumnCDC, InsertTwice) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - auto enable_dictionary = std::get<2>(GetParam()); + auto [dtype, nullable, _] = GetParam(); - auto field = ::arrow::field("f0", dtype, nullable); - - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); - auto part5 = GenerateTable({field}, kPartLength, /*seed=*/5); - - auto base = ConcatAndCombine({part1, part3, part5}); - auto modified = ConcatAndCombine({part1, part2, part3, part4, part5}); + auto base = ConcatAndCombine({part1_, part3_, part5_}); + auto modified = ConcatAndCombine({part1_, part2_, part3_, part4_, part5_}); ASSERT_FALSE(base->Equals(*modified)); - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); - AssertInsertCase(base_result.first, modified_result.first, 2, kEditLength); + for (bool enable_dictionary : {false, true}) { + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); + AssertInsertCase(dtype, base_result.lengths, modified_result.lengths, 2, + part2_->num_rows()); + } } TEST_P(TestColumnCDC, Append) { - auto dtype = std::get<0>(GetParam()); - auto nullable = std::get<1>(GetParam()); - auto enable_dictionary = std::get<2>(GetParam()); + auto [dtype, nullable, _] = GetParam(); - auto field = ::arrow::field("f0", dtype, nullable); - - auto part1 = GenerateTable({field}, kPartLength, /*seed=*/1); - auto part2 = GenerateTable({field}, kEditLength, /*seed=*/2); - auto part3 = GenerateTable({field}, kPartLength, /*seed=*/3); - auto part4 = GenerateTable({field}, kEditLength, /*seed=*/4); - - auto base = ConcatAndCombine({part1, part2, part3}); - auto modified = ConcatAndCombine({part1, part2, part3, part4}); + auto base = ConcatAndCombine({part1_, part2_, part3_}); + auto modified = ConcatAndCombine({part1_, part2_, part3_, part4_}); ASSERT_FALSE(base->Equals(*modified)); - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); - AssertAppendCase(base_result.first, modified_result.first); + for (bool enable_dictionary : {false, true}) { + ASSERT_OK_AND_ASSIGN(auto base_result, + WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN(auto modified_result, + WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, + kMinChunkSize, kMaxChunkSize); + AssertAppendCase(base_result.lengths, modified_result.lengths); + } } // TODO(kszucs): add extension type and dictionary type INSTANTIATE_TEST_SUITE_P( FixedSizedTypes, TestColumnCDC, - Combine(Values(::arrow::uint8(), ::arrow::uint16(), ::arrow::uint32(), - ::arrow::uint64(), ::arrow::int8(), ::arrow::int16(), ::arrow::int32(), - ::arrow::int64(), ::arrow::float16(), ::arrow::float32(), - ::arrow::float64(), ::arrow::binary(), ::arrow::large_binary(), - ::arrow::fixed_size_binary(16), ::arrow::utf8(), ::arrow::large_utf8(), - ::arrow::date32(), ::arrow::date64(), ::arrow::decimal128(18, 6), - ::arrow::decimal256(40, 6), ::arrow::time32(::arrow::TimeUnit::SECOND), - ::arrow::time64(::arrow::TimeUnit::NANO), - ::arrow::timestamp(::arrow::TimeUnit::NANO), - ::arrow::duration(::arrow::TimeUnit::NANO)), - Bool(), Bool())); + testing::Values( + // Numeric + std::make_tuple(::arrow::uint8(), false, 1), + std::make_tuple(::arrow::uint16(), true, 2), + std::make_tuple(::arrow::uint32(), false, 4), + std::make_tuple(::arrow::uint64(), true, 8), + std::make_tuple(::arrow::int8(), false, 1), + std::make_tuple(::arrow::int16(), false, 2), + std::make_tuple(::arrow::int32(), false, 4), + std::make_tuple(::arrow::int64(), true, 8), + std::make_tuple(::arrow::float16(), false, 2), + std::make_tuple(::arrow::float32(), false, 4), + std::make_tuple(::arrow::float64(), true, 8), + std::make_tuple(::arrow::decimal128(18, 6), false, 16), + std::make_tuple(::arrow::decimal256(40, 6), false, 32), + // Binary-like + std::make_tuple(::arrow::binary(), true, 16), + + std::make_tuple(::arrow::large_binary(), false, 16), + std::make_tuple(::arrow::fixed_size_binary(16), true, 16), + std::make_tuple(::arrow::utf8(), false, 16), + std::make_tuple(::arrow::utf8(), true, 16), + std::make_tuple(::arrow::large_utf8(), false, 16), + // Temporal + std::make_tuple(::arrow::date32(), false, 4), + std::make_tuple(::arrow::date64(), false, 8), + std::make_tuple(::arrow::time32(::arrow::TimeUnit::SECOND), true, 4), + std::make_tuple(::arrow::time64(::arrow::TimeUnit::NANO), false, 8), + std::make_tuple(::arrow::timestamp(::arrow::TimeUnit::NANO), true, 8), + std::make_tuple(::arrow::duration(::arrow::TimeUnit::NANO), false, 8), + // Nested types + std::make_tuple(::arrow::list(::arrow::int32()), false, 64), + std::make_tuple(::arrow::list(::arrow::int32()), true, 64), + std::make_tuple(::arrow::list(::arrow::utf8()), true, 64), + std::make_tuple(::arrow::large_list(::arrow::int32()), true, 64), + std::make_tuple(::arrow::struct_({::arrow::field("f0", ::arrow::int32())}), false, + 8), + std::make_tuple(::arrow::struct_({::arrow::field("f0", ::arrow::float64())}), + true, 10))); } // namespace parquet + +// TODO: +// - test multiple row groups +// - test empty diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index bf2aeeaf2d1c1..77ca2adc3daf1 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -754,8 +754,8 @@ class ColumnWriterImpl { fallback_(false), definition_levels_sink_(allocator_), repetition_levels_sink_(allocator_), - content_defined_chunker_(level_info_, properties->cdc_size_range().first, - properties->cdc_size_range().second) { + content_defined_chunker_(level_info_, properties->cdc_size_range(), + properties->cdc_norm_factor()) { definition_levels_rle_ = std::static_pointer_cast(AllocateBuffer(allocator_, 0)); repetition_levels_rle_ = diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h index 185edccd777a3..ab49a0facff20 100644 --- a/cpp/src/parquet/properties.h +++ b/cpp/src/parquet/properties.h @@ -170,6 +170,7 @@ static constexpr SizeStatisticsLevel DEFAULT_SIZE_STATISTICS_LEVEL = SizeStatisticsLevel::PageAndColumnChunk; static constexpr std::pair DEFAULT_CDC_SIZE_RANGE = std::make_pair(256 * 1024, 1024 * 1024); +static constexpr uint8_t DEFAULT_CDC_NORM_FACTOR = 0; class PARQUET_EXPORT ColumnProperties { public: @@ -265,7 +266,8 @@ class PARQUET_EXPORT WriterProperties { page_checksum_enabled_(false), size_statistics_level_(DEFAULT_SIZE_STATISTICS_LEVEL), cdc_enabled_(false), - cdc_size_range_(DEFAULT_CDC_SIZE_RANGE) {} + cdc_size_range_(DEFAULT_CDC_SIZE_RANGE), + cdc_norm_factor_(DEFAULT_CDC_NORM_FACTOR) {} explicit Builder(const WriterProperties& properties) : pool_(properties.memory_pool()), @@ -282,7 +284,8 @@ class PARQUET_EXPORT WriterProperties { sorting_columns_(properties.sorting_columns()), default_column_properties_(properties.default_column_properties()), cdc_enabled_(properties.cdc_enabled()), - cdc_size_range_(properties.cdc_size_range()) {} + cdc_size_range_(properties.cdc_size_range()), + cdc_norm_factor_(properties.cdc_norm_factor()) {} virtual ~Builder() {} @@ -301,6 +304,11 @@ class PARQUET_EXPORT WriterProperties { return this; } + Builder* cdc_norm_factor(uint8_t norm_factor) { + cdc_norm_factor_ = norm_factor; + return this; + } + /// Specify the memory pool for the writer. Default default_memory_pool. Builder* memory_pool(MemoryPool* pool) { pool_ = pool; @@ -724,7 +732,7 @@ class PARQUET_EXPORT WriterProperties { size_statistics_level_, std::move(file_encryption_properties_), default_column_properties_, column_properties, data_page_version_, store_decimal_as_integer_, std::move(sorting_columns_), cdc_enabled_, - cdc_size_range_)); + cdc_size_range_, cdc_norm_factor_)); } private: @@ -756,6 +764,7 @@ class PARQUET_EXPORT WriterProperties { bool cdc_enabled_; std::pair cdc_size_range_; + uint8_t cdc_norm_factor_; }; inline MemoryPool* memory_pool() const { return pool_; } @@ -782,6 +791,7 @@ class PARQUET_EXPORT WriterProperties { inline bool cdc_enabled() const { return cdc_enabled_; } inline std::pair cdc_size_range() const { return cdc_size_range_; } + inline uint8_t cdc_norm_factor() const { return cdc_norm_factor_; } inline SizeStatisticsLevel size_statistics_level() const { return size_statistics_level_; @@ -886,7 +896,7 @@ class PARQUET_EXPORT WriterProperties { const std::unordered_map& column_properties, ParquetDataPageVersion data_page_version, bool store_short_decimal_as_integer, std::vector sorting_columns, bool cdc_enabled, - std::pair cdc_size_range) + std::pair cdc_size_range, uint8_t cdc_norm_factor) : pool_(pool), dictionary_pagesize_limit_(dictionary_pagesize_limit), write_batch_size_(write_batch_size), @@ -903,9 +913,8 @@ class PARQUET_EXPORT WriterProperties { default_column_properties_(default_column_properties), column_properties_(column_properties), cdc_enabled_(cdc_enabled), - cdc_size_range_(cdc_size_range) - - {} + cdc_size_range_(cdc_size_range), + cdc_norm_factor_(cdc_norm_factor) {} MemoryPool* pool_; int64_t dictionary_pagesize_limit_; @@ -928,6 +937,7 @@ class PARQUET_EXPORT WriterProperties { bool cdc_enabled_; std::pair cdc_size_range_; + uint8_t cdc_norm_factor_; }; PARQUET_EXPORT const std::shared_ptr& default_writer_properties(); diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index e764756436165..07976c2efbac4 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -449,6 +449,7 @@ cdef extern from "parquet/api/writer.h" namespace "parquet" nogil: Builder* enable_cdc() Builder* disable_cdc() Builder* cdc_size_range(uint64_t min_size, uint64_t max_size) + Builder* cdc_norm_factor(uint8_t norm_factor) shared_ptr[WriterProperties] build() cdef cppclass ArrowWriterProperties: @@ -600,7 +601,9 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( write_page_checksum=*, sorting_columns=*, store_decimal_as_integer=*, - content_defined_chunking=* + cdc=*, + cdc_size_range=*, + cdc_norm_factor=*, ) except * diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 32dad29e6b3ec..fedba54ba2b16 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -1866,7 +1866,9 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( write_page_checksum=False, sorting_columns=None, store_decimal_as_integer=False, - content_defined_chunking=False) except *: + cdc=False, + cdc_size_range=None, + cdc_norm_factor=None) except *: """General writer properties""" cdef: @@ -2014,18 +2016,16 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( props.dictionary_pagesize_limit(dictionary_pagesize_limit) # content defined chunking - if content_defined_chunking is False: + if cdc is False: props.disable_cdc() - elif content_defined_chunking is True: - props.enable_cdc() - elif isinstance(content_defined_chunking, tuple): - min_size, max_size = content_defined_chunking + elif cdc is True: props.enable_cdc() + + if cdc_size_range is not None: + min_size, max_size = cdc_size_range props.cdc_size_range(min_size, max_size) - else: - raise ValueError( - "Unsupported value for content_defined_chunking: {0}" - .format(content_defined_chunking)) + if cdc_norm_factor is not None: + props.cdc_norm_factor(cdc_norm_factor) # encryption @@ -2201,7 +2201,9 @@ cdef class ParquetWriter(_Weakrefable): write_page_checksum=False, sorting_columns=None, store_decimal_as_integer=False, - content_defined_chunking=False): + cdc=False, + cdc_size_range=None, + cdc_norm_factor=None): cdef: shared_ptr[WriterProperties] properties shared_ptr[ArrowWriterProperties] arrow_properties @@ -2236,7 +2238,7 @@ cdef class ParquetWriter(_Weakrefable): write_page_checksum=write_page_checksum, sorting_columns=sorting_columns, store_decimal_as_integer=store_decimal_as_integer, - content_defined_chunking=content_defined_chunking + cdc=cdc, cdc_size_range=cdc_size_range, cdc_norm_factor=cdc_norm_factor ) arrow_properties = _create_arrow_writer_properties( use_deprecated_int96_timestamps=use_deprecated_int96_timestamps, From c48c51a34b91dcd2a2831e240373dc827154dad1 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 24 Feb 2025 20:18:22 +0100 Subject: [PATCH 19/48] reduce the testing data size to make the test cases quicker --- cpp/src/parquet/column_chunker_test.cc | 40 ++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/cpp/src/parquet/column_chunker_test.cc b/cpp/src/parquet/column_chunker_test.cc index 7bfdb4ed13868..d4b323f1e062e 100644 --- a/cpp/src/parquet/column_chunker_test.cc +++ b/cpp/src/parquet/column_chunker_test.cc @@ -87,7 +87,9 @@ Result> WriteTableToBuffer(const std::shared_ptr
& auto sink = CreateOutputStream(); auto builder = WriterProperties::Builder(); - builder.enable_cdc()->cdc_size_range(min_chunk_size, max_chunk_size); + builder.enable_cdc() + ->cdc_size_range(min_chunk_size, max_chunk_size) + ->cdc_norm_factor(0); if (enable_dictionary) { builder.enable_dictionary(); } else { @@ -257,6 +259,36 @@ std::vector, std::vector>> FindDiffere return merged; } +void PrintDifferences( + const std::vector& original, const std::vector& modified, + std::vector, std::vector>>& diffs) { + std::cout << "Original: "; + for (const auto& val : original) { + std::cout << val << " "; + } + std::cout << std::endl; + + std::cout << "Modified: "; + for (const auto& val : modified) { + std::cout << val << " "; + } + std::cout << std::endl; + + for (const auto& diff : diffs) { + std::cout << "First: "; + for (const auto& val : diff.first) { + std::cout << val << " "; + } + std::cout << std::endl; + + std::cout << "Second: "; + for (const auto& val : diff.second) { + std::cout << val << " "; + } + std::cout << std::endl; + } +} + TEST(TestFindDifferences, Basic) { std::vector first = {1, 2, 3, 4, 5}; std::vector second = {1, 7, 8, 4, 5}; @@ -362,8 +394,6 @@ void AssertUpdateCase(const std::shared_ptr<::arrow::DataType>& dtype, const std::vector& original, const std::vector& modified, uint8_t n_modifications) { auto diffs = FindDifferences(original, modified); - // Print diffs, original, and modified sequences for debugging purposes - ASSERT_LE(diffs.size(), n_modifications); for (const auto& diff : diffs) { @@ -458,9 +488,9 @@ void AssertChunkSizes(const std::shared_ptr<::arrow::DataType>& dtype, } } -constexpr uint64_t kMinChunkSize = 16 * 1024; +constexpr uint64_t kMinChunkSize = 8 * 1024; constexpr uint64_t kMaxChunkSize = 64 * 1024; -constexpr uint64_t kPartSize = 256 * 1024; +constexpr uint64_t kPartSize = 64 * 1024; constexpr uint64_t kEditSize = 256; class TestColumnCDC : public ::testing::TestWithParam< From becbfa4ef20bd2f39e93a9ee44fdbffc9335cfbe Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 24 Feb 2025 21:28:45 +0100 Subject: [PATCH 20/48] increase testing data size --- cpp/src/parquet/column_chunker_test.cc | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/cpp/src/parquet/column_chunker_test.cc b/cpp/src/parquet/column_chunker_test.cc index d4b323f1e062e..79ae8f1cca9c1 100644 --- a/cpp/src/parquet/column_chunker_test.cc +++ b/cpp/src/parquet/column_chunker_test.cc @@ -394,6 +394,9 @@ void AssertUpdateCase(const std::shared_ptr<::arrow::DataType>& dtype, const std::vector& original, const std::vector& modified, uint8_t n_modifications) { auto diffs = FindDifferences(original, modified); + if (diffs.size() > n_modifications) { + PrintDifferences(original, modified, diffs); + } ASSERT_LE(diffs.size(), n_modifications); for (const auto& diff : diffs) { @@ -418,6 +421,9 @@ void AssertDeleteCase(const std::shared_ptr<::arrow::DataType>& dtype, const std::vector& modified, uint8_t n_modifications, uint64_t edit_length) { auto diffs = FindDifferences(original, modified); + if (diffs.size() != n_modifications) { + PrintDifferences(original, modified, diffs); + } ASSERT_EQ(diffs.size(), n_modifications); for (const auto& diff : diffs) { @@ -437,6 +443,9 @@ void AssertInsertCase(const std::shared_ptr<::arrow::DataType>& dtype, const std::vector& modified, uint8_t n_modifications, uint64_t edit_length) { auto diffs = FindDifferences(original, modified); + if (diffs.size() != n_modifications) { + PrintDifferences(original, modified, diffs); + } ASSERT_EQ(diffs.size(), n_modifications); for (const auto& diff : diffs) { @@ -474,6 +483,7 @@ void AssertChunkSizes(const std::shared_ptr<::arrow::DataType>& dtype, PageSizes base_result, PageSizes modified_result, bool nullable, bool enable_dictionary, uint64_t min_chunk_size, uint64_t max_chunk_size) { + max_chunk_size *= 1.2; if (::arrow::is_fixed_width(dtype->id())) { auto min_length = ElementCount(min_chunk_size, dtype->byte_width(), nullable); auto max_length = ElementCount(max_chunk_size, dtype->byte_width(), nullable); @@ -488,10 +498,10 @@ void AssertChunkSizes(const std::shared_ptr<::arrow::DataType>& dtype, } } -constexpr uint64_t kMinChunkSize = 8 * 1024; -constexpr uint64_t kMaxChunkSize = 64 * 1024; -constexpr uint64_t kPartSize = 64 * 1024; -constexpr uint64_t kEditSize = 256; +constexpr uint64_t kMinChunkSize = 64 * 1024; +constexpr uint64_t kMaxChunkSize = 128 * 1024; +constexpr uint64_t kPartSize = 256 * 1024; +constexpr uint64_t kEditSize = 128; class TestColumnCDC : public ::testing::TestWithParam< std::tuple, bool, size_t>> { From 44dca77464e162c04afc69bbe516ece79100cd69 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 24 Feb 2025 21:46:18 +0100 Subject: [PATCH 21/48] add a custom array generator to alwayw produce the same array --- cpp/src/parquet/column_chunker_test.cc | 281 +++++++++++++++++++------ 1 file changed, 221 insertions(+), 60 deletions(-) diff --git a/cpp/src/parquet/column_chunker_test.cc b/cpp/src/parquet/column_chunker_test.cc index 79ae8f1cca9c1..f7b1e8fe1c7b3 100644 --- a/cpp/src/parquet/column_chunker_test.cc +++ b/cpp/src/parquet/column_chunker_test.cc @@ -17,43 +17,33 @@ #include #include +#include +#include +#include #include #include -#include "arrow/array.h" -#include "arrow/array/builder_binary.h" -#include "arrow/array/builder_decimal.h" -#include "arrow/array/builder_primitive.h" #include "arrow/table.h" -#include "arrow/testing/gtest_util.h" -#include "arrow/testing/random.h" #include "arrow/type_fwd.h" -#include "arrow/type_traits.h" -#include "arrow/util/decimal.h" #include "arrow/util/float16.h" -#include "arrow/util/logging.h" #include "parquet/arrow/reader.h" #include "parquet/arrow/reader_internal.h" -#include "parquet/arrow/schema.h" #include "parquet/arrow/test_util.h" #include "parquet/arrow/writer.h" #include "parquet/column_writer.h" #include "parquet/file_writer.h" -#include "parquet/page_index.h" -#include "parquet/test_util.h" namespace parquet { using ::arrow::Array; using ::arrow::ChunkedArray; using ::arrow::ConcatenateTables; +using ::arrow::DataType; using ::arrow::default_memory_pool; using ::arrow::Field; using ::arrow::Result; using ::arrow::Table; using ::arrow::io::BufferReader; -using ::arrow::random::GenerateArray; -using ::arrow::random::GenerateBatch; using ::parquet::arrow::FileReader; using ::parquet::arrow::FileReaderBuilder; using ::parquet::arrow::MakeSimpleTable; @@ -64,18 +54,191 @@ using ::testing::Bool; using ::testing::Combine; using ::testing::Values; -std::shared_ptr
GenerateTable(const std::vector>& fields, - int64_t size, int32_t seed = 42) { - auto batch = GenerateBatch(fields, size, seed); - return Table::FromRecordBatches({batch}).ValueOrDie(); +// generate determinisic and platform-independent data +inline uint64_t hash(uint64_t seed, uint64_t index) { + uint64_t h = (index + seed) * 0xc4ceb9fe1a85ec53ull; + h ^= h >> 33; + h *= 0xff51afd7ed558ccdull; + h ^= h >> 33; + h *= 0xc4ceb9fe1a85ec53ull; + h ^= h >> 33; + return h; } -std::shared_ptr
ConcatAndCombine( +#define GENERATE_CASE_BODY(BUILDER_TYPE, VALUE_EXPR) \ + { \ + BUILDER_TYPE builder(type, default_memory_pool()); \ + if (nullable) { \ + for (int64_t i = 0; i < length; ++i) { \ + uint64_t val = hash(seed, i); \ + if (val % 10 == 0) { \ + RETURN_NOT_OK(builder.AppendNull()); \ + } else { \ + RETURN_NOT_OK(builder.Append(VALUE_EXPR)); \ + } \ + } \ + } else { \ + for (int64_t i = 0; i < length; ++i) { \ + uint64_t val = hash(seed, i); \ + RETURN_NOT_OK(builder.Append(VALUE_EXPR)); \ + } \ + } \ + std::shared_ptr array; \ + RETURN_NOT_OK(builder.Finish(&array)); \ + RETURN_NOT_OK(array->ValidateFull()); \ + return array; \ + } + +// Macro to generate a case for a given scalar type. +#define GENERATE_CASE(TYPE_ID, BUILDER_TYPE, VALUE_EXPR) \ + case ::arrow::Type::TYPE_ID: { \ + GENERATE_CASE_BODY(BUILDER_TYPE, VALUE_EXPR) \ + } + +Result> GenerateArray(const std::shared_ptr& field, + int64_t length, uint64_t seed) { + const std::shared_ptr& type = field->type(); + bool nullable = field->nullable(); + + switch (type->id()) { + GENERATE_CASE(BOOL, ::arrow::BooleanBuilder, (val % 2 == 0)) + + // Numeric types. + GENERATE_CASE(INT8, ::arrow::Int8Builder, static_cast(val)) + GENERATE_CASE(INT16, ::arrow::Int16Builder, static_cast(val)) + GENERATE_CASE(INT32, ::arrow::Int32Builder, static_cast(val)) + GENERATE_CASE(INT64, ::arrow::Int64Builder, static_cast(val)) + GENERATE_CASE(UINT8, ::arrow::UInt8Builder, static_cast(val)) + GENERATE_CASE(UINT16, ::arrow::UInt16Builder, static_cast(val)) + GENERATE_CASE(UINT32, ::arrow::UInt32Builder, static_cast(val)) + GENERATE_CASE(UINT64, ::arrow::UInt64Builder, static_cast(val)) + GENERATE_CASE(HALF_FLOAT, ::arrow::HalfFloatBuilder, + static_cast(val % 1000)) + GENERATE_CASE(FLOAT, ::arrow::FloatBuilder, static_cast(val % 1000) / 1000.0f) + GENERATE_CASE(DOUBLE, ::arrow::DoubleBuilder, + static_cast(val % 100000) / 1000.0) + case ::arrow::Type::DECIMAL128: { + const auto& decimal_type = static_cast(*type); + // Limit the value to fit within the specified precision + int32_t max_exponent = decimal_type.precision() - decimal_type.scale(); + int64_t max_value = static_cast(std::pow(10, max_exponent) - 1); + GENERATE_CASE_BODY(::arrow::Decimal128Builder, ::arrow::Decimal128(val % max_value)) + } + case ::arrow::Type::DECIMAL256: { + const auto& decimal_type = static_cast(*type); + // Limit the value to fit within the specified precision, capped at 9 to avoid + // int64_t overflow + int32_t max_exponent = std::min(9, decimal_type.precision() - decimal_type.scale()); + int64_t max_value = static_cast(std::pow(10, max_exponent) - 1); + GENERATE_CASE_BODY(::arrow::Decimal256Builder, ::arrow::Decimal256(val % max_value)) + } + + // Temporal types + GENERATE_CASE(DATE32, ::arrow::Date32Builder, static_cast(val)) + GENERATE_CASE(TIME32, ::arrow::Time32Builder, + std::abs(static_cast(val) % 86400000)) + GENERATE_CASE(TIME64, ::arrow::Time64Builder, + std::abs(static_cast(val) % 86400000000)) + GENERATE_CASE(TIMESTAMP, ::arrow::TimestampBuilder, static_cast(val)) + GENERATE_CASE(DURATION, ::arrow::DurationBuilder, static_cast(val)) + + // Binary and string types. + GENERATE_CASE(STRING, ::arrow::StringBuilder, + std::string("str_") + std::to_string(val)) + GENERATE_CASE(LARGE_STRING, ::arrow::LargeStringBuilder, + std::string("str_") + std::to_string(val)) + GENERATE_CASE(BINARY, ::arrow::BinaryBuilder, + std::string("bin_") + std::to_string(val)) + case ::arrow::Type::FIXED_SIZE_BINARY: { + auto size = static_cast<::arrow::FixedSizeBinaryType*>(type.get())->byte_width(); + GENERATE_CASE_BODY(::arrow::FixedSizeBinaryBuilder, + std::string("bin_") + std::to_string(val).substr(0, size - 4)) + } + + case ::arrow::Type::STRUCT: { + auto struct_type = static_cast<::arrow::StructType*>(type.get()); + std::vector> child_arrays; + for (auto i = 0; i < struct_type->num_fields(); i++) { + ARROW_ASSIGN_OR_RAISE(auto child_array, + GenerateArray(struct_type->field(i), length, + seed + static_cast(i + 300))); + child_arrays.push_back(child_array); + } + auto struct_array = + std::make_shared<::arrow::StructArray>(type, length, child_arrays); + return struct_array; + } + + case ::arrow::Type::LIST: { + auto list_type = static_cast<::arrow::ListType*>(type.get()); + auto value_field = ::arrow::field("item", list_type->value_type()); + ARROW_ASSIGN_OR_RAISE(auto values_array, GenerateArray(value_field, length, seed)); + auto offset_builder = ::arrow::Int32Builder(); + auto bitmap_builder = ::arrow::TypedBufferBuilder(); + + int32_t num_nulls = 0; + int32_t num_elements = 0; + uint8_t element_size = 0; + int32_t current_offset = 0; + RETURN_NOT_OK(offset_builder.Append(current_offset)); + while (current_offset < length) { + num_elements++; + auto is_valid = !(nullable && (num_elements % 10 == 0)); + if (is_valid) { + RETURN_NOT_OK(bitmap_builder.Append(true)); + current_offset += element_size; + if (current_offset > length) { + RETURN_NOT_OK(offset_builder.Append(static_cast(length))); + break; + } else { + RETURN_NOT_OK(offset_builder.Append(current_offset)); + } + } else { + RETURN_NOT_OK(offset_builder.Append(static_cast(current_offset))); + RETURN_NOT_OK(bitmap_builder.Append(false)); + num_nulls++; + } + + if (element_size > 4) { + element_size = 0; + } else { + element_size++; + } + } + + std::shared_ptr offsets_array; + RETURN_NOT_OK(offset_builder.Finish(&offsets_array)); + std::shared_ptr bitmap_buffer; + RETURN_NOT_OK(bitmap_builder.Finish(&bitmap_buffer)); + ARROW_ASSIGN_OR_RAISE( + auto list_array, ::arrow::ListArray::FromArrays( + type, *offsets_array, *values_array, default_memory_pool(), + bitmap_buffer, num_nulls)); + RETURN_NOT_OK(list_array->ValidateFull()); + return list_array; + } + + default: + return ::arrow::Status::NotImplemented("Unsupported data type " + type->ToString()); + } +} + +Result> GenerateTable( + const std::shared_ptr<::arrow::Schema>& schema, int64_t size, uint64_t seed = 0) { + std::vector> arrays; + for (const auto& field : schema->fields()) { + ARROW_ASSIGN_OR_RAISE(auto array, GenerateArray(field, size, seed)); + arrays.push_back(array); + } + return Table::Make(schema, arrays, size); +} + +Result> ConcatAndCombine( const std::vector>& parts) { // Concatenate and combine chunks so the table doesn't carry information about // the modification points - auto table = ConcatenateTables(parts).ValueOrDie(); - return table->CombineChunks().ValueOrDie(); + ARROW_ASSIGN_OR_RAISE(auto table, ConcatenateTables(parts)); + return table->CombineChunks(); } Result> WriteTableToBuffer(const std::shared_ptr
& table, @@ -483,7 +646,7 @@ void AssertChunkSizes(const std::shared_ptr<::arrow::DataType>& dtype, PageSizes base_result, PageSizes modified_result, bool nullable, bool enable_dictionary, uint64_t min_chunk_size, uint64_t max_chunk_size) { - max_chunk_size *= 1.2; + max_chunk_size = static_cast(max_chunk_size * 1.2); if (::arrow::is_fixed_width(dtype->id())) { auto min_length = ElementCount(min_chunk_size, dtype->byte_width(), nullable); auto max_length = ElementCount(max_chunk_size, dtype->byte_width(), nullable); @@ -498,9 +661,9 @@ void AssertChunkSizes(const std::shared_ptr<::arrow::DataType>& dtype, } } -constexpr uint64_t kMinChunkSize = 64 * 1024; -constexpr uint64_t kMaxChunkSize = 128 * 1024; -constexpr uint64_t kPartSize = 256 * 1024; +constexpr uint64_t kMinChunkSize = 8 * 1024; +constexpr uint64_t kMaxChunkSize = 32 * 1024; +constexpr uint64_t kPartSize = 128 * 1024; constexpr uint64_t kEditSize = 128; class TestColumnCDC : public ::testing::TestWithParam< @@ -513,26 +676,25 @@ class TestColumnCDC : public ::testing::TestWithParam< void SetUp() override { auto [dtype, nullable, byte_per_record] = GetParam(); auto field_ = ::arrow::field("f0", dtype, nullable); + auto schema = ::arrow::schema({field_}); auto part_length = kPartSize / byte_per_record; auto edit_length = kEditSize / byte_per_record; - // Generate random table parts, these are later concatenated to simulate - // different scenarios like insert, update, delete, and append. - part1_ = GenerateTable({field_}, part_length, /*seed=*/1); - part2_ = GenerateTable({field_}, edit_length, /*seed=*/2); - part3_ = GenerateTable({field_}, part_length, /*seed=*/3); - part4_ = GenerateTable({field_}, edit_length, /*seed=*/4); - part5_ = GenerateTable({field_}, part_length, /*seed=*/5); - part6_ = GenerateTable({field_}, edit_length, /*seed=*/6); - part7_ = GenerateTable({field_}, edit_length, /*seed=*/7); + ASSERT_OK_AND_ASSIGN(part1_, GenerateTable(schema, part_length, 0)); + ASSERT_OK_AND_ASSIGN(part2_, GenerateTable(schema, edit_length, 1)); + ASSERT_OK_AND_ASSIGN(part3_, GenerateTable(schema, part_length, part_length)); + ASSERT_OK_AND_ASSIGN(part4_, GenerateTable(schema, edit_length, 2)); + ASSERT_OK_AND_ASSIGN(part5_, GenerateTable(schema, part_length, 2 * part_length)); + ASSERT_OK_AND_ASSIGN(part6_, GenerateTable(schema, edit_length, 3)); + ASSERT_OK_AND_ASSIGN(part7_, GenerateTable(schema, edit_length, 4)); } }; TEST_P(TestColumnCDC, DeleteOnce) { auto [dtype, nullable, _] = GetParam(); - auto base = ConcatAndCombine({part1_, part2_, part3_}); - auto modified = ConcatAndCombine({part1_, part3_}); + ASSERT_OK_AND_ASSIGN(auto base, ConcatAndCombine({part1_, part2_, part3_})); + ASSERT_OK_AND_ASSIGN(auto modified, ConcatAndCombine({part1_, part3_})); ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { @@ -554,8 +716,9 @@ TEST_P(TestColumnCDC, DeleteOnce) { TEST_P(TestColumnCDC, DeleteTwice) { auto [dtype, nullable, _] = GetParam(); - auto base = ConcatAndCombine({part1_, part2_, part3_, part4_, part5_}); - auto modified = ConcatAndCombine({part1_, part3_, part5_}); + ASSERT_OK_AND_ASSIGN(auto base, + ConcatAndCombine({part1_, part2_, part3_, part4_, part5_})); + ASSERT_OK_AND_ASSIGN(auto modified, ConcatAndCombine({part1_, part3_, part5_})); ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { @@ -576,8 +739,8 @@ TEST_P(TestColumnCDC, DeleteTwice) { TEST_P(TestColumnCDC, UpdateOnce) { auto [dtype, nullable, _] = GetParam(); - auto base = ConcatAndCombine({part1_, part2_, part3_}); - auto modified = ConcatAndCombine({part1_, part4_, part3_}); + ASSERT_OK_AND_ASSIGN(auto base, ConcatAndCombine({part1_, part2_, part3_})); + ASSERT_OK_AND_ASSIGN(auto modified, ConcatAndCombine({part1_, part4_, part3_})); ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { @@ -597,8 +760,10 @@ TEST_P(TestColumnCDC, UpdateOnce) { TEST_P(TestColumnCDC, UpdateTwice) { auto [dtype, nullable, _] = GetParam(); - auto base = ConcatAndCombine({part1_, part2_, part3_, part4_, part5_}); - auto modified = ConcatAndCombine({part1_, part6_, part3_, part7_, part5_}); + ASSERT_OK_AND_ASSIGN(auto base, + ConcatAndCombine({part1_, part2_, part3_, part4_, part5_})); + ASSERT_OK_AND_ASSIGN(auto modified, + ConcatAndCombine({part1_, part6_, part3_, part7_, part5_})); ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { @@ -618,8 +783,8 @@ TEST_P(TestColumnCDC, UpdateTwice) { TEST_P(TestColumnCDC, InsertOnce) { auto [dtype, nullable, _] = GetParam(); - auto base = ConcatAndCombine({part1_, part3_}); - auto modified = ConcatAndCombine({part1_, part2_, part3_}); + ASSERT_OK_AND_ASSIGN(auto base, ConcatAndCombine({part1_, part3_})); + ASSERT_OK_AND_ASSIGN(auto modified, ConcatAndCombine({part1_, part2_, part3_})); ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { @@ -640,8 +805,9 @@ TEST_P(TestColumnCDC, InsertOnce) { TEST_P(TestColumnCDC, InsertTwice) { auto [dtype, nullable, _] = GetParam(); - auto base = ConcatAndCombine({part1_, part3_, part5_}); - auto modified = ConcatAndCombine({part1_, part2_, part3_, part4_, part5_}); + ASSERT_OK_AND_ASSIGN(auto base, ConcatAndCombine({part1_, part3_, part5_})); + ASSERT_OK_AND_ASSIGN(auto modified, + ConcatAndCombine({part1_, part2_, part3_, part4_, part5_})); ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { @@ -662,8 +828,8 @@ TEST_P(TestColumnCDC, InsertTwice) { TEST_P(TestColumnCDC, Append) { auto [dtype, nullable, _] = GetParam(); - auto base = ConcatAndCombine({part1_, part2_, part3_}); - auto modified = ConcatAndCombine({part1_, part2_, part3_, part4_}); + ASSERT_OK_AND_ASSIGN(auto base, ConcatAndCombine({part1_, part2_, part3_})); + ASSERT_OK_AND_ASSIGN(auto modified, ConcatAndCombine({part1_, part2_, part3_, part4_})); ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { @@ -686,7 +852,7 @@ INSTANTIATE_TEST_SUITE_P( testing::Values( // Numeric std::make_tuple(::arrow::uint8(), false, 1), - std::make_tuple(::arrow::uint16(), true, 2), + std::make_tuple(::arrow::uint16(), false, 2), std::make_tuple(::arrow::uint32(), false, 4), std::make_tuple(::arrow::uint64(), true, 8), std::make_tuple(::arrow::int8(), false, 1), @@ -699,25 +865,20 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(::arrow::decimal128(18, 6), false, 16), std::make_tuple(::arrow::decimal256(40, 6), false, 32), // Binary-like + std::make_tuple(::arrow::utf8(), false, 16), std::make_tuple(::arrow::binary(), true, 16), - - std::make_tuple(::arrow::large_binary(), false, 16), std::make_tuple(::arrow::fixed_size_binary(16), true, 16), - std::make_tuple(::arrow::utf8(), false, 16), - std::make_tuple(::arrow::utf8(), true, 16), - std::make_tuple(::arrow::large_utf8(), false, 16), + // Temporal std::make_tuple(::arrow::date32(), false, 4), - std::make_tuple(::arrow::date64(), false, 8), - std::make_tuple(::arrow::time32(::arrow::TimeUnit::SECOND), true, 4), + std::make_tuple(::arrow::time32(::arrow::TimeUnit::MILLI), true, 4), std::make_tuple(::arrow::time64(::arrow::TimeUnit::NANO), false, 8), std::make_tuple(::arrow::timestamp(::arrow::TimeUnit::NANO), true, 8), std::make_tuple(::arrow::duration(::arrow::TimeUnit::NANO), false, 8), // Nested types - std::make_tuple(::arrow::list(::arrow::int32()), false, 64), - std::make_tuple(::arrow::list(::arrow::int32()), true, 64), - std::make_tuple(::arrow::list(::arrow::utf8()), true, 64), - std::make_tuple(::arrow::large_list(::arrow::int32()), true, 64), + std::make_tuple(::arrow::list(::arrow::int32()), false, 16), + std::make_tuple(::arrow::list(::arrow::int32()), true, 18), + std::make_tuple(::arrow::list(::arrow::utf8()), true, 18), std::make_tuple(::arrow::struct_({::arrow::field("f0", ::arrow::int32())}), false, 8), std::make_tuple(::arrow::struct_({::arrow::field("f0", ::arrow::float64())}), From 6f0beceb36e0342cd0fcdf462c9f82a5eee25e91 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Thu, 27 Feb 2025 18:05:23 +0100 Subject: [PATCH 22/48] address review comments --- cpp/src/parquet/column_chunker.cc | 41 ++++++++---------- cpp/src/parquet/column_chunker.h | 2 +- cpp/src/parquet/column_writer.cc | 2 +- python/pyarrow/_parquet.pyx | 6 +-- python/run_test.sh | 72 +++++++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 28 deletions(-) create mode 100755 python/run_test.sh diff --git a/cpp/src/parquet/column_chunker.cc b/cpp/src/parquet/column_chunker.cc index 14de6d81575b9..68c522a17c5cc 100644 --- a/cpp/src/parquet/column_chunker.cc +++ b/cpp/src/parquet/column_chunker.cc @@ -592,9 +592,7 @@ void ContentDefinedChunker::Roll(const T value) { auto bytes = reinterpret_cast(&value); for (size_t i = 0; i < BYTE_WIDTH; ++i) { rolling_hash_ = (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][bytes[i]]; - if ((rolling_hash_ & hash_mask_) == 0) { - has_matched_ = true; - } + has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); } } @@ -608,15 +606,13 @@ void ContentDefinedChunker::Roll(std::string_view value) { for (char c : value) { rolling_hash_ = (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][static_cast(c)]; - if ((rolling_hash_ & hash_mask_) == 0) { - has_matched_ = true; - } + has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); } } -bool ContentDefinedChunker::Check() { +bool ContentDefinedChunker::NeedNewChunk() { // decide whether to create a new chunk based on the rolling hash; has_matched_ is - // set to true if we encountered a match since the last Check() call + // set to true if we encountered a match since the last NeedNewChunk() call if (ARROW_PREDICT_FALSE(has_matched_)) { has_matched_ = false; // in order to have a normal distribution of chunk sizes, we only create a new chunk @@ -631,7 +627,8 @@ bool ContentDefinedChunker::Check() { } if (ARROW_PREDICT_FALSE(chunk_size_ >= max_size_)) { // we have a hard limit on the maximum chunk size, not that we don't reset the rolling - // hash state here, so the next Check() call will continue from the current state + // hash state here, so the next NeedNewChunk() call will continue from the current + // state chunk_size_ = 0; return true; } @@ -643,7 +640,7 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev const int16_t* rep_levels, int64_t num_levels, const T& leaf_array) { - std::vector result; + std::vector chunks; bool has_def_levels = level_info_.def_level > 0; bool has_rep_levels = level_info_.rep_level > 0; @@ -654,13 +651,13 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev while (offset < num_levels) { Roll(leaf_array.GetView(offset)); ++offset; - if (Check()) { - result.emplace_back(prev_offset, prev_offset, offset - prev_offset); + if (NeedNewChunk()) { + chunks.emplace_back(prev_offset, prev_offset, offset - prev_offset); prev_offset = offset; } } if (prev_offset < num_levels) { - result.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); + chunks.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); } } else if (!has_rep_levels) { // non-nested data with nulls @@ -670,13 +667,13 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev Roll(def_levels[offset]); Roll(leaf_array.GetView(offset)); ++offset; - if (Check()) { - result.emplace_back(prev_offset, prev_offset, offset - prev_offset); + if (NeedNewChunk()) { + chunks.emplace_back(prev_offset, prev_offset, offset - prev_offset); prev_offset = offset; } } if (prev_offset < num_levels) { - result.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); + chunks.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); } } else { // nested data with nulls @@ -684,12 +681,11 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev bool is_record_boundary; int16_t def_level; int16_t rep_level; - int64_t level_offset = 0; int64_t value_offset = 0; int64_t record_level_offset = 0; int64_t record_value_offset = 0; - while (level_offset < num_levels) { + for (int64_t level_offset = 0; level_offset < num_levels; ++level_offset) { def_level = def_levels[level_offset]; rep_level = rep_levels[level_offset]; @@ -702,16 +698,15 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev Roll(leaf_array.GetView(value_offset)); } - if (is_record_boundary && Check()) { + if (is_record_boundary && NeedNewChunk()) { auto levels_to_write = level_offset - record_level_offset; if (levels_to_write > 0) { - result.emplace_back(record_level_offset, record_value_offset, levels_to_write); + chunks.emplace_back(record_level_offset, record_value_offset, levels_to_write); record_level_offset = level_offset; record_value_offset = value_offset; } } - ++level_offset; if (has_leaf_value) { ++value_offset; } @@ -719,11 +714,11 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev auto levels_to_write = num_levels - record_level_offset; if (levels_to_write > 0) { - result.emplace_back(record_level_offset, record_value_offset, levels_to_write); + chunks.emplace_back(record_level_offset, record_value_offset, levels_to_write); } } - return result; + return chunks; } #define PRIMITIVE_CASE(TYPE_ID, ArrowType) \ diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index 5011620bd31d9..f77fdc435fb04 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -54,7 +54,7 @@ class ContentDefinedChunker { template void Roll(const T value); void Roll(std::string_view value); - inline bool Check(); + inline bool NeedNewChunk(); template const std::vector Calculate(const int16_t* def_levels, const int16_t* rep_levels, diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 77ca2adc3daf1..00f475c7fb037 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -1339,7 +1339,7 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< bits_buffer_->ZeroPadding(); } - if (this->properties_->cdc_enabled()) { + if (properties_->cdc_enabled()) { ARROW_ASSIGN_OR_RAISE(auto boundaries, content_defined_chunker_.GetBoundaries( def_levels, rep_levels, num_levels, leaf_array)); diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index fedba54ba2b16..7af7f4ac8c254 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -2016,10 +2016,10 @@ cdef shared_ptr[WriterProperties] _create_writer_properties( props.dictionary_pagesize_limit(dictionary_pagesize_limit) # content defined chunking - if cdc is False: - props.disable_cdc() - elif cdc is True: + if cdc: props.enable_cdc() + else: + props.disable_cdc() if cdc_size_range is not None: min_size, max_size = cdc_size_range diff --git a/python/run_test.sh b/python/run_test.sh new file mode 100755 index 0000000000000..6476c12dcd44e --- /dev/null +++ b/python/run_test.sh @@ -0,0 +1,72 @@ +set -e + +# -DARROW_USE_ASAN=OFF \ +# -DARROW_USE_UBSAN=OFF \ +# -DARROW_USE_TSAN=OFF \ + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) +ARROW_DIR=${SCRIPT_DIR}/.. +export ARROW_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} +export ARROW_HOME=$CONDA_PREFIX +export PARQUET_TEST_DATA=${ARROW_DIR}/cpp/submodules/parquet-testing/data +export ARROW_TEST_DATA=${ARROW_DIR}/testing/data + +export ARROW_HDFS_TEST_HOST=impala +export ARROW_HDFS_TEST_PORT=8020 +export ARROW_HDFS_TEST_USER=hdfs + +mkdir -p ${ARROW_DIR}/cpp/build +pushd ${ARROW_DIR}/cpp/build + +cmake -GNinja \ + -DARROW_BUILD_BENCHMARKS=OFF \ + -DARROW_BUILD_STATIC=OFF \ + -DARROW_BUILD_TESTS=ON \ + -DARROW_USE_ASAN=OFF \ + -DARROW_DATASET=ON \ + -DARROW_EXTRA_ERROR_CONTEXT=ON \ + -DARROW_BUILD_INTEGRATION=ON \ + -DARROW_DEPENDENCY_SOURCE=CONDA \ + -DARROW_FLIGHT=OFF \ + -DARROW_GANDIVA=OFF \ + -DARROW_JEMALLOC=ON \ + -DARROW_MIMALLOC=ON \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_LZ4=ON \ + -DARROW_WITH_ZSTD=ON \ + -DARROW_COMPUTE=ON \ + -DARROW_PARQUET=ON \ + -DARROW_CSV=ON \ + -DARROW_ORC=OFF \ + -DARROW_USE_CCACHE=ON \ + -DARROW_S3=ON \ + -DARROW_TEST_MEMCHECK=OFF \ + -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=YES \ + -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ + .. + +ninja +ninja install + +popd + +export PYARROW_CMAKE_GENERATOR=Ninja +export PYARROW_BUILD_TYPE=debug +export PYARROW_WITH_PARQUET=1 +# export PYARROW_WITH_HDFS=1 +# export PYARROW_WITH_GANDIVA=0 +export PYARROW_WITH_DATASET=1 +# export PYARROW_WITH_FLIGHT=1 +export PYARROW_WITH_S3=1 +export PYARROW_PARALLEL=8 +# export PYARROW_WITH_ORC=1 + +# # export DYLD_INSERT_LIBRARIES=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/12.0.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib +# # export DYLD_INSERT_LIBRARIES=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/12.0.0/lib/darwin/libclang_rt.tsan_osx_dynamic.dylib + +pushd ${ARROW_DIR}/python +#python setup.py build_ext --inplace +python setup.py develop +popd +# pytest -sv "$@" From edfcd31177b11a4f360cd8e061c879ee844fd722 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Sat, 1 Mar 2025 22:19:16 +0100 Subject: [PATCH 23/48] rename GEAR_HASH_TABLE to GEARHASH_TABLE --- cpp/src/parquet/column_chunker.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/column_chunker.cc b/cpp/src/parquet/column_chunker.cc index 68c522a17c5cc..b35ea431b578b 100644 --- a/cpp/src/parquet/column_chunker.cc +++ b/cpp/src/parquet/column_chunker.cc @@ -26,7 +26,7 @@ namespace parquet { namespace internal { -constexpr uint64_t GEAR_HASH_TABLE[8][256] = { +constexpr uint64_t GEARHASH_TABLE[8][256] = { {// seed = 0 0xf09f35a563783945, 0x0dcc5b3bc5ae410a, 0x63f1ea8d22554270, 0xfbe5ee7bd05a7b61, 0x3f692ed5e9934aba, 0xaab3755952250eb8, 0xdefb168dc2888fa5, 0x501b36f7c77a7d47, @@ -591,7 +591,7 @@ void ContentDefinedChunker::Roll(const T value) { } auto bytes = reinterpret_cast(&value); for (size_t i = 0; i < BYTE_WIDTH; ++i) { - rolling_hash_ = (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][bytes[i]]; + rolling_hash_ = (rolling_hash_ << 1) + GEARHASH_TABLE[nth_run_][bytes[i]]; has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); } } @@ -605,7 +605,7 @@ void ContentDefinedChunker::Roll(std::string_view value) { } for (char c : value) { rolling_hash_ = - (rolling_hash_ << 1) + GEAR_HASH_TABLE[nth_run_][static_cast(c)]; + (rolling_hash_ << 1) + GEARHASH_TABLE[nth_run_][static_cast(c)]; has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); } } From d635e02d33654222dc3c1fe30222412334f37d7b Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 3 Mar 2025 12:48:25 +0100 Subject: [PATCH 24/48] some docstrings about CDC --- cpp/src/parquet/column_chunker.h | 83 +++++++++++++++++++++++++- cpp/src/parquet/column_chunker_hash.py | 27 +++++++++ python/run_test.sh | 72 ---------------------- 3 files changed, 107 insertions(+), 75 deletions(-) create mode 100644 cpp/src/parquet/column_chunker_hash.py delete mode 100755 python/run_test.sh diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index f77fdc435fb04..1921efc049248 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -23,11 +23,12 @@ #include "arrow/array.h" #include "parquet/level_conversion.h" -using arrow::internal::checked_cast; - namespace parquet { + namespace internal { +// Represents a chunk of data with level offsets and value offsets due to the +// record shredding for nested data. struct Chunk { int64_t level_offset; int64_t value_offset; @@ -39,12 +40,81 @@ struct Chunk { levels_to_write(levels_to_write) {} }; +/// CDC (Content-Defined Chunking) is a technique that divides data into variable-sized +/// chunks based on the content of the data itself, rather than using fixed-size +/// boundaries. +/// +/// For example, given this sequence of values in a column: +/// +/// File1: [1,2,3, 4,5,6, 7,8,9] +/// chunk1 chunk2 chunk3 +/// +/// Assume there is an inserted value between 3 and 4: +/// +/// File2: [1,2,3,0, 4,5,6, 7,8,9] +/// new-chunk chunk2 chunk3 +/// +/// The chunking process will adjust to maintain stable boundaries across data +/// modifications. Each chunk defines a new parquet data page which are contiguously +/// written out to the file. Since each page compressed independently, the files' contents +/// would look like the following with unique page identifiers: +/// +/// File1: [Page1][Page2][Page3]... +/// File2: [Page4][Page2][Page3]... +/// +/// Then the parquet file is being uploaded to a content addressable storage systems (CAS) +/// which split the bytes stream into content defined blobs. The CAS system will calculate +/// a unique identifier for each blob, then store the blob in a key-value store. If the +/// same blob is encountered again, the system can refer to the hash instead of physically +/// storing the blob again. In the example above, the CAS system would phiysically store +/// Page1, Page2, Page3, and Page4 only once and the required metadata to reassemble the +/// files. +/// While the deduplication is performed by the CAS system, the parquet chunker makes it +/// possible to efficiently deduplicate the data by consistently dividing the data into +/// chunks. +/// +/// Implementation details: +/// +/// Only the parquet writer must be aware of the content defined chunking, the reader +/// doesn't need to know about it. Each parquet column writer holds a +/// ContentDefinedChunker instance depending on the writer's properties. The chunker's +/// state is maintained across the entire column without being reset between pages and row +/// groups. +/// +/// The chunker receives the record shredded column data (def_levels, rep_levels, values) +/// and goes over the (def_level, rep_level, value) triplets one by one while adjusting +/// the column-global rolling hash based on the triplet. Whenever the rolling hash matches +/// a predefined mask, the chunker creates a new chunk. The chunker returns a vector of +/// Chunk objects that represent the boundaries of the chunks/// +/// Note that the boundaries are deterministically calculated exclusively based on the +/// data itself, so the same data will always produce the same chunks - given the same +/// chunker configuration. +/// +/// References: +/// - FastCDC paper: "FastCDC: a Fast and Efficient Content-Defined Chunking Approach for +/// Data Deduplication" +/// https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf class ContentDefinedChunker { public: + /// Create a new ContentDefinedChunker instance + /// + /// @param level_info Information about definition and repetition levels + /// @param size_range Min/max chunk size as pair, the chunker will + /// attempt to uniformly distribute the chunks between these extremes. + /// @param norm_factor Normalization factor to center the chunk size around the average + /// size more aggressively. By increasing the normalization factor, + /// probability of finding a chunk boundary increases. ContentDefinedChunker(const LevelInfo& level_info, std::pair size_range, - uint8_t norm_factor = 1); + uint8_t norm_factor = 0); + /// Get the chunk boundaries for the given column data + /// + /// @param def_levels Definition levels + /// @param rep_levels Repetition levels + /// @param num_levels Number of levels + /// @param values Column values as an Arrow array + /// @return Vector of Chunk objects representing the chunk boundaries const ::arrow::Result> GetBoundaries(const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, @@ -60,9 +130,16 @@ class ContentDefinedChunker { const std::vector Calculate(const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, const T& leaf_array); + // Reference to the column's level information const internal::LevelInfo& level_info_; + // Minimum chunk size in bytes, the rolling hash will not be updated until this size is + // reached for each chunk. Note that all data sent through the hash function is counted + // towards the chunk size, including definition and repetition levels. const uint64_t min_size_; const uint64_t max_size_; + // The mask to match the rolling hash against to determine if a new chunk should be + // created. The mask is calculated based on min/max chunk size and the normalization + // factor. const uint64_t hash_mask_; bool has_matched_ = false; diff --git a/cpp/src/parquet/column_chunker_hash.py b/cpp/src/parquet/column_chunker_hash.py new file mode 100644 index 0000000000000..179918dd4fefe --- /dev/null +++ b/cpp/src/parquet/column_chunker_hash.py @@ -0,0 +1,27 @@ +import hashlib +import sys + + +def gearhash(n: int, seed: int): + value = bytes([seed] * 64 + [n] * 64) + hasher = hashlib.md5(value) + return hasher.hexdigest()[:16] + + +def print_table(seed: int, length=256, comma=True): + table = [gearhash(n, seed=seed) for n in range(length)] + print(f"{{ // seed = {seed}") + for i in range(0, length, 4): + print(" ", end="") + values = [f"0x{value}" for value in table[i:i + 4]] + values = ", ".join(values) + print(f" {values}", end=",\n" if i < length - 4 else "\n") + print(" }", end=", " if comma else "") + + +if __name__ == "__main__": + print("{") + n = int(sys.argv[1]) + for seed in range(n): + print_table(seed, comma=seed < n) + print("}") \ No newline at end of file diff --git a/python/run_test.sh b/python/run_test.sh deleted file mode 100755 index 6476c12dcd44e..0000000000000 --- a/python/run_test.sh +++ /dev/null @@ -1,72 +0,0 @@ -set -e - -# -DARROW_USE_ASAN=OFF \ -# -DARROW_USE_UBSAN=OFF \ -# -DARROW_USE_TSAN=OFF \ - -SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) -ARROW_DIR=${SCRIPT_DIR}/.. -export ARROW_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} -export ARROW_HOME=$CONDA_PREFIX -export PARQUET_TEST_DATA=${ARROW_DIR}/cpp/submodules/parquet-testing/data -export ARROW_TEST_DATA=${ARROW_DIR}/testing/data - -export ARROW_HDFS_TEST_HOST=impala -export ARROW_HDFS_TEST_PORT=8020 -export ARROW_HDFS_TEST_USER=hdfs - -mkdir -p ${ARROW_DIR}/cpp/build -pushd ${ARROW_DIR}/cpp/build - -cmake -GNinja \ - -DARROW_BUILD_BENCHMARKS=OFF \ - -DARROW_BUILD_STATIC=OFF \ - -DARROW_BUILD_TESTS=ON \ - -DARROW_USE_ASAN=OFF \ - -DARROW_DATASET=ON \ - -DARROW_EXTRA_ERROR_CONTEXT=ON \ - -DARROW_BUILD_INTEGRATION=ON \ - -DARROW_DEPENDENCY_SOURCE=CONDA \ - -DARROW_FLIGHT=OFF \ - -DARROW_GANDIVA=OFF \ - -DARROW_JEMALLOC=ON \ - -DARROW_MIMALLOC=ON \ - -DARROW_WITH_SNAPPY=ON \ - -DARROW_WITH_LZ4=ON \ - -DARROW_WITH_ZSTD=ON \ - -DARROW_COMPUTE=ON \ - -DARROW_PARQUET=ON \ - -DARROW_CSV=ON \ - -DARROW_ORC=OFF \ - -DARROW_USE_CCACHE=ON \ - -DARROW_S3=ON \ - -DARROW_TEST_MEMCHECK=OFF \ - -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \ - -DCMAKE_EXPORT_COMPILE_COMMANDS=YES \ - -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ - .. - -ninja -ninja install - -popd - -export PYARROW_CMAKE_GENERATOR=Ninja -export PYARROW_BUILD_TYPE=debug -export PYARROW_WITH_PARQUET=1 -# export PYARROW_WITH_HDFS=1 -# export PYARROW_WITH_GANDIVA=0 -export PYARROW_WITH_DATASET=1 -# export PYARROW_WITH_FLIGHT=1 -export PYARROW_WITH_S3=1 -export PYARROW_PARALLEL=8 -# export PYARROW_WITH_ORC=1 - -# # export DYLD_INSERT_LIBRARIES=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/12.0.0/lib/darwin/libclang_rt.asan_osx_dynamic.dylib -# # export DYLD_INSERT_LIBRARIES=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/12.0.0/lib/darwin/libclang_rt.tsan_osx_dynamic.dylib - -pushd ${ARROW_DIR}/python -#python setup.py build_ext --inplace -python setup.py develop -popd -# pytest -sv "$@" From 17021ebdba00eb1e7669f1dbbc7011d2206da548 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 3 Mar 2025 13:23:12 +0100 Subject: [PATCH 25/48] place the gearhash table to a separate header --- cpp/src/parquet/column_chunker.cc | 524 +------------------ cpp/src/parquet/column_chunker_hash.py | 27 - cpp/src/parquet/column_chunker_hashtable.h | 547 ++++++++++++++++++++ cpp/src/parquet/column_chunker_hashtable.py | 90 ++++ cpp/src/parquet/column_chunker_test.cc | 19 +- 5 files changed, 656 insertions(+), 551 deletions(-) delete mode 100644 cpp/src/parquet/column_chunker_hash.py create mode 100644 cpp/src/parquet/column_chunker_hashtable.h create mode 100644 cpp/src/parquet/column_chunker_hashtable.py diff --git a/cpp/src/parquet/column_chunker.cc b/cpp/src/parquet/column_chunker.cc index b35ea431b578b..90979bbd25a4e 100644 --- a/cpp/src/parquet/column_chunker.cc +++ b/cpp/src/parquet/column_chunker.cc @@ -21,534 +21,12 @@ #include #include "arrow/array.h" #include "arrow/util/logging.h" +#include "parquet/column_chunker_hashtable.h" #include "parquet/level_conversion.h" namespace parquet { namespace internal { -constexpr uint64_t GEARHASH_TABLE[8][256] = { - {// seed = 0 - 0xf09f35a563783945, 0x0dcc5b3bc5ae410a, 0x63f1ea8d22554270, 0xfbe5ee7bd05a7b61, - 0x3f692ed5e9934aba, 0xaab3755952250eb8, 0xdefb168dc2888fa5, 0x501b36f7c77a7d47, - 0xd2fff45d1989642d, 0x80217c1c600e30a6, 0xb9469ee2e43df7ac, 0x3654b76a61999706, - 0x6ea73dfe5de0c6b6, 0xdfd662e1937a589d, 0x0dbe0cc74b188a68, 0xde45f4e6d73ffc6f, - 0xcdf7a7759e70d87e, 0x5d6a951b8d38c310, 0xdc9423c3813fcf2c, 0x25dc2976e167ffce, - 0xc2555baa1d031c84, 0x115bc3f2230a3ab6, 0xd4b10260f350bede, 0xdfd3501ab447d723, - 0x022e79217edaf167, 0x1635e2255c5a7526, 0xa0a750350cc77102, 0xc027133e05d39f56, - 0xd949459779cf0387, 0xb92f1464f5c688c2, 0xd9ac5f3e8b42f2f3, 0xdf02bb6f5ecaac21, - 0x8156f988fac7bfa4, 0xe4580f97bede2ec8, 0x44fe7d17a76fca32, 0x885f59bd54c2014c, - 0x435e63ec655ffae9, 0x5ebc51930967b1f1, 0x5428c2084ac29e47, 0x9465938fec30e36b, - 0xc7cb3de4977772cd, 0x15692d7c201e8c3a, 0x505ee65cdc4b17f4, 0x7d9839a0a7aead6b, - 0xeef5f5b6a0105291, 0x76c2fb232ce7f5bf, 0x5c13893c1c3ff3a9, 0x65b6b547d4442f98, - 0xb8ad7487c8c96fce, 0x906bcf51c99974f8, 0x2f56e48bb943a48c, 0xbc9ab109f82d3a44, - 0xcd5160cdc8c7e735, 0xbe9acb9df3427732, 0x386b91d477d7fade, 0x36be463621dd5af2, - 0xcbe6a2faffd627a8, 0x9c8fd528463a2f5a, 0xb9b88c6bb802b184, 0xb414b4e665c597c7, - 0xbedb142568209556, 0x5360d81c25429dce, 0x63a69a960a952f37, 0xc900d63899e1b503, - 0x1abc63a8b37c7728, 0xa8b3a8b6409080eb, 0x495e391f662959f6, 0xdf1e136f3e12229b, - 0x33d5fc526b0dd38d, 0x321221ae2abfac63, 0x7fde18351fda7395, 0xed79fe5c3a6aa4c3, - 0x2dd6965a4867d8d4, 0x54813ca20fe8799b, 0x5d59ea6456465c39, 0x0de0c294d1936b81, - 0x4aaf0755002c588c, 0x3530a1857ad04c6d, 0xb8a64f4ce184442b, 0xe0def10bceedfa17, - 0x46e38d0a443757ec, 0x9795a1c645ee16d7, 0x7e531def245eac8a, 0x683b25c43a0716cf, - 0x884583d372da219d, 0x5b06b62c910416e5, 0x54b6902fbebd3dbe, 0x931198d40a761a75, - 0xead7d8e830013590, 0x80b4d5dc99bfaced, 0xf98272c8108a1ad2, 0x1adce054289a0ec6, - 0x7d53a1143c56b465, 0x497fbe4f00c92b52, 0x525e4cc2e81ebd69, 0xc94478e0d5508ff6, - 0xb8a5da83c196d07c, 0x7667a921b65b0603, 0xf236fabbdefe6cd1, 0x53da978d19a92b98, - 0xc604f6e97087124d, 0x2cbd27221924b094, 0x65cd1102c985b1d2, 0x08c0755dc1a97eb4, - 0x5e0419e921c0fef1, 0x282d2c1196f84a29, 0xe21117fcfc5793f7, 0xcf4e985dc38e6c2e, - 0xd521f4f264d55616, 0xde69b04c485f2a10, 0x59410e245305178a, 0xceab1d477c943601, - 0xa9805732d71ee5e9, 0x054cd443896974f6, 0xf2b517717a423a3e, 0x09517937fa9fac95, - 0x4938233e9ca871e3, 0x9132cbaf56f83ec0, 0x4703421ed1dd027d, 0xfd9933f4e6f1ec4e, - 0xf237c7fded2274a8, 0xdf4616efe68cd7b4, 0x5e46de0f39f0a380, 0x3d41e0c6d8e095b0, - 0xc5272f8a5bb2df09, 0x68aa78e8301fb964, 0xbf5b5b52c8e32ae0, 0xbf28ed3df74bdcf7, - 0xd6198f64c833815a, 0x8cd99d2974267544, 0xd90560ea4465ff2c, 0x571d65ad7ad59261, - 0x309453518baa367a, 0xa60538377bc79fb2, 0xace515da1ab4183c, 0xf56d3c8d891d1c5b, - 0x5b0d8370b59def49, 0x775866ce7c83c762, 0x3d76085695c8e18a, 0xba064d1a9af1b114, - 0xc84ef7cd7b98b521, 0x90b9231681c2bc37, 0x37e2b13e6f585b6b, 0x1d0a34e55e0f369f, - 0x86bb8019cf41447c, 0x4b95c6ef55b3f71f, 0x3b6ed1660732b310, 0x617eee603d137f21, - 0xf4f6278b464f3bbc, 0xdfb763b720da205a, 0x353478899b871cb7, 0xe45fbbff574cc41e, - 0x1a94b60847907d72, 0xb10eef051eff67a5, 0xf0e012ec6a284d40, 0xcc1cd1a11b926d7c, - 0xcf9d9c5453e19cad, 0x270febcc0fc0e86b, 0xd6567568778b781e, 0x7323b98965eeb46b, - 0xccecd374567086ff, 0xef7b44bfc497a704, 0xebc479c051a9f0a5, 0xc9b7410e3e00a235, - 0x1d084f7ecdf83dab, 0xc8a9a97e33ba8ba3, 0x8c75318f5b2350d6, 0xaa3cd5d0c684bdda, - 0xa81125fe0901bedf, 0xf7bcd76020edfc93, 0x834ee4c12e75874f, 0xb2bb8a7beb44fa14, - 0x32cd26f50a4f4e4d, 0x0fc5817ca55d959a, 0xd6e4ae2e3ae10718, 0x074abdcceb8d6e38, - 0xc0cc5f4f9b3a9c43, 0x1115d364363595b2, 0x69861db2eb19f2e8, 0x59b8d804cf92bc67, - 0x9bac9785e5e4b863, 0x7fa0e17a41869561, 0x10d3c9633f0c709c, 0x534a03deee6bc44a, - 0x73b1f7201257f581, 0x46fd6a11e2e0706b, 0x494abb554946e67a, 0xb5d6da317864dc8e, - 0x402ded9238f39687, 0xd8fa37d2cbd6d290, 0xcc818293fcb06791, 0x6482ab344806cd4d, - 0x0956e6ee9d8eb60b, 0x01fee622d8465ac8, 0xae7ece370cbd9c35, 0x7ff09e937a177279, - 0xa2c29ee7a33ca5f1, 0x990e8dbee083923b, 0x4a819b72f610863a, 0xddecfad79d3f08be, - 0x627372480fac20a7, 0x802154d6eca2db4c, 0x8fcf02e42f805e55, 0x040a911ff8cea977, - 0xbb544485bc64d0d4, 0xaddde1aeb406d0fb, 0xf6b35fae23dce66f, 0xc07a9fb3645d2f9b, - 0xccd113907e9c0fed, 0xd17af369984fd213, 0x9223823c59a083e7, 0xe19d475606b81013, - 0xe181ac116a90e57a, 0x71f7b6258c6def4c, 0x2246f34b45964f7c, 0xd74aedaea2d31751, - 0xb1add86e5dd305d1, 0xeb9ba881f16d6471, 0xef7600e036f5c6ff, 0x1d50bc9735b8fb85, - 0xe63942bd1f3e2969, 0x9241ba9f8b3f4e72, 0xee8bb2bca07d35b6, 0x55cd55dab522654e, - 0x94d0cfa7c1a6845d, 0x02f9845d559884c3, 0x8ce70ea21063b560, 0xd70998028ef08b74, - 0xdfdb5bbee310876b, 0x4e21b2e348256d16, 0xde007a981c13debc, 0xe51950cbbddabfdd, - 0xd223301dbe9957c1, 0x084b8634cc2cce4b, 0x90e551378aa9d70c, 0x833b533ac633e448, - 0x7891e232882da57f, 0xa1bf26f0163ce2b3, 0xf33a0171eb9c68d5, 0x2e7de18ca69b3fa2, - 0x666fd6f175619199, 0x1239d37edb5feb9f, 0xfa9fc9382e61ff5c, 0x3ca4ad427e3c126f, - 0x37c6dd4c2c31ae6e, 0x1f1bacb619d427b2, 0x7dd09f5d10759afe, 0xc8d941432327d733, - 0x2b389ba25e1d43a7, 0xa4e3030c3740ff21, 0xcc56dae13fd37463, 0x2481457c175b560f, - 0x9deb35bde77c5c41, 0x847aa6ea5549a0c3, 0xcde01bb48b6e7f02, 0x15a28844e64cb211}, - {// seed = 1 - 0xecfcba92fe5691a3, 0x71377799fea34699, 0xb284c9096fa614e5, 0x54534170f40de6c8, - 0xbbd804d45884fba3, 0x44929a896388c8a1, 0x79b712508e0fa3b1, 0xeb53ab280af31054, - 0x351ea23a6319da7a, 0x2fbe55d9819d85a2, 0x34f4b6568dcd28b1, 0x8c94ea5e5d82967a, - 0x09068d333a46d3c5, 0x762ad4f64cb73381, 0xd5c6db5ef0e22640, 0x36d8ab5a36175680, - 0xd41fe333cdc3525a, 0xa1f51dbdf20ce781, 0x1410a95e786c8be6, 0x96b7499a670c2b41, - 0x3912e1037835d893, 0x272c5bd83e1e9115, 0x2ea7f91cad82a0d6, 0xcd10e85662ce9931, - 0xedad49be8d5e8b74, 0x7ccd8fe0f37d12bc, 0xfac0482005eed593, 0x4513991681f6c8b0, - 0x2804d612eb0ad37d, 0x7cca9e8412b81d34, 0x85ffd6707192b7b8, 0xea0560aeea954411, - 0x0122d28226102bba, 0xf51c47cdbd22fdd1, 0x3707d851183ff17c, 0xaef5a1465f3e902d, - 0xbcb38c2d8736a04f, 0x4025317e864bef15, 0x8d3f66d86e1ea58f, 0xc16759a3d97ed79a, - 0x1c62abdc0659f2f5, 0x23b3eb4e699bd28f, 0x5083c4fceed3ccaf, 0xa65bf34562cc989c, - 0xaa5865932fd79064, 0xf24d08d268c24593, 0x7fbd00a215196999, 0x7812cd366d752964, - 0x62e8dcb27ef3d945, 0xf08b7984e1b946dc, 0x547d23ad9a5c1dcf, 0x496b1fb249b27fb7, - 0xcd692e1db5f3b3ba, 0x41931e39f1e1bc61, 0x286c6a7d7edae82b, 0x17ef6638b6c4ca6e, - 0x609beb5a2576a934, 0xcc5e16fe4a69b83c, 0xbbd14d08b078fc24, 0x2a617680f481cb94, - 0x81dbbd5f86e6d039, 0xeb8205e1fc8ecc3c, 0xe5e3bb576faa8042, 0x5d6f1eb9d9df01b5, - 0x9a47b8739c10fb44, 0x398a7caad7ea7696, 0x9c0fc1d7c46adde6, 0x67cd6de0a51978a6, - 0x68ccc4b77a21cca4, 0x1e067066b82f415c, 0xf7ddade6535e1819, 0xf2185c884291751b, - 0xc322b7381fcbe34f, 0x242f593e88290b9b, 0x8e11ccc0ea5e84a3, 0x40e3a2e3346db8a2, - 0xf18bfc3ad2931a2c, 0x2468397394b00144, 0xeae199cce14e6817, 0x05b462686c75a1ae, - 0xda096cb859c51673, 0xd87aeb967a906bef, 0xaabc74493cb02fe6, 0x74d48fc2e7da143e, - 0x6ec1c8fed3f2c1fd, 0xe01e0704b463f18e, 0xc3d88a4d3a8056e4, 0xd01ae0ffab6c8f3f, - 0x881ba052620ae7c7, 0xcea033aef0a823a5, 0x8d2cad91d83df1e3, 0x18746d205e66dbe9, - 0x3061f8e58d046650, 0xd819c59f0ce2cf8b, 0x144e89e93635e870, 0x3415e88279b21651, - 0xd6f7ab944b86c3fa, 0x45f1dd15d0f67bdc, 0xbf0d97c7f4fa24f4, 0x34a7de520a57fcd2, - 0x4ba86fda03e9e2bc, 0xa7995265a025b552, 0x698f6819d5f51cf7, 0xd07dbe9d8a156981, - 0x2683945373857fc1, 0x116f8a84f96167de, 0x8bc832bd85595ebf, 0xb206519d74fdfafa, - 0xde9519b2e9b5cc5f, 0x16fdd6f2da1d8163, 0x7ba32bd48ef56f11, 0x6f4e4d7ee8b29717, - 0xd31576dde7468aad, 0x023bb08848676045, 0xf6dcc083178160b7, 0x42035f426250e683, - 0x343732993cfed89f, 0x0640a870a22d3d58, 0x65cff80b53b4ae6a, 0x27996fa17ab05215, - 0xfd5db01401b21a04, 0x894508784bc1673c, 0x5bfcf43a2380e27d, 0x4cd6dcc2715583b7, - 0xa43b3763e7d4c902, 0x6da83e12ef0c1257, 0xfe80a602b0335aff, 0x293a7d8f4ff344de, - 0xb4ae7c2b8956bf5a, 0x6b45432d38254b4d, 0xd086acbdf15d9455, 0xa4d19e43f41ea87b, - 0xf01f13ba4bb87fbf, 0xca582cf301a299ff, 0x0ddad3d45298fa7d, 0x0646a130459c3999, - 0xc08e3af3747e2cee, 0xfc7db8aa9ed67295, 0x783b329e7bd79d5f, 0x732dbc607957af7b, - 0x8e446ac19fb26555, 0xff1dfa4d61dc89a5, 0xb6fbc46bd8d011d8, 0x185147ec5779f0d7, - 0x6eb2cf6149a5380f, 0xb0e773df803a1eae, 0xc07706c5519bfce5, 0xc35abcf54fa95f14, - 0x40a01d99a38608ea, 0x776dcd6f603c277f, 0x6ae12389b1d6d0bb, 0x8bd981448df92bb9, - 0x426a6a7ca21a2c16, 0x87efd5b71c1bad26, 0x71fb7fc4cd41de48, 0xdd9033c45619d463, - 0x40eaab322654cef7, 0xe077fffed6f3e3a2, 0x375a4dbef9384447, 0x2066b009d2c4a100, - 0xeca4a5794a068447, 0x2128f64bddf341a1, 0x738b4bb1be90bd61, 0x433772cf3813d52e, - 0x9540c88add8e4474, 0x0b6d5decd21d3519, 0x654ead966745642d, 0xe1bfb03c3b4bdb4c, - 0x0b977a9937515b1f, 0x0a4587509ef63870, 0xe89f0de1d9cfd44a, 0x23a91390272e7f68, - 0xd92defbc9096b8d8, 0x004db87174612539, 0xc88ecaabdd1a71f1, 0x050de38393073346, - 0x8af1426d7964e038, 0xf352c4fef8ad5c87, 0x6f26bc7408e26548, 0x0d41543fd9bf3084, - 0xfc4e07553a840fc6, 0x5ef117de86a555a9, 0x1f11c42dffb5ae1b, 0x4147648f07490fa5, - 0x09b35fd7671b21aa, 0x1453b14f7ccca481, 0x944f6fcce4c9b2ba, 0x5b08dd2e3583dc06, - 0xe0220df78dc9c22d, 0x1c200b9506cbf666, 0x8a0b7465eadb523b, 0xfbcb43a91a1e2d80, - 0xe697f44be3c36a58, 0x2f8a8e48fb7e350d, 0x7baba71b8920d55f, 0x10edc0216105bc96, - 0x52db07c79d7a7a63, 0x1916e8cef9452ac3, 0x5cbbbf21f867b6cc, 0xadd583365a690a4b, - 0x4e4ca2c8bffc2fdb, 0xf5fe3416d2eebcfe, 0x839af8b85e452476, 0x8496c0c54ad44e16, - 0x6c46f1ecad4482bf, 0xb794cad76ae18715, 0x67b762eec7c62985, 0x52dc9e68df5b3a53, - 0x0cc7e444b422a5f9, 0xadbfe90841c112b0, 0xfe37b136f0ca5c34, 0xcfe9e47948a8d73e, - 0xee90572b86a30d91, 0x549e72d8262830aa, 0x3361564b469f32c6, 0x1e6eba9e0d2648e2, - 0x5f8e2b2ac5fcb4eb, 0xe4224fa5f71f7cc6, 0x7357a9230c76757b, 0xcad70f74aaf6b702, - 0xeef28ced23894cc2, 0x753fdd3352aefd68, 0x1fed6ba90bbeb9d2, 0x05316f4ab4034b4b, - 0x3396df022b9f63d6, 0x82d7125a7cfd0935, 0x3519a71caf1f87f0, 0xd1dfb7a5cc3974be, - 0xbfae40ecbdbbcc2a, 0x152c11778e08dd54, 0x4a96566a6c848554, 0x3a84d621c340cdd7, - 0xfd47aa1887e2fb03, 0xa63cae94b2f1d099, 0xed61783f3e5b75e0, 0xefd44864106019be, - 0x145ff78b80b081aa, 0x34670e5fcea9230e, 0x876ef976328db371, 0x4221f3a5269942a6, - 0x95315cbd85c648f4, 0x3ca344dc7c3b1600, 0x38421ea39ff28780, 0x31dbeee967c0435c, - 0x27437c3e268402e7, 0xdd0cf8343312a654, 0x965ab9dad1d8aa29, 0xf871706dd3e23509, - 0xce23d06c7a25e699, 0x1b37d59382b27589, 0x3407f004723d6324, 0x56efb69cdb5deaa1, - 0xf46cdd2b9fd604e0, 0xcad3ca79fdac69bd, 0x7252802a574e63cb, 0xc281fb8acc6ec1d3}, - {// seed = 2 - 0xdd16cb672ba6979c, 0x3954eaa9ec41ae41, 0x52cb802771d2966d, 0xf57ed8eb0d0294f2, - 0x768be23c71da2219, 0x6131e22d95a84ad3, 0xd849e4e49bb15842, 0x18e8e5c4978cf00d, - 0x3af5e5867ce1f9bd, 0x06c75a9fffe83d63, 0xe8de75a00b58a065, 0x0a773251bc0d755a, - 0x629dc21e54548329, 0x2a168f5e5a883e70, 0x33547375f0996c86, 0xdfcb4c7680451322, - 0x55c1ecaaaa57e397, 0x4546c346c24f5a31, 0x6f8f0401dfabc86c, 0x7760d2d36ee340b4, - 0xf6448e48bdeb229d, 0xba70e1633b4dba65, 0x069cda561e273054, 0xa010b6a84aebf340, - 0x5c23b8229eee34b6, 0xea63c926d90153af, 0x7d7de27b3e43ec1b, 0xea119541eddc3491, - 0xf1259daeddfc724c, 0x2873ca9a67730647, 0xa1e7710dade32607, 0x758de030b61d43fd, - 0xd2c9bcbfa475edb4, 0x18ade47bb8a0aa29, 0xf7a74af0ff1aea88, 0x6f8873274a987162, - 0x6963e8d876f4d282, 0xd435d4fe448c6c5b, 0x93ec80ba404cafff, 0xcf90d24c509e41e7, - 0x5f0fc8a62923e36e, 0x9224878fe458f3a4, 0xd9a039edf1945bcd, 0x0877d1892c288441, - 0x75205491f4b4740b, 0x30f9d2d523a9085b, 0x4b7f4029fa097c99, 0x170bb013745709d4, - 0x7087af537f11ef2e, 0x28c62b88e08fc464, 0x84bbcb3e0bb56271, 0x485a4b099165c681, - 0x357c63357caa9292, 0x819eb7d1aee2d27e, 0xdaa759eb9c0f8c9d, 0x42cdc36729cc3db5, - 0x9489aa852eddbb06, 0x8161e4f85a84e6d4, 0xa964863fdad3eb29, 0xcc095ddbce1a6702, - 0x3ecfadbb8dc2ce58, 0x971316509b95a231, 0xc8f484d1dbc38427, 0xae9c510c463574c0, - 0xdf2b31179600c21a, 0x440de87bada4dfa3, 0xbd8d30f3f6fb7522, 0x84e6d7f678a0e2d0, - 0x0ec4d74323e15975, 0xf6947610dad6d9ab, 0x73a55a95d73fe3a5, 0x3e5f623024d37eda, - 0x8d99a728d95d9344, 0x8b82a7956c4acdc4, 0x7faeaea4385b27f6, 0x540625ff4aa2ff21, - 0x4aa43b3ebd92ce2b, 0x899646a6df2da807, 0x49225115780942d7, 0xe16606636af89525, - 0xb980bcf893888e33, 0xf9ed57695291b0d8, 0x5c6dd14464619afa, 0x50606d69b733d4f3, - 0x7fb1af465b990f97, 0x3fab2634c8bbd936, 0x556da6168838b902, 0x0f15975902a30e1f, - 0xb29d782ae9e1991f, 0xae00e26ff8f7e739, 0xd3da86458bb292d5, 0x4528ee0afb27e4ce, - 0x49882d5ba49fabad, 0x7e873b6a7cf875ee, 0x777edd535113c912, 0x94ed05e7ff149594, - 0x0b8f95fc4211df43, 0x9135c2b42426fef2, 0x411e6c2b47307073, 0x503207d1af0c8cf8, - 0xd76f8619059f9a79, 0x64d24617855dee45, 0xf7bc7a877923196a, 0xd6cc42ed6a65be79, - 0xe3912ff09d4fc574, 0x4192d03b2bc2460a, 0xa0dcc37dad98af85, 0xfc59049b2a5818a4, - 0x2128bae90a5b975f, 0xbe7067ca05ea3294, 0x5bab7e7753064c4f, 0x42cbf0949ef88443, - 0x564df4bbd017492c, 0xf2c2eb500cf80564, 0x5b92e67eb00e92af, 0x8c4103eef59c0341, - 0x83412122b8284998, 0x888daf2da0636b6d, 0x4d54b10303dd07d6, 0x201190e7c1e7b5ed, - 0x3797510bb53a5771, 0x03f7bc598b570b79, 0xdc1e15d67d94f73e, 0x721e8b499ebe02c1, - 0x71f954f606d13fa0, 0x0c7a2e408c168bf0, 0x07df2ef14f69c89d, 0xe295096f46b4baaf, - 0x7a2037916438737e, 0xd1e861aeaf8676ea, 0xb36ebdce368b8108, 0xb7e53b090ddb5d25, - 0x5a606607b390b1aa, 0x475e52994f4a2471, 0xbcc2038ba55b2078, 0x28b8a6b6c80df694, - 0xb5f0130ec972c9a2, 0x7a87cd2a93276b54, 0x4d0eec7ecf92d625, 0xac1a8ce16269a42e, - 0xa4ca0237ca9637b8, 0xd8dc8ff91202b6ff, 0x75b29846799d7678, 0x761b11a5edd9c757, - 0xf2581db294ef3307, 0xe3173c2b6a48e20f, 0xe46fd7d486d65b3c, 0x1352024303580d1f, - 0x2d665dae485c1d6d, 0x4e0905c825d74d3b, 0x14ff470c331c229e, 0xbdc656b8613d8805, - 0x36de38e396345721, 0xaae682c1aa8ff13b, 0x57eb28d7b85a1052, 0xf3145290231d443a, - 0xd0f68095e23cbe39, 0x67f99b3c2570b33d, 0x54575285f3017a83, 0x9b2f7bb03d836a79, - 0xa57b209d303367a9, 0x7ccb545dd0939c79, 0x1392b79a37f4716d, 0x6e81bb91a3c79bcd, - 0x2c2cd80307dddf81, 0xb949e119e2a16cbb, 0x69625382c4c7596f, 0xf19c6d97204fb95c, - 0x1b2ea42a24b6b05e, 0x8976f83cd43d20ac, 0x7149dd3de44c9872, 0xc79f1ae2d2623059, - 0xca17a4f143a414e1, 0x66d7a1a21b6f0185, 0xed2c6198fe73f113, 0x16a5f0295cbe06af, - 0x5f27162e38d98013, 0xf54d9f295bdc0f76, 0x9ba7d562073ef77b, 0xa4a24daaa2cfc571, - 0x49884cf486da43cd, 0x74c641c0e2148a24, 0xbff9dcbff504c482, 0xf8fc2d9403c837ab, - 0x6ccc44828af0bb1e, 0xbcf0d69b4c19dfdb, 0x8fe0d962d47abf8f, 0xa65f1d9d5514271d, - 0x26ff393e62ef6a03, 0xc7153500f283e8fc, 0xea5ed99cdd9d15cd, 0xfc16ac2ba8b48bb7, - 0xf49694b70041c67a, 0xbd35dd30f5d15f72, 0xcf10ad7385f83f98, 0x709e52e27339cdc2, - 0xe9505cb3ec893b71, 0x2ffa610e4a229af7, 0x12e1bc774d1f0e52, 0xe301a3bb7eacccc8, - 0x1fdd3b6dcd877ebf, 0x56a7e8bda59c05aa, 0x99acd421035d6ab4, 0xfd21e401cecd2808, - 0x9a89d23df8b8d46f, 0x4e26b1f1eb297b9c, 0x9df24d973e1eae07, 0xe6cdc74da62a6318, - 0xfc360d74df992db0, 0xf4eca0a739514c98, 0x481c515ba9bf5215, 0xce89cce80f5f3022, - 0xf487a10fc80e4777, 0x235b379a87e41832, 0x76f72e028371f194, 0xd044d4a201325a7d, - 0x47d8e855e0ffbdde, 0x268ae196fe7334b0, 0x123f2b26db46faa8, 0x11741175b86eb083, - 0x72ee185a423e6e31, 0x8da113dfe6f6df89, 0x286b72e338bbd548, 0xa922246204973592, - 0x7237b4f939a6b629, 0x31babda9bedf039a, 0xb2e8f18c6aeec258, 0x0f5f6ce6dd65a45e, - 0x8f9071a0f23e57d3, 0x71307115ba598423, 0xcbe70264c0e1768c, 0x1c23729f955681a8, - 0xfbc829099bc2fc24, 0x9619355cbc37d5d6, 0xea694d4e59b59a74, 0xb41cf8d3a7c4f638, - 0xae1e792df721cd0b, 0x7cd855d28aac11f6, 0xca11ba0efec11238, 0x7c433e554ce261d8, - 0xe3140366f042b6ba, 0x8a59d68642b3b18c, 0x094fcdd5d7bccac2, 0x9517d80356362c37, - 0x4a20a9949c6c74e8, 0xc25bcf1699d3b326, 0xa8893f1d1ed2f340, 0x9b58986e0e8a886e, - 0x29d78c647587ce41, 0x3b210181df471767, 0xd45e8e807627849d, 0x1ec56bc3f2b653e3, - 0x974ff23068558b00, 0xdb72bdac5d34262c, 0x23225143bb206b57, 0xd0a34cfe027cbb7e}, - {// seed = 3 - 0x39209fb3eb541043, 0xee0cd3754563088f, 0x36c05fc545bf8abe, 0x842cb6381a9d396b, - 0xd5059dcb443ce3bf, 0xe92545a8dfa7097e, 0xb9d47558d8049174, 0xc6389e426f4c2fc0, - 0xd8e0a6e4c0b850d3, 0x7730e54360bd0d0d, 0x6ecb4d4c50d050d5, 0x07a16584d4eb229f, - 0x13305d05f4a92267, 0xb278ddd75db4baec, 0x32381b774138608f, 0x61fe7a7163948057, - 0x460c58a9092efee6, 0x553bf895d9b5ff62, 0x899daf2dabfd0189, 0xf388ab9c1c4b6f70, - 0xd600fe47027ea4cd, 0x16d527ec2b5ef355, 0x5ac1f58ff6908c81, 0xa08d79ff8ee9ffe8, - 0xc1060a80b7a5e117, 0x14b2c23118c60bda, 0x8cc0defbb890df8f, 0xe29540fd94c6d28b, - 0xa604f003f82d5b71, 0xa67583d4eb066d18, 0xd62cbd796322b3fc, 0x070cfe244cdcccf3, - 0x73557c30b3af47e5, 0x2e544e31153a2163, 0x996eef7464d5bead, 0xbc71cb5ab0586cdc, - 0x0bfcb6c1b517ed69, 0x62b4f1fcc82e8ca0, 0x0edbc68f544965c5, 0x40fa39baa24af412, - 0xf39aeb2413dab165, 0x17e6013e7afee738, 0x8109bff1c8d42a9d, 0x3cd99863390989b5, - 0x02021a4cc9c336c8, 0xa06060778cb60aa4, 0xd96591db60bc1e06, 0xd2727175183f4022, - 0xcdc1f1c5bce3e7ce, 0xb393ccc447872a37, 0xdf6efe63257ead3a, 0x20729d0340dbceb6, - 0x9f3d2d26fc0ea0d7, 0xf392e0885189bd79, 0xdf2ee01eb212b8b6, 0x6e103a0c0f97e2c3, - 0x96c604a763bd841b, 0x9fc590c43bba0169, 0xf92dcd5ddc248c40, 0x113a8b54446941dc, - 0x5943eda146b46bb8, 0xbf657901a36a39a7, 0x5a4e0e7ea6568971, 0xb94c635bae9f9117, - 0x2626fb65b3a4ef81, 0xa59bfd5478ce97de, 0x79112ba9cc1a1c63, 0xf41f102f002cf39c, - 0x0a589bcbfb7ff1c8, 0xa1478c53540c4fa1, 0x60d55e72c86dfaca, 0x312e7b6840ea7a39, - 0x8aae72dcccfe1f75, 0xff2f51f55bf0247a, 0x3c2e4b109edb4a90, 0x5c6d73f6525c7637, - 0xe49acb04a199f61c, 0x27860642d966df7f, 0x541ce75fb1e21c30, 0xd9fcd6f90806c7cc, - 0xb87c27bc93a7969b, 0x92f77a1179b8f8dc, 0xb1f29379deb89ed4, 0x7e63ead35808efe7, - 0x13545183d7fa5420, 0x575f593e34cf029d, 0x27f1199fb07344ae, 0xe67f95f7dc741455, - 0x49b478b761ab850b, 0xd7bedf794adfc21e, 0xdc788dcd2dda40ae, 0x14673eb9f4d8ad35, - 0x0cced3c71ecf5eb1, 0xe62d4e6c84471180, 0xdfe1b9e2cb4ada7d, 0x70185a8fce980426, - 0x0ce2db5e8f9553d6, 0x1fedc57bb37b7264, 0xb9310a2e970b3760, 0x989ff8ab9805e87d, - 0x0b912d7eb712d9ee, 0x1fe272830379e67c, 0x16e6a73aff4738fb, 0xeed196d98ba43866, - 0x7088ca12d356cbe2, 0x23539aa43a71eee0, 0xed52f0311fa0f7ad, 0xa12b16233f302eea, - 0xc477786f0870ecb4, 0xd603674717a93920, 0x4abe0ae17fa62a4c, 0xa18f1ad79e4edc8d, - 0xc49fe6db967c6981, 0xcc154d7e3c1271e9, 0xdd075d640013c0c0, 0xc026cd797d10922a, - 0xead7339703f95572, 0x4342f6f11739eb4b, 0x9862f4657d15c197, 0x4f3cb1d4d392f9ff, - 0xe35bffa018b97d03, 0x600c755031939ad3, 0xb8c6557ffea83abf, 0x14c9e7f2f8a122ea, - 0x0a2eb9285ee95a7c, 0x8823fec19840c46f, 0x2c4c445c736ed1d0, 0x83181dff233449f1, - 0x15ed3fca3107bef5, 0x305e9adb688a4c71, 0x7dbef196f68a3e2e, 0x93e47ece3e249187, - 0x8353c5e890ead93c, 0xea8a7ae66abafdf7, 0xf956dbb6becf7f74, 0x9f37c494fbfdb6e4, - 0x11c6cbaa2485dd32, 0x206f336fcca11320, 0x9befe9a59135d8fe, 0x5f3ef8b8db92c7db, - 0xbb305e556ce0ce9a, 0xf26bdafb1305887f, 0xcbf28abe23f08c61, 0x0bc64173b914e00b, - 0x9168da52e983f54a, 0x6ea41d09c3574a3e, 0x78aa44d4a74459ae, 0x2931422878387bf5, - 0x018f64a3a92c2d9c, 0x9be43f6752e66b34, 0xae378890decd1152, 0x07325329a1cb7623, - 0x3b96f4ee3dd9c525, 0x2d6ebcdbe77d61a3, 0x10e32b0e975f510c, 0xffc007b9da959bf9, - 0x38bf66c6559e5d90, 0xbe22bdf0bf8899fe, 0x87807d7a991632a8, 0x149a0d702816766a, - 0x026f723db057e9ab, 0xeeecb83625ec6798, 0xcec2ed5984208148, 0xd985a78e97f03c84, - 0xf96c279e7927b116, 0x99d5027b3204f6e2, 0x13a84878c3d34c55, 0x5cf5ec96229e9676, - 0x0bc36b07e4f8e289, 0xbed33b80a069914d, 0x2fbfbdd1ff4b9396, 0xab352bb6982da90f, - 0x154d219e4fa3f62b, 0x4d087512bb6b9be7, 0xc582e31775ee400e, 0x7dadb002ae8c4a4e, - 0xaae2957375c1aee2, 0x5f36ca643356625b, 0xf87cf8eb76e07fb7, 0x46f432a755e02cc3, - 0x36087e07aba09642, 0xe5642c1e4ebb9939, 0xb9152d22338eefad, 0xf7ba44278a22cf7f, - 0xd3b8013502acd838, 0x7761511da6482659, 0xb0857621638e8e50, 0x552eddb4a8b1d5f5, - 0xc43d9861e812c3ea, 0xd765c2aada47910c, 0x21c935b68f552b19, 0x6256d5641a2b47dc, - 0xab711d8e6c94bc79, 0xa8d0b91a2a01ab81, 0x5e6d66141e8d632a, 0x7638285124d5d602, - 0x794876dbca3e471f, 0x951937d8682670ce, 0x0f99cb1f52ed466a, 0x8c7cd205543b804c, - 0x2fd24d74a9c33783, 0xe5dcb7b7762e5af1, 0x45e6749cca4af77c, 0x540ac7ee61f2259f, - 0x89c505c72802ce86, 0xeab83b9d2d8000d1, 0x9f01d5e76748d005, 0xc740aaef3035b6d0, - 0x49afcd31d582d054, 0xcba5dc4c1efb5ddc, 0xc0a4c07434350ca1, 0xfc8dfaddcc65ee80, - 0x157c9780f6e4b2d9, 0x9762a872e1797617, 0xc4afae2cf3c7e1bd, 0x71cde14591b595d4, - 0x8843c3e0e641f3b9, 0xd92ecd91dce28750, 0x1474e7a1742cb19f, 0xec198e22764fa06b, - 0x39394edb47330c7d, 0x00ba1d925242533d, 0xaed8702536c6fb30, 0x6d3618e531c2967a, - 0x77f7cedcd7cc0411, 0xbc1e2ab82be5b752, 0x07b0cf9223676977, 0x596c693b099edd53, - 0xbb7f570f5b9b2811, 0x96bfdad3c4a6840c, 0x668015e79b60c534, 0x3ad38d72123f1366, - 0x6b994d81d2fcbb09, 0x70885f022c5052d8, 0xc891ee79d9306a7b, 0x2c4df05c0ed02497, - 0x19ebc13816898be2, 0xea7c64df11c392a2, 0xb7663e88dd12e1bd, 0x79f768cb8e154c21, - 0x1fb21b12e945933b, 0xe6a9045643f6906e, 0x544c47acd7e15371, 0xb7709b14f727e3d1, - 0x326ee36a46942971, 0x477f1cf7b0e2d847, 0x88b8f6b82b3b0c24, 0x18bc357b80e3cd5c, - 0x3333de70e4d66e0b, 0x4fd4c5e148583cf6, 0xae1b62f3008c0af3, 0xc49f419b6ab29cf5, - 0x2c29fa65afc3fa28, 0x4b19d93734d03009, 0x7dd6c09e589276ad, 0x1cece97f30de48ad}, - {// seed = 4 - 0x58bdf4338602e4fb, 0x71a5620b02c926d5, 0x3811c960129c2d9f, 0x29c2fb11fccac567, - 0x0d6b1ea7780f1352, 0xcc4d3ddfae3f87b3, 0xfdd30257362a586b, 0xabc948fde69f25f1, - 0x51b3523469d30f7b, 0xe0f0322724405ace, 0xd3729266d896da1e, 0xb10c37e5147915bf, - 0x8b577039f9fa32a3, 0xe677c6a9cbfb44b3, 0x7317a756ebb51a03, 0xf8e988ef37359485, - 0x600fc1ef3f469ff3, 0xbf0b8f8520444e01, 0x3711168b08b63d73, 0x34146f2944a6cb36, - 0x717feb263862cdde, 0x7185f8347db00412, 0x900798d82127e693, 0x84089e976a473268, - 0x10f8308c0d293719, 0xf62a618d4e5719b8, 0x8bdbd257a1a9516f, 0xf49f666fd7a75110, - 0xbaf45e2db7864339, 0xe4efa1ea0c627697, 0x3e71d4c82a09fe10, 0x54a2a51cf12127bb, - 0xa0592c9f54ba14cd, 0x27dd627a101c7a42, 0x3d2ceb44b3d20d72, 0x7ee1f94a68ca8f5d, - 0x7e8cb8651b006c36, 0xbd9fa7ca3a475259, 0x856de173586a7b34, 0xcedb291b594cb1b5, - 0xa3d6e462fd21cddc, 0x74561d10af9118e4, 0x13a3d389fc2d4b36, 0xeea8594a4a054856, - 0xf56d7474d9ba4b13, 0x25ddce2f6490b2fd, 0x920653ff3a8d830b, 0xcd8c0c9cdac740d1, - 0x2c348a738db9c4a0, 0x2967ccbe8ea44c22, 0x47963f69adb049f8, 0xf9d01eb5b4cf7eb6, - 0x7a5c26eb63a86bd2, 0x62ad8b7a71fa0566, 0xb373213179f250ae, 0x589d4e9a88245a4d, - 0x433dafebe2d558a8, 0x521fbef2c8fe4399, 0x62a31f9ff9ccd46b, 0x51602203eba7c1a6, - 0x9afc8c451b06c99f, 0xb529085bdbaffcea, 0xac251825cc75892b, 0x94976a5bce23d58e, - 0xdd17925b6c71b515, 0x568fd07a57bce92e, 0xefac31200d8bd340, 0x716c3e466b540ef9, - 0x3d2c9e380063c69b, 0x14168f9a3662dd83, 0xd298c7504dbc412f, 0x74490a94f016719f, - 0x0e0da431e1ab80c8, 0xe321f63dc6b169ae, 0xf08671544febc95a, 0x39324450cc394b3b, - 0xea6e3d35f1aa3a70, 0x8ef8a886508ce486, 0xdc1a631ef0a17f06, 0xfda2b3fbcd79e87b, - 0xd75bcae936403b10, 0xf88b5bd9f035f875, 0xc43efec2e3792dd4, 0xe9fac21a9d47cd94, - 0xc2876f0c4b7d47c3, 0xaba156cf49f368b4, 0x5ccda2170fa58bf9, 0xadc92c879ed18df7, - 0x110c1b227354e6c8, 0x298ee7a603249200, 0xde92142ede0e8ee7, 0x88e4a4610644ba9e, - 0xbb62d277e7641d3a, 0xb9be1985b7bf8073, 0x29024e5426cdb0d1, 0xf6aefd01f3092ab8, - 0x2a07087b313133aa, 0x6d71f445d6dfc839, 0x1e2412ff12e5526b, 0xed5cdeba6617b9e1, - 0x20b1d0d5e5f8760e, 0x12ff15705c368260, 0x7bf4338b7c387203, 0x34ff25f00cd06185, - 0x1148c706c518cf28, 0x5c04f0623388f025, 0xcb9d649275d87d79, 0x9b5f0c24fabc42ec, - 0x1a7b5e7964e33858, 0x2a81bbd8efdc6793, 0x8d05431ffe42752e, 0x83915cd511002677, - 0x580ed4d791837b31, 0x5982e041d19ff306, 0xcad0d08fa5d864ca, 0x867bee6efe1afa63, - 0x26467b0320f23009, 0xd842414dfda4ec36, 0x047fcdcbc0a76725, 0xbddb340a3768aeca, - 0xef4ce6fa6e99ab45, 0x88c5b66c7762bf9b, 0x5679f1c51ffb225d, 0xdab79048317d77ee, - 0xf14e9b8a8ba03803, 0xe77f07f7731184c1, 0x4c2aab9a108c1ef5, 0xa137795718e6ad97, - 0x8d6c7cc73350b88b, 0x5c34e2ae74131a49, 0xd4828f579570a056, 0xb7862594da5336fc, - 0x6fd590a4a2bed7a5, 0x138d327de35e0ec1, 0xe8290eb33d585b0b, 0xcee01d52cdf88833, - 0x165c7c76484f160e, 0x7232653da72fc7f6, 0x66600f13445ca481, 0x6bbdf0a01f7b127d, - 0xd7b71d6a1992c73b, 0xcf259d37ae3fda4a, 0xf570c70d05895acf, 0x1e01e6a3e8f60155, - 0x2dacbb83c2bd3671, 0x9c291f5a5bca81af, 0xd976826c68b4ee90, 0x95112eec1f6310a2, - 0x11ebc7f623bc4c9a, 0x18471781b1122b30, 0x48f7c65414b00187, 0x6834b03efa2f5c30, - 0x0875ef5c2c56b164, 0x45248d4f2a60ba71, 0x5a7d466e7f7ba830, 0x2bebe6a5e42c4a1d, - 0xd871d8483db51d10, 0x6ee37decd2fd392f, 0x7d724392010cede3, 0x8e96ef11e1c9bcc8, - 0x804a61d86b89d178, 0xbb1b83ce956055ec, 0xcb44e107410ff64f, 0xc426bb09ee0ba955, - 0x057c08f42c3dd7f1, 0x40ea1ec148602bdf, 0xc24688deeb65d7f1, 0xd8bcc53c768ba4e4, - 0x16e0e3af65c1106c, 0xfc12f7e7d647218b, 0x70d6e1d3ee93cef4, 0x01d2a505c4541ef9, - 0x1ef79e16e764d5c3, 0x0363d14d13870b98, 0xb56ef64345d06b11, 0xe653d557ebb7c346, - 0x8304a8597c2b2706, 0x1536e1322ce7e7bb, 0x525aec08a65af822, 0x91f66d6e98d28e43, - 0xe65af12c0b5c0274, 0xdf6ae56b7d5ea4c2, 0x5cef621cedf3c81c, 0x41e8b1ffd4889944, - 0xb5c0f452c213c3e5, 0x77af86f3e67e499b, 0xe20e76ea5b010704, 0xbdc205ab0c889ec0, - 0xc76d93eb0469cd83, 0x17ac27f65cab0034, 0xd49ec4531fd62133, 0x07a873ea2f1b9984, - 0xbff270dfef0032ee, 0x1764dbe91592f255, 0xe40363126f79e859, 0xa06cad3ab46971f6, - 0x0be596e90dedd875, 0x3387cce5c1658461, 0x44246acf88a9585e, 0xe0ad82b92d5ecb2c, - 0x2177491c9a1600a6, 0x16e7c4aac0f02422, 0x75792eeeec15c4e1, 0x2309cd359d08ee30, - 0x7cd9831dd1b83b0a, 0x374914a7c4ee8cf0, 0x0dd17765c9ac2e54, 0xb7847470ba9a7688, - 0xfba4f4bbe2991173, 0x422b203fc3de040e, 0x63bfcaf2ecf2ab0e, 0x0c5559f3a192946e, - 0xfdf80675c1847695, 0xf5f570accab842c9, 0x65cc5a448767afea, 0x1efeb0a7ee234f2f, - 0x9b05f03d81e7b5d2, 0xe7c31317a8626cf4, 0x620f2a53081d0398, 0x1b6de96cdd9943ae, - 0x8c226a436777d303, 0xa08fbbd50fafb10d, 0x6a64c5ec20104883, 0x9c9c653502c0f671, - 0x678a02b2174f52a0, 0x68e008ba16bbad4b, 0xa317c16d2efb860f, 0xeab2075d17ed714c, - 0x565eeeddf0c4ea15, 0x8ec8e94d242a6c19, 0x139e8e27d9000fae, 0xc977a7ff1b33d2f5, - 0x1d0accca84420346, 0xc9e82602cd436e03, 0x6a2231da53d2ccd3, 0xb44b12d917826e2a, - 0x4f4567c6a74cf0b9, 0xd8e115a42fc6da8f, 0xb6bbe79d95742a74, 0x5686c647f1707dab, - 0xa70d58eb6c008fc5, 0xaaedc2dbe4418026, 0x6661e2267bdcfd3d, 0x4882a6eda7706f9e, - 0xf6c2d2c912dafdd0, 0x2f2298c142fd61f9, 0x31d75afeb17143a8, 0x1f9b96580a2a982f, - 0xa6cd3e5604a8ad49, 0x0dae2a80aad17419, 0xdb9a9d12868124ac, 0x66b6109f80877fac, - 0x9a81d9c703a94029, 0xbd3b381b1e03c647, 0xe88bc07b70f31083, 0x4e17878356a55822}, - {// seed = 5 - 0xb3c58c2483ad5ead, 0x6570847428cdcf6c, 0x2b38adbf813ac866, 0x8cb9945d37eb9ad3, - 0xf5b409ec3d1aed1c, 0xa35f4bffc9bb5a93, 0x5db89cde3c9e9340, 0xff1225231b2afb2b, - 0x157b0b212b9cc47d, 0xf03faf97a2b2e04d, 0x86fdab8544a20f87, 0xfcb8732744ae5c1c, - 0xd91744c0787986d5, 0x5f8db2a76d65ad05, 0xcff605cbed17a90d, 0xf80284980a3164e7, - 0x59cc24e713fccc7d, 0x268982cada117ce4, 0xcd020e63896e730e, 0xe760dc46e9fe9885, - 0x6aaece8ab49c6b5d, 0x7451194d597aae3e, 0x35d4385900332457, 0xa40fb563a096583d, - 0xa797b612f7f11b76, 0x2fed6eb68e6a2b9b, 0x2f06ee64aeffd943, 0x9dd0e49d9ca45330, - 0x97d48f08bd7f1d8f, 0x1cfa7fe3ebe4d8ee, 0x2a2ba076bd397d42, 0x68c4344f7472f333, - 0xce21ec31987d74b5, 0xb73dabdc91d84088, 0x801aadee592222fe, 0xaf41345398ebc3f5, - 0x8a8f653d7f15ee46, 0xce2d065ff2ba2965, 0x4e05da515da2adb7, 0xa6dbdb8aa25f0fd4, - 0xca9f9666bbd2d5a9, 0x6b917ce50bd46408, 0x1550cc564ba6c84d, 0xb3063ae043506504, - 0x84e5f96bb796653d, 0xe2364798096cf6e3, 0x3b0dfedf6d3a53d0, 0xb7e4c7c77bde8d93, - 0xe99545bac9ab418a, 0xa0e31f96889507bb, 0x883c74f80c346885, 0xf674ae0b039fd341, - 0x8bb6ce2d5e8d1c75, 0x0c48737966a7ed7c, 0x04fcdf897b34c61c, 0xe96ac181bacbd4d6, - 0x5a9c55a6106a9c01, 0x2520f020de4f45d3, 0x935730955e94d208, 0xce5ad4d7f3f67d3b, - 0xa4b6d107fe2d81ca, 0x4f0033f50ae7944e, 0x32c5d28dd8a645a7, 0x57ce018223ef1039, - 0x2cbab15a661ab68e, 0x6de08798c0b5bec2, 0xee197fb2c5c007c6, 0x31b630ac63e7bda2, - 0xab98785aefe9efe3, 0xa36006158a606bf7, 0x7b20376b9f4af635, 0xa40762fdc3c08680, - 0x943b5faffd0ebee2, 0x7f39f41d0b81f06e, 0x7c4b399b116a90f8, 0x24e1662ac92bc9f3, - 0xcf586fc4e8e6c7db, 0xe46e0d047eeb12d7, 0xe8021076e4ea9958, 0x11fc13492e3ca22a, - 0xd61eae01410397e3, 0x7e8c4a58036a8e9f, 0x068a6de267970745, 0x64faab129bef1a41, - 0xb4a6f720943dad01, 0x631491058d73a9d5, 0xdad4fe95eab3ec02, 0x0a8b141c5c3a44f6, - 0x9fc69d4c2b335b98, 0x94d5f84a07d6e4cd, 0x1b73965de143c608, 0x443932c2dda54bcc, - 0x7397818fb0b04cd2, 0xef4ab03a1202b277, 0xf3d2ee459c0c2b92, 0x182d4daf8b058a87, - 0x90e63035d7b51368, 0xba4cd8b9a95d45fd, 0x12a7392c76731090, 0x890d264ec5d082d2, - 0xeeaf5c363da4994e, 0xd6aad756902123fb, 0xb531ebebdb28f191, 0xe71ce659fc59babd, - 0x37c1b94f63f2dcb5, 0xe4e3abeb311f9b96, 0x4a31b72ccb8695d3, 0x52cae1f0629fdce4, - 0xe5b0475e2ed71369, 0x2724e8c3506414fb, 0xbab0367920672deb, 0x0161a781c305449f, - 0x37b70f40f5bb60be, 0xddd1094c50251a01, 0x3b28283afd17224e, 0x06dec0cfe889fc6b, - 0x47608ea95bb4902d, 0xad883ebc12c00e82, 0x9e8d7ae0f7a8df29, 0xa79443e9f7c013a1, - 0xcfa26f68b7c68b71, 0x33ae6cc19bda1f23, 0xd9741e22b407887f, 0xf2bff78066d46b1c, - 0x794123191c9d32d4, 0x56cb6b903764ec76, 0x98775d0ef91e1a5a, 0xae7b713bc15c1db9, - 0x3b4c1a7870ed7a0d, 0x46666965f305cc34, 0x0ea0c3b2e9c6b3cd, 0x4dc387039a143bff, - 0x5f38bb9229ef9477, 0xea5d39ba72af7850, 0x69a5ed0174ce2b6d, 0x06969a36bfe7594d, - 0x0adee8e4065ccaa3, 0x908a581d57113718, 0x64822d6c5a8190ed, 0x8c5068b56ace4e4c, - 0x88ba3b4fb4e30bef, 0xa6ec0b8bb5896cfe, 0x4e23fcc6b47996fd, 0xe18e75b0dd549c7a, - 0xcd90f17e106cf939, 0x1666fdfb2ef7c52f, 0x4fae325f206dd88c, 0xe7bc1160e25b062d, - 0x3cc999cb246db950, 0xc5930a7326cd5c37, 0xb008a48a211367bd, 0xc5559da145a88fd4, - 0x1e3ad46655fac69c, 0x7834266b4841bfd7, 0xa764450fbffc58cc, 0x54d8cf93a939c667, - 0x93c51f11b21b2d9d, 0x0964112082ed65cc, 0x4c2df21213e7fb03, 0xf0405bc877468615, - 0x17b4fc835d116ab4, 0xa6b112ae5f3cb4ef, 0x23cfc8a7fd38a46e, 0x8e0a360dc2774808, - 0x24ca9c8092105ad5, 0xafd3f75524f2e0d5, 0x4f39ed7dbaddc24c, 0xe5e362c7679a7875, - 0x00914a916b07b389, 0xdfe1119b7d5ab5da, 0xabd6ed9940e46161, 0x630ed2044171e22c, - 0xdecc244157dd1601, 0x777e6d5b4b4868d5, 0x9b3530bee67017d8, 0xd2faf08b291fdcb9, - 0x006e99455d6523de, 0xd559b5817f6955b5, 0xefcc1063b0088c61, 0xed73145ae0f00ae7, - 0xab2af402cf5b7421, 0x897767f537644926, 0x26c9c0473ca83695, 0x192e34e1881b2962, - 0xf7cf666ec3b3d020, 0x27f9b79c7404afb7, 0xe533e8bed3010767, 0xe5817838e11d05d3, - 0x65659c531bd36517, 0xd427c5e0a23836fd, 0xf3eab7ea58fa3528, 0x07683adae1289f35, - 0x201d6af7e896dd32, 0xd5da938b9a21ad88, 0x843fb73ad67bc316, 0x1782ec7d5feef21b, - 0x943f66f6ec772877, 0x7e9112e7b26da097, 0xeac8161f8663c2c7, 0xe8600db480a9ebf4, - 0x07807fc90f6eaf5f, 0xe0e4c9deb41abf83, 0xbdf533db271f9c15, 0xb398411b0497afe2, - 0xdebb45ef25448940, 0xe7a5decefcd376c4, 0xaf1ef3c728c83735, 0xb8b83a99355cb15a, - 0x6444a0344f1611e4, 0xe8bb7f5cf3c60179, 0x77ab5c5177e75ff7, 0xc38fd6fa849d585d, - 0x390d57d53029060a, 0xa66327eb7b8b593c, 0x6350a14f6fcd5ac9, 0x2c08125bcd7008b4, - 0x2d00c299a6a6bf8e, 0x6b0039c1f68d1445, 0x0035150c5d06f143, 0xa34d01628cc927e1, - 0xdf5b3164d7b2ede1, 0x8167db1d0583d72e, 0x4e13b341cd2ae8bc, 0xa693d9b1f416e306, - 0xc15ed7ca0bc67609, 0xdc344313c1c4f0af, 0x88b6887ccf772bb4, 0x6326d8f93ca0b20e, - 0x6964fad667dc2f11, 0xe9783dd38fc6d515, 0x359ed258fa022718, 0x27ac934d1f7fd60a, - 0xd68130437294dbcc, 0xaf5f869921f8f416, 0x2b8f149b4ab4bf9f, 0xc41caca607e421cb, - 0x7746976904238ef9, 0x604cb5529b1532f0, 0x1c94cd17c4c4e4ab, 0xe833274b734d6bbe, - 0xe9f1d3ef674539ce, 0x64f56ed68d193c6a, 0xe34192343d8ecfc1, 0xcb162f6c3aa71fe8, - 0x99eaf25f4c0f8fa4, 0x92f11e7361cb8d02, 0xb89170cddff37197, 0x4f86e68a51e071e3, - 0x31abf6afd911a75b, 0x6d20cf259c269333, 0x4150b9f88fcb6513, 0x705063989ebf7451, - 0x559231d927c84410, 0x1ca8ec4b098bc687, 0xebed22405c9180e0, 0xaa815b37d052af59}, - {// seed = 6 - 0x946ac62246e04460, 0x9cebee264fcbc1ae, 0x8af54943a415652b, 0x2b327ed3b17b8682, - 0x983fde47b3c3847e, 0x10a3013f99a2ad33, 0x6e230bb92d2721ef, 0x1cf8b8369e5c5c50, - 0x7f64017f2b7b3738, 0xd393248a62417fa1, 0x9ff01c0b20a372c5, 0xb0e44abce7e7c220, - 0xcebb9f88d48a815f, 0xdb7df6bd09033886, 0x7844fc82b6fa9091, 0x72d095449863b8ec, - 0xc13e678c89da2c7e, 0x6caf4d5ad231d12f, 0x2e0ab7b5fcf35c49, 0xf410720cb932a70f, - 0xd66ea581f16fce06, 0x175c9f002f57dc98, 0xccbcfd0d32988775, 0xfde4c407d3b0a232, - 0x5db2931ae7e97223, 0x6e07e2173085809f, 0x6e1d1ec0f9cad73c, 0xb2fc251a7f802619, - 0xbc1fc17f04f342de, 0x8de8f21ec658e078, 0x72c0f40cbee53fd6, 0x0678244411fc17a1, - 0x1d5837ca166b9bbd, 0xc8cada003c554345, 0x6a2fe2bfb2e58652, 0xfca9d797a6f7988b, - 0x6699e24ac737948b, 0x69623ffcb05789ba, 0x946429c529d95b75, 0x0d14df0b2a13970f, - 0x593d8592c440dfec, 0x2ee176f3d7e74b94, 0xae003f1da3be9e26, 0x0c7b02c4c0f6764a, - 0x3117e2fa1f632462, 0xf0f23265b6f1eaeb, 0x3111255d9b10c137, 0xc82745e509a00397, - 0xbd1d04037005fea7, 0xe104ab0dd22a9036, 0x51b27ce50851ac7a, 0xb2cb9fb21b471b15, - 0x29d298074c5a3e26, 0x6ebdf2058b737418, 0xc4a974041431b96f, 0x1ec5a30ccb6bdaac, - 0xe818beede9bf4425, 0x4b69b1bce67a5555, 0xf5c35f1eb0d62698, 0xf4509bbd8e99867c, - 0xb17206debd52e1bc, 0x35785668c770b3be, 0xe9343987ff5863bc, 0x2ee768499ac73114, - 0x5132bb3426eeaaf4, 0x471bce2c6833c5ff, 0xbb9a2d5428e6f6f9, 0xd5678943c595792d, - 0xab2a65e7f81e479c, 0xa82407bb23990b31, 0xdae321383984923c, 0x01823bb22648e6f1, - 0xda6e8df4214a8b04, 0x0e172bb88e03d94f, 0x552da6c22e362777, 0x7ce67329fb0e90cb, - 0x7b2d7f287ede7ebf, 0xd44f8222500651bd, 0x4acca1ef58fbb8ab, 0x428ecf058df9656b, - 0xd7e1ec6a8987c185, 0x365be6a54b253246, 0x168849be1e271ee8, 0x6a00f3c4151a8db2, - 0x37602727ca94b33d, 0xf6b50f18504fa9ce, 0x1c10817f6bc872de, 0x4bfe1fe42b0f3638, - 0x135fad4b8ef6143b, 0x1b25ad2bafc25f58, 0x41e37f85cf321f92, 0xfc73f75d9d5b9bea, - 0x9eb3694d1e9cb7e1, 0x601d51f08fa83b90, 0x234a2a9b88366f41, 0x63fe903e16f2c3bf, - 0x1cdbd34fa751c0b0, 0x0ce4fc6747c0558c, 0x51ed72afb8bb49aa, 0x20313ba13ca12c96, - 0x271fa38f9ebd54c1, 0x3696a5ac03a8edde, 0x05602be7df625702, 0x11f1ac73790f7a9f, - 0xa2836c099f0810bd, 0xe5ac2e47caa532fa, 0xd9c000a66d39f681, 0xd93d900e6f3d9d5f, - 0x792c81c65b7900f2, 0x5c5dce790ee20da1, 0x74ff1950edec1aee, 0x71fc85fa1e277d8f, - 0x0e77df17d6546cbc, 0x07debad44816c3b4, 0xbafa721581e92a70, 0x8ab6fbe2ed27bba8, - 0xe83243a20dea304a, 0xaa85a63a84c00a07, 0xde0e79917fc4153a, 0x21bb445e83537896, - 0xeedcac49fc0b433a, 0xffb2926a810ae57a, 0xf724be1f41d28702, 0x79cb95746039bb3b, - 0x5a54fe3742a00900, 0xda4768d64922c04f, 0x420396a84a339dae, 0xa171e26ee5e8724e, - 0x4c8da7c5d289c20a, 0x9ebd79a1a8e94742, 0x39235232b97e9782, 0xb75df0be9bba7d80, - 0x0c1d204dd87d48fc, 0x8f81f3e7177266e8, 0xe4a460b39e78d72b, 0x50b98fa151e65351, - 0xb7cb585c3ee1eddc, 0x11cdad9a76ee1dc4, 0xa38054a78595dc1c, 0x92f09e2ec4978edc, - 0xa8f0061b5efdabaa, 0x04bcc4abc224d230, 0xc58606738e692d46, 0xdd2b27b565952433, - 0x19e6ed1b740beec0, 0xceadd49b2ef9891f, 0x328178c28fe95cad, 0xe5ad4c43afe02848, - 0x03c0cb538cd967c0, 0xec4352526d19a630, 0x4c7e99389d39b031, 0xf65dd05362c2deb6, - 0xd1e70daf6879d28d, 0xbe9f57db6309b265, 0xa4b66f370b872bb7, 0xe26896fbc6ee1fd5, - 0xac705e661bfcf7c5, 0xab4d0d07d7f09940, 0x976417c06aeb6267, 0x8161c684a6bd468c, - 0xf77b6b9976dc4601, 0xc6489b779a39c12c, 0xb2aa58d5681cea1a, 0x043b1b40f8c3e04c, - 0x681fcbfadc845430, 0xab8896c921ba8def, 0x57aaf172606f37b2, 0xc3735048cd5eb8d7, - 0xa7078b96955631bd, 0xdd6b3543aa187f33, 0xc7103ea4a2a697fd, 0x8d7b95f6ff1f7407, - 0xe44f419e84709530, 0xf340caa9132cbb0a, 0x2ba407283143c66c, 0xe1be240ca636c844, - 0x90d32f2877ac08bc, 0x5d26e6294b2c8673, 0x4a6b2f5b27c87a44, 0x961fb9043f76d34f, - 0x0afee02d8d3c55d2, 0x6228e3f48c42e5dc, 0xc338e69ee6593675, 0x853f74b16efb7bdd, - 0xd062f40bdd22e687, 0x647164b9ab4c4190, 0xf94689f67d598369, 0x8e4b29d87a5012d7, - 0xaf02b8b925656fbd, 0x7a722a767179a630, 0xb5c8afe937a75ace, 0xfdb8e8d02d279372, - 0x887ef700cb25fae1, 0xcfe9bd912f72cabe, 0xb1d4dedc24f978de, 0x517522d38319cc2a, - 0x7dd87b2b36aab798, 0x579c4ff3046b5a04, 0xf5c5975c5028b7a7, 0x7094579d1000ec84, - 0xbc8d5b1ea70a5291, 0x161b2d783be8855c, 0xd26d0b0d6d18279f, 0x0be1945f02a78bd5, - 0xb822a5a9e045415b, 0x2fe9d68b1ccc3562, 0xb2e375960033d14f, 0x26aca04e49b4ff22, - 0x732a81c862112aea, 0x8bd901ed6e4260b8, 0xe839532c561ad5b0, 0x8fb6e4d517a79b12, - 0x0dd37f8c0be9b429, 0xc8ad87ad12f1b1b0, 0xc51f3aa62b90318b, 0x031a7e8b86c1cefc, - 0xa95547af2b70fc76, 0x9cb3615c5a98801e, 0xa387e3c3341d7032, 0xa087ea52a1debaef, - 0x16325ec9a2e6e835, 0x587944a484c585eb, 0xc8879033bde22ecc, 0xa39dbfce709c464a, - 0x7acc010f99208774, 0x98dd2973a096c5ad, 0x26458b51139f198c, 0x2f5d19575e8c4f02, - 0x726643f0d38af352, 0x44d879b6d73e6e94, 0xa68a03885c980abe, 0x06048acd161c40c0, - 0xa4dab8f89d405d28, 0x7120c880cb04be18, 0xa062ace22a1cf0cf, 0x3901a9daf29704f4, - 0xff08f3ed989db30a, 0x6d22b13e874c67e9, 0x80c6f35518d73f4d, 0xc23c2a521aac6f29, - 0x2e708fd83aaa42e0, 0x7fc3780f55f1b0fd, 0xabb3075c98cf87f2, 0xb4df3f40f7c61143, - 0x2a04418098a76d75, 0x0d9eeee9509b2d37, 0x6be8ae51f4b59cdc, 0xe746cc7c00e4a2ab, - 0x785bc6df9cac597c, 0x33cb6620ce8adc48, 0xc1ba30739bffcef7, 0x6d95771f18e503f7, - 0xf7be3ae2e62652ff, 0xc8d82ffd2a73c62b, 0x8725a3ba5b110973, 0x67ed6b9c724757ec}, - {// seed = 7 - 0xc0272d42c19ff3ae, 0x4694228b43ea043b, 0x5709a6ef8a462841, 0xc9210a1e538805c9, - 0x279b171196113ec2, 0x859b769fc2d9e815, 0x0d5d3125a2bf14d3, 0x22bca1cfefa878ba, - 0x481b6bf58037bd83, 0x4933ba8647728d22, 0xf08c7b6b56f6e1b6, 0x374e8af5a15407c7, - 0xa95c4dc3d2487a5c, 0x9b832808ff11e751, 0xf2048507e9da01d5, 0xa9c576189f544a4a, - 0xf6c2a45b2e9d2b41, 0x9b9874c9f10ecc2f, 0x37d9b5f51f8c149e, 0x93aead54c9de9467, - 0x59cf0b4af262da23, 0xe7e9929af18194b2, 0x9df2644e33eb0178, 0xde4122d6f0671938, - 0xf005786c07f4800b, 0xb1fc9d254b5d1039, 0x0bf1088631f6dd7b, 0x665623f0a4b8f0c7, - 0x60f0113a9187db7c, 0xfd7cceda4f0d23a6, 0x26c01e9d89955940, 0x33afa1dfc0f5a6a0, - 0xeb77daf215e9283c, 0xc7575214bf85edb4, 0xeb0d804bf297e616, 0x84bff4ffd564f747, - 0xc4ac33189246f620, 0x43ef61213ecc1005, 0xcbbb0dea6cd96acd, 0x8ed27abfa8cfcb05, - 0x543b61529cb996b6, 0xa5f987ca41ea5e59, 0x3c50e0ac5254cb7a, 0x4192b0446c06d1e6, - 0x3e86592e21b45388, 0xdb766f06fcc6e51e, 0x0448ee36efe632db, 0x663c9db689253e35, - 0x72e0bd4985331dd4, 0xff501b5bf7d94e74, 0xe911ce758e2113a8, 0xec3a8d03a75a6ba4, - 0xaf6b4b72f56edc83, 0xf284857936c0a391, 0x5ba6feff407d46f4, 0x9d689c26de9d6702, - 0x28c04a9083726b5d, 0x2ccf4a627a029730, 0x7b4719500d4f0c71, 0x76470a9a7da250a8, - 0xcc48409404a1c890, 0xccefbdc7ec9a8055, 0xe0db91bff3cc42d3, 0x0532436426141254, - 0xf2ee9325e6f0ff0b, 0x149c20a5fbb28d9d, 0xe71624cd8d2d14d4, 0x8f01d4dc8cc2dd77, - 0x29cf409b333015b7, 0xba8bebd211884dd1, 0xc3396635e8c8db1d, 0x8ed0f6208d0528b8, - 0x0d90b43fdd0ee334, 0xd73c9a3333a044c7, 0xa2595cd208dbdc38, 0xae93cb264f940c09, - 0x8e0538d8afb07a97, 0x19115ec881385ba2, 0xa886f9e6a8039c6a, 0xcd5d62147ce3ecac, - 0xaecdf9e0bb4969f7, 0x2ddd631c53dcad10, 0x73ad1c97b3412054, 0xb08915fa2722efc6, - 0x97966047e5067eb0, 0x337f1675ed91445c, 0xb3a833d150b96a0d, 0x5940a98fe35e5e2e, - 0xfd03cc354ed0d8ff, 0x4e65b98291a8644a, 0x14a259f2852a60b2, 0x7648e3478c1e8e5f, - 0xbc0fbef6d9a919b4, 0xbec4302081346cf1, 0x57d2ce7aa1c7c511, 0x234c209d8f4e1ac3, - 0x87cf80cc933ce443, 0x7c262c616931e94e, 0xc5e33b049cf9eddf, 0x1a80790ed03ae51b, - 0xf2e8b9494f7220cf, 0x124cb59c14fff3ff, 0xa8a06cbfdb86ce18, 0x9068ef1f80b37653, - 0x0c55417b8d90338f, 0xcd579a523f6bcd30, 0xa31bfe2476a8d2a9, 0x1f8d142208094223, - 0x332dc40a5203cfad, 0xf8792fe5b2d33b4c, 0x443bd9668bf9461e, 0xc9019db0ace1409e, - 0x781bea919a113e8b, 0xb0f11d866abfbeec, 0xcfe139a60db0c26a, 0x869ab8721e6aa39e, - 0xdb48a4977717837a, 0x588a5ff151065b18, 0xe4a251ea0028864d, 0x7f0e43ba408a77c3, - 0x65f66dd50a536135, 0x6f49e934d9331c3e, 0xb8d742e0f0fa6b09, 0xe4e9b272deca2348, - 0xaee132ff902f773c, 0x43f658f7c2a0c90a, 0x28cb4dbc76cc53ea, 0x7d92253aa99ac39b, - 0x4fea3d832370baab, 0xb29e36936e51d78e, 0xea10778712321064, 0xff4f21f8ef274be2, - 0x84eff18ddfa0933f, 0xd0ec6a9f86c758a0, 0xaf82e5973c431ae0, 0x352023c00c045425, - 0xad34d7bc4a2f8961, 0xbdb4a02a24d4dee0, 0x354a4846d97447cf, 0x331a8b944d5bc19f, - 0x5ce04f8e17909035, 0x6497581bad8f4aab, 0x07c503bba647111e, 0x85f412ba78e1f7ff, - 0x7f3b920fd20f4cff, 0x424e1a9a4ce34e2f, 0x3035e2d62e1b9f0a, 0xef63114bff7b729a, - 0xe86a05889ab6bb60, 0xee0830cf095585a1, 0x4a54f7fa47d9c94b, 0x17daeece9fcb556a, - 0xc506d3f391834c6f, 0xb3f24be362e1af64, 0xc435e4e23608efdd, 0xeeba9caaa4cc1768, - 0x5a71f306daddc22d, 0x18e5205f41eba1a0, 0x7b29b4d1f6610925, 0x065cb65a0258d9a9, - 0x3e5ac8faa9fd1f95, 0x3b362362c1ea0470, 0xce0e4f6434db7a2e, 0xf327341098de52f2, - 0xcfca3b9e2a1992c3, 0x7483bf9401233e41, 0xbafbac531c6f9281, 0x4b52dd71b2c106f8, - 0xdf73b66e50b5a1f7, 0x237aec0202a20283, 0x23dd5be23dffdf2b, 0xea9730731ee122ef, - 0x5cb3f846014fbcd3, 0xc3b21c8ffdce9201, 0x06a99a02f91a8760, 0x721a81fa8fd7b7a3, - 0x6aafcdddc53cbcd8, 0xd03b464005a93bcc, 0x8212edc1b1669dcb, 0x71f4c31364c31bc7, - 0xfeeec0eba8772307, 0x1948d00a13d88cf1, 0x19064fd6d943ada8, 0x4ec8d31722697bfd, - 0x596d9a953a516609, 0xc4cb4bff53507da2, 0x1d59f3c5be36e4ca, 0xe5b4fc5bf6044c9b, - 0x1bb74e052232f735, 0x04e8a0db611ddd5d, 0x8d04eaa009b421bf, 0xa7878ae0ac0e6d58, - 0x28c1030217cab2b3, 0x827943767e56a883, 0x28fce5fa02d22809, 0xb30c322fffc8c58e, - 0x1ca5a6a9f8066c5b, 0xb24db5f1462b2513, 0x02f653b89b7e5f6c, 0xe31f8fb5d5f78eee, - 0x266acc514ed93501, 0x936879d1c6fddcc4, 0xcd51be3636af1952, 0x3fdbb6fc332c78c8, - 0x9eb656379fa73094, 0x056146cc92fa0f96, 0xed6c4f1836c027c3, 0x021e0bb5d2113f2a, - 0x8983e42ec1c626b3, 0x73ea9bc6513ad9c9, 0x0c904903b24f4247, 0xacbac1e6243e2525, - 0x0b1069a0c230fb06, 0x77d709fca3fc1ce5, 0x87ad0f65020947e6, 0x555302641c53f4e6, - 0x65ea87871fa9aaee, 0x58aaf4ecc1067bb4, 0x1a66c48cc4c65b3f, 0xca96aca48b2ea969, - 0xa68eb70bad14de2b, 0x5ccdb3d7e00a6f6e, 0xe178fbfec73fe72f, 0x2b63d6a16b83e890, - 0x32fdb7a5330fbae0, 0x2ab5803c8d1bf32c, 0xda838388c1527c94, 0x16a50bdc4de24acb, - 0xe561301f134c074a, 0xd7ae63d2816b4db1, 0x036aabd4df0dd741, 0xc5e0db8783435b9d, - 0x9c4386cf0a07f3b2, 0x6a72ac1aa56a13a1, 0x299bbdb04bb20a23, 0x138c1018fda16b81, - 0x0e354f0b3bda49df, 0x9f4c295b23127437, 0xd133ceb2bd561341, 0xd8b4bfd5a526ac29, - 0xcdd0a70ddc1c7bbd, 0x81dce595bf572225, 0x1c6f925c05f6efd7, 0x8ae5097553856ea0, - 0x3aabeaeef248f60d, 0xd9005809d19a69e2, 0x2a3a1a314311cc27, 0x89bb2dc76b2b624a, - 0x50a2a95d0412e289, 0x9def8df564e68581, 0xf49010a9b2e2ea5c, 0x8602ae175d9ff3f0, - 0xbf037e245369a618, 0x8038164365f6e2b5, 0xe2e1f6163b4e8d08, 0x8df9314914f0857e}, -}; - // create a fake null array class with a GetView method returning 0 always class FakeNullArray { public: diff --git a/cpp/src/parquet/column_chunker_hash.py b/cpp/src/parquet/column_chunker_hash.py deleted file mode 100644 index 179918dd4fefe..0000000000000 --- a/cpp/src/parquet/column_chunker_hash.py +++ /dev/null @@ -1,27 +0,0 @@ -import hashlib -import sys - - -def gearhash(n: int, seed: int): - value = bytes([seed] * 64 + [n] * 64) - hasher = hashlib.md5(value) - return hasher.hexdigest()[:16] - - -def print_table(seed: int, length=256, comma=True): - table = [gearhash(n, seed=seed) for n in range(length)] - print(f"{{ // seed = {seed}") - for i in range(0, length, 4): - print(" ", end="") - values = [f"0x{value}" for value in table[i:i + 4]] - values = ", ".join(values) - print(f" {values}", end=",\n" if i < length - 4 else "\n") - print(" }", end=", " if comma else "") - - -if __name__ == "__main__": - print("{") - n = int(sys.argv[1]) - for seed in range(n): - print_table(seed, comma=seed < n) - print("}") \ No newline at end of file diff --git a/cpp/src/parquet/column_chunker_hashtable.h b/cpp/src/parquet/column_chunker_hashtable.h new file mode 100644 index 0000000000000..b608e6583854a --- /dev/null +++ b/cpp/src/parquet/column_chunker_hashtable.h @@ -0,0 +1,547 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once +#include + +namespace parquet { +namespace internal { + +constexpr uint64_t GEARHASH_TABLE[8][256] = { + {// seed = 0 + 0xf09f35a563783945, 0x0dcc5b3bc5ae410a, 0x63f1ea8d22554270, 0xfbe5ee7bd05a7b61, + 0x3f692ed5e9934aba, 0xaab3755952250eb8, 0xdefb168dc2888fa5, 0x501b36f7c77a7d47, + 0xd2fff45d1989642d, 0x80217c1c600e30a6, 0xb9469ee2e43df7ac, 0x3654b76a61999706, + 0x6ea73dfe5de0c6b6, 0xdfd662e1937a589d, 0x0dbe0cc74b188a68, 0xde45f4e6d73ffc6f, + 0xcdf7a7759e70d87e, 0x5d6a951b8d38c310, 0xdc9423c3813fcf2c, 0x25dc2976e167ffce, + 0xc2555baa1d031c84, 0x115bc3f2230a3ab6, 0xd4b10260f350bede, 0xdfd3501ab447d723, + 0x022e79217edaf167, 0x1635e2255c5a7526, 0xa0a750350cc77102, 0xc027133e05d39f56, + 0xd949459779cf0387, 0xb92f1464f5c688c2, 0xd9ac5f3e8b42f2f3, 0xdf02bb6f5ecaac21, + 0x8156f988fac7bfa4, 0xe4580f97bede2ec8, 0x44fe7d17a76fca32, 0x885f59bd54c2014c, + 0x435e63ec655ffae9, 0x5ebc51930967b1f1, 0x5428c2084ac29e47, 0x9465938fec30e36b, + 0xc7cb3de4977772cd, 0x15692d7c201e8c3a, 0x505ee65cdc4b17f4, 0x7d9839a0a7aead6b, + 0xeef5f5b6a0105291, 0x76c2fb232ce7f5bf, 0x5c13893c1c3ff3a9, 0x65b6b547d4442f98, + 0xb8ad7487c8c96fce, 0x906bcf51c99974f8, 0x2f56e48bb943a48c, 0xbc9ab109f82d3a44, + 0xcd5160cdc8c7e735, 0xbe9acb9df3427732, 0x386b91d477d7fade, 0x36be463621dd5af2, + 0xcbe6a2faffd627a8, 0x9c8fd528463a2f5a, 0xb9b88c6bb802b184, 0xb414b4e665c597c7, + 0xbedb142568209556, 0x5360d81c25429dce, 0x63a69a960a952f37, 0xc900d63899e1b503, + 0x1abc63a8b37c7728, 0xa8b3a8b6409080eb, 0x495e391f662959f6, 0xdf1e136f3e12229b, + 0x33d5fc526b0dd38d, 0x321221ae2abfac63, 0x7fde18351fda7395, 0xed79fe5c3a6aa4c3, + 0x2dd6965a4867d8d4, 0x54813ca20fe8799b, 0x5d59ea6456465c39, 0x0de0c294d1936b81, + 0x4aaf0755002c588c, 0x3530a1857ad04c6d, 0xb8a64f4ce184442b, 0xe0def10bceedfa17, + 0x46e38d0a443757ec, 0x9795a1c645ee16d7, 0x7e531def245eac8a, 0x683b25c43a0716cf, + 0x884583d372da219d, 0x5b06b62c910416e5, 0x54b6902fbebd3dbe, 0x931198d40a761a75, + 0xead7d8e830013590, 0x80b4d5dc99bfaced, 0xf98272c8108a1ad2, 0x1adce054289a0ec6, + 0x7d53a1143c56b465, 0x497fbe4f00c92b52, 0x525e4cc2e81ebd69, 0xc94478e0d5508ff6, + 0xb8a5da83c196d07c, 0x7667a921b65b0603, 0xf236fabbdefe6cd1, 0x53da978d19a92b98, + 0xc604f6e97087124d, 0x2cbd27221924b094, 0x65cd1102c985b1d2, 0x08c0755dc1a97eb4, + 0x5e0419e921c0fef1, 0x282d2c1196f84a29, 0xe21117fcfc5793f7, 0xcf4e985dc38e6c2e, + 0xd521f4f264d55616, 0xde69b04c485f2a10, 0x59410e245305178a, 0xceab1d477c943601, + 0xa9805732d71ee5e9, 0x054cd443896974f6, 0xf2b517717a423a3e, 0x09517937fa9fac95, + 0x4938233e9ca871e3, 0x9132cbaf56f83ec0, 0x4703421ed1dd027d, 0xfd9933f4e6f1ec4e, + 0xf237c7fded2274a8, 0xdf4616efe68cd7b4, 0x5e46de0f39f0a380, 0x3d41e0c6d8e095b0, + 0xc5272f8a5bb2df09, 0x68aa78e8301fb964, 0xbf5b5b52c8e32ae0, 0xbf28ed3df74bdcf7, + 0xd6198f64c833815a, 0x8cd99d2974267544, 0xd90560ea4465ff2c, 0x571d65ad7ad59261, + 0x309453518baa367a, 0xa60538377bc79fb2, 0xace515da1ab4183c, 0xf56d3c8d891d1c5b, + 0x5b0d8370b59def49, 0x775866ce7c83c762, 0x3d76085695c8e18a, 0xba064d1a9af1b114, + 0xc84ef7cd7b98b521, 0x90b9231681c2bc37, 0x37e2b13e6f585b6b, 0x1d0a34e55e0f369f, + 0x86bb8019cf41447c, 0x4b95c6ef55b3f71f, 0x3b6ed1660732b310, 0x617eee603d137f21, + 0xf4f6278b464f3bbc, 0xdfb763b720da205a, 0x353478899b871cb7, 0xe45fbbff574cc41e, + 0x1a94b60847907d72, 0xb10eef051eff67a5, 0xf0e012ec6a284d40, 0xcc1cd1a11b926d7c, + 0xcf9d9c5453e19cad, 0x270febcc0fc0e86b, 0xd6567568778b781e, 0x7323b98965eeb46b, + 0xccecd374567086ff, 0xef7b44bfc497a704, 0xebc479c051a9f0a5, 0xc9b7410e3e00a235, + 0x1d084f7ecdf83dab, 0xc8a9a97e33ba8ba3, 0x8c75318f5b2350d6, 0xaa3cd5d0c684bdda, + 0xa81125fe0901bedf, 0xf7bcd76020edfc93, 0x834ee4c12e75874f, 0xb2bb8a7beb44fa14, + 0x32cd26f50a4f4e4d, 0x0fc5817ca55d959a, 0xd6e4ae2e3ae10718, 0x074abdcceb8d6e38, + 0xc0cc5f4f9b3a9c43, 0x1115d364363595b2, 0x69861db2eb19f2e8, 0x59b8d804cf92bc67, + 0x9bac9785e5e4b863, 0x7fa0e17a41869561, 0x10d3c9633f0c709c, 0x534a03deee6bc44a, + 0x73b1f7201257f581, 0x46fd6a11e2e0706b, 0x494abb554946e67a, 0xb5d6da317864dc8e, + 0x402ded9238f39687, 0xd8fa37d2cbd6d290, 0xcc818293fcb06791, 0x6482ab344806cd4d, + 0x0956e6ee9d8eb60b, 0x01fee622d8465ac8, 0xae7ece370cbd9c35, 0x7ff09e937a177279, + 0xa2c29ee7a33ca5f1, 0x990e8dbee083923b, 0x4a819b72f610863a, 0xddecfad79d3f08be, + 0x627372480fac20a7, 0x802154d6eca2db4c, 0x8fcf02e42f805e55, 0x040a911ff8cea977, + 0xbb544485bc64d0d4, 0xaddde1aeb406d0fb, 0xf6b35fae23dce66f, 0xc07a9fb3645d2f9b, + 0xccd113907e9c0fed, 0xd17af369984fd213, 0x9223823c59a083e7, 0xe19d475606b81013, + 0xe181ac116a90e57a, 0x71f7b6258c6def4c, 0x2246f34b45964f7c, 0xd74aedaea2d31751, + 0xb1add86e5dd305d1, 0xeb9ba881f16d6471, 0xef7600e036f5c6ff, 0x1d50bc9735b8fb85, + 0xe63942bd1f3e2969, 0x9241ba9f8b3f4e72, 0xee8bb2bca07d35b6, 0x55cd55dab522654e, + 0x94d0cfa7c1a6845d, 0x02f9845d559884c3, 0x8ce70ea21063b560, 0xd70998028ef08b74, + 0xdfdb5bbee310876b, 0x4e21b2e348256d16, 0xde007a981c13debc, 0xe51950cbbddabfdd, + 0xd223301dbe9957c1, 0x084b8634cc2cce4b, 0x90e551378aa9d70c, 0x833b533ac633e448, + 0x7891e232882da57f, 0xa1bf26f0163ce2b3, 0xf33a0171eb9c68d5, 0x2e7de18ca69b3fa2, + 0x666fd6f175619199, 0x1239d37edb5feb9f, 0xfa9fc9382e61ff5c, 0x3ca4ad427e3c126f, + 0x37c6dd4c2c31ae6e, 0x1f1bacb619d427b2, 0x7dd09f5d10759afe, 0xc8d941432327d733, + 0x2b389ba25e1d43a7, 0xa4e3030c3740ff21, 0xcc56dae13fd37463, 0x2481457c175b560f, + 0x9deb35bde77c5c41, 0x847aa6ea5549a0c3, 0xcde01bb48b6e7f02, 0x15a28844e64cb211}, + {// seed = 1 + 0xecfcba92fe5691a3, 0x71377799fea34699, 0xb284c9096fa614e5, 0x54534170f40de6c8, + 0xbbd804d45884fba3, 0x44929a896388c8a1, 0x79b712508e0fa3b1, 0xeb53ab280af31054, + 0x351ea23a6319da7a, 0x2fbe55d9819d85a2, 0x34f4b6568dcd28b1, 0x8c94ea5e5d82967a, + 0x09068d333a46d3c5, 0x762ad4f64cb73381, 0xd5c6db5ef0e22640, 0x36d8ab5a36175680, + 0xd41fe333cdc3525a, 0xa1f51dbdf20ce781, 0x1410a95e786c8be6, 0x96b7499a670c2b41, + 0x3912e1037835d893, 0x272c5bd83e1e9115, 0x2ea7f91cad82a0d6, 0xcd10e85662ce9931, + 0xedad49be8d5e8b74, 0x7ccd8fe0f37d12bc, 0xfac0482005eed593, 0x4513991681f6c8b0, + 0x2804d612eb0ad37d, 0x7cca9e8412b81d34, 0x85ffd6707192b7b8, 0xea0560aeea954411, + 0x0122d28226102bba, 0xf51c47cdbd22fdd1, 0x3707d851183ff17c, 0xaef5a1465f3e902d, + 0xbcb38c2d8736a04f, 0x4025317e864bef15, 0x8d3f66d86e1ea58f, 0xc16759a3d97ed79a, + 0x1c62abdc0659f2f5, 0x23b3eb4e699bd28f, 0x5083c4fceed3ccaf, 0xa65bf34562cc989c, + 0xaa5865932fd79064, 0xf24d08d268c24593, 0x7fbd00a215196999, 0x7812cd366d752964, + 0x62e8dcb27ef3d945, 0xf08b7984e1b946dc, 0x547d23ad9a5c1dcf, 0x496b1fb249b27fb7, + 0xcd692e1db5f3b3ba, 0x41931e39f1e1bc61, 0x286c6a7d7edae82b, 0x17ef6638b6c4ca6e, + 0x609beb5a2576a934, 0xcc5e16fe4a69b83c, 0xbbd14d08b078fc24, 0x2a617680f481cb94, + 0x81dbbd5f86e6d039, 0xeb8205e1fc8ecc3c, 0xe5e3bb576faa8042, 0x5d6f1eb9d9df01b5, + 0x9a47b8739c10fb44, 0x398a7caad7ea7696, 0x9c0fc1d7c46adde6, 0x67cd6de0a51978a6, + 0x68ccc4b77a21cca4, 0x1e067066b82f415c, 0xf7ddade6535e1819, 0xf2185c884291751b, + 0xc322b7381fcbe34f, 0x242f593e88290b9b, 0x8e11ccc0ea5e84a3, 0x40e3a2e3346db8a2, + 0xf18bfc3ad2931a2c, 0x2468397394b00144, 0xeae199cce14e6817, 0x05b462686c75a1ae, + 0xda096cb859c51673, 0xd87aeb967a906bef, 0xaabc74493cb02fe6, 0x74d48fc2e7da143e, + 0x6ec1c8fed3f2c1fd, 0xe01e0704b463f18e, 0xc3d88a4d3a8056e4, 0xd01ae0ffab6c8f3f, + 0x881ba052620ae7c7, 0xcea033aef0a823a5, 0x8d2cad91d83df1e3, 0x18746d205e66dbe9, + 0x3061f8e58d046650, 0xd819c59f0ce2cf8b, 0x144e89e93635e870, 0x3415e88279b21651, + 0xd6f7ab944b86c3fa, 0x45f1dd15d0f67bdc, 0xbf0d97c7f4fa24f4, 0x34a7de520a57fcd2, + 0x4ba86fda03e9e2bc, 0xa7995265a025b552, 0x698f6819d5f51cf7, 0xd07dbe9d8a156981, + 0x2683945373857fc1, 0x116f8a84f96167de, 0x8bc832bd85595ebf, 0xb206519d74fdfafa, + 0xde9519b2e9b5cc5f, 0x16fdd6f2da1d8163, 0x7ba32bd48ef56f11, 0x6f4e4d7ee8b29717, + 0xd31576dde7468aad, 0x023bb08848676045, 0xf6dcc083178160b7, 0x42035f426250e683, + 0x343732993cfed89f, 0x0640a870a22d3d58, 0x65cff80b53b4ae6a, 0x27996fa17ab05215, + 0xfd5db01401b21a04, 0x894508784bc1673c, 0x5bfcf43a2380e27d, 0x4cd6dcc2715583b7, + 0xa43b3763e7d4c902, 0x6da83e12ef0c1257, 0xfe80a602b0335aff, 0x293a7d8f4ff344de, + 0xb4ae7c2b8956bf5a, 0x6b45432d38254b4d, 0xd086acbdf15d9455, 0xa4d19e43f41ea87b, + 0xf01f13ba4bb87fbf, 0xca582cf301a299ff, 0x0ddad3d45298fa7d, 0x0646a130459c3999, + 0xc08e3af3747e2cee, 0xfc7db8aa9ed67295, 0x783b329e7bd79d5f, 0x732dbc607957af7b, + 0x8e446ac19fb26555, 0xff1dfa4d61dc89a5, 0xb6fbc46bd8d011d8, 0x185147ec5779f0d7, + 0x6eb2cf6149a5380f, 0xb0e773df803a1eae, 0xc07706c5519bfce5, 0xc35abcf54fa95f14, + 0x40a01d99a38608ea, 0x776dcd6f603c277f, 0x6ae12389b1d6d0bb, 0x8bd981448df92bb9, + 0x426a6a7ca21a2c16, 0x87efd5b71c1bad26, 0x71fb7fc4cd41de48, 0xdd9033c45619d463, + 0x40eaab322654cef7, 0xe077fffed6f3e3a2, 0x375a4dbef9384447, 0x2066b009d2c4a100, + 0xeca4a5794a068447, 0x2128f64bddf341a1, 0x738b4bb1be90bd61, 0x433772cf3813d52e, + 0x9540c88add8e4474, 0x0b6d5decd21d3519, 0x654ead966745642d, 0xe1bfb03c3b4bdb4c, + 0x0b977a9937515b1f, 0x0a4587509ef63870, 0xe89f0de1d9cfd44a, 0x23a91390272e7f68, + 0xd92defbc9096b8d8, 0x004db87174612539, 0xc88ecaabdd1a71f1, 0x050de38393073346, + 0x8af1426d7964e038, 0xf352c4fef8ad5c87, 0x6f26bc7408e26548, 0x0d41543fd9bf3084, + 0xfc4e07553a840fc6, 0x5ef117de86a555a9, 0x1f11c42dffb5ae1b, 0x4147648f07490fa5, + 0x09b35fd7671b21aa, 0x1453b14f7ccca481, 0x944f6fcce4c9b2ba, 0x5b08dd2e3583dc06, + 0xe0220df78dc9c22d, 0x1c200b9506cbf666, 0x8a0b7465eadb523b, 0xfbcb43a91a1e2d80, + 0xe697f44be3c36a58, 0x2f8a8e48fb7e350d, 0x7baba71b8920d55f, 0x10edc0216105bc96, + 0x52db07c79d7a7a63, 0x1916e8cef9452ac3, 0x5cbbbf21f867b6cc, 0xadd583365a690a4b, + 0x4e4ca2c8bffc2fdb, 0xf5fe3416d2eebcfe, 0x839af8b85e452476, 0x8496c0c54ad44e16, + 0x6c46f1ecad4482bf, 0xb794cad76ae18715, 0x67b762eec7c62985, 0x52dc9e68df5b3a53, + 0x0cc7e444b422a5f9, 0xadbfe90841c112b0, 0xfe37b136f0ca5c34, 0xcfe9e47948a8d73e, + 0xee90572b86a30d91, 0x549e72d8262830aa, 0x3361564b469f32c6, 0x1e6eba9e0d2648e2, + 0x5f8e2b2ac5fcb4eb, 0xe4224fa5f71f7cc6, 0x7357a9230c76757b, 0xcad70f74aaf6b702, + 0xeef28ced23894cc2, 0x753fdd3352aefd68, 0x1fed6ba90bbeb9d2, 0x05316f4ab4034b4b, + 0x3396df022b9f63d6, 0x82d7125a7cfd0935, 0x3519a71caf1f87f0, 0xd1dfb7a5cc3974be, + 0xbfae40ecbdbbcc2a, 0x152c11778e08dd54, 0x4a96566a6c848554, 0x3a84d621c340cdd7, + 0xfd47aa1887e2fb03, 0xa63cae94b2f1d099, 0xed61783f3e5b75e0, 0xefd44864106019be, + 0x145ff78b80b081aa, 0x34670e5fcea9230e, 0x876ef976328db371, 0x4221f3a5269942a6, + 0x95315cbd85c648f4, 0x3ca344dc7c3b1600, 0x38421ea39ff28780, 0x31dbeee967c0435c, + 0x27437c3e268402e7, 0xdd0cf8343312a654, 0x965ab9dad1d8aa29, 0xf871706dd3e23509, + 0xce23d06c7a25e699, 0x1b37d59382b27589, 0x3407f004723d6324, 0x56efb69cdb5deaa1, + 0xf46cdd2b9fd604e0, 0xcad3ca79fdac69bd, 0x7252802a574e63cb, 0xc281fb8acc6ec1d3}, + {// seed = 2 + 0xdd16cb672ba6979c, 0x3954eaa9ec41ae41, 0x52cb802771d2966d, 0xf57ed8eb0d0294f2, + 0x768be23c71da2219, 0x6131e22d95a84ad3, 0xd849e4e49bb15842, 0x18e8e5c4978cf00d, + 0x3af5e5867ce1f9bd, 0x06c75a9fffe83d63, 0xe8de75a00b58a065, 0x0a773251bc0d755a, + 0x629dc21e54548329, 0x2a168f5e5a883e70, 0x33547375f0996c86, 0xdfcb4c7680451322, + 0x55c1ecaaaa57e397, 0x4546c346c24f5a31, 0x6f8f0401dfabc86c, 0x7760d2d36ee340b4, + 0xf6448e48bdeb229d, 0xba70e1633b4dba65, 0x069cda561e273054, 0xa010b6a84aebf340, + 0x5c23b8229eee34b6, 0xea63c926d90153af, 0x7d7de27b3e43ec1b, 0xea119541eddc3491, + 0xf1259daeddfc724c, 0x2873ca9a67730647, 0xa1e7710dade32607, 0x758de030b61d43fd, + 0xd2c9bcbfa475edb4, 0x18ade47bb8a0aa29, 0xf7a74af0ff1aea88, 0x6f8873274a987162, + 0x6963e8d876f4d282, 0xd435d4fe448c6c5b, 0x93ec80ba404cafff, 0xcf90d24c509e41e7, + 0x5f0fc8a62923e36e, 0x9224878fe458f3a4, 0xd9a039edf1945bcd, 0x0877d1892c288441, + 0x75205491f4b4740b, 0x30f9d2d523a9085b, 0x4b7f4029fa097c99, 0x170bb013745709d4, + 0x7087af537f11ef2e, 0x28c62b88e08fc464, 0x84bbcb3e0bb56271, 0x485a4b099165c681, + 0x357c63357caa9292, 0x819eb7d1aee2d27e, 0xdaa759eb9c0f8c9d, 0x42cdc36729cc3db5, + 0x9489aa852eddbb06, 0x8161e4f85a84e6d4, 0xa964863fdad3eb29, 0xcc095ddbce1a6702, + 0x3ecfadbb8dc2ce58, 0x971316509b95a231, 0xc8f484d1dbc38427, 0xae9c510c463574c0, + 0xdf2b31179600c21a, 0x440de87bada4dfa3, 0xbd8d30f3f6fb7522, 0x84e6d7f678a0e2d0, + 0x0ec4d74323e15975, 0xf6947610dad6d9ab, 0x73a55a95d73fe3a5, 0x3e5f623024d37eda, + 0x8d99a728d95d9344, 0x8b82a7956c4acdc4, 0x7faeaea4385b27f6, 0x540625ff4aa2ff21, + 0x4aa43b3ebd92ce2b, 0x899646a6df2da807, 0x49225115780942d7, 0xe16606636af89525, + 0xb980bcf893888e33, 0xf9ed57695291b0d8, 0x5c6dd14464619afa, 0x50606d69b733d4f3, + 0x7fb1af465b990f97, 0x3fab2634c8bbd936, 0x556da6168838b902, 0x0f15975902a30e1f, + 0xb29d782ae9e1991f, 0xae00e26ff8f7e739, 0xd3da86458bb292d5, 0x4528ee0afb27e4ce, + 0x49882d5ba49fabad, 0x7e873b6a7cf875ee, 0x777edd535113c912, 0x94ed05e7ff149594, + 0x0b8f95fc4211df43, 0x9135c2b42426fef2, 0x411e6c2b47307073, 0x503207d1af0c8cf8, + 0xd76f8619059f9a79, 0x64d24617855dee45, 0xf7bc7a877923196a, 0xd6cc42ed6a65be79, + 0xe3912ff09d4fc574, 0x4192d03b2bc2460a, 0xa0dcc37dad98af85, 0xfc59049b2a5818a4, + 0x2128bae90a5b975f, 0xbe7067ca05ea3294, 0x5bab7e7753064c4f, 0x42cbf0949ef88443, + 0x564df4bbd017492c, 0xf2c2eb500cf80564, 0x5b92e67eb00e92af, 0x8c4103eef59c0341, + 0x83412122b8284998, 0x888daf2da0636b6d, 0x4d54b10303dd07d6, 0x201190e7c1e7b5ed, + 0x3797510bb53a5771, 0x03f7bc598b570b79, 0xdc1e15d67d94f73e, 0x721e8b499ebe02c1, + 0x71f954f606d13fa0, 0x0c7a2e408c168bf0, 0x07df2ef14f69c89d, 0xe295096f46b4baaf, + 0x7a2037916438737e, 0xd1e861aeaf8676ea, 0xb36ebdce368b8108, 0xb7e53b090ddb5d25, + 0x5a606607b390b1aa, 0x475e52994f4a2471, 0xbcc2038ba55b2078, 0x28b8a6b6c80df694, + 0xb5f0130ec972c9a2, 0x7a87cd2a93276b54, 0x4d0eec7ecf92d625, 0xac1a8ce16269a42e, + 0xa4ca0237ca9637b8, 0xd8dc8ff91202b6ff, 0x75b29846799d7678, 0x761b11a5edd9c757, + 0xf2581db294ef3307, 0xe3173c2b6a48e20f, 0xe46fd7d486d65b3c, 0x1352024303580d1f, + 0x2d665dae485c1d6d, 0x4e0905c825d74d3b, 0x14ff470c331c229e, 0xbdc656b8613d8805, + 0x36de38e396345721, 0xaae682c1aa8ff13b, 0x57eb28d7b85a1052, 0xf3145290231d443a, + 0xd0f68095e23cbe39, 0x67f99b3c2570b33d, 0x54575285f3017a83, 0x9b2f7bb03d836a79, + 0xa57b209d303367a9, 0x7ccb545dd0939c79, 0x1392b79a37f4716d, 0x6e81bb91a3c79bcd, + 0x2c2cd80307dddf81, 0xb949e119e2a16cbb, 0x69625382c4c7596f, 0xf19c6d97204fb95c, + 0x1b2ea42a24b6b05e, 0x8976f83cd43d20ac, 0x7149dd3de44c9872, 0xc79f1ae2d2623059, + 0xca17a4f143a414e1, 0x66d7a1a21b6f0185, 0xed2c6198fe73f113, 0x16a5f0295cbe06af, + 0x5f27162e38d98013, 0xf54d9f295bdc0f76, 0x9ba7d562073ef77b, 0xa4a24daaa2cfc571, + 0x49884cf486da43cd, 0x74c641c0e2148a24, 0xbff9dcbff504c482, 0xf8fc2d9403c837ab, + 0x6ccc44828af0bb1e, 0xbcf0d69b4c19dfdb, 0x8fe0d962d47abf8f, 0xa65f1d9d5514271d, + 0x26ff393e62ef6a03, 0xc7153500f283e8fc, 0xea5ed99cdd9d15cd, 0xfc16ac2ba8b48bb7, + 0xf49694b70041c67a, 0xbd35dd30f5d15f72, 0xcf10ad7385f83f98, 0x709e52e27339cdc2, + 0xe9505cb3ec893b71, 0x2ffa610e4a229af7, 0x12e1bc774d1f0e52, 0xe301a3bb7eacccc8, + 0x1fdd3b6dcd877ebf, 0x56a7e8bda59c05aa, 0x99acd421035d6ab4, 0xfd21e401cecd2808, + 0x9a89d23df8b8d46f, 0x4e26b1f1eb297b9c, 0x9df24d973e1eae07, 0xe6cdc74da62a6318, + 0xfc360d74df992db0, 0xf4eca0a739514c98, 0x481c515ba9bf5215, 0xce89cce80f5f3022, + 0xf487a10fc80e4777, 0x235b379a87e41832, 0x76f72e028371f194, 0xd044d4a201325a7d, + 0x47d8e855e0ffbdde, 0x268ae196fe7334b0, 0x123f2b26db46faa8, 0x11741175b86eb083, + 0x72ee185a423e6e31, 0x8da113dfe6f6df89, 0x286b72e338bbd548, 0xa922246204973592, + 0x7237b4f939a6b629, 0x31babda9bedf039a, 0xb2e8f18c6aeec258, 0x0f5f6ce6dd65a45e, + 0x8f9071a0f23e57d3, 0x71307115ba598423, 0xcbe70264c0e1768c, 0x1c23729f955681a8, + 0xfbc829099bc2fc24, 0x9619355cbc37d5d6, 0xea694d4e59b59a74, 0xb41cf8d3a7c4f638, + 0xae1e792df721cd0b, 0x7cd855d28aac11f6, 0xca11ba0efec11238, 0x7c433e554ce261d8, + 0xe3140366f042b6ba, 0x8a59d68642b3b18c, 0x094fcdd5d7bccac2, 0x9517d80356362c37, + 0x4a20a9949c6c74e8, 0xc25bcf1699d3b326, 0xa8893f1d1ed2f340, 0x9b58986e0e8a886e, + 0x29d78c647587ce41, 0x3b210181df471767, 0xd45e8e807627849d, 0x1ec56bc3f2b653e3, + 0x974ff23068558b00, 0xdb72bdac5d34262c, 0x23225143bb206b57, 0xd0a34cfe027cbb7e}, + {// seed = 3 + 0x39209fb3eb541043, 0xee0cd3754563088f, 0x36c05fc545bf8abe, 0x842cb6381a9d396b, + 0xd5059dcb443ce3bf, 0xe92545a8dfa7097e, 0xb9d47558d8049174, 0xc6389e426f4c2fc0, + 0xd8e0a6e4c0b850d3, 0x7730e54360bd0d0d, 0x6ecb4d4c50d050d5, 0x07a16584d4eb229f, + 0x13305d05f4a92267, 0xb278ddd75db4baec, 0x32381b774138608f, 0x61fe7a7163948057, + 0x460c58a9092efee6, 0x553bf895d9b5ff62, 0x899daf2dabfd0189, 0xf388ab9c1c4b6f70, + 0xd600fe47027ea4cd, 0x16d527ec2b5ef355, 0x5ac1f58ff6908c81, 0xa08d79ff8ee9ffe8, + 0xc1060a80b7a5e117, 0x14b2c23118c60bda, 0x8cc0defbb890df8f, 0xe29540fd94c6d28b, + 0xa604f003f82d5b71, 0xa67583d4eb066d18, 0xd62cbd796322b3fc, 0x070cfe244cdcccf3, + 0x73557c30b3af47e5, 0x2e544e31153a2163, 0x996eef7464d5bead, 0xbc71cb5ab0586cdc, + 0x0bfcb6c1b517ed69, 0x62b4f1fcc82e8ca0, 0x0edbc68f544965c5, 0x40fa39baa24af412, + 0xf39aeb2413dab165, 0x17e6013e7afee738, 0x8109bff1c8d42a9d, 0x3cd99863390989b5, + 0x02021a4cc9c336c8, 0xa06060778cb60aa4, 0xd96591db60bc1e06, 0xd2727175183f4022, + 0xcdc1f1c5bce3e7ce, 0xb393ccc447872a37, 0xdf6efe63257ead3a, 0x20729d0340dbceb6, + 0x9f3d2d26fc0ea0d7, 0xf392e0885189bd79, 0xdf2ee01eb212b8b6, 0x6e103a0c0f97e2c3, + 0x96c604a763bd841b, 0x9fc590c43bba0169, 0xf92dcd5ddc248c40, 0x113a8b54446941dc, + 0x5943eda146b46bb8, 0xbf657901a36a39a7, 0x5a4e0e7ea6568971, 0xb94c635bae9f9117, + 0x2626fb65b3a4ef81, 0xa59bfd5478ce97de, 0x79112ba9cc1a1c63, 0xf41f102f002cf39c, + 0x0a589bcbfb7ff1c8, 0xa1478c53540c4fa1, 0x60d55e72c86dfaca, 0x312e7b6840ea7a39, + 0x8aae72dcccfe1f75, 0xff2f51f55bf0247a, 0x3c2e4b109edb4a90, 0x5c6d73f6525c7637, + 0xe49acb04a199f61c, 0x27860642d966df7f, 0x541ce75fb1e21c30, 0xd9fcd6f90806c7cc, + 0xb87c27bc93a7969b, 0x92f77a1179b8f8dc, 0xb1f29379deb89ed4, 0x7e63ead35808efe7, + 0x13545183d7fa5420, 0x575f593e34cf029d, 0x27f1199fb07344ae, 0xe67f95f7dc741455, + 0x49b478b761ab850b, 0xd7bedf794adfc21e, 0xdc788dcd2dda40ae, 0x14673eb9f4d8ad35, + 0x0cced3c71ecf5eb1, 0xe62d4e6c84471180, 0xdfe1b9e2cb4ada7d, 0x70185a8fce980426, + 0x0ce2db5e8f9553d6, 0x1fedc57bb37b7264, 0xb9310a2e970b3760, 0x989ff8ab9805e87d, + 0x0b912d7eb712d9ee, 0x1fe272830379e67c, 0x16e6a73aff4738fb, 0xeed196d98ba43866, + 0x7088ca12d356cbe2, 0x23539aa43a71eee0, 0xed52f0311fa0f7ad, 0xa12b16233f302eea, + 0xc477786f0870ecb4, 0xd603674717a93920, 0x4abe0ae17fa62a4c, 0xa18f1ad79e4edc8d, + 0xc49fe6db967c6981, 0xcc154d7e3c1271e9, 0xdd075d640013c0c0, 0xc026cd797d10922a, + 0xead7339703f95572, 0x4342f6f11739eb4b, 0x9862f4657d15c197, 0x4f3cb1d4d392f9ff, + 0xe35bffa018b97d03, 0x600c755031939ad3, 0xb8c6557ffea83abf, 0x14c9e7f2f8a122ea, + 0x0a2eb9285ee95a7c, 0x8823fec19840c46f, 0x2c4c445c736ed1d0, 0x83181dff233449f1, + 0x15ed3fca3107bef5, 0x305e9adb688a4c71, 0x7dbef196f68a3e2e, 0x93e47ece3e249187, + 0x8353c5e890ead93c, 0xea8a7ae66abafdf7, 0xf956dbb6becf7f74, 0x9f37c494fbfdb6e4, + 0x11c6cbaa2485dd32, 0x206f336fcca11320, 0x9befe9a59135d8fe, 0x5f3ef8b8db92c7db, + 0xbb305e556ce0ce9a, 0xf26bdafb1305887f, 0xcbf28abe23f08c61, 0x0bc64173b914e00b, + 0x9168da52e983f54a, 0x6ea41d09c3574a3e, 0x78aa44d4a74459ae, 0x2931422878387bf5, + 0x018f64a3a92c2d9c, 0x9be43f6752e66b34, 0xae378890decd1152, 0x07325329a1cb7623, + 0x3b96f4ee3dd9c525, 0x2d6ebcdbe77d61a3, 0x10e32b0e975f510c, 0xffc007b9da959bf9, + 0x38bf66c6559e5d90, 0xbe22bdf0bf8899fe, 0x87807d7a991632a8, 0x149a0d702816766a, + 0x026f723db057e9ab, 0xeeecb83625ec6798, 0xcec2ed5984208148, 0xd985a78e97f03c84, + 0xf96c279e7927b116, 0x99d5027b3204f6e2, 0x13a84878c3d34c55, 0x5cf5ec96229e9676, + 0x0bc36b07e4f8e289, 0xbed33b80a069914d, 0x2fbfbdd1ff4b9396, 0xab352bb6982da90f, + 0x154d219e4fa3f62b, 0x4d087512bb6b9be7, 0xc582e31775ee400e, 0x7dadb002ae8c4a4e, + 0xaae2957375c1aee2, 0x5f36ca643356625b, 0xf87cf8eb76e07fb7, 0x46f432a755e02cc3, + 0x36087e07aba09642, 0xe5642c1e4ebb9939, 0xb9152d22338eefad, 0xf7ba44278a22cf7f, + 0xd3b8013502acd838, 0x7761511da6482659, 0xb0857621638e8e50, 0x552eddb4a8b1d5f5, + 0xc43d9861e812c3ea, 0xd765c2aada47910c, 0x21c935b68f552b19, 0x6256d5641a2b47dc, + 0xab711d8e6c94bc79, 0xa8d0b91a2a01ab81, 0x5e6d66141e8d632a, 0x7638285124d5d602, + 0x794876dbca3e471f, 0x951937d8682670ce, 0x0f99cb1f52ed466a, 0x8c7cd205543b804c, + 0x2fd24d74a9c33783, 0xe5dcb7b7762e5af1, 0x45e6749cca4af77c, 0x540ac7ee61f2259f, + 0x89c505c72802ce86, 0xeab83b9d2d8000d1, 0x9f01d5e76748d005, 0xc740aaef3035b6d0, + 0x49afcd31d582d054, 0xcba5dc4c1efb5ddc, 0xc0a4c07434350ca1, 0xfc8dfaddcc65ee80, + 0x157c9780f6e4b2d9, 0x9762a872e1797617, 0xc4afae2cf3c7e1bd, 0x71cde14591b595d4, + 0x8843c3e0e641f3b9, 0xd92ecd91dce28750, 0x1474e7a1742cb19f, 0xec198e22764fa06b, + 0x39394edb47330c7d, 0x00ba1d925242533d, 0xaed8702536c6fb30, 0x6d3618e531c2967a, + 0x77f7cedcd7cc0411, 0xbc1e2ab82be5b752, 0x07b0cf9223676977, 0x596c693b099edd53, + 0xbb7f570f5b9b2811, 0x96bfdad3c4a6840c, 0x668015e79b60c534, 0x3ad38d72123f1366, + 0x6b994d81d2fcbb09, 0x70885f022c5052d8, 0xc891ee79d9306a7b, 0x2c4df05c0ed02497, + 0x19ebc13816898be2, 0xea7c64df11c392a2, 0xb7663e88dd12e1bd, 0x79f768cb8e154c21, + 0x1fb21b12e945933b, 0xe6a9045643f6906e, 0x544c47acd7e15371, 0xb7709b14f727e3d1, + 0x326ee36a46942971, 0x477f1cf7b0e2d847, 0x88b8f6b82b3b0c24, 0x18bc357b80e3cd5c, + 0x3333de70e4d66e0b, 0x4fd4c5e148583cf6, 0xae1b62f3008c0af3, 0xc49f419b6ab29cf5, + 0x2c29fa65afc3fa28, 0x4b19d93734d03009, 0x7dd6c09e589276ad, 0x1cece97f30de48ad}, + {// seed = 4 + 0x58bdf4338602e4fb, 0x71a5620b02c926d5, 0x3811c960129c2d9f, 0x29c2fb11fccac567, + 0x0d6b1ea7780f1352, 0xcc4d3ddfae3f87b3, 0xfdd30257362a586b, 0xabc948fde69f25f1, + 0x51b3523469d30f7b, 0xe0f0322724405ace, 0xd3729266d896da1e, 0xb10c37e5147915bf, + 0x8b577039f9fa32a3, 0xe677c6a9cbfb44b3, 0x7317a756ebb51a03, 0xf8e988ef37359485, + 0x600fc1ef3f469ff3, 0xbf0b8f8520444e01, 0x3711168b08b63d73, 0x34146f2944a6cb36, + 0x717feb263862cdde, 0x7185f8347db00412, 0x900798d82127e693, 0x84089e976a473268, + 0x10f8308c0d293719, 0xf62a618d4e5719b8, 0x8bdbd257a1a9516f, 0xf49f666fd7a75110, + 0xbaf45e2db7864339, 0xe4efa1ea0c627697, 0x3e71d4c82a09fe10, 0x54a2a51cf12127bb, + 0xa0592c9f54ba14cd, 0x27dd627a101c7a42, 0x3d2ceb44b3d20d72, 0x7ee1f94a68ca8f5d, + 0x7e8cb8651b006c36, 0xbd9fa7ca3a475259, 0x856de173586a7b34, 0xcedb291b594cb1b5, + 0xa3d6e462fd21cddc, 0x74561d10af9118e4, 0x13a3d389fc2d4b36, 0xeea8594a4a054856, + 0xf56d7474d9ba4b13, 0x25ddce2f6490b2fd, 0x920653ff3a8d830b, 0xcd8c0c9cdac740d1, + 0x2c348a738db9c4a0, 0x2967ccbe8ea44c22, 0x47963f69adb049f8, 0xf9d01eb5b4cf7eb6, + 0x7a5c26eb63a86bd2, 0x62ad8b7a71fa0566, 0xb373213179f250ae, 0x589d4e9a88245a4d, + 0x433dafebe2d558a8, 0x521fbef2c8fe4399, 0x62a31f9ff9ccd46b, 0x51602203eba7c1a6, + 0x9afc8c451b06c99f, 0xb529085bdbaffcea, 0xac251825cc75892b, 0x94976a5bce23d58e, + 0xdd17925b6c71b515, 0x568fd07a57bce92e, 0xefac31200d8bd340, 0x716c3e466b540ef9, + 0x3d2c9e380063c69b, 0x14168f9a3662dd83, 0xd298c7504dbc412f, 0x74490a94f016719f, + 0x0e0da431e1ab80c8, 0xe321f63dc6b169ae, 0xf08671544febc95a, 0x39324450cc394b3b, + 0xea6e3d35f1aa3a70, 0x8ef8a886508ce486, 0xdc1a631ef0a17f06, 0xfda2b3fbcd79e87b, + 0xd75bcae936403b10, 0xf88b5bd9f035f875, 0xc43efec2e3792dd4, 0xe9fac21a9d47cd94, + 0xc2876f0c4b7d47c3, 0xaba156cf49f368b4, 0x5ccda2170fa58bf9, 0xadc92c879ed18df7, + 0x110c1b227354e6c8, 0x298ee7a603249200, 0xde92142ede0e8ee7, 0x88e4a4610644ba9e, + 0xbb62d277e7641d3a, 0xb9be1985b7bf8073, 0x29024e5426cdb0d1, 0xf6aefd01f3092ab8, + 0x2a07087b313133aa, 0x6d71f445d6dfc839, 0x1e2412ff12e5526b, 0xed5cdeba6617b9e1, + 0x20b1d0d5e5f8760e, 0x12ff15705c368260, 0x7bf4338b7c387203, 0x34ff25f00cd06185, + 0x1148c706c518cf28, 0x5c04f0623388f025, 0xcb9d649275d87d79, 0x9b5f0c24fabc42ec, + 0x1a7b5e7964e33858, 0x2a81bbd8efdc6793, 0x8d05431ffe42752e, 0x83915cd511002677, + 0x580ed4d791837b31, 0x5982e041d19ff306, 0xcad0d08fa5d864ca, 0x867bee6efe1afa63, + 0x26467b0320f23009, 0xd842414dfda4ec36, 0x047fcdcbc0a76725, 0xbddb340a3768aeca, + 0xef4ce6fa6e99ab45, 0x88c5b66c7762bf9b, 0x5679f1c51ffb225d, 0xdab79048317d77ee, + 0xf14e9b8a8ba03803, 0xe77f07f7731184c1, 0x4c2aab9a108c1ef5, 0xa137795718e6ad97, + 0x8d6c7cc73350b88b, 0x5c34e2ae74131a49, 0xd4828f579570a056, 0xb7862594da5336fc, + 0x6fd590a4a2bed7a5, 0x138d327de35e0ec1, 0xe8290eb33d585b0b, 0xcee01d52cdf88833, + 0x165c7c76484f160e, 0x7232653da72fc7f6, 0x66600f13445ca481, 0x6bbdf0a01f7b127d, + 0xd7b71d6a1992c73b, 0xcf259d37ae3fda4a, 0xf570c70d05895acf, 0x1e01e6a3e8f60155, + 0x2dacbb83c2bd3671, 0x9c291f5a5bca81af, 0xd976826c68b4ee90, 0x95112eec1f6310a2, + 0x11ebc7f623bc4c9a, 0x18471781b1122b30, 0x48f7c65414b00187, 0x6834b03efa2f5c30, + 0x0875ef5c2c56b164, 0x45248d4f2a60ba71, 0x5a7d466e7f7ba830, 0x2bebe6a5e42c4a1d, + 0xd871d8483db51d10, 0x6ee37decd2fd392f, 0x7d724392010cede3, 0x8e96ef11e1c9bcc8, + 0x804a61d86b89d178, 0xbb1b83ce956055ec, 0xcb44e107410ff64f, 0xc426bb09ee0ba955, + 0x057c08f42c3dd7f1, 0x40ea1ec148602bdf, 0xc24688deeb65d7f1, 0xd8bcc53c768ba4e4, + 0x16e0e3af65c1106c, 0xfc12f7e7d647218b, 0x70d6e1d3ee93cef4, 0x01d2a505c4541ef9, + 0x1ef79e16e764d5c3, 0x0363d14d13870b98, 0xb56ef64345d06b11, 0xe653d557ebb7c346, + 0x8304a8597c2b2706, 0x1536e1322ce7e7bb, 0x525aec08a65af822, 0x91f66d6e98d28e43, + 0xe65af12c0b5c0274, 0xdf6ae56b7d5ea4c2, 0x5cef621cedf3c81c, 0x41e8b1ffd4889944, + 0xb5c0f452c213c3e5, 0x77af86f3e67e499b, 0xe20e76ea5b010704, 0xbdc205ab0c889ec0, + 0xc76d93eb0469cd83, 0x17ac27f65cab0034, 0xd49ec4531fd62133, 0x07a873ea2f1b9984, + 0xbff270dfef0032ee, 0x1764dbe91592f255, 0xe40363126f79e859, 0xa06cad3ab46971f6, + 0x0be596e90dedd875, 0x3387cce5c1658461, 0x44246acf88a9585e, 0xe0ad82b92d5ecb2c, + 0x2177491c9a1600a6, 0x16e7c4aac0f02422, 0x75792eeeec15c4e1, 0x2309cd359d08ee30, + 0x7cd9831dd1b83b0a, 0x374914a7c4ee8cf0, 0x0dd17765c9ac2e54, 0xb7847470ba9a7688, + 0xfba4f4bbe2991173, 0x422b203fc3de040e, 0x63bfcaf2ecf2ab0e, 0x0c5559f3a192946e, + 0xfdf80675c1847695, 0xf5f570accab842c9, 0x65cc5a448767afea, 0x1efeb0a7ee234f2f, + 0x9b05f03d81e7b5d2, 0xe7c31317a8626cf4, 0x620f2a53081d0398, 0x1b6de96cdd9943ae, + 0x8c226a436777d303, 0xa08fbbd50fafb10d, 0x6a64c5ec20104883, 0x9c9c653502c0f671, + 0x678a02b2174f52a0, 0x68e008ba16bbad4b, 0xa317c16d2efb860f, 0xeab2075d17ed714c, + 0x565eeeddf0c4ea15, 0x8ec8e94d242a6c19, 0x139e8e27d9000fae, 0xc977a7ff1b33d2f5, + 0x1d0accca84420346, 0xc9e82602cd436e03, 0x6a2231da53d2ccd3, 0xb44b12d917826e2a, + 0x4f4567c6a74cf0b9, 0xd8e115a42fc6da8f, 0xb6bbe79d95742a74, 0x5686c647f1707dab, + 0xa70d58eb6c008fc5, 0xaaedc2dbe4418026, 0x6661e2267bdcfd3d, 0x4882a6eda7706f9e, + 0xf6c2d2c912dafdd0, 0x2f2298c142fd61f9, 0x31d75afeb17143a8, 0x1f9b96580a2a982f, + 0xa6cd3e5604a8ad49, 0x0dae2a80aad17419, 0xdb9a9d12868124ac, 0x66b6109f80877fac, + 0x9a81d9c703a94029, 0xbd3b381b1e03c647, 0xe88bc07b70f31083, 0x4e17878356a55822}, + {// seed = 5 + 0xb3c58c2483ad5ead, 0x6570847428cdcf6c, 0x2b38adbf813ac866, 0x8cb9945d37eb9ad3, + 0xf5b409ec3d1aed1c, 0xa35f4bffc9bb5a93, 0x5db89cde3c9e9340, 0xff1225231b2afb2b, + 0x157b0b212b9cc47d, 0xf03faf97a2b2e04d, 0x86fdab8544a20f87, 0xfcb8732744ae5c1c, + 0xd91744c0787986d5, 0x5f8db2a76d65ad05, 0xcff605cbed17a90d, 0xf80284980a3164e7, + 0x59cc24e713fccc7d, 0x268982cada117ce4, 0xcd020e63896e730e, 0xe760dc46e9fe9885, + 0x6aaece8ab49c6b5d, 0x7451194d597aae3e, 0x35d4385900332457, 0xa40fb563a096583d, + 0xa797b612f7f11b76, 0x2fed6eb68e6a2b9b, 0x2f06ee64aeffd943, 0x9dd0e49d9ca45330, + 0x97d48f08bd7f1d8f, 0x1cfa7fe3ebe4d8ee, 0x2a2ba076bd397d42, 0x68c4344f7472f333, + 0xce21ec31987d74b5, 0xb73dabdc91d84088, 0x801aadee592222fe, 0xaf41345398ebc3f5, + 0x8a8f653d7f15ee46, 0xce2d065ff2ba2965, 0x4e05da515da2adb7, 0xa6dbdb8aa25f0fd4, + 0xca9f9666bbd2d5a9, 0x6b917ce50bd46408, 0x1550cc564ba6c84d, 0xb3063ae043506504, + 0x84e5f96bb796653d, 0xe2364798096cf6e3, 0x3b0dfedf6d3a53d0, 0xb7e4c7c77bde8d93, + 0xe99545bac9ab418a, 0xa0e31f96889507bb, 0x883c74f80c346885, 0xf674ae0b039fd341, + 0x8bb6ce2d5e8d1c75, 0x0c48737966a7ed7c, 0x04fcdf897b34c61c, 0xe96ac181bacbd4d6, + 0x5a9c55a6106a9c01, 0x2520f020de4f45d3, 0x935730955e94d208, 0xce5ad4d7f3f67d3b, + 0xa4b6d107fe2d81ca, 0x4f0033f50ae7944e, 0x32c5d28dd8a645a7, 0x57ce018223ef1039, + 0x2cbab15a661ab68e, 0x6de08798c0b5bec2, 0xee197fb2c5c007c6, 0x31b630ac63e7bda2, + 0xab98785aefe9efe3, 0xa36006158a606bf7, 0x7b20376b9f4af635, 0xa40762fdc3c08680, + 0x943b5faffd0ebee2, 0x7f39f41d0b81f06e, 0x7c4b399b116a90f8, 0x24e1662ac92bc9f3, + 0xcf586fc4e8e6c7db, 0xe46e0d047eeb12d7, 0xe8021076e4ea9958, 0x11fc13492e3ca22a, + 0xd61eae01410397e3, 0x7e8c4a58036a8e9f, 0x068a6de267970745, 0x64faab129bef1a41, + 0xb4a6f720943dad01, 0x631491058d73a9d5, 0xdad4fe95eab3ec02, 0x0a8b141c5c3a44f6, + 0x9fc69d4c2b335b98, 0x94d5f84a07d6e4cd, 0x1b73965de143c608, 0x443932c2dda54bcc, + 0x7397818fb0b04cd2, 0xef4ab03a1202b277, 0xf3d2ee459c0c2b92, 0x182d4daf8b058a87, + 0x90e63035d7b51368, 0xba4cd8b9a95d45fd, 0x12a7392c76731090, 0x890d264ec5d082d2, + 0xeeaf5c363da4994e, 0xd6aad756902123fb, 0xb531ebebdb28f191, 0xe71ce659fc59babd, + 0x37c1b94f63f2dcb5, 0xe4e3abeb311f9b96, 0x4a31b72ccb8695d3, 0x52cae1f0629fdce4, + 0xe5b0475e2ed71369, 0x2724e8c3506414fb, 0xbab0367920672deb, 0x0161a781c305449f, + 0x37b70f40f5bb60be, 0xddd1094c50251a01, 0x3b28283afd17224e, 0x06dec0cfe889fc6b, + 0x47608ea95bb4902d, 0xad883ebc12c00e82, 0x9e8d7ae0f7a8df29, 0xa79443e9f7c013a1, + 0xcfa26f68b7c68b71, 0x33ae6cc19bda1f23, 0xd9741e22b407887f, 0xf2bff78066d46b1c, + 0x794123191c9d32d4, 0x56cb6b903764ec76, 0x98775d0ef91e1a5a, 0xae7b713bc15c1db9, + 0x3b4c1a7870ed7a0d, 0x46666965f305cc34, 0x0ea0c3b2e9c6b3cd, 0x4dc387039a143bff, + 0x5f38bb9229ef9477, 0xea5d39ba72af7850, 0x69a5ed0174ce2b6d, 0x06969a36bfe7594d, + 0x0adee8e4065ccaa3, 0x908a581d57113718, 0x64822d6c5a8190ed, 0x8c5068b56ace4e4c, + 0x88ba3b4fb4e30bef, 0xa6ec0b8bb5896cfe, 0x4e23fcc6b47996fd, 0xe18e75b0dd549c7a, + 0xcd90f17e106cf939, 0x1666fdfb2ef7c52f, 0x4fae325f206dd88c, 0xe7bc1160e25b062d, + 0x3cc999cb246db950, 0xc5930a7326cd5c37, 0xb008a48a211367bd, 0xc5559da145a88fd4, + 0x1e3ad46655fac69c, 0x7834266b4841bfd7, 0xa764450fbffc58cc, 0x54d8cf93a939c667, + 0x93c51f11b21b2d9d, 0x0964112082ed65cc, 0x4c2df21213e7fb03, 0xf0405bc877468615, + 0x17b4fc835d116ab4, 0xa6b112ae5f3cb4ef, 0x23cfc8a7fd38a46e, 0x8e0a360dc2774808, + 0x24ca9c8092105ad5, 0xafd3f75524f2e0d5, 0x4f39ed7dbaddc24c, 0xe5e362c7679a7875, + 0x00914a916b07b389, 0xdfe1119b7d5ab5da, 0xabd6ed9940e46161, 0x630ed2044171e22c, + 0xdecc244157dd1601, 0x777e6d5b4b4868d5, 0x9b3530bee67017d8, 0xd2faf08b291fdcb9, + 0x006e99455d6523de, 0xd559b5817f6955b5, 0xefcc1063b0088c61, 0xed73145ae0f00ae7, + 0xab2af402cf5b7421, 0x897767f537644926, 0x26c9c0473ca83695, 0x192e34e1881b2962, + 0xf7cf666ec3b3d020, 0x27f9b79c7404afb7, 0xe533e8bed3010767, 0xe5817838e11d05d3, + 0x65659c531bd36517, 0xd427c5e0a23836fd, 0xf3eab7ea58fa3528, 0x07683adae1289f35, + 0x201d6af7e896dd32, 0xd5da938b9a21ad88, 0x843fb73ad67bc316, 0x1782ec7d5feef21b, + 0x943f66f6ec772877, 0x7e9112e7b26da097, 0xeac8161f8663c2c7, 0xe8600db480a9ebf4, + 0x07807fc90f6eaf5f, 0xe0e4c9deb41abf83, 0xbdf533db271f9c15, 0xb398411b0497afe2, + 0xdebb45ef25448940, 0xe7a5decefcd376c4, 0xaf1ef3c728c83735, 0xb8b83a99355cb15a, + 0x6444a0344f1611e4, 0xe8bb7f5cf3c60179, 0x77ab5c5177e75ff7, 0xc38fd6fa849d585d, + 0x390d57d53029060a, 0xa66327eb7b8b593c, 0x6350a14f6fcd5ac9, 0x2c08125bcd7008b4, + 0x2d00c299a6a6bf8e, 0x6b0039c1f68d1445, 0x0035150c5d06f143, 0xa34d01628cc927e1, + 0xdf5b3164d7b2ede1, 0x8167db1d0583d72e, 0x4e13b341cd2ae8bc, 0xa693d9b1f416e306, + 0xc15ed7ca0bc67609, 0xdc344313c1c4f0af, 0x88b6887ccf772bb4, 0x6326d8f93ca0b20e, + 0x6964fad667dc2f11, 0xe9783dd38fc6d515, 0x359ed258fa022718, 0x27ac934d1f7fd60a, + 0xd68130437294dbcc, 0xaf5f869921f8f416, 0x2b8f149b4ab4bf9f, 0xc41caca607e421cb, + 0x7746976904238ef9, 0x604cb5529b1532f0, 0x1c94cd17c4c4e4ab, 0xe833274b734d6bbe, + 0xe9f1d3ef674539ce, 0x64f56ed68d193c6a, 0xe34192343d8ecfc1, 0xcb162f6c3aa71fe8, + 0x99eaf25f4c0f8fa4, 0x92f11e7361cb8d02, 0xb89170cddff37197, 0x4f86e68a51e071e3, + 0x31abf6afd911a75b, 0x6d20cf259c269333, 0x4150b9f88fcb6513, 0x705063989ebf7451, + 0x559231d927c84410, 0x1ca8ec4b098bc687, 0xebed22405c9180e0, 0xaa815b37d052af59}, + {// seed = 6 + 0x946ac62246e04460, 0x9cebee264fcbc1ae, 0x8af54943a415652b, 0x2b327ed3b17b8682, + 0x983fde47b3c3847e, 0x10a3013f99a2ad33, 0x6e230bb92d2721ef, 0x1cf8b8369e5c5c50, + 0x7f64017f2b7b3738, 0xd393248a62417fa1, 0x9ff01c0b20a372c5, 0xb0e44abce7e7c220, + 0xcebb9f88d48a815f, 0xdb7df6bd09033886, 0x7844fc82b6fa9091, 0x72d095449863b8ec, + 0xc13e678c89da2c7e, 0x6caf4d5ad231d12f, 0x2e0ab7b5fcf35c49, 0xf410720cb932a70f, + 0xd66ea581f16fce06, 0x175c9f002f57dc98, 0xccbcfd0d32988775, 0xfde4c407d3b0a232, + 0x5db2931ae7e97223, 0x6e07e2173085809f, 0x6e1d1ec0f9cad73c, 0xb2fc251a7f802619, + 0xbc1fc17f04f342de, 0x8de8f21ec658e078, 0x72c0f40cbee53fd6, 0x0678244411fc17a1, + 0x1d5837ca166b9bbd, 0xc8cada003c554345, 0x6a2fe2bfb2e58652, 0xfca9d797a6f7988b, + 0x6699e24ac737948b, 0x69623ffcb05789ba, 0x946429c529d95b75, 0x0d14df0b2a13970f, + 0x593d8592c440dfec, 0x2ee176f3d7e74b94, 0xae003f1da3be9e26, 0x0c7b02c4c0f6764a, + 0x3117e2fa1f632462, 0xf0f23265b6f1eaeb, 0x3111255d9b10c137, 0xc82745e509a00397, + 0xbd1d04037005fea7, 0xe104ab0dd22a9036, 0x51b27ce50851ac7a, 0xb2cb9fb21b471b15, + 0x29d298074c5a3e26, 0x6ebdf2058b737418, 0xc4a974041431b96f, 0x1ec5a30ccb6bdaac, + 0xe818beede9bf4425, 0x4b69b1bce67a5555, 0xf5c35f1eb0d62698, 0xf4509bbd8e99867c, + 0xb17206debd52e1bc, 0x35785668c770b3be, 0xe9343987ff5863bc, 0x2ee768499ac73114, + 0x5132bb3426eeaaf4, 0x471bce2c6833c5ff, 0xbb9a2d5428e6f6f9, 0xd5678943c595792d, + 0xab2a65e7f81e479c, 0xa82407bb23990b31, 0xdae321383984923c, 0x01823bb22648e6f1, + 0xda6e8df4214a8b04, 0x0e172bb88e03d94f, 0x552da6c22e362777, 0x7ce67329fb0e90cb, + 0x7b2d7f287ede7ebf, 0xd44f8222500651bd, 0x4acca1ef58fbb8ab, 0x428ecf058df9656b, + 0xd7e1ec6a8987c185, 0x365be6a54b253246, 0x168849be1e271ee8, 0x6a00f3c4151a8db2, + 0x37602727ca94b33d, 0xf6b50f18504fa9ce, 0x1c10817f6bc872de, 0x4bfe1fe42b0f3638, + 0x135fad4b8ef6143b, 0x1b25ad2bafc25f58, 0x41e37f85cf321f92, 0xfc73f75d9d5b9bea, + 0x9eb3694d1e9cb7e1, 0x601d51f08fa83b90, 0x234a2a9b88366f41, 0x63fe903e16f2c3bf, + 0x1cdbd34fa751c0b0, 0x0ce4fc6747c0558c, 0x51ed72afb8bb49aa, 0x20313ba13ca12c96, + 0x271fa38f9ebd54c1, 0x3696a5ac03a8edde, 0x05602be7df625702, 0x11f1ac73790f7a9f, + 0xa2836c099f0810bd, 0xe5ac2e47caa532fa, 0xd9c000a66d39f681, 0xd93d900e6f3d9d5f, + 0x792c81c65b7900f2, 0x5c5dce790ee20da1, 0x74ff1950edec1aee, 0x71fc85fa1e277d8f, + 0x0e77df17d6546cbc, 0x07debad44816c3b4, 0xbafa721581e92a70, 0x8ab6fbe2ed27bba8, + 0xe83243a20dea304a, 0xaa85a63a84c00a07, 0xde0e79917fc4153a, 0x21bb445e83537896, + 0xeedcac49fc0b433a, 0xffb2926a810ae57a, 0xf724be1f41d28702, 0x79cb95746039bb3b, + 0x5a54fe3742a00900, 0xda4768d64922c04f, 0x420396a84a339dae, 0xa171e26ee5e8724e, + 0x4c8da7c5d289c20a, 0x9ebd79a1a8e94742, 0x39235232b97e9782, 0xb75df0be9bba7d80, + 0x0c1d204dd87d48fc, 0x8f81f3e7177266e8, 0xe4a460b39e78d72b, 0x50b98fa151e65351, + 0xb7cb585c3ee1eddc, 0x11cdad9a76ee1dc4, 0xa38054a78595dc1c, 0x92f09e2ec4978edc, + 0xa8f0061b5efdabaa, 0x04bcc4abc224d230, 0xc58606738e692d46, 0xdd2b27b565952433, + 0x19e6ed1b740beec0, 0xceadd49b2ef9891f, 0x328178c28fe95cad, 0xe5ad4c43afe02848, + 0x03c0cb538cd967c0, 0xec4352526d19a630, 0x4c7e99389d39b031, 0xf65dd05362c2deb6, + 0xd1e70daf6879d28d, 0xbe9f57db6309b265, 0xa4b66f370b872bb7, 0xe26896fbc6ee1fd5, + 0xac705e661bfcf7c5, 0xab4d0d07d7f09940, 0x976417c06aeb6267, 0x8161c684a6bd468c, + 0xf77b6b9976dc4601, 0xc6489b779a39c12c, 0xb2aa58d5681cea1a, 0x043b1b40f8c3e04c, + 0x681fcbfadc845430, 0xab8896c921ba8def, 0x57aaf172606f37b2, 0xc3735048cd5eb8d7, + 0xa7078b96955631bd, 0xdd6b3543aa187f33, 0xc7103ea4a2a697fd, 0x8d7b95f6ff1f7407, + 0xe44f419e84709530, 0xf340caa9132cbb0a, 0x2ba407283143c66c, 0xe1be240ca636c844, + 0x90d32f2877ac08bc, 0x5d26e6294b2c8673, 0x4a6b2f5b27c87a44, 0x961fb9043f76d34f, + 0x0afee02d8d3c55d2, 0x6228e3f48c42e5dc, 0xc338e69ee6593675, 0x853f74b16efb7bdd, + 0xd062f40bdd22e687, 0x647164b9ab4c4190, 0xf94689f67d598369, 0x8e4b29d87a5012d7, + 0xaf02b8b925656fbd, 0x7a722a767179a630, 0xb5c8afe937a75ace, 0xfdb8e8d02d279372, + 0x887ef700cb25fae1, 0xcfe9bd912f72cabe, 0xb1d4dedc24f978de, 0x517522d38319cc2a, + 0x7dd87b2b36aab798, 0x579c4ff3046b5a04, 0xf5c5975c5028b7a7, 0x7094579d1000ec84, + 0xbc8d5b1ea70a5291, 0x161b2d783be8855c, 0xd26d0b0d6d18279f, 0x0be1945f02a78bd5, + 0xb822a5a9e045415b, 0x2fe9d68b1ccc3562, 0xb2e375960033d14f, 0x26aca04e49b4ff22, + 0x732a81c862112aea, 0x8bd901ed6e4260b8, 0xe839532c561ad5b0, 0x8fb6e4d517a79b12, + 0x0dd37f8c0be9b429, 0xc8ad87ad12f1b1b0, 0xc51f3aa62b90318b, 0x031a7e8b86c1cefc, + 0xa95547af2b70fc76, 0x9cb3615c5a98801e, 0xa387e3c3341d7032, 0xa087ea52a1debaef, + 0x16325ec9a2e6e835, 0x587944a484c585eb, 0xc8879033bde22ecc, 0xa39dbfce709c464a, + 0x7acc010f99208774, 0x98dd2973a096c5ad, 0x26458b51139f198c, 0x2f5d19575e8c4f02, + 0x726643f0d38af352, 0x44d879b6d73e6e94, 0xa68a03885c980abe, 0x06048acd161c40c0, + 0xa4dab8f89d405d28, 0x7120c880cb04be18, 0xa062ace22a1cf0cf, 0x3901a9daf29704f4, + 0xff08f3ed989db30a, 0x6d22b13e874c67e9, 0x80c6f35518d73f4d, 0xc23c2a521aac6f29, + 0x2e708fd83aaa42e0, 0x7fc3780f55f1b0fd, 0xabb3075c98cf87f2, 0xb4df3f40f7c61143, + 0x2a04418098a76d75, 0x0d9eeee9509b2d37, 0x6be8ae51f4b59cdc, 0xe746cc7c00e4a2ab, + 0x785bc6df9cac597c, 0x33cb6620ce8adc48, 0xc1ba30739bffcef7, 0x6d95771f18e503f7, + 0xf7be3ae2e62652ff, 0xc8d82ffd2a73c62b, 0x8725a3ba5b110973, 0x67ed6b9c724757ec}, + {// seed = 7 + 0xc0272d42c19ff3ae, 0x4694228b43ea043b, 0x5709a6ef8a462841, 0xc9210a1e538805c9, + 0x279b171196113ec2, 0x859b769fc2d9e815, 0x0d5d3125a2bf14d3, 0x22bca1cfefa878ba, + 0x481b6bf58037bd83, 0x4933ba8647728d22, 0xf08c7b6b56f6e1b6, 0x374e8af5a15407c7, + 0xa95c4dc3d2487a5c, 0x9b832808ff11e751, 0xf2048507e9da01d5, 0xa9c576189f544a4a, + 0xf6c2a45b2e9d2b41, 0x9b9874c9f10ecc2f, 0x37d9b5f51f8c149e, 0x93aead54c9de9467, + 0x59cf0b4af262da23, 0xe7e9929af18194b2, 0x9df2644e33eb0178, 0xde4122d6f0671938, + 0xf005786c07f4800b, 0xb1fc9d254b5d1039, 0x0bf1088631f6dd7b, 0x665623f0a4b8f0c7, + 0x60f0113a9187db7c, 0xfd7cceda4f0d23a6, 0x26c01e9d89955940, 0x33afa1dfc0f5a6a0, + 0xeb77daf215e9283c, 0xc7575214bf85edb4, 0xeb0d804bf297e616, 0x84bff4ffd564f747, + 0xc4ac33189246f620, 0x43ef61213ecc1005, 0xcbbb0dea6cd96acd, 0x8ed27abfa8cfcb05, + 0x543b61529cb996b6, 0xa5f987ca41ea5e59, 0x3c50e0ac5254cb7a, 0x4192b0446c06d1e6, + 0x3e86592e21b45388, 0xdb766f06fcc6e51e, 0x0448ee36efe632db, 0x663c9db689253e35, + 0x72e0bd4985331dd4, 0xff501b5bf7d94e74, 0xe911ce758e2113a8, 0xec3a8d03a75a6ba4, + 0xaf6b4b72f56edc83, 0xf284857936c0a391, 0x5ba6feff407d46f4, 0x9d689c26de9d6702, + 0x28c04a9083726b5d, 0x2ccf4a627a029730, 0x7b4719500d4f0c71, 0x76470a9a7da250a8, + 0xcc48409404a1c890, 0xccefbdc7ec9a8055, 0xe0db91bff3cc42d3, 0x0532436426141254, + 0xf2ee9325e6f0ff0b, 0x149c20a5fbb28d9d, 0xe71624cd8d2d14d4, 0x8f01d4dc8cc2dd77, + 0x29cf409b333015b7, 0xba8bebd211884dd1, 0xc3396635e8c8db1d, 0x8ed0f6208d0528b8, + 0x0d90b43fdd0ee334, 0xd73c9a3333a044c7, 0xa2595cd208dbdc38, 0xae93cb264f940c09, + 0x8e0538d8afb07a97, 0x19115ec881385ba2, 0xa886f9e6a8039c6a, 0xcd5d62147ce3ecac, + 0xaecdf9e0bb4969f7, 0x2ddd631c53dcad10, 0x73ad1c97b3412054, 0xb08915fa2722efc6, + 0x97966047e5067eb0, 0x337f1675ed91445c, 0xb3a833d150b96a0d, 0x5940a98fe35e5e2e, + 0xfd03cc354ed0d8ff, 0x4e65b98291a8644a, 0x14a259f2852a60b2, 0x7648e3478c1e8e5f, + 0xbc0fbef6d9a919b4, 0xbec4302081346cf1, 0x57d2ce7aa1c7c511, 0x234c209d8f4e1ac3, + 0x87cf80cc933ce443, 0x7c262c616931e94e, 0xc5e33b049cf9eddf, 0x1a80790ed03ae51b, + 0xf2e8b9494f7220cf, 0x124cb59c14fff3ff, 0xa8a06cbfdb86ce18, 0x9068ef1f80b37653, + 0x0c55417b8d90338f, 0xcd579a523f6bcd30, 0xa31bfe2476a8d2a9, 0x1f8d142208094223, + 0x332dc40a5203cfad, 0xf8792fe5b2d33b4c, 0x443bd9668bf9461e, 0xc9019db0ace1409e, + 0x781bea919a113e8b, 0xb0f11d866abfbeec, 0xcfe139a60db0c26a, 0x869ab8721e6aa39e, + 0xdb48a4977717837a, 0x588a5ff151065b18, 0xe4a251ea0028864d, 0x7f0e43ba408a77c3, + 0x65f66dd50a536135, 0x6f49e934d9331c3e, 0xb8d742e0f0fa6b09, 0xe4e9b272deca2348, + 0xaee132ff902f773c, 0x43f658f7c2a0c90a, 0x28cb4dbc76cc53ea, 0x7d92253aa99ac39b, + 0x4fea3d832370baab, 0xb29e36936e51d78e, 0xea10778712321064, 0xff4f21f8ef274be2, + 0x84eff18ddfa0933f, 0xd0ec6a9f86c758a0, 0xaf82e5973c431ae0, 0x352023c00c045425, + 0xad34d7bc4a2f8961, 0xbdb4a02a24d4dee0, 0x354a4846d97447cf, 0x331a8b944d5bc19f, + 0x5ce04f8e17909035, 0x6497581bad8f4aab, 0x07c503bba647111e, 0x85f412ba78e1f7ff, + 0x7f3b920fd20f4cff, 0x424e1a9a4ce34e2f, 0x3035e2d62e1b9f0a, 0xef63114bff7b729a, + 0xe86a05889ab6bb60, 0xee0830cf095585a1, 0x4a54f7fa47d9c94b, 0x17daeece9fcb556a, + 0xc506d3f391834c6f, 0xb3f24be362e1af64, 0xc435e4e23608efdd, 0xeeba9caaa4cc1768, + 0x5a71f306daddc22d, 0x18e5205f41eba1a0, 0x7b29b4d1f6610925, 0x065cb65a0258d9a9, + 0x3e5ac8faa9fd1f95, 0x3b362362c1ea0470, 0xce0e4f6434db7a2e, 0xf327341098de52f2, + 0xcfca3b9e2a1992c3, 0x7483bf9401233e41, 0xbafbac531c6f9281, 0x4b52dd71b2c106f8, + 0xdf73b66e50b5a1f7, 0x237aec0202a20283, 0x23dd5be23dffdf2b, 0xea9730731ee122ef, + 0x5cb3f846014fbcd3, 0xc3b21c8ffdce9201, 0x06a99a02f91a8760, 0x721a81fa8fd7b7a3, + 0x6aafcdddc53cbcd8, 0xd03b464005a93bcc, 0x8212edc1b1669dcb, 0x71f4c31364c31bc7, + 0xfeeec0eba8772307, 0x1948d00a13d88cf1, 0x19064fd6d943ada8, 0x4ec8d31722697bfd, + 0x596d9a953a516609, 0xc4cb4bff53507da2, 0x1d59f3c5be36e4ca, 0xe5b4fc5bf6044c9b, + 0x1bb74e052232f735, 0x04e8a0db611ddd5d, 0x8d04eaa009b421bf, 0xa7878ae0ac0e6d58, + 0x28c1030217cab2b3, 0x827943767e56a883, 0x28fce5fa02d22809, 0xb30c322fffc8c58e, + 0x1ca5a6a9f8066c5b, 0xb24db5f1462b2513, 0x02f653b89b7e5f6c, 0xe31f8fb5d5f78eee, + 0x266acc514ed93501, 0x936879d1c6fddcc4, 0xcd51be3636af1952, 0x3fdbb6fc332c78c8, + 0x9eb656379fa73094, 0x056146cc92fa0f96, 0xed6c4f1836c027c3, 0x021e0bb5d2113f2a, + 0x8983e42ec1c626b3, 0x73ea9bc6513ad9c9, 0x0c904903b24f4247, 0xacbac1e6243e2525, + 0x0b1069a0c230fb06, 0x77d709fca3fc1ce5, 0x87ad0f65020947e6, 0x555302641c53f4e6, + 0x65ea87871fa9aaee, 0x58aaf4ecc1067bb4, 0x1a66c48cc4c65b3f, 0xca96aca48b2ea969, + 0xa68eb70bad14de2b, 0x5ccdb3d7e00a6f6e, 0xe178fbfec73fe72f, 0x2b63d6a16b83e890, + 0x32fdb7a5330fbae0, 0x2ab5803c8d1bf32c, 0xda838388c1527c94, 0x16a50bdc4de24acb, + 0xe561301f134c074a, 0xd7ae63d2816b4db1, 0x036aabd4df0dd741, 0xc5e0db8783435b9d, + 0x9c4386cf0a07f3b2, 0x6a72ac1aa56a13a1, 0x299bbdb04bb20a23, 0x138c1018fda16b81, + 0x0e354f0b3bda49df, 0x9f4c295b23127437, 0xd133ceb2bd561341, 0xd8b4bfd5a526ac29, + 0xcdd0a70ddc1c7bbd, 0x81dce595bf572225, 0x1c6f925c05f6efd7, 0x8ae5097553856ea0, + 0x3aabeaeef248f60d, 0xd9005809d19a69e2, 0x2a3a1a314311cc27, 0x89bb2dc76b2b624a, + 0x50a2a95d0412e289, 0x9def8df564e68581, 0xf49010a9b2e2ea5c, 0x8602ae175d9ff3f0, + 0xbf037e245369a618, 0x8038164365f6e2b5, 0xe2e1f6163b4e8d08, 0x8df9314914f0857e}}; + +} // namespace internal +} // namespace parquet diff --git a/cpp/src/parquet/column_chunker_hashtable.py b/cpp/src/parquet/column_chunker_hashtable.py new file mode 100644 index 0000000000000..8addcc3af26b3 --- /dev/null +++ b/cpp/src/parquet/column_chunker_hashtable.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import hashlib +import pathlib +import sys +from io import StringIO + + +template = """\ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once +#include + +namespace parquet {{ +namespace internal {{ + +constexpr uint64_t GEARHASH_TABLE[8][256] = {{ +{content}}}; + +}} // namespace internal +}} // namespace parquet +""" + + +def generate_hash(n: int, seed: int): + value = bytes([seed] * 64 + [n] * 64) + hasher = hashlib.md5(value) + return hasher.hexdigest()[:16] + + +def generate_hashtable(seed: int, length=256, comma=True): + table = [generate_hash(n, seed=seed) for n in range(length)] + + out = StringIO() + out.write(f" {{// seed = {seed}\n") + for i in range(0, length, 4): + values = [f"0x{value}" for value in table[i:i + 4]] + values = ", ".join(values) + out.write(f" {values}") + if i < length - 4: + out.write(",\n") + out.write("}") + + return out.getvalue() + + +def generate_header(ntables=8, relative_path="column_chunker_hashtable.h"): + path = pathlib.Path(__file__).parent / relative_path + + tables = [generate_hashtable(seed) for seed in range(ntables)] + text = template.format(content=",\n".join(tables)) + path.write_text(text) + + +if __name__ == "__main__": + ntables = int(sys.argv[1]) if len(sys.argv) > 1 else 8 + generate_header(ntables) \ No newline at end of file diff --git a/cpp/src/parquet/column_chunker_test.cc b/cpp/src/parquet/column_chunker_test.cc index f7b1e8fe1c7b3..99301179ebd49 100644 --- a/cpp/src/parquet/column_chunker_test.cc +++ b/cpp/src/parquet/column_chunker_test.cc @@ -846,6 +846,24 @@ TEST_P(TestColumnCDC, Append) { } } +TEST_P(TestColumnCDC, EmptyTable) { + auto [dtype, nullable, _] = GetParam(); + + auto schema = ::arrow::schema({::arrow::field("f0", dtype, nullable)}); + ASSERT_OK_AND_ASSIGN(auto empty_table, GenerateTable(schema, 0, 0)); + ASSERT_EQ(empty_table->num_rows(), 0); + + for (bool enable_dictionary : {false, true}) { + ASSERT_OK_AND_ASSIGN(auto result, + WriteAndGetPageSizes(empty_table, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); + + // An empty table should result in no data pages + ASSERT_TRUE(result.lengths.empty()); + ASSERT_TRUE(result.sizes.empty()); + } +} + // TODO(kszucs): add extension type and dictionary type INSTANTIATE_TEST_SUITE_P( FixedSizedTypes, TestColumnCDC, @@ -888,4 +906,3 @@ INSTANTIATE_TEST_SUITE_P( // TODO: // - test multiple row groups -// - test empty From 138447d92bcfdfeb1bd3d9f53543e0311c8c465a Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 3 Mar 2025 14:26:45 +0100 Subject: [PATCH 26/48] more CDC docstrings --- cpp/src/parquet/column_chunker.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index 1921efc049248..c9a5c4e62e8a1 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -121,11 +121,20 @@ class ContentDefinedChunker { const ::arrow::Array& values); private: + // Update the rolling hash with a compile-time known sized value, set has_matched_ to + // true if the hash matches the mask. template void Roll(const T value); + + // Update the rolling hash with a binary-like value, set has_matched_ to true if the + // hash matches the mask. void Roll(std::string_view value); + + // Evaluate whether a new chunk should be created based on the has_matched_, nth_run_ + // and chunk_size_ state. inline bool NeedNewChunk(); + // Calculate the chunk boundaries for typed Arrow arrays. template const std::vector Calculate(const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, const T& leaf_array); @@ -142,9 +151,16 @@ class ContentDefinedChunker { // factor. const uint64_t hash_mask_; + // Whether the rolling hash has matched the mask since the last chunk creation. This + // flag is set true by the Roll() function when the mask is matched and reset to false + // by NeedNewChunk() method. bool has_matched_ = false; + // The current run of the rolling hash, used to normalize the chunk size distribution + // by requiring multiple consecutive matches to create a new chunk. uint64_t nth_run_ = 0; + // Current chunk size in bytes, reset to 0 when a new chunk is created. uint64_t chunk_size_ = 0; + // Rolling hash state, never reset only initialized once for the entire column. uint64_t rolling_hash_ = 0; }; From 79ce13b8b390d0e4f0ff99053bd5ee275e441f55 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 5 Mar 2025 11:48:34 +0100 Subject: [PATCH 27/48] address review comments --- cpp/src/parquet/column_chunker.cc | 6 ++---- cpp/src/parquet/column_chunker.h | 9 +++------ cpp/src/parquet/column_chunker_hashtable.h | 6 ++---- cpp/src/parquet/column_writer.h | 1 - 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/cpp/src/parquet/column_chunker.cc b/cpp/src/parquet/column_chunker.cc index 90979bbd25a4e..11db84dbe5408 100644 --- a/cpp/src/parquet/column_chunker.cc +++ b/cpp/src/parquet/column_chunker.cc @@ -24,8 +24,7 @@ #include "parquet/column_chunker_hashtable.h" #include "parquet/level_conversion.h" -namespace parquet { -namespace internal { +namespace parquet::internal { // create a fake null array class with a GetView method returning 0 always class FakeNullArray { @@ -247,5 +246,4 @@ const ::arrow::Result> ContentDefinedChunker::GetBoundaries( } } -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/column_chunker.h index c9a5c4e62e8a1..30b04df6d7c1c 100644 --- a/cpp/src/parquet/column_chunker.h +++ b/cpp/src/parquet/column_chunker.h @@ -23,9 +23,7 @@ #include "arrow/array.h" #include "parquet/level_conversion.h" -namespace parquet { - -namespace internal { +namespace parquet::internal { // Represents a chunk of data with level offsets and value offsets due to the // record shredding for nested data. @@ -85,7 +83,7 @@ struct Chunk { /// and goes over the (def_level, rep_level, value) triplets one by one while adjusting /// the column-global rolling hash based on the triplet. Whenever the rolling hash matches /// a predefined mask, the chunker creates a new chunk. The chunker returns a vector of -/// Chunk objects that represent the boundaries of the chunks/// +/// Chunk objects that represent the boundaries of the chunks. /// Note that the boundaries are deterministically calculated exclusively based on the /// data itself, so the same data will always produce the same chunks - given the same /// chunker configuration. @@ -164,5 +162,4 @@ class ContentDefinedChunker { uint64_t rolling_hash_ = 0; }; -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/column_chunker_hashtable.h b/cpp/src/parquet/column_chunker_hashtable.h index b608e6583854a..63812cfec8423 100644 --- a/cpp/src/parquet/column_chunker_hashtable.h +++ b/cpp/src/parquet/column_chunker_hashtable.h @@ -18,8 +18,7 @@ #pragma once #include -namespace parquet { -namespace internal { +namespace parquet::internal { constexpr uint64_t GEARHASH_TABLE[8][256] = { {// seed = 0 @@ -543,5 +542,4 @@ constexpr uint64_t GEARHASH_TABLE[8][256] = { 0x50a2a95d0412e289, 0x9def8df564e68581, 0xf49010a9b2e2ea5c, 0x8602ae175d9ff3f0, 0xbf037e245369a618, 0x8038164365f6e2b5, 0xe2e1f6163b4e8d08, 0x8df9314914f0857e}}; -} // namespace internal -} // namespace parquet +} // namespace parquet::internal diff --git a/cpp/src/parquet/column_writer.h b/cpp/src/parquet/column_writer.h index 2ef549150b34c..bd329d61053f2 100644 --- a/cpp/src/parquet/column_writer.h +++ b/cpp/src/parquet/column_writer.h @@ -23,7 +23,6 @@ #include "arrow/type_fwd.h" #include "arrow/util/compression.h" -#include "parquet/column_chunker.h" #include "parquet/exception.h" #include "parquet/platform.h" #include "parquet/types.h" From 74ddc2728e253ef24e30bf6760eb952cd0b56fca Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 5 Mar 2025 13:05:44 +0100 Subject: [PATCH 28/48] rename files to chunker_internal_* to avoid installing the headers --- cpp/src/parquet/CMakeLists.txt | 5 +++-- cpp/src/parquet/{column_chunker.cc => chunker_internal.cc} | 5 +++-- cpp/src/parquet/{column_chunker.h => chunker_internal.h} | 0 ...lumn_chunker_hashtable.h => chunker_internal_hashtable.h} | 0 ...mn_chunker_hashtable.py => chunker_internal_hashtable.py} | 0 .../{column_chunker_test.cc => chunker_internal_test.cc} | 0 cpp/src/parquet/column_writer.cc | 2 +- cpp/src/parquet/properties.h | 1 - 8 files changed, 7 insertions(+), 6 deletions(-) rename cpp/src/parquet/{column_chunker.cc => chunker_internal.cc} (99%) rename cpp/src/parquet/{column_chunker.h => chunker_internal.h} (100%) rename cpp/src/parquet/{column_chunker_hashtable.h => chunker_internal_hashtable.h} (100%) rename cpp/src/parquet/{column_chunker_hashtable.py => chunker_internal_hashtable.py} (100%) rename cpp/src/parquet/{column_chunker_test.cc => chunker_internal_test.cc} (100%) diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index fd01f566cd413..f76e016860e0c 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -160,7 +160,7 @@ set(PARQUET_SRCS arrow/writer.cc bloom_filter.cc bloom_filter_reader.cc - column_chunker.cc + chunker_internal.cc column_reader.cc column_scanner.cc column_writer.cc @@ -391,11 +391,12 @@ add_parquet_test(reader-test add_parquet_test(writer-test SOURCES - column_chunker_test.cc column_writer_test.cc file_serialize_test.cc stream_writer_test.cc) +add_parquet_test(chunker-test SOURCES chunker_internal_test.cc) + add_parquet_test(arrow-test SOURCES arrow/arrow_metadata_test.cc diff --git a/cpp/src/parquet/column_chunker.cc b/cpp/src/parquet/chunker_internal.cc similarity index 99% rename from cpp/src/parquet/column_chunker.cc rename to cpp/src/parquet/chunker_internal.cc index 11db84dbe5408..1c463bde5f6a2 100644 --- a/cpp/src/parquet/column_chunker.cc +++ b/cpp/src/parquet/chunker_internal.cc @@ -15,13 +15,14 @@ // specific language governing permissions and limitations // under the License. -#include "parquet/column_chunker.h" +#include "parquet/chunker_internal.h" + #include #include #include #include "arrow/array.h" #include "arrow/util/logging.h" -#include "parquet/column_chunker_hashtable.h" +#include "parquet/chunker_internal_hashtable.h" #include "parquet/level_conversion.h" namespace parquet::internal { diff --git a/cpp/src/parquet/column_chunker.h b/cpp/src/parquet/chunker_internal.h similarity index 100% rename from cpp/src/parquet/column_chunker.h rename to cpp/src/parquet/chunker_internal.h diff --git a/cpp/src/parquet/column_chunker_hashtable.h b/cpp/src/parquet/chunker_internal_hashtable.h similarity index 100% rename from cpp/src/parquet/column_chunker_hashtable.h rename to cpp/src/parquet/chunker_internal_hashtable.h diff --git a/cpp/src/parquet/column_chunker_hashtable.py b/cpp/src/parquet/chunker_internal_hashtable.py similarity index 100% rename from cpp/src/parquet/column_chunker_hashtable.py rename to cpp/src/parquet/chunker_internal_hashtable.py diff --git a/cpp/src/parquet/column_chunker_test.cc b/cpp/src/parquet/chunker_internal_test.cc similarity index 100% rename from cpp/src/parquet/column_chunker_test.cc rename to cpp/src/parquet/chunker_internal_test.cc diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 00f475c7fb037..ce29b1f808228 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -45,7 +45,7 @@ #include "arrow/util/rle_encoding_internal.h" #include "arrow/util/type_traits.h" #include "arrow/visit_array_inline.h" -#include "parquet/column_chunker.h" +#include "parquet/chunker_internal.h" #include "parquet/column_page.h" #include "parquet/encoding.h" #include "parquet/encryption/encryption_internal.h" diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h index ab49a0facff20..2674fadb05044 100644 --- a/cpp/src/parquet/properties.h +++ b/cpp/src/parquet/properties.h @@ -27,7 +27,6 @@ #include "arrow/type.h" #include "arrow/util/compression.h" #include "arrow/util/type_fwd.h" -#include "parquet/column_chunker.h" #include "parquet/encryption/encryption.h" #include "parquet/exception.h" #include "parquet/parquet_version.h" From 65852764cbc3da76364b00fff50729195441c745 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 5 Mar 2025 13:13:07 +0100 Subject: [PATCH 29/48] prefer to throw parquet exception rather than returning arrow status --- cpp/src/parquet/chunker_internal.cc | 8 +++++--- cpp/src/parquet/chunker_internal.h | 17 ++++++++--------- cpp/src/parquet/column_writer.cc | 5 ++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cpp/src/parquet/chunker_internal.cc b/cpp/src/parquet/chunker_internal.cc index 1c463bde5f6a2..e04d77c4234e6 100644 --- a/cpp/src/parquet/chunker_internal.cc +++ b/cpp/src/parquet/chunker_internal.cc @@ -23,6 +23,7 @@ #include "arrow/array.h" #include "arrow/util/logging.h" #include "parquet/chunker_internal_hashtable.h" +#include "parquet/exception.h" #include "parquet/level_conversion.h" namespace parquet::internal { @@ -204,7 +205,7 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev return Calculate(def_levels, rep_levels, num_levels, \ static_cast(values)); -const ::arrow::Result> ContentDefinedChunker::GetBoundaries( +const std::vector ContentDefinedChunker::GetBoundaries( const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, const ::arrow::Array& values) { auto type_id = values.type()->id(); @@ -242,8 +243,9 @@ const ::arrow::Result> ContentDefinedChunker::GetBoundaries( FakeNullArray fake_null_array; return Calculate(def_levels, rep_levels, num_levels, fake_null_array); default: - return ::arrow::Status::NotImplemented("Unsupported type " + - values.type()->ToString()); + throw ParquetException("Unsupported Arrow array type " + values.type()->ToString()); + // return ::arrow::Status::NotImplemented("Unsupported type " + + // values.type()->ToString()); } } diff --git a/cpp/src/parquet/chunker_internal.h b/cpp/src/parquet/chunker_internal.h index 30b04df6d7c1c..02414cf651dba 100644 --- a/cpp/src/parquet/chunker_internal.h +++ b/cpp/src/parquet/chunker_internal.h @@ -60,11 +60,11 @@ struct Chunk { /// File1: [Page1][Page2][Page3]... /// File2: [Page4][Page2][Page3]... /// -/// Then the parquet file is being uploaded to a content addressable storage systems (CAS) -/// which split the bytes stream into content defined blobs. The CAS system will calculate -/// a unique identifier for each blob, then store the blob in a key-value store. If the -/// same blob is encountered again, the system can refer to the hash instead of physically -/// storing the blob again. In the example above, the CAS system would phiysically store +/// Then the parquet file is being uploaded to a content addressable storage system (CAS) +/// which splits the bytes stream into content defined blobs. The CAS system will +/// calculate a unique identifier for each blob, then store the blob in a key-value store. +/// If the same blob is encountered again, the system can refer to the hash instead of +/// physically storing the blob again. In the example above, the CAS system would store /// Page1, Page2, Page3, and Page4 only once and the required metadata to reassemble the /// files. /// While the deduplication is performed by the CAS system, the parquet chunker makes it @@ -113,10 +113,9 @@ class ContentDefinedChunker { /// @param num_levels Number of levels /// @param values Column values as an Arrow array /// @return Vector of Chunk objects representing the chunk boundaries - const ::arrow::Result> GetBoundaries(const int16_t* def_levels, - const int16_t* rep_levels, - int64_t num_levels, - const ::arrow::Array& values); + const std::vector GetBoundaries(const int16_t* def_levels, + const int16_t* rep_levels, int64_t num_levels, + const ::arrow::Array& values); private: // Update the rolling hash with a compile-time known sized value, set has_matched_ to diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index ce29b1f808228..a4183eac1bdd3 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -1340,9 +1340,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< } if (properties_->cdc_enabled()) { - ARROW_ASSIGN_OR_RAISE(auto boundaries, - content_defined_chunker_.GetBoundaries( - def_levels, rep_levels, num_levels, leaf_array)); + auto boundaries = content_defined_chunker_.GetBoundaries(def_levels, rep_levels, + num_levels, leaf_array); for (auto chunk : boundaries) { auto chunk_array = leaf_array.Slice(chunk.value_offset); auto chunk_def_levels = AddIfNotNull(def_levels, chunk.level_offset); From fc49e5160864b7b4d89b366b166b339cfa6b779f Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 5 Mar 2025 13:32:11 +0100 Subject: [PATCH 30/48] add reference to chunk size normalization --- cpp/src/parquet/chunker_internal.cc | 2 -- cpp/src/parquet/chunker_internal.h | 6 ++++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/parquet/chunker_internal.cc b/cpp/src/parquet/chunker_internal.cc index e04d77c4234e6..60b797c8146b9 100644 --- a/cpp/src/parquet/chunker_internal.cc +++ b/cpp/src/parquet/chunker_internal.cc @@ -244,8 +244,6 @@ const std::vector ContentDefinedChunker::GetBoundaries( return Calculate(def_levels, rep_levels, num_levels, fake_null_array); default: throw ParquetException("Unsupported Arrow array type " + values.type()->ToString()); - // return ::arrow::Status::NotImplemented("Unsupported type " + - // values.type()->ToString()); } } diff --git a/cpp/src/parquet/chunker_internal.h b/cpp/src/parquet/chunker_internal.h index 02414cf651dba..53d8df0a799bb 100644 --- a/cpp/src/parquet/chunker_internal.h +++ b/cpp/src/parquet/chunker_internal.h @@ -89,9 +89,11 @@ struct Chunk { /// chunker configuration. /// /// References: -/// - FastCDC paper: "FastCDC: a Fast and Efficient Content-Defined Chunking Approach for -/// Data Deduplication" +/// - FastCDC: a Fast and Efficient Content-Defined Chunking Approach for Data +/// Deduplication /// https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf +/// - Git is for Data (chunk size normalization used here is described in section 6.2.1): +/// https://www.cidrdb.org/cidr2023/papers/p43-low.pdf class ContentDefinedChunker { public: /// Create a new ContentDefinedChunker instance From 3a520025efd775317feab0e068532a5139d8ff11 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 5 Mar 2025 13:44:13 +0100 Subject: [PATCH 31/48] add a comment about AddDataPage() at the end of each chunk --- cpp/src/parquet/column_writer.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index a4183eac1bdd3..824b1db9d4ef3 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -1356,6 +1356,10 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< maybe_parent_nulls)); } if (num_buffered_values_ > 0) { + // Explicitly add a new data page according to the content-defined chunk + // boundaries. This way the same chunks will have the same byte-sequence + // in the resulting file, which can be identified by content addressible + // storage. AddDataPage(); } } From 3cb7fcf09072cef8fe364d836e8e807062f102c8 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Thu, 6 Mar 2025 19:51:02 +0100 Subject: [PATCH 32/48] address review comments --- .gitattributes | 1 + cpp/src/parquet/chunker_internal.cc | 20 +- cpp/src/parquet/chunker_internal.h | 3 +- ...shtable.py => chunker_internal_codegen.py} | 12 +- ...shtable.h => chunker_internal_generated.h} | 2 +- cpp/src/parquet/column_chunker_generated.h | 545 ++++++++++++++++++ cpp/src/parquet/column_writer.cc | 3 +- 7 files changed, 565 insertions(+), 21 deletions(-) rename cpp/src/parquet/{chunker_internal_hashtable.py => chunker_internal_codegen.py} (90%) rename cpp/src/parquet/{chunker_internal_hashtable.h => chunker_internal_generated.h} (99%) create mode 100644 cpp/src/parquet/column_chunker_generated.h diff --git a/.gitattributes b/.gitattributes index 70007c26c8b9b..18396af493351 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,5 @@ cpp/src/arrow/util/bpacking_*_generated.h linguist-generated=true +cpp/src/parquet/chunker_*_generated.h linguist-generated=true cpp/src/generated/*.cpp linguist-generated=true cpp/src/generated/*.h linguist-generated=true go/**/*.s linguist-generated=true diff --git a/cpp/src/parquet/chunker_internal.cc b/cpp/src/parquet/chunker_internal.cc index 60b797c8146b9..2ded58c7bdd41 100644 --- a/cpp/src/parquet/chunker_internal.cc +++ b/cpp/src/parquet/chunker_internal.cc @@ -22,7 +22,7 @@ #include #include "arrow/array.h" #include "arrow/util/logging.h" -#include "parquet/chunker_internal_hashtable.h" +#include "parquet/chunker_internal_generated.h" #include "parquet/exception.h" #include "parquet/level_conversion.h" @@ -52,12 +52,12 @@ static uint64_t GetMask(uint64_t min_size, uint64_t max_size, uint8_t norm_facto } ContentDefinedChunker::ContentDefinedChunker(const LevelInfo& level_info, - std::pair size_range, + uint64_t min_size, uint64_t max_size, uint8_t norm_factor) : level_info_(level_info), - min_size_(size_range.first), - max_size_(size_range.second), - hash_mask_(GetMask(size_range.first, size_range.second, norm_factor)) {} + min_size_(min_size), + max_size_(max_size), + hash_mask_(GetMask(min_size, max_size, norm_factor)) {} template void ContentDefinedChunker::Roll(const T value) { @@ -70,7 +70,7 @@ void ContentDefinedChunker::Roll(const T value) { } auto bytes = reinterpret_cast(&value); for (size_t i = 0; i < BYTE_WIDTH; ++i) { - rolling_hash_ = (rolling_hash_ << 1) + GEARHASH_TABLE[nth_run_][bytes[i]]; + rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][bytes[i]]; has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); } } @@ -84,7 +84,7 @@ void ContentDefinedChunker::Roll(std::string_view value) { } for (char c : value) { rolling_hash_ = - (rolling_hash_ << 1) + GEARHASH_TABLE[nth_run_][static_cast(c)]; + (rolling_hash_ << 1) + kGearhashTable[nth_run_][static_cast(c)]; has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); } } @@ -105,9 +105,9 @@ bool ContentDefinedChunker::NeedNewChunk() { } } if (ARROW_PREDICT_FALSE(chunk_size_ >= max_size_)) { - // we have a hard limit on the maximum chunk size, not that we don't reset the rolling - // hash state here, so the next NeedNewChunk() call will continue from the current - // state + // we have a hard limit on the maximum chunk size, note that we don't reset the + // rolling hash state here, so the next NeedNewChunk() call will continue from the + // current state chunk_size_ = 0; return true; } diff --git a/cpp/src/parquet/chunker_internal.h b/cpp/src/parquet/chunker_internal.h index 53d8df0a799bb..b7334d3f6a535 100644 --- a/cpp/src/parquet/chunker_internal.h +++ b/cpp/src/parquet/chunker_internal.h @@ -104,8 +104,7 @@ class ContentDefinedChunker { /// @param norm_factor Normalization factor to center the chunk size around the average /// size more aggressively. By increasing the normalization factor, /// probability of finding a chunk boundary increases. - ContentDefinedChunker(const LevelInfo& level_info, - std::pair size_range, + ContentDefinedChunker(const LevelInfo& level_info, uint64_t min_size, uint64_t max_size, uint8_t norm_factor = 0); /// Get the chunk boundaries for the given column data diff --git a/cpp/src/parquet/chunker_internal_hashtable.py b/cpp/src/parquet/chunker_internal_codegen.py similarity index 90% rename from cpp/src/parquet/chunker_internal_hashtable.py rename to cpp/src/parquet/chunker_internal_codegen.py index 8addcc3af26b3..063eda4b92f84 100644 --- a/cpp/src/parquet/chunker_internal_hashtable.py +++ b/cpp/src/parquet/chunker_internal_codegen.py @@ -44,14 +44,12 @@ #pragma once #include -namespace parquet {{ -namespace internal {{ +namespace parquet::internal {{ -constexpr uint64_t GEARHASH_TABLE[8][256] = {{ +constexpr uint64_t kGearhashTable[8][256] = {{ {content}}}; -}} // namespace internal -}} // namespace parquet +}} // namespace parquet::internal """ @@ -61,7 +59,7 @@ def generate_hash(n: int, seed: int): return hasher.hexdigest()[:16] -def generate_hashtable(seed: int, length=256, comma=True): +def generate_hashtable(seed: int, length=256): table = [generate_hash(n, seed=seed) for n in range(length)] out = StringIO() @@ -77,7 +75,7 @@ def generate_hashtable(seed: int, length=256, comma=True): return out.getvalue() -def generate_header(ntables=8, relative_path="column_chunker_hashtable.h"): +def generate_header(ntables=8, relative_path="column_chunker_generated.h"): path = pathlib.Path(__file__).parent / relative_path tables = [generate_hashtable(seed) for seed in range(ntables)] diff --git a/cpp/src/parquet/chunker_internal_hashtable.h b/cpp/src/parquet/chunker_internal_generated.h similarity index 99% rename from cpp/src/parquet/chunker_internal_hashtable.h rename to cpp/src/parquet/chunker_internal_generated.h index 63812cfec8423..13a47984b7415 100644 --- a/cpp/src/parquet/chunker_internal_hashtable.h +++ b/cpp/src/parquet/chunker_internal_generated.h @@ -20,7 +20,7 @@ namespace parquet::internal { -constexpr uint64_t GEARHASH_TABLE[8][256] = { +constexpr uint64_t kGearhashTable[8][256] = { {// seed = 0 0xf09f35a563783945, 0x0dcc5b3bc5ae410a, 0x63f1ea8d22554270, 0xfbe5ee7bd05a7b61, 0x3f692ed5e9934aba, 0xaab3755952250eb8, 0xdefb168dc2888fa5, 0x501b36f7c77a7d47, diff --git a/cpp/src/parquet/column_chunker_generated.h b/cpp/src/parquet/column_chunker_generated.h new file mode 100644 index 0000000000000..13a47984b7415 --- /dev/null +++ b/cpp/src/parquet/column_chunker_generated.h @@ -0,0 +1,545 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once +#include + +namespace parquet::internal { + +constexpr uint64_t kGearhashTable[8][256] = { + {// seed = 0 + 0xf09f35a563783945, 0x0dcc5b3bc5ae410a, 0x63f1ea8d22554270, 0xfbe5ee7bd05a7b61, + 0x3f692ed5e9934aba, 0xaab3755952250eb8, 0xdefb168dc2888fa5, 0x501b36f7c77a7d47, + 0xd2fff45d1989642d, 0x80217c1c600e30a6, 0xb9469ee2e43df7ac, 0x3654b76a61999706, + 0x6ea73dfe5de0c6b6, 0xdfd662e1937a589d, 0x0dbe0cc74b188a68, 0xde45f4e6d73ffc6f, + 0xcdf7a7759e70d87e, 0x5d6a951b8d38c310, 0xdc9423c3813fcf2c, 0x25dc2976e167ffce, + 0xc2555baa1d031c84, 0x115bc3f2230a3ab6, 0xd4b10260f350bede, 0xdfd3501ab447d723, + 0x022e79217edaf167, 0x1635e2255c5a7526, 0xa0a750350cc77102, 0xc027133e05d39f56, + 0xd949459779cf0387, 0xb92f1464f5c688c2, 0xd9ac5f3e8b42f2f3, 0xdf02bb6f5ecaac21, + 0x8156f988fac7bfa4, 0xe4580f97bede2ec8, 0x44fe7d17a76fca32, 0x885f59bd54c2014c, + 0x435e63ec655ffae9, 0x5ebc51930967b1f1, 0x5428c2084ac29e47, 0x9465938fec30e36b, + 0xc7cb3de4977772cd, 0x15692d7c201e8c3a, 0x505ee65cdc4b17f4, 0x7d9839a0a7aead6b, + 0xeef5f5b6a0105291, 0x76c2fb232ce7f5bf, 0x5c13893c1c3ff3a9, 0x65b6b547d4442f98, + 0xb8ad7487c8c96fce, 0x906bcf51c99974f8, 0x2f56e48bb943a48c, 0xbc9ab109f82d3a44, + 0xcd5160cdc8c7e735, 0xbe9acb9df3427732, 0x386b91d477d7fade, 0x36be463621dd5af2, + 0xcbe6a2faffd627a8, 0x9c8fd528463a2f5a, 0xb9b88c6bb802b184, 0xb414b4e665c597c7, + 0xbedb142568209556, 0x5360d81c25429dce, 0x63a69a960a952f37, 0xc900d63899e1b503, + 0x1abc63a8b37c7728, 0xa8b3a8b6409080eb, 0x495e391f662959f6, 0xdf1e136f3e12229b, + 0x33d5fc526b0dd38d, 0x321221ae2abfac63, 0x7fde18351fda7395, 0xed79fe5c3a6aa4c3, + 0x2dd6965a4867d8d4, 0x54813ca20fe8799b, 0x5d59ea6456465c39, 0x0de0c294d1936b81, + 0x4aaf0755002c588c, 0x3530a1857ad04c6d, 0xb8a64f4ce184442b, 0xe0def10bceedfa17, + 0x46e38d0a443757ec, 0x9795a1c645ee16d7, 0x7e531def245eac8a, 0x683b25c43a0716cf, + 0x884583d372da219d, 0x5b06b62c910416e5, 0x54b6902fbebd3dbe, 0x931198d40a761a75, + 0xead7d8e830013590, 0x80b4d5dc99bfaced, 0xf98272c8108a1ad2, 0x1adce054289a0ec6, + 0x7d53a1143c56b465, 0x497fbe4f00c92b52, 0x525e4cc2e81ebd69, 0xc94478e0d5508ff6, + 0xb8a5da83c196d07c, 0x7667a921b65b0603, 0xf236fabbdefe6cd1, 0x53da978d19a92b98, + 0xc604f6e97087124d, 0x2cbd27221924b094, 0x65cd1102c985b1d2, 0x08c0755dc1a97eb4, + 0x5e0419e921c0fef1, 0x282d2c1196f84a29, 0xe21117fcfc5793f7, 0xcf4e985dc38e6c2e, + 0xd521f4f264d55616, 0xde69b04c485f2a10, 0x59410e245305178a, 0xceab1d477c943601, + 0xa9805732d71ee5e9, 0x054cd443896974f6, 0xf2b517717a423a3e, 0x09517937fa9fac95, + 0x4938233e9ca871e3, 0x9132cbaf56f83ec0, 0x4703421ed1dd027d, 0xfd9933f4e6f1ec4e, + 0xf237c7fded2274a8, 0xdf4616efe68cd7b4, 0x5e46de0f39f0a380, 0x3d41e0c6d8e095b0, + 0xc5272f8a5bb2df09, 0x68aa78e8301fb964, 0xbf5b5b52c8e32ae0, 0xbf28ed3df74bdcf7, + 0xd6198f64c833815a, 0x8cd99d2974267544, 0xd90560ea4465ff2c, 0x571d65ad7ad59261, + 0x309453518baa367a, 0xa60538377bc79fb2, 0xace515da1ab4183c, 0xf56d3c8d891d1c5b, + 0x5b0d8370b59def49, 0x775866ce7c83c762, 0x3d76085695c8e18a, 0xba064d1a9af1b114, + 0xc84ef7cd7b98b521, 0x90b9231681c2bc37, 0x37e2b13e6f585b6b, 0x1d0a34e55e0f369f, + 0x86bb8019cf41447c, 0x4b95c6ef55b3f71f, 0x3b6ed1660732b310, 0x617eee603d137f21, + 0xf4f6278b464f3bbc, 0xdfb763b720da205a, 0x353478899b871cb7, 0xe45fbbff574cc41e, + 0x1a94b60847907d72, 0xb10eef051eff67a5, 0xf0e012ec6a284d40, 0xcc1cd1a11b926d7c, + 0xcf9d9c5453e19cad, 0x270febcc0fc0e86b, 0xd6567568778b781e, 0x7323b98965eeb46b, + 0xccecd374567086ff, 0xef7b44bfc497a704, 0xebc479c051a9f0a5, 0xc9b7410e3e00a235, + 0x1d084f7ecdf83dab, 0xc8a9a97e33ba8ba3, 0x8c75318f5b2350d6, 0xaa3cd5d0c684bdda, + 0xa81125fe0901bedf, 0xf7bcd76020edfc93, 0x834ee4c12e75874f, 0xb2bb8a7beb44fa14, + 0x32cd26f50a4f4e4d, 0x0fc5817ca55d959a, 0xd6e4ae2e3ae10718, 0x074abdcceb8d6e38, + 0xc0cc5f4f9b3a9c43, 0x1115d364363595b2, 0x69861db2eb19f2e8, 0x59b8d804cf92bc67, + 0x9bac9785e5e4b863, 0x7fa0e17a41869561, 0x10d3c9633f0c709c, 0x534a03deee6bc44a, + 0x73b1f7201257f581, 0x46fd6a11e2e0706b, 0x494abb554946e67a, 0xb5d6da317864dc8e, + 0x402ded9238f39687, 0xd8fa37d2cbd6d290, 0xcc818293fcb06791, 0x6482ab344806cd4d, + 0x0956e6ee9d8eb60b, 0x01fee622d8465ac8, 0xae7ece370cbd9c35, 0x7ff09e937a177279, + 0xa2c29ee7a33ca5f1, 0x990e8dbee083923b, 0x4a819b72f610863a, 0xddecfad79d3f08be, + 0x627372480fac20a7, 0x802154d6eca2db4c, 0x8fcf02e42f805e55, 0x040a911ff8cea977, + 0xbb544485bc64d0d4, 0xaddde1aeb406d0fb, 0xf6b35fae23dce66f, 0xc07a9fb3645d2f9b, + 0xccd113907e9c0fed, 0xd17af369984fd213, 0x9223823c59a083e7, 0xe19d475606b81013, + 0xe181ac116a90e57a, 0x71f7b6258c6def4c, 0x2246f34b45964f7c, 0xd74aedaea2d31751, + 0xb1add86e5dd305d1, 0xeb9ba881f16d6471, 0xef7600e036f5c6ff, 0x1d50bc9735b8fb85, + 0xe63942bd1f3e2969, 0x9241ba9f8b3f4e72, 0xee8bb2bca07d35b6, 0x55cd55dab522654e, + 0x94d0cfa7c1a6845d, 0x02f9845d559884c3, 0x8ce70ea21063b560, 0xd70998028ef08b74, + 0xdfdb5bbee310876b, 0x4e21b2e348256d16, 0xde007a981c13debc, 0xe51950cbbddabfdd, + 0xd223301dbe9957c1, 0x084b8634cc2cce4b, 0x90e551378aa9d70c, 0x833b533ac633e448, + 0x7891e232882da57f, 0xa1bf26f0163ce2b3, 0xf33a0171eb9c68d5, 0x2e7de18ca69b3fa2, + 0x666fd6f175619199, 0x1239d37edb5feb9f, 0xfa9fc9382e61ff5c, 0x3ca4ad427e3c126f, + 0x37c6dd4c2c31ae6e, 0x1f1bacb619d427b2, 0x7dd09f5d10759afe, 0xc8d941432327d733, + 0x2b389ba25e1d43a7, 0xa4e3030c3740ff21, 0xcc56dae13fd37463, 0x2481457c175b560f, + 0x9deb35bde77c5c41, 0x847aa6ea5549a0c3, 0xcde01bb48b6e7f02, 0x15a28844e64cb211}, + {// seed = 1 + 0xecfcba92fe5691a3, 0x71377799fea34699, 0xb284c9096fa614e5, 0x54534170f40de6c8, + 0xbbd804d45884fba3, 0x44929a896388c8a1, 0x79b712508e0fa3b1, 0xeb53ab280af31054, + 0x351ea23a6319da7a, 0x2fbe55d9819d85a2, 0x34f4b6568dcd28b1, 0x8c94ea5e5d82967a, + 0x09068d333a46d3c5, 0x762ad4f64cb73381, 0xd5c6db5ef0e22640, 0x36d8ab5a36175680, + 0xd41fe333cdc3525a, 0xa1f51dbdf20ce781, 0x1410a95e786c8be6, 0x96b7499a670c2b41, + 0x3912e1037835d893, 0x272c5bd83e1e9115, 0x2ea7f91cad82a0d6, 0xcd10e85662ce9931, + 0xedad49be8d5e8b74, 0x7ccd8fe0f37d12bc, 0xfac0482005eed593, 0x4513991681f6c8b0, + 0x2804d612eb0ad37d, 0x7cca9e8412b81d34, 0x85ffd6707192b7b8, 0xea0560aeea954411, + 0x0122d28226102bba, 0xf51c47cdbd22fdd1, 0x3707d851183ff17c, 0xaef5a1465f3e902d, + 0xbcb38c2d8736a04f, 0x4025317e864bef15, 0x8d3f66d86e1ea58f, 0xc16759a3d97ed79a, + 0x1c62abdc0659f2f5, 0x23b3eb4e699bd28f, 0x5083c4fceed3ccaf, 0xa65bf34562cc989c, + 0xaa5865932fd79064, 0xf24d08d268c24593, 0x7fbd00a215196999, 0x7812cd366d752964, + 0x62e8dcb27ef3d945, 0xf08b7984e1b946dc, 0x547d23ad9a5c1dcf, 0x496b1fb249b27fb7, + 0xcd692e1db5f3b3ba, 0x41931e39f1e1bc61, 0x286c6a7d7edae82b, 0x17ef6638b6c4ca6e, + 0x609beb5a2576a934, 0xcc5e16fe4a69b83c, 0xbbd14d08b078fc24, 0x2a617680f481cb94, + 0x81dbbd5f86e6d039, 0xeb8205e1fc8ecc3c, 0xe5e3bb576faa8042, 0x5d6f1eb9d9df01b5, + 0x9a47b8739c10fb44, 0x398a7caad7ea7696, 0x9c0fc1d7c46adde6, 0x67cd6de0a51978a6, + 0x68ccc4b77a21cca4, 0x1e067066b82f415c, 0xf7ddade6535e1819, 0xf2185c884291751b, + 0xc322b7381fcbe34f, 0x242f593e88290b9b, 0x8e11ccc0ea5e84a3, 0x40e3a2e3346db8a2, + 0xf18bfc3ad2931a2c, 0x2468397394b00144, 0xeae199cce14e6817, 0x05b462686c75a1ae, + 0xda096cb859c51673, 0xd87aeb967a906bef, 0xaabc74493cb02fe6, 0x74d48fc2e7da143e, + 0x6ec1c8fed3f2c1fd, 0xe01e0704b463f18e, 0xc3d88a4d3a8056e4, 0xd01ae0ffab6c8f3f, + 0x881ba052620ae7c7, 0xcea033aef0a823a5, 0x8d2cad91d83df1e3, 0x18746d205e66dbe9, + 0x3061f8e58d046650, 0xd819c59f0ce2cf8b, 0x144e89e93635e870, 0x3415e88279b21651, + 0xd6f7ab944b86c3fa, 0x45f1dd15d0f67bdc, 0xbf0d97c7f4fa24f4, 0x34a7de520a57fcd2, + 0x4ba86fda03e9e2bc, 0xa7995265a025b552, 0x698f6819d5f51cf7, 0xd07dbe9d8a156981, + 0x2683945373857fc1, 0x116f8a84f96167de, 0x8bc832bd85595ebf, 0xb206519d74fdfafa, + 0xde9519b2e9b5cc5f, 0x16fdd6f2da1d8163, 0x7ba32bd48ef56f11, 0x6f4e4d7ee8b29717, + 0xd31576dde7468aad, 0x023bb08848676045, 0xf6dcc083178160b7, 0x42035f426250e683, + 0x343732993cfed89f, 0x0640a870a22d3d58, 0x65cff80b53b4ae6a, 0x27996fa17ab05215, + 0xfd5db01401b21a04, 0x894508784bc1673c, 0x5bfcf43a2380e27d, 0x4cd6dcc2715583b7, + 0xa43b3763e7d4c902, 0x6da83e12ef0c1257, 0xfe80a602b0335aff, 0x293a7d8f4ff344de, + 0xb4ae7c2b8956bf5a, 0x6b45432d38254b4d, 0xd086acbdf15d9455, 0xa4d19e43f41ea87b, + 0xf01f13ba4bb87fbf, 0xca582cf301a299ff, 0x0ddad3d45298fa7d, 0x0646a130459c3999, + 0xc08e3af3747e2cee, 0xfc7db8aa9ed67295, 0x783b329e7bd79d5f, 0x732dbc607957af7b, + 0x8e446ac19fb26555, 0xff1dfa4d61dc89a5, 0xb6fbc46bd8d011d8, 0x185147ec5779f0d7, + 0x6eb2cf6149a5380f, 0xb0e773df803a1eae, 0xc07706c5519bfce5, 0xc35abcf54fa95f14, + 0x40a01d99a38608ea, 0x776dcd6f603c277f, 0x6ae12389b1d6d0bb, 0x8bd981448df92bb9, + 0x426a6a7ca21a2c16, 0x87efd5b71c1bad26, 0x71fb7fc4cd41de48, 0xdd9033c45619d463, + 0x40eaab322654cef7, 0xe077fffed6f3e3a2, 0x375a4dbef9384447, 0x2066b009d2c4a100, + 0xeca4a5794a068447, 0x2128f64bddf341a1, 0x738b4bb1be90bd61, 0x433772cf3813d52e, + 0x9540c88add8e4474, 0x0b6d5decd21d3519, 0x654ead966745642d, 0xe1bfb03c3b4bdb4c, + 0x0b977a9937515b1f, 0x0a4587509ef63870, 0xe89f0de1d9cfd44a, 0x23a91390272e7f68, + 0xd92defbc9096b8d8, 0x004db87174612539, 0xc88ecaabdd1a71f1, 0x050de38393073346, + 0x8af1426d7964e038, 0xf352c4fef8ad5c87, 0x6f26bc7408e26548, 0x0d41543fd9bf3084, + 0xfc4e07553a840fc6, 0x5ef117de86a555a9, 0x1f11c42dffb5ae1b, 0x4147648f07490fa5, + 0x09b35fd7671b21aa, 0x1453b14f7ccca481, 0x944f6fcce4c9b2ba, 0x5b08dd2e3583dc06, + 0xe0220df78dc9c22d, 0x1c200b9506cbf666, 0x8a0b7465eadb523b, 0xfbcb43a91a1e2d80, + 0xe697f44be3c36a58, 0x2f8a8e48fb7e350d, 0x7baba71b8920d55f, 0x10edc0216105bc96, + 0x52db07c79d7a7a63, 0x1916e8cef9452ac3, 0x5cbbbf21f867b6cc, 0xadd583365a690a4b, + 0x4e4ca2c8bffc2fdb, 0xf5fe3416d2eebcfe, 0x839af8b85e452476, 0x8496c0c54ad44e16, + 0x6c46f1ecad4482bf, 0xb794cad76ae18715, 0x67b762eec7c62985, 0x52dc9e68df5b3a53, + 0x0cc7e444b422a5f9, 0xadbfe90841c112b0, 0xfe37b136f0ca5c34, 0xcfe9e47948a8d73e, + 0xee90572b86a30d91, 0x549e72d8262830aa, 0x3361564b469f32c6, 0x1e6eba9e0d2648e2, + 0x5f8e2b2ac5fcb4eb, 0xe4224fa5f71f7cc6, 0x7357a9230c76757b, 0xcad70f74aaf6b702, + 0xeef28ced23894cc2, 0x753fdd3352aefd68, 0x1fed6ba90bbeb9d2, 0x05316f4ab4034b4b, + 0x3396df022b9f63d6, 0x82d7125a7cfd0935, 0x3519a71caf1f87f0, 0xd1dfb7a5cc3974be, + 0xbfae40ecbdbbcc2a, 0x152c11778e08dd54, 0x4a96566a6c848554, 0x3a84d621c340cdd7, + 0xfd47aa1887e2fb03, 0xa63cae94b2f1d099, 0xed61783f3e5b75e0, 0xefd44864106019be, + 0x145ff78b80b081aa, 0x34670e5fcea9230e, 0x876ef976328db371, 0x4221f3a5269942a6, + 0x95315cbd85c648f4, 0x3ca344dc7c3b1600, 0x38421ea39ff28780, 0x31dbeee967c0435c, + 0x27437c3e268402e7, 0xdd0cf8343312a654, 0x965ab9dad1d8aa29, 0xf871706dd3e23509, + 0xce23d06c7a25e699, 0x1b37d59382b27589, 0x3407f004723d6324, 0x56efb69cdb5deaa1, + 0xf46cdd2b9fd604e0, 0xcad3ca79fdac69bd, 0x7252802a574e63cb, 0xc281fb8acc6ec1d3}, + {// seed = 2 + 0xdd16cb672ba6979c, 0x3954eaa9ec41ae41, 0x52cb802771d2966d, 0xf57ed8eb0d0294f2, + 0x768be23c71da2219, 0x6131e22d95a84ad3, 0xd849e4e49bb15842, 0x18e8e5c4978cf00d, + 0x3af5e5867ce1f9bd, 0x06c75a9fffe83d63, 0xe8de75a00b58a065, 0x0a773251bc0d755a, + 0x629dc21e54548329, 0x2a168f5e5a883e70, 0x33547375f0996c86, 0xdfcb4c7680451322, + 0x55c1ecaaaa57e397, 0x4546c346c24f5a31, 0x6f8f0401dfabc86c, 0x7760d2d36ee340b4, + 0xf6448e48bdeb229d, 0xba70e1633b4dba65, 0x069cda561e273054, 0xa010b6a84aebf340, + 0x5c23b8229eee34b6, 0xea63c926d90153af, 0x7d7de27b3e43ec1b, 0xea119541eddc3491, + 0xf1259daeddfc724c, 0x2873ca9a67730647, 0xa1e7710dade32607, 0x758de030b61d43fd, + 0xd2c9bcbfa475edb4, 0x18ade47bb8a0aa29, 0xf7a74af0ff1aea88, 0x6f8873274a987162, + 0x6963e8d876f4d282, 0xd435d4fe448c6c5b, 0x93ec80ba404cafff, 0xcf90d24c509e41e7, + 0x5f0fc8a62923e36e, 0x9224878fe458f3a4, 0xd9a039edf1945bcd, 0x0877d1892c288441, + 0x75205491f4b4740b, 0x30f9d2d523a9085b, 0x4b7f4029fa097c99, 0x170bb013745709d4, + 0x7087af537f11ef2e, 0x28c62b88e08fc464, 0x84bbcb3e0bb56271, 0x485a4b099165c681, + 0x357c63357caa9292, 0x819eb7d1aee2d27e, 0xdaa759eb9c0f8c9d, 0x42cdc36729cc3db5, + 0x9489aa852eddbb06, 0x8161e4f85a84e6d4, 0xa964863fdad3eb29, 0xcc095ddbce1a6702, + 0x3ecfadbb8dc2ce58, 0x971316509b95a231, 0xc8f484d1dbc38427, 0xae9c510c463574c0, + 0xdf2b31179600c21a, 0x440de87bada4dfa3, 0xbd8d30f3f6fb7522, 0x84e6d7f678a0e2d0, + 0x0ec4d74323e15975, 0xf6947610dad6d9ab, 0x73a55a95d73fe3a5, 0x3e5f623024d37eda, + 0x8d99a728d95d9344, 0x8b82a7956c4acdc4, 0x7faeaea4385b27f6, 0x540625ff4aa2ff21, + 0x4aa43b3ebd92ce2b, 0x899646a6df2da807, 0x49225115780942d7, 0xe16606636af89525, + 0xb980bcf893888e33, 0xf9ed57695291b0d8, 0x5c6dd14464619afa, 0x50606d69b733d4f3, + 0x7fb1af465b990f97, 0x3fab2634c8bbd936, 0x556da6168838b902, 0x0f15975902a30e1f, + 0xb29d782ae9e1991f, 0xae00e26ff8f7e739, 0xd3da86458bb292d5, 0x4528ee0afb27e4ce, + 0x49882d5ba49fabad, 0x7e873b6a7cf875ee, 0x777edd535113c912, 0x94ed05e7ff149594, + 0x0b8f95fc4211df43, 0x9135c2b42426fef2, 0x411e6c2b47307073, 0x503207d1af0c8cf8, + 0xd76f8619059f9a79, 0x64d24617855dee45, 0xf7bc7a877923196a, 0xd6cc42ed6a65be79, + 0xe3912ff09d4fc574, 0x4192d03b2bc2460a, 0xa0dcc37dad98af85, 0xfc59049b2a5818a4, + 0x2128bae90a5b975f, 0xbe7067ca05ea3294, 0x5bab7e7753064c4f, 0x42cbf0949ef88443, + 0x564df4bbd017492c, 0xf2c2eb500cf80564, 0x5b92e67eb00e92af, 0x8c4103eef59c0341, + 0x83412122b8284998, 0x888daf2da0636b6d, 0x4d54b10303dd07d6, 0x201190e7c1e7b5ed, + 0x3797510bb53a5771, 0x03f7bc598b570b79, 0xdc1e15d67d94f73e, 0x721e8b499ebe02c1, + 0x71f954f606d13fa0, 0x0c7a2e408c168bf0, 0x07df2ef14f69c89d, 0xe295096f46b4baaf, + 0x7a2037916438737e, 0xd1e861aeaf8676ea, 0xb36ebdce368b8108, 0xb7e53b090ddb5d25, + 0x5a606607b390b1aa, 0x475e52994f4a2471, 0xbcc2038ba55b2078, 0x28b8a6b6c80df694, + 0xb5f0130ec972c9a2, 0x7a87cd2a93276b54, 0x4d0eec7ecf92d625, 0xac1a8ce16269a42e, + 0xa4ca0237ca9637b8, 0xd8dc8ff91202b6ff, 0x75b29846799d7678, 0x761b11a5edd9c757, + 0xf2581db294ef3307, 0xe3173c2b6a48e20f, 0xe46fd7d486d65b3c, 0x1352024303580d1f, + 0x2d665dae485c1d6d, 0x4e0905c825d74d3b, 0x14ff470c331c229e, 0xbdc656b8613d8805, + 0x36de38e396345721, 0xaae682c1aa8ff13b, 0x57eb28d7b85a1052, 0xf3145290231d443a, + 0xd0f68095e23cbe39, 0x67f99b3c2570b33d, 0x54575285f3017a83, 0x9b2f7bb03d836a79, + 0xa57b209d303367a9, 0x7ccb545dd0939c79, 0x1392b79a37f4716d, 0x6e81bb91a3c79bcd, + 0x2c2cd80307dddf81, 0xb949e119e2a16cbb, 0x69625382c4c7596f, 0xf19c6d97204fb95c, + 0x1b2ea42a24b6b05e, 0x8976f83cd43d20ac, 0x7149dd3de44c9872, 0xc79f1ae2d2623059, + 0xca17a4f143a414e1, 0x66d7a1a21b6f0185, 0xed2c6198fe73f113, 0x16a5f0295cbe06af, + 0x5f27162e38d98013, 0xf54d9f295bdc0f76, 0x9ba7d562073ef77b, 0xa4a24daaa2cfc571, + 0x49884cf486da43cd, 0x74c641c0e2148a24, 0xbff9dcbff504c482, 0xf8fc2d9403c837ab, + 0x6ccc44828af0bb1e, 0xbcf0d69b4c19dfdb, 0x8fe0d962d47abf8f, 0xa65f1d9d5514271d, + 0x26ff393e62ef6a03, 0xc7153500f283e8fc, 0xea5ed99cdd9d15cd, 0xfc16ac2ba8b48bb7, + 0xf49694b70041c67a, 0xbd35dd30f5d15f72, 0xcf10ad7385f83f98, 0x709e52e27339cdc2, + 0xe9505cb3ec893b71, 0x2ffa610e4a229af7, 0x12e1bc774d1f0e52, 0xe301a3bb7eacccc8, + 0x1fdd3b6dcd877ebf, 0x56a7e8bda59c05aa, 0x99acd421035d6ab4, 0xfd21e401cecd2808, + 0x9a89d23df8b8d46f, 0x4e26b1f1eb297b9c, 0x9df24d973e1eae07, 0xe6cdc74da62a6318, + 0xfc360d74df992db0, 0xf4eca0a739514c98, 0x481c515ba9bf5215, 0xce89cce80f5f3022, + 0xf487a10fc80e4777, 0x235b379a87e41832, 0x76f72e028371f194, 0xd044d4a201325a7d, + 0x47d8e855e0ffbdde, 0x268ae196fe7334b0, 0x123f2b26db46faa8, 0x11741175b86eb083, + 0x72ee185a423e6e31, 0x8da113dfe6f6df89, 0x286b72e338bbd548, 0xa922246204973592, + 0x7237b4f939a6b629, 0x31babda9bedf039a, 0xb2e8f18c6aeec258, 0x0f5f6ce6dd65a45e, + 0x8f9071a0f23e57d3, 0x71307115ba598423, 0xcbe70264c0e1768c, 0x1c23729f955681a8, + 0xfbc829099bc2fc24, 0x9619355cbc37d5d6, 0xea694d4e59b59a74, 0xb41cf8d3a7c4f638, + 0xae1e792df721cd0b, 0x7cd855d28aac11f6, 0xca11ba0efec11238, 0x7c433e554ce261d8, + 0xe3140366f042b6ba, 0x8a59d68642b3b18c, 0x094fcdd5d7bccac2, 0x9517d80356362c37, + 0x4a20a9949c6c74e8, 0xc25bcf1699d3b326, 0xa8893f1d1ed2f340, 0x9b58986e0e8a886e, + 0x29d78c647587ce41, 0x3b210181df471767, 0xd45e8e807627849d, 0x1ec56bc3f2b653e3, + 0x974ff23068558b00, 0xdb72bdac5d34262c, 0x23225143bb206b57, 0xd0a34cfe027cbb7e}, + {// seed = 3 + 0x39209fb3eb541043, 0xee0cd3754563088f, 0x36c05fc545bf8abe, 0x842cb6381a9d396b, + 0xd5059dcb443ce3bf, 0xe92545a8dfa7097e, 0xb9d47558d8049174, 0xc6389e426f4c2fc0, + 0xd8e0a6e4c0b850d3, 0x7730e54360bd0d0d, 0x6ecb4d4c50d050d5, 0x07a16584d4eb229f, + 0x13305d05f4a92267, 0xb278ddd75db4baec, 0x32381b774138608f, 0x61fe7a7163948057, + 0x460c58a9092efee6, 0x553bf895d9b5ff62, 0x899daf2dabfd0189, 0xf388ab9c1c4b6f70, + 0xd600fe47027ea4cd, 0x16d527ec2b5ef355, 0x5ac1f58ff6908c81, 0xa08d79ff8ee9ffe8, + 0xc1060a80b7a5e117, 0x14b2c23118c60bda, 0x8cc0defbb890df8f, 0xe29540fd94c6d28b, + 0xa604f003f82d5b71, 0xa67583d4eb066d18, 0xd62cbd796322b3fc, 0x070cfe244cdcccf3, + 0x73557c30b3af47e5, 0x2e544e31153a2163, 0x996eef7464d5bead, 0xbc71cb5ab0586cdc, + 0x0bfcb6c1b517ed69, 0x62b4f1fcc82e8ca0, 0x0edbc68f544965c5, 0x40fa39baa24af412, + 0xf39aeb2413dab165, 0x17e6013e7afee738, 0x8109bff1c8d42a9d, 0x3cd99863390989b5, + 0x02021a4cc9c336c8, 0xa06060778cb60aa4, 0xd96591db60bc1e06, 0xd2727175183f4022, + 0xcdc1f1c5bce3e7ce, 0xb393ccc447872a37, 0xdf6efe63257ead3a, 0x20729d0340dbceb6, + 0x9f3d2d26fc0ea0d7, 0xf392e0885189bd79, 0xdf2ee01eb212b8b6, 0x6e103a0c0f97e2c3, + 0x96c604a763bd841b, 0x9fc590c43bba0169, 0xf92dcd5ddc248c40, 0x113a8b54446941dc, + 0x5943eda146b46bb8, 0xbf657901a36a39a7, 0x5a4e0e7ea6568971, 0xb94c635bae9f9117, + 0x2626fb65b3a4ef81, 0xa59bfd5478ce97de, 0x79112ba9cc1a1c63, 0xf41f102f002cf39c, + 0x0a589bcbfb7ff1c8, 0xa1478c53540c4fa1, 0x60d55e72c86dfaca, 0x312e7b6840ea7a39, + 0x8aae72dcccfe1f75, 0xff2f51f55bf0247a, 0x3c2e4b109edb4a90, 0x5c6d73f6525c7637, + 0xe49acb04a199f61c, 0x27860642d966df7f, 0x541ce75fb1e21c30, 0xd9fcd6f90806c7cc, + 0xb87c27bc93a7969b, 0x92f77a1179b8f8dc, 0xb1f29379deb89ed4, 0x7e63ead35808efe7, + 0x13545183d7fa5420, 0x575f593e34cf029d, 0x27f1199fb07344ae, 0xe67f95f7dc741455, + 0x49b478b761ab850b, 0xd7bedf794adfc21e, 0xdc788dcd2dda40ae, 0x14673eb9f4d8ad35, + 0x0cced3c71ecf5eb1, 0xe62d4e6c84471180, 0xdfe1b9e2cb4ada7d, 0x70185a8fce980426, + 0x0ce2db5e8f9553d6, 0x1fedc57bb37b7264, 0xb9310a2e970b3760, 0x989ff8ab9805e87d, + 0x0b912d7eb712d9ee, 0x1fe272830379e67c, 0x16e6a73aff4738fb, 0xeed196d98ba43866, + 0x7088ca12d356cbe2, 0x23539aa43a71eee0, 0xed52f0311fa0f7ad, 0xa12b16233f302eea, + 0xc477786f0870ecb4, 0xd603674717a93920, 0x4abe0ae17fa62a4c, 0xa18f1ad79e4edc8d, + 0xc49fe6db967c6981, 0xcc154d7e3c1271e9, 0xdd075d640013c0c0, 0xc026cd797d10922a, + 0xead7339703f95572, 0x4342f6f11739eb4b, 0x9862f4657d15c197, 0x4f3cb1d4d392f9ff, + 0xe35bffa018b97d03, 0x600c755031939ad3, 0xb8c6557ffea83abf, 0x14c9e7f2f8a122ea, + 0x0a2eb9285ee95a7c, 0x8823fec19840c46f, 0x2c4c445c736ed1d0, 0x83181dff233449f1, + 0x15ed3fca3107bef5, 0x305e9adb688a4c71, 0x7dbef196f68a3e2e, 0x93e47ece3e249187, + 0x8353c5e890ead93c, 0xea8a7ae66abafdf7, 0xf956dbb6becf7f74, 0x9f37c494fbfdb6e4, + 0x11c6cbaa2485dd32, 0x206f336fcca11320, 0x9befe9a59135d8fe, 0x5f3ef8b8db92c7db, + 0xbb305e556ce0ce9a, 0xf26bdafb1305887f, 0xcbf28abe23f08c61, 0x0bc64173b914e00b, + 0x9168da52e983f54a, 0x6ea41d09c3574a3e, 0x78aa44d4a74459ae, 0x2931422878387bf5, + 0x018f64a3a92c2d9c, 0x9be43f6752e66b34, 0xae378890decd1152, 0x07325329a1cb7623, + 0x3b96f4ee3dd9c525, 0x2d6ebcdbe77d61a3, 0x10e32b0e975f510c, 0xffc007b9da959bf9, + 0x38bf66c6559e5d90, 0xbe22bdf0bf8899fe, 0x87807d7a991632a8, 0x149a0d702816766a, + 0x026f723db057e9ab, 0xeeecb83625ec6798, 0xcec2ed5984208148, 0xd985a78e97f03c84, + 0xf96c279e7927b116, 0x99d5027b3204f6e2, 0x13a84878c3d34c55, 0x5cf5ec96229e9676, + 0x0bc36b07e4f8e289, 0xbed33b80a069914d, 0x2fbfbdd1ff4b9396, 0xab352bb6982da90f, + 0x154d219e4fa3f62b, 0x4d087512bb6b9be7, 0xc582e31775ee400e, 0x7dadb002ae8c4a4e, + 0xaae2957375c1aee2, 0x5f36ca643356625b, 0xf87cf8eb76e07fb7, 0x46f432a755e02cc3, + 0x36087e07aba09642, 0xe5642c1e4ebb9939, 0xb9152d22338eefad, 0xf7ba44278a22cf7f, + 0xd3b8013502acd838, 0x7761511da6482659, 0xb0857621638e8e50, 0x552eddb4a8b1d5f5, + 0xc43d9861e812c3ea, 0xd765c2aada47910c, 0x21c935b68f552b19, 0x6256d5641a2b47dc, + 0xab711d8e6c94bc79, 0xa8d0b91a2a01ab81, 0x5e6d66141e8d632a, 0x7638285124d5d602, + 0x794876dbca3e471f, 0x951937d8682670ce, 0x0f99cb1f52ed466a, 0x8c7cd205543b804c, + 0x2fd24d74a9c33783, 0xe5dcb7b7762e5af1, 0x45e6749cca4af77c, 0x540ac7ee61f2259f, + 0x89c505c72802ce86, 0xeab83b9d2d8000d1, 0x9f01d5e76748d005, 0xc740aaef3035b6d0, + 0x49afcd31d582d054, 0xcba5dc4c1efb5ddc, 0xc0a4c07434350ca1, 0xfc8dfaddcc65ee80, + 0x157c9780f6e4b2d9, 0x9762a872e1797617, 0xc4afae2cf3c7e1bd, 0x71cde14591b595d4, + 0x8843c3e0e641f3b9, 0xd92ecd91dce28750, 0x1474e7a1742cb19f, 0xec198e22764fa06b, + 0x39394edb47330c7d, 0x00ba1d925242533d, 0xaed8702536c6fb30, 0x6d3618e531c2967a, + 0x77f7cedcd7cc0411, 0xbc1e2ab82be5b752, 0x07b0cf9223676977, 0x596c693b099edd53, + 0xbb7f570f5b9b2811, 0x96bfdad3c4a6840c, 0x668015e79b60c534, 0x3ad38d72123f1366, + 0x6b994d81d2fcbb09, 0x70885f022c5052d8, 0xc891ee79d9306a7b, 0x2c4df05c0ed02497, + 0x19ebc13816898be2, 0xea7c64df11c392a2, 0xb7663e88dd12e1bd, 0x79f768cb8e154c21, + 0x1fb21b12e945933b, 0xe6a9045643f6906e, 0x544c47acd7e15371, 0xb7709b14f727e3d1, + 0x326ee36a46942971, 0x477f1cf7b0e2d847, 0x88b8f6b82b3b0c24, 0x18bc357b80e3cd5c, + 0x3333de70e4d66e0b, 0x4fd4c5e148583cf6, 0xae1b62f3008c0af3, 0xc49f419b6ab29cf5, + 0x2c29fa65afc3fa28, 0x4b19d93734d03009, 0x7dd6c09e589276ad, 0x1cece97f30de48ad}, + {// seed = 4 + 0x58bdf4338602e4fb, 0x71a5620b02c926d5, 0x3811c960129c2d9f, 0x29c2fb11fccac567, + 0x0d6b1ea7780f1352, 0xcc4d3ddfae3f87b3, 0xfdd30257362a586b, 0xabc948fde69f25f1, + 0x51b3523469d30f7b, 0xe0f0322724405ace, 0xd3729266d896da1e, 0xb10c37e5147915bf, + 0x8b577039f9fa32a3, 0xe677c6a9cbfb44b3, 0x7317a756ebb51a03, 0xf8e988ef37359485, + 0x600fc1ef3f469ff3, 0xbf0b8f8520444e01, 0x3711168b08b63d73, 0x34146f2944a6cb36, + 0x717feb263862cdde, 0x7185f8347db00412, 0x900798d82127e693, 0x84089e976a473268, + 0x10f8308c0d293719, 0xf62a618d4e5719b8, 0x8bdbd257a1a9516f, 0xf49f666fd7a75110, + 0xbaf45e2db7864339, 0xe4efa1ea0c627697, 0x3e71d4c82a09fe10, 0x54a2a51cf12127bb, + 0xa0592c9f54ba14cd, 0x27dd627a101c7a42, 0x3d2ceb44b3d20d72, 0x7ee1f94a68ca8f5d, + 0x7e8cb8651b006c36, 0xbd9fa7ca3a475259, 0x856de173586a7b34, 0xcedb291b594cb1b5, + 0xa3d6e462fd21cddc, 0x74561d10af9118e4, 0x13a3d389fc2d4b36, 0xeea8594a4a054856, + 0xf56d7474d9ba4b13, 0x25ddce2f6490b2fd, 0x920653ff3a8d830b, 0xcd8c0c9cdac740d1, + 0x2c348a738db9c4a0, 0x2967ccbe8ea44c22, 0x47963f69adb049f8, 0xf9d01eb5b4cf7eb6, + 0x7a5c26eb63a86bd2, 0x62ad8b7a71fa0566, 0xb373213179f250ae, 0x589d4e9a88245a4d, + 0x433dafebe2d558a8, 0x521fbef2c8fe4399, 0x62a31f9ff9ccd46b, 0x51602203eba7c1a6, + 0x9afc8c451b06c99f, 0xb529085bdbaffcea, 0xac251825cc75892b, 0x94976a5bce23d58e, + 0xdd17925b6c71b515, 0x568fd07a57bce92e, 0xefac31200d8bd340, 0x716c3e466b540ef9, + 0x3d2c9e380063c69b, 0x14168f9a3662dd83, 0xd298c7504dbc412f, 0x74490a94f016719f, + 0x0e0da431e1ab80c8, 0xe321f63dc6b169ae, 0xf08671544febc95a, 0x39324450cc394b3b, + 0xea6e3d35f1aa3a70, 0x8ef8a886508ce486, 0xdc1a631ef0a17f06, 0xfda2b3fbcd79e87b, + 0xd75bcae936403b10, 0xf88b5bd9f035f875, 0xc43efec2e3792dd4, 0xe9fac21a9d47cd94, + 0xc2876f0c4b7d47c3, 0xaba156cf49f368b4, 0x5ccda2170fa58bf9, 0xadc92c879ed18df7, + 0x110c1b227354e6c8, 0x298ee7a603249200, 0xde92142ede0e8ee7, 0x88e4a4610644ba9e, + 0xbb62d277e7641d3a, 0xb9be1985b7bf8073, 0x29024e5426cdb0d1, 0xf6aefd01f3092ab8, + 0x2a07087b313133aa, 0x6d71f445d6dfc839, 0x1e2412ff12e5526b, 0xed5cdeba6617b9e1, + 0x20b1d0d5e5f8760e, 0x12ff15705c368260, 0x7bf4338b7c387203, 0x34ff25f00cd06185, + 0x1148c706c518cf28, 0x5c04f0623388f025, 0xcb9d649275d87d79, 0x9b5f0c24fabc42ec, + 0x1a7b5e7964e33858, 0x2a81bbd8efdc6793, 0x8d05431ffe42752e, 0x83915cd511002677, + 0x580ed4d791837b31, 0x5982e041d19ff306, 0xcad0d08fa5d864ca, 0x867bee6efe1afa63, + 0x26467b0320f23009, 0xd842414dfda4ec36, 0x047fcdcbc0a76725, 0xbddb340a3768aeca, + 0xef4ce6fa6e99ab45, 0x88c5b66c7762bf9b, 0x5679f1c51ffb225d, 0xdab79048317d77ee, + 0xf14e9b8a8ba03803, 0xe77f07f7731184c1, 0x4c2aab9a108c1ef5, 0xa137795718e6ad97, + 0x8d6c7cc73350b88b, 0x5c34e2ae74131a49, 0xd4828f579570a056, 0xb7862594da5336fc, + 0x6fd590a4a2bed7a5, 0x138d327de35e0ec1, 0xe8290eb33d585b0b, 0xcee01d52cdf88833, + 0x165c7c76484f160e, 0x7232653da72fc7f6, 0x66600f13445ca481, 0x6bbdf0a01f7b127d, + 0xd7b71d6a1992c73b, 0xcf259d37ae3fda4a, 0xf570c70d05895acf, 0x1e01e6a3e8f60155, + 0x2dacbb83c2bd3671, 0x9c291f5a5bca81af, 0xd976826c68b4ee90, 0x95112eec1f6310a2, + 0x11ebc7f623bc4c9a, 0x18471781b1122b30, 0x48f7c65414b00187, 0x6834b03efa2f5c30, + 0x0875ef5c2c56b164, 0x45248d4f2a60ba71, 0x5a7d466e7f7ba830, 0x2bebe6a5e42c4a1d, + 0xd871d8483db51d10, 0x6ee37decd2fd392f, 0x7d724392010cede3, 0x8e96ef11e1c9bcc8, + 0x804a61d86b89d178, 0xbb1b83ce956055ec, 0xcb44e107410ff64f, 0xc426bb09ee0ba955, + 0x057c08f42c3dd7f1, 0x40ea1ec148602bdf, 0xc24688deeb65d7f1, 0xd8bcc53c768ba4e4, + 0x16e0e3af65c1106c, 0xfc12f7e7d647218b, 0x70d6e1d3ee93cef4, 0x01d2a505c4541ef9, + 0x1ef79e16e764d5c3, 0x0363d14d13870b98, 0xb56ef64345d06b11, 0xe653d557ebb7c346, + 0x8304a8597c2b2706, 0x1536e1322ce7e7bb, 0x525aec08a65af822, 0x91f66d6e98d28e43, + 0xe65af12c0b5c0274, 0xdf6ae56b7d5ea4c2, 0x5cef621cedf3c81c, 0x41e8b1ffd4889944, + 0xb5c0f452c213c3e5, 0x77af86f3e67e499b, 0xe20e76ea5b010704, 0xbdc205ab0c889ec0, + 0xc76d93eb0469cd83, 0x17ac27f65cab0034, 0xd49ec4531fd62133, 0x07a873ea2f1b9984, + 0xbff270dfef0032ee, 0x1764dbe91592f255, 0xe40363126f79e859, 0xa06cad3ab46971f6, + 0x0be596e90dedd875, 0x3387cce5c1658461, 0x44246acf88a9585e, 0xe0ad82b92d5ecb2c, + 0x2177491c9a1600a6, 0x16e7c4aac0f02422, 0x75792eeeec15c4e1, 0x2309cd359d08ee30, + 0x7cd9831dd1b83b0a, 0x374914a7c4ee8cf0, 0x0dd17765c9ac2e54, 0xb7847470ba9a7688, + 0xfba4f4bbe2991173, 0x422b203fc3de040e, 0x63bfcaf2ecf2ab0e, 0x0c5559f3a192946e, + 0xfdf80675c1847695, 0xf5f570accab842c9, 0x65cc5a448767afea, 0x1efeb0a7ee234f2f, + 0x9b05f03d81e7b5d2, 0xe7c31317a8626cf4, 0x620f2a53081d0398, 0x1b6de96cdd9943ae, + 0x8c226a436777d303, 0xa08fbbd50fafb10d, 0x6a64c5ec20104883, 0x9c9c653502c0f671, + 0x678a02b2174f52a0, 0x68e008ba16bbad4b, 0xa317c16d2efb860f, 0xeab2075d17ed714c, + 0x565eeeddf0c4ea15, 0x8ec8e94d242a6c19, 0x139e8e27d9000fae, 0xc977a7ff1b33d2f5, + 0x1d0accca84420346, 0xc9e82602cd436e03, 0x6a2231da53d2ccd3, 0xb44b12d917826e2a, + 0x4f4567c6a74cf0b9, 0xd8e115a42fc6da8f, 0xb6bbe79d95742a74, 0x5686c647f1707dab, + 0xa70d58eb6c008fc5, 0xaaedc2dbe4418026, 0x6661e2267bdcfd3d, 0x4882a6eda7706f9e, + 0xf6c2d2c912dafdd0, 0x2f2298c142fd61f9, 0x31d75afeb17143a8, 0x1f9b96580a2a982f, + 0xa6cd3e5604a8ad49, 0x0dae2a80aad17419, 0xdb9a9d12868124ac, 0x66b6109f80877fac, + 0x9a81d9c703a94029, 0xbd3b381b1e03c647, 0xe88bc07b70f31083, 0x4e17878356a55822}, + {// seed = 5 + 0xb3c58c2483ad5ead, 0x6570847428cdcf6c, 0x2b38adbf813ac866, 0x8cb9945d37eb9ad3, + 0xf5b409ec3d1aed1c, 0xa35f4bffc9bb5a93, 0x5db89cde3c9e9340, 0xff1225231b2afb2b, + 0x157b0b212b9cc47d, 0xf03faf97a2b2e04d, 0x86fdab8544a20f87, 0xfcb8732744ae5c1c, + 0xd91744c0787986d5, 0x5f8db2a76d65ad05, 0xcff605cbed17a90d, 0xf80284980a3164e7, + 0x59cc24e713fccc7d, 0x268982cada117ce4, 0xcd020e63896e730e, 0xe760dc46e9fe9885, + 0x6aaece8ab49c6b5d, 0x7451194d597aae3e, 0x35d4385900332457, 0xa40fb563a096583d, + 0xa797b612f7f11b76, 0x2fed6eb68e6a2b9b, 0x2f06ee64aeffd943, 0x9dd0e49d9ca45330, + 0x97d48f08bd7f1d8f, 0x1cfa7fe3ebe4d8ee, 0x2a2ba076bd397d42, 0x68c4344f7472f333, + 0xce21ec31987d74b5, 0xb73dabdc91d84088, 0x801aadee592222fe, 0xaf41345398ebc3f5, + 0x8a8f653d7f15ee46, 0xce2d065ff2ba2965, 0x4e05da515da2adb7, 0xa6dbdb8aa25f0fd4, + 0xca9f9666bbd2d5a9, 0x6b917ce50bd46408, 0x1550cc564ba6c84d, 0xb3063ae043506504, + 0x84e5f96bb796653d, 0xe2364798096cf6e3, 0x3b0dfedf6d3a53d0, 0xb7e4c7c77bde8d93, + 0xe99545bac9ab418a, 0xa0e31f96889507bb, 0x883c74f80c346885, 0xf674ae0b039fd341, + 0x8bb6ce2d5e8d1c75, 0x0c48737966a7ed7c, 0x04fcdf897b34c61c, 0xe96ac181bacbd4d6, + 0x5a9c55a6106a9c01, 0x2520f020de4f45d3, 0x935730955e94d208, 0xce5ad4d7f3f67d3b, + 0xa4b6d107fe2d81ca, 0x4f0033f50ae7944e, 0x32c5d28dd8a645a7, 0x57ce018223ef1039, + 0x2cbab15a661ab68e, 0x6de08798c0b5bec2, 0xee197fb2c5c007c6, 0x31b630ac63e7bda2, + 0xab98785aefe9efe3, 0xa36006158a606bf7, 0x7b20376b9f4af635, 0xa40762fdc3c08680, + 0x943b5faffd0ebee2, 0x7f39f41d0b81f06e, 0x7c4b399b116a90f8, 0x24e1662ac92bc9f3, + 0xcf586fc4e8e6c7db, 0xe46e0d047eeb12d7, 0xe8021076e4ea9958, 0x11fc13492e3ca22a, + 0xd61eae01410397e3, 0x7e8c4a58036a8e9f, 0x068a6de267970745, 0x64faab129bef1a41, + 0xb4a6f720943dad01, 0x631491058d73a9d5, 0xdad4fe95eab3ec02, 0x0a8b141c5c3a44f6, + 0x9fc69d4c2b335b98, 0x94d5f84a07d6e4cd, 0x1b73965de143c608, 0x443932c2dda54bcc, + 0x7397818fb0b04cd2, 0xef4ab03a1202b277, 0xf3d2ee459c0c2b92, 0x182d4daf8b058a87, + 0x90e63035d7b51368, 0xba4cd8b9a95d45fd, 0x12a7392c76731090, 0x890d264ec5d082d2, + 0xeeaf5c363da4994e, 0xd6aad756902123fb, 0xb531ebebdb28f191, 0xe71ce659fc59babd, + 0x37c1b94f63f2dcb5, 0xe4e3abeb311f9b96, 0x4a31b72ccb8695d3, 0x52cae1f0629fdce4, + 0xe5b0475e2ed71369, 0x2724e8c3506414fb, 0xbab0367920672deb, 0x0161a781c305449f, + 0x37b70f40f5bb60be, 0xddd1094c50251a01, 0x3b28283afd17224e, 0x06dec0cfe889fc6b, + 0x47608ea95bb4902d, 0xad883ebc12c00e82, 0x9e8d7ae0f7a8df29, 0xa79443e9f7c013a1, + 0xcfa26f68b7c68b71, 0x33ae6cc19bda1f23, 0xd9741e22b407887f, 0xf2bff78066d46b1c, + 0x794123191c9d32d4, 0x56cb6b903764ec76, 0x98775d0ef91e1a5a, 0xae7b713bc15c1db9, + 0x3b4c1a7870ed7a0d, 0x46666965f305cc34, 0x0ea0c3b2e9c6b3cd, 0x4dc387039a143bff, + 0x5f38bb9229ef9477, 0xea5d39ba72af7850, 0x69a5ed0174ce2b6d, 0x06969a36bfe7594d, + 0x0adee8e4065ccaa3, 0x908a581d57113718, 0x64822d6c5a8190ed, 0x8c5068b56ace4e4c, + 0x88ba3b4fb4e30bef, 0xa6ec0b8bb5896cfe, 0x4e23fcc6b47996fd, 0xe18e75b0dd549c7a, + 0xcd90f17e106cf939, 0x1666fdfb2ef7c52f, 0x4fae325f206dd88c, 0xe7bc1160e25b062d, + 0x3cc999cb246db950, 0xc5930a7326cd5c37, 0xb008a48a211367bd, 0xc5559da145a88fd4, + 0x1e3ad46655fac69c, 0x7834266b4841bfd7, 0xa764450fbffc58cc, 0x54d8cf93a939c667, + 0x93c51f11b21b2d9d, 0x0964112082ed65cc, 0x4c2df21213e7fb03, 0xf0405bc877468615, + 0x17b4fc835d116ab4, 0xa6b112ae5f3cb4ef, 0x23cfc8a7fd38a46e, 0x8e0a360dc2774808, + 0x24ca9c8092105ad5, 0xafd3f75524f2e0d5, 0x4f39ed7dbaddc24c, 0xe5e362c7679a7875, + 0x00914a916b07b389, 0xdfe1119b7d5ab5da, 0xabd6ed9940e46161, 0x630ed2044171e22c, + 0xdecc244157dd1601, 0x777e6d5b4b4868d5, 0x9b3530bee67017d8, 0xd2faf08b291fdcb9, + 0x006e99455d6523de, 0xd559b5817f6955b5, 0xefcc1063b0088c61, 0xed73145ae0f00ae7, + 0xab2af402cf5b7421, 0x897767f537644926, 0x26c9c0473ca83695, 0x192e34e1881b2962, + 0xf7cf666ec3b3d020, 0x27f9b79c7404afb7, 0xe533e8bed3010767, 0xe5817838e11d05d3, + 0x65659c531bd36517, 0xd427c5e0a23836fd, 0xf3eab7ea58fa3528, 0x07683adae1289f35, + 0x201d6af7e896dd32, 0xd5da938b9a21ad88, 0x843fb73ad67bc316, 0x1782ec7d5feef21b, + 0x943f66f6ec772877, 0x7e9112e7b26da097, 0xeac8161f8663c2c7, 0xe8600db480a9ebf4, + 0x07807fc90f6eaf5f, 0xe0e4c9deb41abf83, 0xbdf533db271f9c15, 0xb398411b0497afe2, + 0xdebb45ef25448940, 0xe7a5decefcd376c4, 0xaf1ef3c728c83735, 0xb8b83a99355cb15a, + 0x6444a0344f1611e4, 0xe8bb7f5cf3c60179, 0x77ab5c5177e75ff7, 0xc38fd6fa849d585d, + 0x390d57d53029060a, 0xa66327eb7b8b593c, 0x6350a14f6fcd5ac9, 0x2c08125bcd7008b4, + 0x2d00c299a6a6bf8e, 0x6b0039c1f68d1445, 0x0035150c5d06f143, 0xa34d01628cc927e1, + 0xdf5b3164d7b2ede1, 0x8167db1d0583d72e, 0x4e13b341cd2ae8bc, 0xa693d9b1f416e306, + 0xc15ed7ca0bc67609, 0xdc344313c1c4f0af, 0x88b6887ccf772bb4, 0x6326d8f93ca0b20e, + 0x6964fad667dc2f11, 0xe9783dd38fc6d515, 0x359ed258fa022718, 0x27ac934d1f7fd60a, + 0xd68130437294dbcc, 0xaf5f869921f8f416, 0x2b8f149b4ab4bf9f, 0xc41caca607e421cb, + 0x7746976904238ef9, 0x604cb5529b1532f0, 0x1c94cd17c4c4e4ab, 0xe833274b734d6bbe, + 0xe9f1d3ef674539ce, 0x64f56ed68d193c6a, 0xe34192343d8ecfc1, 0xcb162f6c3aa71fe8, + 0x99eaf25f4c0f8fa4, 0x92f11e7361cb8d02, 0xb89170cddff37197, 0x4f86e68a51e071e3, + 0x31abf6afd911a75b, 0x6d20cf259c269333, 0x4150b9f88fcb6513, 0x705063989ebf7451, + 0x559231d927c84410, 0x1ca8ec4b098bc687, 0xebed22405c9180e0, 0xaa815b37d052af59}, + {// seed = 6 + 0x946ac62246e04460, 0x9cebee264fcbc1ae, 0x8af54943a415652b, 0x2b327ed3b17b8682, + 0x983fde47b3c3847e, 0x10a3013f99a2ad33, 0x6e230bb92d2721ef, 0x1cf8b8369e5c5c50, + 0x7f64017f2b7b3738, 0xd393248a62417fa1, 0x9ff01c0b20a372c5, 0xb0e44abce7e7c220, + 0xcebb9f88d48a815f, 0xdb7df6bd09033886, 0x7844fc82b6fa9091, 0x72d095449863b8ec, + 0xc13e678c89da2c7e, 0x6caf4d5ad231d12f, 0x2e0ab7b5fcf35c49, 0xf410720cb932a70f, + 0xd66ea581f16fce06, 0x175c9f002f57dc98, 0xccbcfd0d32988775, 0xfde4c407d3b0a232, + 0x5db2931ae7e97223, 0x6e07e2173085809f, 0x6e1d1ec0f9cad73c, 0xb2fc251a7f802619, + 0xbc1fc17f04f342de, 0x8de8f21ec658e078, 0x72c0f40cbee53fd6, 0x0678244411fc17a1, + 0x1d5837ca166b9bbd, 0xc8cada003c554345, 0x6a2fe2bfb2e58652, 0xfca9d797a6f7988b, + 0x6699e24ac737948b, 0x69623ffcb05789ba, 0x946429c529d95b75, 0x0d14df0b2a13970f, + 0x593d8592c440dfec, 0x2ee176f3d7e74b94, 0xae003f1da3be9e26, 0x0c7b02c4c0f6764a, + 0x3117e2fa1f632462, 0xf0f23265b6f1eaeb, 0x3111255d9b10c137, 0xc82745e509a00397, + 0xbd1d04037005fea7, 0xe104ab0dd22a9036, 0x51b27ce50851ac7a, 0xb2cb9fb21b471b15, + 0x29d298074c5a3e26, 0x6ebdf2058b737418, 0xc4a974041431b96f, 0x1ec5a30ccb6bdaac, + 0xe818beede9bf4425, 0x4b69b1bce67a5555, 0xf5c35f1eb0d62698, 0xf4509bbd8e99867c, + 0xb17206debd52e1bc, 0x35785668c770b3be, 0xe9343987ff5863bc, 0x2ee768499ac73114, + 0x5132bb3426eeaaf4, 0x471bce2c6833c5ff, 0xbb9a2d5428e6f6f9, 0xd5678943c595792d, + 0xab2a65e7f81e479c, 0xa82407bb23990b31, 0xdae321383984923c, 0x01823bb22648e6f1, + 0xda6e8df4214a8b04, 0x0e172bb88e03d94f, 0x552da6c22e362777, 0x7ce67329fb0e90cb, + 0x7b2d7f287ede7ebf, 0xd44f8222500651bd, 0x4acca1ef58fbb8ab, 0x428ecf058df9656b, + 0xd7e1ec6a8987c185, 0x365be6a54b253246, 0x168849be1e271ee8, 0x6a00f3c4151a8db2, + 0x37602727ca94b33d, 0xf6b50f18504fa9ce, 0x1c10817f6bc872de, 0x4bfe1fe42b0f3638, + 0x135fad4b8ef6143b, 0x1b25ad2bafc25f58, 0x41e37f85cf321f92, 0xfc73f75d9d5b9bea, + 0x9eb3694d1e9cb7e1, 0x601d51f08fa83b90, 0x234a2a9b88366f41, 0x63fe903e16f2c3bf, + 0x1cdbd34fa751c0b0, 0x0ce4fc6747c0558c, 0x51ed72afb8bb49aa, 0x20313ba13ca12c96, + 0x271fa38f9ebd54c1, 0x3696a5ac03a8edde, 0x05602be7df625702, 0x11f1ac73790f7a9f, + 0xa2836c099f0810bd, 0xe5ac2e47caa532fa, 0xd9c000a66d39f681, 0xd93d900e6f3d9d5f, + 0x792c81c65b7900f2, 0x5c5dce790ee20da1, 0x74ff1950edec1aee, 0x71fc85fa1e277d8f, + 0x0e77df17d6546cbc, 0x07debad44816c3b4, 0xbafa721581e92a70, 0x8ab6fbe2ed27bba8, + 0xe83243a20dea304a, 0xaa85a63a84c00a07, 0xde0e79917fc4153a, 0x21bb445e83537896, + 0xeedcac49fc0b433a, 0xffb2926a810ae57a, 0xf724be1f41d28702, 0x79cb95746039bb3b, + 0x5a54fe3742a00900, 0xda4768d64922c04f, 0x420396a84a339dae, 0xa171e26ee5e8724e, + 0x4c8da7c5d289c20a, 0x9ebd79a1a8e94742, 0x39235232b97e9782, 0xb75df0be9bba7d80, + 0x0c1d204dd87d48fc, 0x8f81f3e7177266e8, 0xe4a460b39e78d72b, 0x50b98fa151e65351, + 0xb7cb585c3ee1eddc, 0x11cdad9a76ee1dc4, 0xa38054a78595dc1c, 0x92f09e2ec4978edc, + 0xa8f0061b5efdabaa, 0x04bcc4abc224d230, 0xc58606738e692d46, 0xdd2b27b565952433, + 0x19e6ed1b740beec0, 0xceadd49b2ef9891f, 0x328178c28fe95cad, 0xe5ad4c43afe02848, + 0x03c0cb538cd967c0, 0xec4352526d19a630, 0x4c7e99389d39b031, 0xf65dd05362c2deb6, + 0xd1e70daf6879d28d, 0xbe9f57db6309b265, 0xa4b66f370b872bb7, 0xe26896fbc6ee1fd5, + 0xac705e661bfcf7c5, 0xab4d0d07d7f09940, 0x976417c06aeb6267, 0x8161c684a6bd468c, + 0xf77b6b9976dc4601, 0xc6489b779a39c12c, 0xb2aa58d5681cea1a, 0x043b1b40f8c3e04c, + 0x681fcbfadc845430, 0xab8896c921ba8def, 0x57aaf172606f37b2, 0xc3735048cd5eb8d7, + 0xa7078b96955631bd, 0xdd6b3543aa187f33, 0xc7103ea4a2a697fd, 0x8d7b95f6ff1f7407, + 0xe44f419e84709530, 0xf340caa9132cbb0a, 0x2ba407283143c66c, 0xe1be240ca636c844, + 0x90d32f2877ac08bc, 0x5d26e6294b2c8673, 0x4a6b2f5b27c87a44, 0x961fb9043f76d34f, + 0x0afee02d8d3c55d2, 0x6228e3f48c42e5dc, 0xc338e69ee6593675, 0x853f74b16efb7bdd, + 0xd062f40bdd22e687, 0x647164b9ab4c4190, 0xf94689f67d598369, 0x8e4b29d87a5012d7, + 0xaf02b8b925656fbd, 0x7a722a767179a630, 0xb5c8afe937a75ace, 0xfdb8e8d02d279372, + 0x887ef700cb25fae1, 0xcfe9bd912f72cabe, 0xb1d4dedc24f978de, 0x517522d38319cc2a, + 0x7dd87b2b36aab798, 0x579c4ff3046b5a04, 0xf5c5975c5028b7a7, 0x7094579d1000ec84, + 0xbc8d5b1ea70a5291, 0x161b2d783be8855c, 0xd26d0b0d6d18279f, 0x0be1945f02a78bd5, + 0xb822a5a9e045415b, 0x2fe9d68b1ccc3562, 0xb2e375960033d14f, 0x26aca04e49b4ff22, + 0x732a81c862112aea, 0x8bd901ed6e4260b8, 0xe839532c561ad5b0, 0x8fb6e4d517a79b12, + 0x0dd37f8c0be9b429, 0xc8ad87ad12f1b1b0, 0xc51f3aa62b90318b, 0x031a7e8b86c1cefc, + 0xa95547af2b70fc76, 0x9cb3615c5a98801e, 0xa387e3c3341d7032, 0xa087ea52a1debaef, + 0x16325ec9a2e6e835, 0x587944a484c585eb, 0xc8879033bde22ecc, 0xa39dbfce709c464a, + 0x7acc010f99208774, 0x98dd2973a096c5ad, 0x26458b51139f198c, 0x2f5d19575e8c4f02, + 0x726643f0d38af352, 0x44d879b6d73e6e94, 0xa68a03885c980abe, 0x06048acd161c40c0, + 0xa4dab8f89d405d28, 0x7120c880cb04be18, 0xa062ace22a1cf0cf, 0x3901a9daf29704f4, + 0xff08f3ed989db30a, 0x6d22b13e874c67e9, 0x80c6f35518d73f4d, 0xc23c2a521aac6f29, + 0x2e708fd83aaa42e0, 0x7fc3780f55f1b0fd, 0xabb3075c98cf87f2, 0xb4df3f40f7c61143, + 0x2a04418098a76d75, 0x0d9eeee9509b2d37, 0x6be8ae51f4b59cdc, 0xe746cc7c00e4a2ab, + 0x785bc6df9cac597c, 0x33cb6620ce8adc48, 0xc1ba30739bffcef7, 0x6d95771f18e503f7, + 0xf7be3ae2e62652ff, 0xc8d82ffd2a73c62b, 0x8725a3ba5b110973, 0x67ed6b9c724757ec}, + {// seed = 7 + 0xc0272d42c19ff3ae, 0x4694228b43ea043b, 0x5709a6ef8a462841, 0xc9210a1e538805c9, + 0x279b171196113ec2, 0x859b769fc2d9e815, 0x0d5d3125a2bf14d3, 0x22bca1cfefa878ba, + 0x481b6bf58037bd83, 0x4933ba8647728d22, 0xf08c7b6b56f6e1b6, 0x374e8af5a15407c7, + 0xa95c4dc3d2487a5c, 0x9b832808ff11e751, 0xf2048507e9da01d5, 0xa9c576189f544a4a, + 0xf6c2a45b2e9d2b41, 0x9b9874c9f10ecc2f, 0x37d9b5f51f8c149e, 0x93aead54c9de9467, + 0x59cf0b4af262da23, 0xe7e9929af18194b2, 0x9df2644e33eb0178, 0xde4122d6f0671938, + 0xf005786c07f4800b, 0xb1fc9d254b5d1039, 0x0bf1088631f6dd7b, 0x665623f0a4b8f0c7, + 0x60f0113a9187db7c, 0xfd7cceda4f0d23a6, 0x26c01e9d89955940, 0x33afa1dfc0f5a6a0, + 0xeb77daf215e9283c, 0xc7575214bf85edb4, 0xeb0d804bf297e616, 0x84bff4ffd564f747, + 0xc4ac33189246f620, 0x43ef61213ecc1005, 0xcbbb0dea6cd96acd, 0x8ed27abfa8cfcb05, + 0x543b61529cb996b6, 0xa5f987ca41ea5e59, 0x3c50e0ac5254cb7a, 0x4192b0446c06d1e6, + 0x3e86592e21b45388, 0xdb766f06fcc6e51e, 0x0448ee36efe632db, 0x663c9db689253e35, + 0x72e0bd4985331dd4, 0xff501b5bf7d94e74, 0xe911ce758e2113a8, 0xec3a8d03a75a6ba4, + 0xaf6b4b72f56edc83, 0xf284857936c0a391, 0x5ba6feff407d46f4, 0x9d689c26de9d6702, + 0x28c04a9083726b5d, 0x2ccf4a627a029730, 0x7b4719500d4f0c71, 0x76470a9a7da250a8, + 0xcc48409404a1c890, 0xccefbdc7ec9a8055, 0xe0db91bff3cc42d3, 0x0532436426141254, + 0xf2ee9325e6f0ff0b, 0x149c20a5fbb28d9d, 0xe71624cd8d2d14d4, 0x8f01d4dc8cc2dd77, + 0x29cf409b333015b7, 0xba8bebd211884dd1, 0xc3396635e8c8db1d, 0x8ed0f6208d0528b8, + 0x0d90b43fdd0ee334, 0xd73c9a3333a044c7, 0xa2595cd208dbdc38, 0xae93cb264f940c09, + 0x8e0538d8afb07a97, 0x19115ec881385ba2, 0xa886f9e6a8039c6a, 0xcd5d62147ce3ecac, + 0xaecdf9e0bb4969f7, 0x2ddd631c53dcad10, 0x73ad1c97b3412054, 0xb08915fa2722efc6, + 0x97966047e5067eb0, 0x337f1675ed91445c, 0xb3a833d150b96a0d, 0x5940a98fe35e5e2e, + 0xfd03cc354ed0d8ff, 0x4e65b98291a8644a, 0x14a259f2852a60b2, 0x7648e3478c1e8e5f, + 0xbc0fbef6d9a919b4, 0xbec4302081346cf1, 0x57d2ce7aa1c7c511, 0x234c209d8f4e1ac3, + 0x87cf80cc933ce443, 0x7c262c616931e94e, 0xc5e33b049cf9eddf, 0x1a80790ed03ae51b, + 0xf2e8b9494f7220cf, 0x124cb59c14fff3ff, 0xa8a06cbfdb86ce18, 0x9068ef1f80b37653, + 0x0c55417b8d90338f, 0xcd579a523f6bcd30, 0xa31bfe2476a8d2a9, 0x1f8d142208094223, + 0x332dc40a5203cfad, 0xf8792fe5b2d33b4c, 0x443bd9668bf9461e, 0xc9019db0ace1409e, + 0x781bea919a113e8b, 0xb0f11d866abfbeec, 0xcfe139a60db0c26a, 0x869ab8721e6aa39e, + 0xdb48a4977717837a, 0x588a5ff151065b18, 0xe4a251ea0028864d, 0x7f0e43ba408a77c3, + 0x65f66dd50a536135, 0x6f49e934d9331c3e, 0xb8d742e0f0fa6b09, 0xe4e9b272deca2348, + 0xaee132ff902f773c, 0x43f658f7c2a0c90a, 0x28cb4dbc76cc53ea, 0x7d92253aa99ac39b, + 0x4fea3d832370baab, 0xb29e36936e51d78e, 0xea10778712321064, 0xff4f21f8ef274be2, + 0x84eff18ddfa0933f, 0xd0ec6a9f86c758a0, 0xaf82e5973c431ae0, 0x352023c00c045425, + 0xad34d7bc4a2f8961, 0xbdb4a02a24d4dee0, 0x354a4846d97447cf, 0x331a8b944d5bc19f, + 0x5ce04f8e17909035, 0x6497581bad8f4aab, 0x07c503bba647111e, 0x85f412ba78e1f7ff, + 0x7f3b920fd20f4cff, 0x424e1a9a4ce34e2f, 0x3035e2d62e1b9f0a, 0xef63114bff7b729a, + 0xe86a05889ab6bb60, 0xee0830cf095585a1, 0x4a54f7fa47d9c94b, 0x17daeece9fcb556a, + 0xc506d3f391834c6f, 0xb3f24be362e1af64, 0xc435e4e23608efdd, 0xeeba9caaa4cc1768, + 0x5a71f306daddc22d, 0x18e5205f41eba1a0, 0x7b29b4d1f6610925, 0x065cb65a0258d9a9, + 0x3e5ac8faa9fd1f95, 0x3b362362c1ea0470, 0xce0e4f6434db7a2e, 0xf327341098de52f2, + 0xcfca3b9e2a1992c3, 0x7483bf9401233e41, 0xbafbac531c6f9281, 0x4b52dd71b2c106f8, + 0xdf73b66e50b5a1f7, 0x237aec0202a20283, 0x23dd5be23dffdf2b, 0xea9730731ee122ef, + 0x5cb3f846014fbcd3, 0xc3b21c8ffdce9201, 0x06a99a02f91a8760, 0x721a81fa8fd7b7a3, + 0x6aafcdddc53cbcd8, 0xd03b464005a93bcc, 0x8212edc1b1669dcb, 0x71f4c31364c31bc7, + 0xfeeec0eba8772307, 0x1948d00a13d88cf1, 0x19064fd6d943ada8, 0x4ec8d31722697bfd, + 0x596d9a953a516609, 0xc4cb4bff53507da2, 0x1d59f3c5be36e4ca, 0xe5b4fc5bf6044c9b, + 0x1bb74e052232f735, 0x04e8a0db611ddd5d, 0x8d04eaa009b421bf, 0xa7878ae0ac0e6d58, + 0x28c1030217cab2b3, 0x827943767e56a883, 0x28fce5fa02d22809, 0xb30c322fffc8c58e, + 0x1ca5a6a9f8066c5b, 0xb24db5f1462b2513, 0x02f653b89b7e5f6c, 0xe31f8fb5d5f78eee, + 0x266acc514ed93501, 0x936879d1c6fddcc4, 0xcd51be3636af1952, 0x3fdbb6fc332c78c8, + 0x9eb656379fa73094, 0x056146cc92fa0f96, 0xed6c4f1836c027c3, 0x021e0bb5d2113f2a, + 0x8983e42ec1c626b3, 0x73ea9bc6513ad9c9, 0x0c904903b24f4247, 0xacbac1e6243e2525, + 0x0b1069a0c230fb06, 0x77d709fca3fc1ce5, 0x87ad0f65020947e6, 0x555302641c53f4e6, + 0x65ea87871fa9aaee, 0x58aaf4ecc1067bb4, 0x1a66c48cc4c65b3f, 0xca96aca48b2ea969, + 0xa68eb70bad14de2b, 0x5ccdb3d7e00a6f6e, 0xe178fbfec73fe72f, 0x2b63d6a16b83e890, + 0x32fdb7a5330fbae0, 0x2ab5803c8d1bf32c, 0xda838388c1527c94, 0x16a50bdc4de24acb, + 0xe561301f134c074a, 0xd7ae63d2816b4db1, 0x036aabd4df0dd741, 0xc5e0db8783435b9d, + 0x9c4386cf0a07f3b2, 0x6a72ac1aa56a13a1, 0x299bbdb04bb20a23, 0x138c1018fda16b81, + 0x0e354f0b3bda49df, 0x9f4c295b23127437, 0xd133ceb2bd561341, 0xd8b4bfd5a526ac29, + 0xcdd0a70ddc1c7bbd, 0x81dce595bf572225, 0x1c6f925c05f6efd7, 0x8ae5097553856ea0, + 0x3aabeaeef248f60d, 0xd9005809d19a69e2, 0x2a3a1a314311cc27, 0x89bb2dc76b2b624a, + 0x50a2a95d0412e289, 0x9def8df564e68581, 0xf49010a9b2e2ea5c, 0x8602ae175d9ff3f0, + 0xbf037e245369a618, 0x8038164365f6e2b5, 0xe2e1f6163b4e8d08, 0x8df9314914f0857e}}; + +} // namespace parquet::internal diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 824b1db9d4ef3..af3fbe3ce8289 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -754,7 +754,8 @@ class ColumnWriterImpl { fallback_(false), definition_levels_sink_(allocator_), repetition_levels_sink_(allocator_), - content_defined_chunker_(level_info_, properties->cdc_size_range(), + content_defined_chunker_(level_info_, properties->cdc_size_range().first, + properties->cdc_size_range().second, properties->cdc_norm_factor()) { definition_levels_rle_ = std::static_pointer_cast(AllocateBuffer(allocator_, 0)); From 6e34d031e676f79acec45637b5e43ba8bde7560a Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Thu, 6 Mar 2025 19:54:03 +0100 Subject: [PATCH 33/48] fix generated header name --- cpp/src/parquet/chunker_internal_codegen.py | 2 +- cpp/src/parquet/column_chunker_generated.h | 545 -------------------- 2 files changed, 1 insertion(+), 546 deletions(-) delete mode 100644 cpp/src/parquet/column_chunker_generated.h diff --git a/cpp/src/parquet/chunker_internal_codegen.py b/cpp/src/parquet/chunker_internal_codegen.py index 063eda4b92f84..29cd856f3c47f 100644 --- a/cpp/src/parquet/chunker_internal_codegen.py +++ b/cpp/src/parquet/chunker_internal_codegen.py @@ -75,7 +75,7 @@ def generate_hashtable(seed: int, length=256): return out.getvalue() -def generate_header(ntables=8, relative_path="column_chunker_generated.h"): +def generate_header(ntables=8, relative_path="chunker_internal_generated.h"): path = pathlib.Path(__file__).parent / relative_path tables = [generate_hashtable(seed) for seed in range(ntables)] diff --git a/cpp/src/parquet/column_chunker_generated.h b/cpp/src/parquet/column_chunker_generated.h deleted file mode 100644 index 13a47984b7415..0000000000000 --- a/cpp/src/parquet/column_chunker_generated.h +++ /dev/null @@ -1,545 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once -#include - -namespace parquet::internal { - -constexpr uint64_t kGearhashTable[8][256] = { - {// seed = 0 - 0xf09f35a563783945, 0x0dcc5b3bc5ae410a, 0x63f1ea8d22554270, 0xfbe5ee7bd05a7b61, - 0x3f692ed5e9934aba, 0xaab3755952250eb8, 0xdefb168dc2888fa5, 0x501b36f7c77a7d47, - 0xd2fff45d1989642d, 0x80217c1c600e30a6, 0xb9469ee2e43df7ac, 0x3654b76a61999706, - 0x6ea73dfe5de0c6b6, 0xdfd662e1937a589d, 0x0dbe0cc74b188a68, 0xde45f4e6d73ffc6f, - 0xcdf7a7759e70d87e, 0x5d6a951b8d38c310, 0xdc9423c3813fcf2c, 0x25dc2976e167ffce, - 0xc2555baa1d031c84, 0x115bc3f2230a3ab6, 0xd4b10260f350bede, 0xdfd3501ab447d723, - 0x022e79217edaf167, 0x1635e2255c5a7526, 0xa0a750350cc77102, 0xc027133e05d39f56, - 0xd949459779cf0387, 0xb92f1464f5c688c2, 0xd9ac5f3e8b42f2f3, 0xdf02bb6f5ecaac21, - 0x8156f988fac7bfa4, 0xe4580f97bede2ec8, 0x44fe7d17a76fca32, 0x885f59bd54c2014c, - 0x435e63ec655ffae9, 0x5ebc51930967b1f1, 0x5428c2084ac29e47, 0x9465938fec30e36b, - 0xc7cb3de4977772cd, 0x15692d7c201e8c3a, 0x505ee65cdc4b17f4, 0x7d9839a0a7aead6b, - 0xeef5f5b6a0105291, 0x76c2fb232ce7f5bf, 0x5c13893c1c3ff3a9, 0x65b6b547d4442f98, - 0xb8ad7487c8c96fce, 0x906bcf51c99974f8, 0x2f56e48bb943a48c, 0xbc9ab109f82d3a44, - 0xcd5160cdc8c7e735, 0xbe9acb9df3427732, 0x386b91d477d7fade, 0x36be463621dd5af2, - 0xcbe6a2faffd627a8, 0x9c8fd528463a2f5a, 0xb9b88c6bb802b184, 0xb414b4e665c597c7, - 0xbedb142568209556, 0x5360d81c25429dce, 0x63a69a960a952f37, 0xc900d63899e1b503, - 0x1abc63a8b37c7728, 0xa8b3a8b6409080eb, 0x495e391f662959f6, 0xdf1e136f3e12229b, - 0x33d5fc526b0dd38d, 0x321221ae2abfac63, 0x7fde18351fda7395, 0xed79fe5c3a6aa4c3, - 0x2dd6965a4867d8d4, 0x54813ca20fe8799b, 0x5d59ea6456465c39, 0x0de0c294d1936b81, - 0x4aaf0755002c588c, 0x3530a1857ad04c6d, 0xb8a64f4ce184442b, 0xe0def10bceedfa17, - 0x46e38d0a443757ec, 0x9795a1c645ee16d7, 0x7e531def245eac8a, 0x683b25c43a0716cf, - 0x884583d372da219d, 0x5b06b62c910416e5, 0x54b6902fbebd3dbe, 0x931198d40a761a75, - 0xead7d8e830013590, 0x80b4d5dc99bfaced, 0xf98272c8108a1ad2, 0x1adce054289a0ec6, - 0x7d53a1143c56b465, 0x497fbe4f00c92b52, 0x525e4cc2e81ebd69, 0xc94478e0d5508ff6, - 0xb8a5da83c196d07c, 0x7667a921b65b0603, 0xf236fabbdefe6cd1, 0x53da978d19a92b98, - 0xc604f6e97087124d, 0x2cbd27221924b094, 0x65cd1102c985b1d2, 0x08c0755dc1a97eb4, - 0x5e0419e921c0fef1, 0x282d2c1196f84a29, 0xe21117fcfc5793f7, 0xcf4e985dc38e6c2e, - 0xd521f4f264d55616, 0xde69b04c485f2a10, 0x59410e245305178a, 0xceab1d477c943601, - 0xa9805732d71ee5e9, 0x054cd443896974f6, 0xf2b517717a423a3e, 0x09517937fa9fac95, - 0x4938233e9ca871e3, 0x9132cbaf56f83ec0, 0x4703421ed1dd027d, 0xfd9933f4e6f1ec4e, - 0xf237c7fded2274a8, 0xdf4616efe68cd7b4, 0x5e46de0f39f0a380, 0x3d41e0c6d8e095b0, - 0xc5272f8a5bb2df09, 0x68aa78e8301fb964, 0xbf5b5b52c8e32ae0, 0xbf28ed3df74bdcf7, - 0xd6198f64c833815a, 0x8cd99d2974267544, 0xd90560ea4465ff2c, 0x571d65ad7ad59261, - 0x309453518baa367a, 0xa60538377bc79fb2, 0xace515da1ab4183c, 0xf56d3c8d891d1c5b, - 0x5b0d8370b59def49, 0x775866ce7c83c762, 0x3d76085695c8e18a, 0xba064d1a9af1b114, - 0xc84ef7cd7b98b521, 0x90b9231681c2bc37, 0x37e2b13e6f585b6b, 0x1d0a34e55e0f369f, - 0x86bb8019cf41447c, 0x4b95c6ef55b3f71f, 0x3b6ed1660732b310, 0x617eee603d137f21, - 0xf4f6278b464f3bbc, 0xdfb763b720da205a, 0x353478899b871cb7, 0xe45fbbff574cc41e, - 0x1a94b60847907d72, 0xb10eef051eff67a5, 0xf0e012ec6a284d40, 0xcc1cd1a11b926d7c, - 0xcf9d9c5453e19cad, 0x270febcc0fc0e86b, 0xd6567568778b781e, 0x7323b98965eeb46b, - 0xccecd374567086ff, 0xef7b44bfc497a704, 0xebc479c051a9f0a5, 0xc9b7410e3e00a235, - 0x1d084f7ecdf83dab, 0xc8a9a97e33ba8ba3, 0x8c75318f5b2350d6, 0xaa3cd5d0c684bdda, - 0xa81125fe0901bedf, 0xf7bcd76020edfc93, 0x834ee4c12e75874f, 0xb2bb8a7beb44fa14, - 0x32cd26f50a4f4e4d, 0x0fc5817ca55d959a, 0xd6e4ae2e3ae10718, 0x074abdcceb8d6e38, - 0xc0cc5f4f9b3a9c43, 0x1115d364363595b2, 0x69861db2eb19f2e8, 0x59b8d804cf92bc67, - 0x9bac9785e5e4b863, 0x7fa0e17a41869561, 0x10d3c9633f0c709c, 0x534a03deee6bc44a, - 0x73b1f7201257f581, 0x46fd6a11e2e0706b, 0x494abb554946e67a, 0xb5d6da317864dc8e, - 0x402ded9238f39687, 0xd8fa37d2cbd6d290, 0xcc818293fcb06791, 0x6482ab344806cd4d, - 0x0956e6ee9d8eb60b, 0x01fee622d8465ac8, 0xae7ece370cbd9c35, 0x7ff09e937a177279, - 0xa2c29ee7a33ca5f1, 0x990e8dbee083923b, 0x4a819b72f610863a, 0xddecfad79d3f08be, - 0x627372480fac20a7, 0x802154d6eca2db4c, 0x8fcf02e42f805e55, 0x040a911ff8cea977, - 0xbb544485bc64d0d4, 0xaddde1aeb406d0fb, 0xf6b35fae23dce66f, 0xc07a9fb3645d2f9b, - 0xccd113907e9c0fed, 0xd17af369984fd213, 0x9223823c59a083e7, 0xe19d475606b81013, - 0xe181ac116a90e57a, 0x71f7b6258c6def4c, 0x2246f34b45964f7c, 0xd74aedaea2d31751, - 0xb1add86e5dd305d1, 0xeb9ba881f16d6471, 0xef7600e036f5c6ff, 0x1d50bc9735b8fb85, - 0xe63942bd1f3e2969, 0x9241ba9f8b3f4e72, 0xee8bb2bca07d35b6, 0x55cd55dab522654e, - 0x94d0cfa7c1a6845d, 0x02f9845d559884c3, 0x8ce70ea21063b560, 0xd70998028ef08b74, - 0xdfdb5bbee310876b, 0x4e21b2e348256d16, 0xde007a981c13debc, 0xe51950cbbddabfdd, - 0xd223301dbe9957c1, 0x084b8634cc2cce4b, 0x90e551378aa9d70c, 0x833b533ac633e448, - 0x7891e232882da57f, 0xa1bf26f0163ce2b3, 0xf33a0171eb9c68d5, 0x2e7de18ca69b3fa2, - 0x666fd6f175619199, 0x1239d37edb5feb9f, 0xfa9fc9382e61ff5c, 0x3ca4ad427e3c126f, - 0x37c6dd4c2c31ae6e, 0x1f1bacb619d427b2, 0x7dd09f5d10759afe, 0xc8d941432327d733, - 0x2b389ba25e1d43a7, 0xa4e3030c3740ff21, 0xcc56dae13fd37463, 0x2481457c175b560f, - 0x9deb35bde77c5c41, 0x847aa6ea5549a0c3, 0xcde01bb48b6e7f02, 0x15a28844e64cb211}, - {// seed = 1 - 0xecfcba92fe5691a3, 0x71377799fea34699, 0xb284c9096fa614e5, 0x54534170f40de6c8, - 0xbbd804d45884fba3, 0x44929a896388c8a1, 0x79b712508e0fa3b1, 0xeb53ab280af31054, - 0x351ea23a6319da7a, 0x2fbe55d9819d85a2, 0x34f4b6568dcd28b1, 0x8c94ea5e5d82967a, - 0x09068d333a46d3c5, 0x762ad4f64cb73381, 0xd5c6db5ef0e22640, 0x36d8ab5a36175680, - 0xd41fe333cdc3525a, 0xa1f51dbdf20ce781, 0x1410a95e786c8be6, 0x96b7499a670c2b41, - 0x3912e1037835d893, 0x272c5bd83e1e9115, 0x2ea7f91cad82a0d6, 0xcd10e85662ce9931, - 0xedad49be8d5e8b74, 0x7ccd8fe0f37d12bc, 0xfac0482005eed593, 0x4513991681f6c8b0, - 0x2804d612eb0ad37d, 0x7cca9e8412b81d34, 0x85ffd6707192b7b8, 0xea0560aeea954411, - 0x0122d28226102bba, 0xf51c47cdbd22fdd1, 0x3707d851183ff17c, 0xaef5a1465f3e902d, - 0xbcb38c2d8736a04f, 0x4025317e864bef15, 0x8d3f66d86e1ea58f, 0xc16759a3d97ed79a, - 0x1c62abdc0659f2f5, 0x23b3eb4e699bd28f, 0x5083c4fceed3ccaf, 0xa65bf34562cc989c, - 0xaa5865932fd79064, 0xf24d08d268c24593, 0x7fbd00a215196999, 0x7812cd366d752964, - 0x62e8dcb27ef3d945, 0xf08b7984e1b946dc, 0x547d23ad9a5c1dcf, 0x496b1fb249b27fb7, - 0xcd692e1db5f3b3ba, 0x41931e39f1e1bc61, 0x286c6a7d7edae82b, 0x17ef6638b6c4ca6e, - 0x609beb5a2576a934, 0xcc5e16fe4a69b83c, 0xbbd14d08b078fc24, 0x2a617680f481cb94, - 0x81dbbd5f86e6d039, 0xeb8205e1fc8ecc3c, 0xe5e3bb576faa8042, 0x5d6f1eb9d9df01b5, - 0x9a47b8739c10fb44, 0x398a7caad7ea7696, 0x9c0fc1d7c46adde6, 0x67cd6de0a51978a6, - 0x68ccc4b77a21cca4, 0x1e067066b82f415c, 0xf7ddade6535e1819, 0xf2185c884291751b, - 0xc322b7381fcbe34f, 0x242f593e88290b9b, 0x8e11ccc0ea5e84a3, 0x40e3a2e3346db8a2, - 0xf18bfc3ad2931a2c, 0x2468397394b00144, 0xeae199cce14e6817, 0x05b462686c75a1ae, - 0xda096cb859c51673, 0xd87aeb967a906bef, 0xaabc74493cb02fe6, 0x74d48fc2e7da143e, - 0x6ec1c8fed3f2c1fd, 0xe01e0704b463f18e, 0xc3d88a4d3a8056e4, 0xd01ae0ffab6c8f3f, - 0x881ba052620ae7c7, 0xcea033aef0a823a5, 0x8d2cad91d83df1e3, 0x18746d205e66dbe9, - 0x3061f8e58d046650, 0xd819c59f0ce2cf8b, 0x144e89e93635e870, 0x3415e88279b21651, - 0xd6f7ab944b86c3fa, 0x45f1dd15d0f67bdc, 0xbf0d97c7f4fa24f4, 0x34a7de520a57fcd2, - 0x4ba86fda03e9e2bc, 0xa7995265a025b552, 0x698f6819d5f51cf7, 0xd07dbe9d8a156981, - 0x2683945373857fc1, 0x116f8a84f96167de, 0x8bc832bd85595ebf, 0xb206519d74fdfafa, - 0xde9519b2e9b5cc5f, 0x16fdd6f2da1d8163, 0x7ba32bd48ef56f11, 0x6f4e4d7ee8b29717, - 0xd31576dde7468aad, 0x023bb08848676045, 0xf6dcc083178160b7, 0x42035f426250e683, - 0x343732993cfed89f, 0x0640a870a22d3d58, 0x65cff80b53b4ae6a, 0x27996fa17ab05215, - 0xfd5db01401b21a04, 0x894508784bc1673c, 0x5bfcf43a2380e27d, 0x4cd6dcc2715583b7, - 0xa43b3763e7d4c902, 0x6da83e12ef0c1257, 0xfe80a602b0335aff, 0x293a7d8f4ff344de, - 0xb4ae7c2b8956bf5a, 0x6b45432d38254b4d, 0xd086acbdf15d9455, 0xa4d19e43f41ea87b, - 0xf01f13ba4bb87fbf, 0xca582cf301a299ff, 0x0ddad3d45298fa7d, 0x0646a130459c3999, - 0xc08e3af3747e2cee, 0xfc7db8aa9ed67295, 0x783b329e7bd79d5f, 0x732dbc607957af7b, - 0x8e446ac19fb26555, 0xff1dfa4d61dc89a5, 0xb6fbc46bd8d011d8, 0x185147ec5779f0d7, - 0x6eb2cf6149a5380f, 0xb0e773df803a1eae, 0xc07706c5519bfce5, 0xc35abcf54fa95f14, - 0x40a01d99a38608ea, 0x776dcd6f603c277f, 0x6ae12389b1d6d0bb, 0x8bd981448df92bb9, - 0x426a6a7ca21a2c16, 0x87efd5b71c1bad26, 0x71fb7fc4cd41de48, 0xdd9033c45619d463, - 0x40eaab322654cef7, 0xe077fffed6f3e3a2, 0x375a4dbef9384447, 0x2066b009d2c4a100, - 0xeca4a5794a068447, 0x2128f64bddf341a1, 0x738b4bb1be90bd61, 0x433772cf3813d52e, - 0x9540c88add8e4474, 0x0b6d5decd21d3519, 0x654ead966745642d, 0xe1bfb03c3b4bdb4c, - 0x0b977a9937515b1f, 0x0a4587509ef63870, 0xe89f0de1d9cfd44a, 0x23a91390272e7f68, - 0xd92defbc9096b8d8, 0x004db87174612539, 0xc88ecaabdd1a71f1, 0x050de38393073346, - 0x8af1426d7964e038, 0xf352c4fef8ad5c87, 0x6f26bc7408e26548, 0x0d41543fd9bf3084, - 0xfc4e07553a840fc6, 0x5ef117de86a555a9, 0x1f11c42dffb5ae1b, 0x4147648f07490fa5, - 0x09b35fd7671b21aa, 0x1453b14f7ccca481, 0x944f6fcce4c9b2ba, 0x5b08dd2e3583dc06, - 0xe0220df78dc9c22d, 0x1c200b9506cbf666, 0x8a0b7465eadb523b, 0xfbcb43a91a1e2d80, - 0xe697f44be3c36a58, 0x2f8a8e48fb7e350d, 0x7baba71b8920d55f, 0x10edc0216105bc96, - 0x52db07c79d7a7a63, 0x1916e8cef9452ac3, 0x5cbbbf21f867b6cc, 0xadd583365a690a4b, - 0x4e4ca2c8bffc2fdb, 0xf5fe3416d2eebcfe, 0x839af8b85e452476, 0x8496c0c54ad44e16, - 0x6c46f1ecad4482bf, 0xb794cad76ae18715, 0x67b762eec7c62985, 0x52dc9e68df5b3a53, - 0x0cc7e444b422a5f9, 0xadbfe90841c112b0, 0xfe37b136f0ca5c34, 0xcfe9e47948a8d73e, - 0xee90572b86a30d91, 0x549e72d8262830aa, 0x3361564b469f32c6, 0x1e6eba9e0d2648e2, - 0x5f8e2b2ac5fcb4eb, 0xe4224fa5f71f7cc6, 0x7357a9230c76757b, 0xcad70f74aaf6b702, - 0xeef28ced23894cc2, 0x753fdd3352aefd68, 0x1fed6ba90bbeb9d2, 0x05316f4ab4034b4b, - 0x3396df022b9f63d6, 0x82d7125a7cfd0935, 0x3519a71caf1f87f0, 0xd1dfb7a5cc3974be, - 0xbfae40ecbdbbcc2a, 0x152c11778e08dd54, 0x4a96566a6c848554, 0x3a84d621c340cdd7, - 0xfd47aa1887e2fb03, 0xa63cae94b2f1d099, 0xed61783f3e5b75e0, 0xefd44864106019be, - 0x145ff78b80b081aa, 0x34670e5fcea9230e, 0x876ef976328db371, 0x4221f3a5269942a6, - 0x95315cbd85c648f4, 0x3ca344dc7c3b1600, 0x38421ea39ff28780, 0x31dbeee967c0435c, - 0x27437c3e268402e7, 0xdd0cf8343312a654, 0x965ab9dad1d8aa29, 0xf871706dd3e23509, - 0xce23d06c7a25e699, 0x1b37d59382b27589, 0x3407f004723d6324, 0x56efb69cdb5deaa1, - 0xf46cdd2b9fd604e0, 0xcad3ca79fdac69bd, 0x7252802a574e63cb, 0xc281fb8acc6ec1d3}, - {// seed = 2 - 0xdd16cb672ba6979c, 0x3954eaa9ec41ae41, 0x52cb802771d2966d, 0xf57ed8eb0d0294f2, - 0x768be23c71da2219, 0x6131e22d95a84ad3, 0xd849e4e49bb15842, 0x18e8e5c4978cf00d, - 0x3af5e5867ce1f9bd, 0x06c75a9fffe83d63, 0xe8de75a00b58a065, 0x0a773251bc0d755a, - 0x629dc21e54548329, 0x2a168f5e5a883e70, 0x33547375f0996c86, 0xdfcb4c7680451322, - 0x55c1ecaaaa57e397, 0x4546c346c24f5a31, 0x6f8f0401dfabc86c, 0x7760d2d36ee340b4, - 0xf6448e48bdeb229d, 0xba70e1633b4dba65, 0x069cda561e273054, 0xa010b6a84aebf340, - 0x5c23b8229eee34b6, 0xea63c926d90153af, 0x7d7de27b3e43ec1b, 0xea119541eddc3491, - 0xf1259daeddfc724c, 0x2873ca9a67730647, 0xa1e7710dade32607, 0x758de030b61d43fd, - 0xd2c9bcbfa475edb4, 0x18ade47bb8a0aa29, 0xf7a74af0ff1aea88, 0x6f8873274a987162, - 0x6963e8d876f4d282, 0xd435d4fe448c6c5b, 0x93ec80ba404cafff, 0xcf90d24c509e41e7, - 0x5f0fc8a62923e36e, 0x9224878fe458f3a4, 0xd9a039edf1945bcd, 0x0877d1892c288441, - 0x75205491f4b4740b, 0x30f9d2d523a9085b, 0x4b7f4029fa097c99, 0x170bb013745709d4, - 0x7087af537f11ef2e, 0x28c62b88e08fc464, 0x84bbcb3e0bb56271, 0x485a4b099165c681, - 0x357c63357caa9292, 0x819eb7d1aee2d27e, 0xdaa759eb9c0f8c9d, 0x42cdc36729cc3db5, - 0x9489aa852eddbb06, 0x8161e4f85a84e6d4, 0xa964863fdad3eb29, 0xcc095ddbce1a6702, - 0x3ecfadbb8dc2ce58, 0x971316509b95a231, 0xc8f484d1dbc38427, 0xae9c510c463574c0, - 0xdf2b31179600c21a, 0x440de87bada4dfa3, 0xbd8d30f3f6fb7522, 0x84e6d7f678a0e2d0, - 0x0ec4d74323e15975, 0xf6947610dad6d9ab, 0x73a55a95d73fe3a5, 0x3e5f623024d37eda, - 0x8d99a728d95d9344, 0x8b82a7956c4acdc4, 0x7faeaea4385b27f6, 0x540625ff4aa2ff21, - 0x4aa43b3ebd92ce2b, 0x899646a6df2da807, 0x49225115780942d7, 0xe16606636af89525, - 0xb980bcf893888e33, 0xf9ed57695291b0d8, 0x5c6dd14464619afa, 0x50606d69b733d4f3, - 0x7fb1af465b990f97, 0x3fab2634c8bbd936, 0x556da6168838b902, 0x0f15975902a30e1f, - 0xb29d782ae9e1991f, 0xae00e26ff8f7e739, 0xd3da86458bb292d5, 0x4528ee0afb27e4ce, - 0x49882d5ba49fabad, 0x7e873b6a7cf875ee, 0x777edd535113c912, 0x94ed05e7ff149594, - 0x0b8f95fc4211df43, 0x9135c2b42426fef2, 0x411e6c2b47307073, 0x503207d1af0c8cf8, - 0xd76f8619059f9a79, 0x64d24617855dee45, 0xf7bc7a877923196a, 0xd6cc42ed6a65be79, - 0xe3912ff09d4fc574, 0x4192d03b2bc2460a, 0xa0dcc37dad98af85, 0xfc59049b2a5818a4, - 0x2128bae90a5b975f, 0xbe7067ca05ea3294, 0x5bab7e7753064c4f, 0x42cbf0949ef88443, - 0x564df4bbd017492c, 0xf2c2eb500cf80564, 0x5b92e67eb00e92af, 0x8c4103eef59c0341, - 0x83412122b8284998, 0x888daf2da0636b6d, 0x4d54b10303dd07d6, 0x201190e7c1e7b5ed, - 0x3797510bb53a5771, 0x03f7bc598b570b79, 0xdc1e15d67d94f73e, 0x721e8b499ebe02c1, - 0x71f954f606d13fa0, 0x0c7a2e408c168bf0, 0x07df2ef14f69c89d, 0xe295096f46b4baaf, - 0x7a2037916438737e, 0xd1e861aeaf8676ea, 0xb36ebdce368b8108, 0xb7e53b090ddb5d25, - 0x5a606607b390b1aa, 0x475e52994f4a2471, 0xbcc2038ba55b2078, 0x28b8a6b6c80df694, - 0xb5f0130ec972c9a2, 0x7a87cd2a93276b54, 0x4d0eec7ecf92d625, 0xac1a8ce16269a42e, - 0xa4ca0237ca9637b8, 0xd8dc8ff91202b6ff, 0x75b29846799d7678, 0x761b11a5edd9c757, - 0xf2581db294ef3307, 0xe3173c2b6a48e20f, 0xe46fd7d486d65b3c, 0x1352024303580d1f, - 0x2d665dae485c1d6d, 0x4e0905c825d74d3b, 0x14ff470c331c229e, 0xbdc656b8613d8805, - 0x36de38e396345721, 0xaae682c1aa8ff13b, 0x57eb28d7b85a1052, 0xf3145290231d443a, - 0xd0f68095e23cbe39, 0x67f99b3c2570b33d, 0x54575285f3017a83, 0x9b2f7bb03d836a79, - 0xa57b209d303367a9, 0x7ccb545dd0939c79, 0x1392b79a37f4716d, 0x6e81bb91a3c79bcd, - 0x2c2cd80307dddf81, 0xb949e119e2a16cbb, 0x69625382c4c7596f, 0xf19c6d97204fb95c, - 0x1b2ea42a24b6b05e, 0x8976f83cd43d20ac, 0x7149dd3de44c9872, 0xc79f1ae2d2623059, - 0xca17a4f143a414e1, 0x66d7a1a21b6f0185, 0xed2c6198fe73f113, 0x16a5f0295cbe06af, - 0x5f27162e38d98013, 0xf54d9f295bdc0f76, 0x9ba7d562073ef77b, 0xa4a24daaa2cfc571, - 0x49884cf486da43cd, 0x74c641c0e2148a24, 0xbff9dcbff504c482, 0xf8fc2d9403c837ab, - 0x6ccc44828af0bb1e, 0xbcf0d69b4c19dfdb, 0x8fe0d962d47abf8f, 0xa65f1d9d5514271d, - 0x26ff393e62ef6a03, 0xc7153500f283e8fc, 0xea5ed99cdd9d15cd, 0xfc16ac2ba8b48bb7, - 0xf49694b70041c67a, 0xbd35dd30f5d15f72, 0xcf10ad7385f83f98, 0x709e52e27339cdc2, - 0xe9505cb3ec893b71, 0x2ffa610e4a229af7, 0x12e1bc774d1f0e52, 0xe301a3bb7eacccc8, - 0x1fdd3b6dcd877ebf, 0x56a7e8bda59c05aa, 0x99acd421035d6ab4, 0xfd21e401cecd2808, - 0x9a89d23df8b8d46f, 0x4e26b1f1eb297b9c, 0x9df24d973e1eae07, 0xe6cdc74da62a6318, - 0xfc360d74df992db0, 0xf4eca0a739514c98, 0x481c515ba9bf5215, 0xce89cce80f5f3022, - 0xf487a10fc80e4777, 0x235b379a87e41832, 0x76f72e028371f194, 0xd044d4a201325a7d, - 0x47d8e855e0ffbdde, 0x268ae196fe7334b0, 0x123f2b26db46faa8, 0x11741175b86eb083, - 0x72ee185a423e6e31, 0x8da113dfe6f6df89, 0x286b72e338bbd548, 0xa922246204973592, - 0x7237b4f939a6b629, 0x31babda9bedf039a, 0xb2e8f18c6aeec258, 0x0f5f6ce6dd65a45e, - 0x8f9071a0f23e57d3, 0x71307115ba598423, 0xcbe70264c0e1768c, 0x1c23729f955681a8, - 0xfbc829099bc2fc24, 0x9619355cbc37d5d6, 0xea694d4e59b59a74, 0xb41cf8d3a7c4f638, - 0xae1e792df721cd0b, 0x7cd855d28aac11f6, 0xca11ba0efec11238, 0x7c433e554ce261d8, - 0xe3140366f042b6ba, 0x8a59d68642b3b18c, 0x094fcdd5d7bccac2, 0x9517d80356362c37, - 0x4a20a9949c6c74e8, 0xc25bcf1699d3b326, 0xa8893f1d1ed2f340, 0x9b58986e0e8a886e, - 0x29d78c647587ce41, 0x3b210181df471767, 0xd45e8e807627849d, 0x1ec56bc3f2b653e3, - 0x974ff23068558b00, 0xdb72bdac5d34262c, 0x23225143bb206b57, 0xd0a34cfe027cbb7e}, - {// seed = 3 - 0x39209fb3eb541043, 0xee0cd3754563088f, 0x36c05fc545bf8abe, 0x842cb6381a9d396b, - 0xd5059dcb443ce3bf, 0xe92545a8dfa7097e, 0xb9d47558d8049174, 0xc6389e426f4c2fc0, - 0xd8e0a6e4c0b850d3, 0x7730e54360bd0d0d, 0x6ecb4d4c50d050d5, 0x07a16584d4eb229f, - 0x13305d05f4a92267, 0xb278ddd75db4baec, 0x32381b774138608f, 0x61fe7a7163948057, - 0x460c58a9092efee6, 0x553bf895d9b5ff62, 0x899daf2dabfd0189, 0xf388ab9c1c4b6f70, - 0xd600fe47027ea4cd, 0x16d527ec2b5ef355, 0x5ac1f58ff6908c81, 0xa08d79ff8ee9ffe8, - 0xc1060a80b7a5e117, 0x14b2c23118c60bda, 0x8cc0defbb890df8f, 0xe29540fd94c6d28b, - 0xa604f003f82d5b71, 0xa67583d4eb066d18, 0xd62cbd796322b3fc, 0x070cfe244cdcccf3, - 0x73557c30b3af47e5, 0x2e544e31153a2163, 0x996eef7464d5bead, 0xbc71cb5ab0586cdc, - 0x0bfcb6c1b517ed69, 0x62b4f1fcc82e8ca0, 0x0edbc68f544965c5, 0x40fa39baa24af412, - 0xf39aeb2413dab165, 0x17e6013e7afee738, 0x8109bff1c8d42a9d, 0x3cd99863390989b5, - 0x02021a4cc9c336c8, 0xa06060778cb60aa4, 0xd96591db60bc1e06, 0xd2727175183f4022, - 0xcdc1f1c5bce3e7ce, 0xb393ccc447872a37, 0xdf6efe63257ead3a, 0x20729d0340dbceb6, - 0x9f3d2d26fc0ea0d7, 0xf392e0885189bd79, 0xdf2ee01eb212b8b6, 0x6e103a0c0f97e2c3, - 0x96c604a763bd841b, 0x9fc590c43bba0169, 0xf92dcd5ddc248c40, 0x113a8b54446941dc, - 0x5943eda146b46bb8, 0xbf657901a36a39a7, 0x5a4e0e7ea6568971, 0xb94c635bae9f9117, - 0x2626fb65b3a4ef81, 0xa59bfd5478ce97de, 0x79112ba9cc1a1c63, 0xf41f102f002cf39c, - 0x0a589bcbfb7ff1c8, 0xa1478c53540c4fa1, 0x60d55e72c86dfaca, 0x312e7b6840ea7a39, - 0x8aae72dcccfe1f75, 0xff2f51f55bf0247a, 0x3c2e4b109edb4a90, 0x5c6d73f6525c7637, - 0xe49acb04a199f61c, 0x27860642d966df7f, 0x541ce75fb1e21c30, 0xd9fcd6f90806c7cc, - 0xb87c27bc93a7969b, 0x92f77a1179b8f8dc, 0xb1f29379deb89ed4, 0x7e63ead35808efe7, - 0x13545183d7fa5420, 0x575f593e34cf029d, 0x27f1199fb07344ae, 0xe67f95f7dc741455, - 0x49b478b761ab850b, 0xd7bedf794adfc21e, 0xdc788dcd2dda40ae, 0x14673eb9f4d8ad35, - 0x0cced3c71ecf5eb1, 0xe62d4e6c84471180, 0xdfe1b9e2cb4ada7d, 0x70185a8fce980426, - 0x0ce2db5e8f9553d6, 0x1fedc57bb37b7264, 0xb9310a2e970b3760, 0x989ff8ab9805e87d, - 0x0b912d7eb712d9ee, 0x1fe272830379e67c, 0x16e6a73aff4738fb, 0xeed196d98ba43866, - 0x7088ca12d356cbe2, 0x23539aa43a71eee0, 0xed52f0311fa0f7ad, 0xa12b16233f302eea, - 0xc477786f0870ecb4, 0xd603674717a93920, 0x4abe0ae17fa62a4c, 0xa18f1ad79e4edc8d, - 0xc49fe6db967c6981, 0xcc154d7e3c1271e9, 0xdd075d640013c0c0, 0xc026cd797d10922a, - 0xead7339703f95572, 0x4342f6f11739eb4b, 0x9862f4657d15c197, 0x4f3cb1d4d392f9ff, - 0xe35bffa018b97d03, 0x600c755031939ad3, 0xb8c6557ffea83abf, 0x14c9e7f2f8a122ea, - 0x0a2eb9285ee95a7c, 0x8823fec19840c46f, 0x2c4c445c736ed1d0, 0x83181dff233449f1, - 0x15ed3fca3107bef5, 0x305e9adb688a4c71, 0x7dbef196f68a3e2e, 0x93e47ece3e249187, - 0x8353c5e890ead93c, 0xea8a7ae66abafdf7, 0xf956dbb6becf7f74, 0x9f37c494fbfdb6e4, - 0x11c6cbaa2485dd32, 0x206f336fcca11320, 0x9befe9a59135d8fe, 0x5f3ef8b8db92c7db, - 0xbb305e556ce0ce9a, 0xf26bdafb1305887f, 0xcbf28abe23f08c61, 0x0bc64173b914e00b, - 0x9168da52e983f54a, 0x6ea41d09c3574a3e, 0x78aa44d4a74459ae, 0x2931422878387bf5, - 0x018f64a3a92c2d9c, 0x9be43f6752e66b34, 0xae378890decd1152, 0x07325329a1cb7623, - 0x3b96f4ee3dd9c525, 0x2d6ebcdbe77d61a3, 0x10e32b0e975f510c, 0xffc007b9da959bf9, - 0x38bf66c6559e5d90, 0xbe22bdf0bf8899fe, 0x87807d7a991632a8, 0x149a0d702816766a, - 0x026f723db057e9ab, 0xeeecb83625ec6798, 0xcec2ed5984208148, 0xd985a78e97f03c84, - 0xf96c279e7927b116, 0x99d5027b3204f6e2, 0x13a84878c3d34c55, 0x5cf5ec96229e9676, - 0x0bc36b07e4f8e289, 0xbed33b80a069914d, 0x2fbfbdd1ff4b9396, 0xab352bb6982da90f, - 0x154d219e4fa3f62b, 0x4d087512bb6b9be7, 0xc582e31775ee400e, 0x7dadb002ae8c4a4e, - 0xaae2957375c1aee2, 0x5f36ca643356625b, 0xf87cf8eb76e07fb7, 0x46f432a755e02cc3, - 0x36087e07aba09642, 0xe5642c1e4ebb9939, 0xb9152d22338eefad, 0xf7ba44278a22cf7f, - 0xd3b8013502acd838, 0x7761511da6482659, 0xb0857621638e8e50, 0x552eddb4a8b1d5f5, - 0xc43d9861e812c3ea, 0xd765c2aada47910c, 0x21c935b68f552b19, 0x6256d5641a2b47dc, - 0xab711d8e6c94bc79, 0xa8d0b91a2a01ab81, 0x5e6d66141e8d632a, 0x7638285124d5d602, - 0x794876dbca3e471f, 0x951937d8682670ce, 0x0f99cb1f52ed466a, 0x8c7cd205543b804c, - 0x2fd24d74a9c33783, 0xe5dcb7b7762e5af1, 0x45e6749cca4af77c, 0x540ac7ee61f2259f, - 0x89c505c72802ce86, 0xeab83b9d2d8000d1, 0x9f01d5e76748d005, 0xc740aaef3035b6d0, - 0x49afcd31d582d054, 0xcba5dc4c1efb5ddc, 0xc0a4c07434350ca1, 0xfc8dfaddcc65ee80, - 0x157c9780f6e4b2d9, 0x9762a872e1797617, 0xc4afae2cf3c7e1bd, 0x71cde14591b595d4, - 0x8843c3e0e641f3b9, 0xd92ecd91dce28750, 0x1474e7a1742cb19f, 0xec198e22764fa06b, - 0x39394edb47330c7d, 0x00ba1d925242533d, 0xaed8702536c6fb30, 0x6d3618e531c2967a, - 0x77f7cedcd7cc0411, 0xbc1e2ab82be5b752, 0x07b0cf9223676977, 0x596c693b099edd53, - 0xbb7f570f5b9b2811, 0x96bfdad3c4a6840c, 0x668015e79b60c534, 0x3ad38d72123f1366, - 0x6b994d81d2fcbb09, 0x70885f022c5052d8, 0xc891ee79d9306a7b, 0x2c4df05c0ed02497, - 0x19ebc13816898be2, 0xea7c64df11c392a2, 0xb7663e88dd12e1bd, 0x79f768cb8e154c21, - 0x1fb21b12e945933b, 0xe6a9045643f6906e, 0x544c47acd7e15371, 0xb7709b14f727e3d1, - 0x326ee36a46942971, 0x477f1cf7b0e2d847, 0x88b8f6b82b3b0c24, 0x18bc357b80e3cd5c, - 0x3333de70e4d66e0b, 0x4fd4c5e148583cf6, 0xae1b62f3008c0af3, 0xc49f419b6ab29cf5, - 0x2c29fa65afc3fa28, 0x4b19d93734d03009, 0x7dd6c09e589276ad, 0x1cece97f30de48ad}, - {// seed = 4 - 0x58bdf4338602e4fb, 0x71a5620b02c926d5, 0x3811c960129c2d9f, 0x29c2fb11fccac567, - 0x0d6b1ea7780f1352, 0xcc4d3ddfae3f87b3, 0xfdd30257362a586b, 0xabc948fde69f25f1, - 0x51b3523469d30f7b, 0xe0f0322724405ace, 0xd3729266d896da1e, 0xb10c37e5147915bf, - 0x8b577039f9fa32a3, 0xe677c6a9cbfb44b3, 0x7317a756ebb51a03, 0xf8e988ef37359485, - 0x600fc1ef3f469ff3, 0xbf0b8f8520444e01, 0x3711168b08b63d73, 0x34146f2944a6cb36, - 0x717feb263862cdde, 0x7185f8347db00412, 0x900798d82127e693, 0x84089e976a473268, - 0x10f8308c0d293719, 0xf62a618d4e5719b8, 0x8bdbd257a1a9516f, 0xf49f666fd7a75110, - 0xbaf45e2db7864339, 0xe4efa1ea0c627697, 0x3e71d4c82a09fe10, 0x54a2a51cf12127bb, - 0xa0592c9f54ba14cd, 0x27dd627a101c7a42, 0x3d2ceb44b3d20d72, 0x7ee1f94a68ca8f5d, - 0x7e8cb8651b006c36, 0xbd9fa7ca3a475259, 0x856de173586a7b34, 0xcedb291b594cb1b5, - 0xa3d6e462fd21cddc, 0x74561d10af9118e4, 0x13a3d389fc2d4b36, 0xeea8594a4a054856, - 0xf56d7474d9ba4b13, 0x25ddce2f6490b2fd, 0x920653ff3a8d830b, 0xcd8c0c9cdac740d1, - 0x2c348a738db9c4a0, 0x2967ccbe8ea44c22, 0x47963f69adb049f8, 0xf9d01eb5b4cf7eb6, - 0x7a5c26eb63a86bd2, 0x62ad8b7a71fa0566, 0xb373213179f250ae, 0x589d4e9a88245a4d, - 0x433dafebe2d558a8, 0x521fbef2c8fe4399, 0x62a31f9ff9ccd46b, 0x51602203eba7c1a6, - 0x9afc8c451b06c99f, 0xb529085bdbaffcea, 0xac251825cc75892b, 0x94976a5bce23d58e, - 0xdd17925b6c71b515, 0x568fd07a57bce92e, 0xefac31200d8bd340, 0x716c3e466b540ef9, - 0x3d2c9e380063c69b, 0x14168f9a3662dd83, 0xd298c7504dbc412f, 0x74490a94f016719f, - 0x0e0da431e1ab80c8, 0xe321f63dc6b169ae, 0xf08671544febc95a, 0x39324450cc394b3b, - 0xea6e3d35f1aa3a70, 0x8ef8a886508ce486, 0xdc1a631ef0a17f06, 0xfda2b3fbcd79e87b, - 0xd75bcae936403b10, 0xf88b5bd9f035f875, 0xc43efec2e3792dd4, 0xe9fac21a9d47cd94, - 0xc2876f0c4b7d47c3, 0xaba156cf49f368b4, 0x5ccda2170fa58bf9, 0xadc92c879ed18df7, - 0x110c1b227354e6c8, 0x298ee7a603249200, 0xde92142ede0e8ee7, 0x88e4a4610644ba9e, - 0xbb62d277e7641d3a, 0xb9be1985b7bf8073, 0x29024e5426cdb0d1, 0xf6aefd01f3092ab8, - 0x2a07087b313133aa, 0x6d71f445d6dfc839, 0x1e2412ff12e5526b, 0xed5cdeba6617b9e1, - 0x20b1d0d5e5f8760e, 0x12ff15705c368260, 0x7bf4338b7c387203, 0x34ff25f00cd06185, - 0x1148c706c518cf28, 0x5c04f0623388f025, 0xcb9d649275d87d79, 0x9b5f0c24fabc42ec, - 0x1a7b5e7964e33858, 0x2a81bbd8efdc6793, 0x8d05431ffe42752e, 0x83915cd511002677, - 0x580ed4d791837b31, 0x5982e041d19ff306, 0xcad0d08fa5d864ca, 0x867bee6efe1afa63, - 0x26467b0320f23009, 0xd842414dfda4ec36, 0x047fcdcbc0a76725, 0xbddb340a3768aeca, - 0xef4ce6fa6e99ab45, 0x88c5b66c7762bf9b, 0x5679f1c51ffb225d, 0xdab79048317d77ee, - 0xf14e9b8a8ba03803, 0xe77f07f7731184c1, 0x4c2aab9a108c1ef5, 0xa137795718e6ad97, - 0x8d6c7cc73350b88b, 0x5c34e2ae74131a49, 0xd4828f579570a056, 0xb7862594da5336fc, - 0x6fd590a4a2bed7a5, 0x138d327de35e0ec1, 0xe8290eb33d585b0b, 0xcee01d52cdf88833, - 0x165c7c76484f160e, 0x7232653da72fc7f6, 0x66600f13445ca481, 0x6bbdf0a01f7b127d, - 0xd7b71d6a1992c73b, 0xcf259d37ae3fda4a, 0xf570c70d05895acf, 0x1e01e6a3e8f60155, - 0x2dacbb83c2bd3671, 0x9c291f5a5bca81af, 0xd976826c68b4ee90, 0x95112eec1f6310a2, - 0x11ebc7f623bc4c9a, 0x18471781b1122b30, 0x48f7c65414b00187, 0x6834b03efa2f5c30, - 0x0875ef5c2c56b164, 0x45248d4f2a60ba71, 0x5a7d466e7f7ba830, 0x2bebe6a5e42c4a1d, - 0xd871d8483db51d10, 0x6ee37decd2fd392f, 0x7d724392010cede3, 0x8e96ef11e1c9bcc8, - 0x804a61d86b89d178, 0xbb1b83ce956055ec, 0xcb44e107410ff64f, 0xc426bb09ee0ba955, - 0x057c08f42c3dd7f1, 0x40ea1ec148602bdf, 0xc24688deeb65d7f1, 0xd8bcc53c768ba4e4, - 0x16e0e3af65c1106c, 0xfc12f7e7d647218b, 0x70d6e1d3ee93cef4, 0x01d2a505c4541ef9, - 0x1ef79e16e764d5c3, 0x0363d14d13870b98, 0xb56ef64345d06b11, 0xe653d557ebb7c346, - 0x8304a8597c2b2706, 0x1536e1322ce7e7bb, 0x525aec08a65af822, 0x91f66d6e98d28e43, - 0xe65af12c0b5c0274, 0xdf6ae56b7d5ea4c2, 0x5cef621cedf3c81c, 0x41e8b1ffd4889944, - 0xb5c0f452c213c3e5, 0x77af86f3e67e499b, 0xe20e76ea5b010704, 0xbdc205ab0c889ec0, - 0xc76d93eb0469cd83, 0x17ac27f65cab0034, 0xd49ec4531fd62133, 0x07a873ea2f1b9984, - 0xbff270dfef0032ee, 0x1764dbe91592f255, 0xe40363126f79e859, 0xa06cad3ab46971f6, - 0x0be596e90dedd875, 0x3387cce5c1658461, 0x44246acf88a9585e, 0xe0ad82b92d5ecb2c, - 0x2177491c9a1600a6, 0x16e7c4aac0f02422, 0x75792eeeec15c4e1, 0x2309cd359d08ee30, - 0x7cd9831dd1b83b0a, 0x374914a7c4ee8cf0, 0x0dd17765c9ac2e54, 0xb7847470ba9a7688, - 0xfba4f4bbe2991173, 0x422b203fc3de040e, 0x63bfcaf2ecf2ab0e, 0x0c5559f3a192946e, - 0xfdf80675c1847695, 0xf5f570accab842c9, 0x65cc5a448767afea, 0x1efeb0a7ee234f2f, - 0x9b05f03d81e7b5d2, 0xe7c31317a8626cf4, 0x620f2a53081d0398, 0x1b6de96cdd9943ae, - 0x8c226a436777d303, 0xa08fbbd50fafb10d, 0x6a64c5ec20104883, 0x9c9c653502c0f671, - 0x678a02b2174f52a0, 0x68e008ba16bbad4b, 0xa317c16d2efb860f, 0xeab2075d17ed714c, - 0x565eeeddf0c4ea15, 0x8ec8e94d242a6c19, 0x139e8e27d9000fae, 0xc977a7ff1b33d2f5, - 0x1d0accca84420346, 0xc9e82602cd436e03, 0x6a2231da53d2ccd3, 0xb44b12d917826e2a, - 0x4f4567c6a74cf0b9, 0xd8e115a42fc6da8f, 0xb6bbe79d95742a74, 0x5686c647f1707dab, - 0xa70d58eb6c008fc5, 0xaaedc2dbe4418026, 0x6661e2267bdcfd3d, 0x4882a6eda7706f9e, - 0xf6c2d2c912dafdd0, 0x2f2298c142fd61f9, 0x31d75afeb17143a8, 0x1f9b96580a2a982f, - 0xa6cd3e5604a8ad49, 0x0dae2a80aad17419, 0xdb9a9d12868124ac, 0x66b6109f80877fac, - 0x9a81d9c703a94029, 0xbd3b381b1e03c647, 0xe88bc07b70f31083, 0x4e17878356a55822}, - {// seed = 5 - 0xb3c58c2483ad5ead, 0x6570847428cdcf6c, 0x2b38adbf813ac866, 0x8cb9945d37eb9ad3, - 0xf5b409ec3d1aed1c, 0xa35f4bffc9bb5a93, 0x5db89cde3c9e9340, 0xff1225231b2afb2b, - 0x157b0b212b9cc47d, 0xf03faf97a2b2e04d, 0x86fdab8544a20f87, 0xfcb8732744ae5c1c, - 0xd91744c0787986d5, 0x5f8db2a76d65ad05, 0xcff605cbed17a90d, 0xf80284980a3164e7, - 0x59cc24e713fccc7d, 0x268982cada117ce4, 0xcd020e63896e730e, 0xe760dc46e9fe9885, - 0x6aaece8ab49c6b5d, 0x7451194d597aae3e, 0x35d4385900332457, 0xa40fb563a096583d, - 0xa797b612f7f11b76, 0x2fed6eb68e6a2b9b, 0x2f06ee64aeffd943, 0x9dd0e49d9ca45330, - 0x97d48f08bd7f1d8f, 0x1cfa7fe3ebe4d8ee, 0x2a2ba076bd397d42, 0x68c4344f7472f333, - 0xce21ec31987d74b5, 0xb73dabdc91d84088, 0x801aadee592222fe, 0xaf41345398ebc3f5, - 0x8a8f653d7f15ee46, 0xce2d065ff2ba2965, 0x4e05da515da2adb7, 0xa6dbdb8aa25f0fd4, - 0xca9f9666bbd2d5a9, 0x6b917ce50bd46408, 0x1550cc564ba6c84d, 0xb3063ae043506504, - 0x84e5f96bb796653d, 0xe2364798096cf6e3, 0x3b0dfedf6d3a53d0, 0xb7e4c7c77bde8d93, - 0xe99545bac9ab418a, 0xa0e31f96889507bb, 0x883c74f80c346885, 0xf674ae0b039fd341, - 0x8bb6ce2d5e8d1c75, 0x0c48737966a7ed7c, 0x04fcdf897b34c61c, 0xe96ac181bacbd4d6, - 0x5a9c55a6106a9c01, 0x2520f020de4f45d3, 0x935730955e94d208, 0xce5ad4d7f3f67d3b, - 0xa4b6d107fe2d81ca, 0x4f0033f50ae7944e, 0x32c5d28dd8a645a7, 0x57ce018223ef1039, - 0x2cbab15a661ab68e, 0x6de08798c0b5bec2, 0xee197fb2c5c007c6, 0x31b630ac63e7bda2, - 0xab98785aefe9efe3, 0xa36006158a606bf7, 0x7b20376b9f4af635, 0xa40762fdc3c08680, - 0x943b5faffd0ebee2, 0x7f39f41d0b81f06e, 0x7c4b399b116a90f8, 0x24e1662ac92bc9f3, - 0xcf586fc4e8e6c7db, 0xe46e0d047eeb12d7, 0xe8021076e4ea9958, 0x11fc13492e3ca22a, - 0xd61eae01410397e3, 0x7e8c4a58036a8e9f, 0x068a6de267970745, 0x64faab129bef1a41, - 0xb4a6f720943dad01, 0x631491058d73a9d5, 0xdad4fe95eab3ec02, 0x0a8b141c5c3a44f6, - 0x9fc69d4c2b335b98, 0x94d5f84a07d6e4cd, 0x1b73965de143c608, 0x443932c2dda54bcc, - 0x7397818fb0b04cd2, 0xef4ab03a1202b277, 0xf3d2ee459c0c2b92, 0x182d4daf8b058a87, - 0x90e63035d7b51368, 0xba4cd8b9a95d45fd, 0x12a7392c76731090, 0x890d264ec5d082d2, - 0xeeaf5c363da4994e, 0xd6aad756902123fb, 0xb531ebebdb28f191, 0xe71ce659fc59babd, - 0x37c1b94f63f2dcb5, 0xe4e3abeb311f9b96, 0x4a31b72ccb8695d3, 0x52cae1f0629fdce4, - 0xe5b0475e2ed71369, 0x2724e8c3506414fb, 0xbab0367920672deb, 0x0161a781c305449f, - 0x37b70f40f5bb60be, 0xddd1094c50251a01, 0x3b28283afd17224e, 0x06dec0cfe889fc6b, - 0x47608ea95bb4902d, 0xad883ebc12c00e82, 0x9e8d7ae0f7a8df29, 0xa79443e9f7c013a1, - 0xcfa26f68b7c68b71, 0x33ae6cc19bda1f23, 0xd9741e22b407887f, 0xf2bff78066d46b1c, - 0x794123191c9d32d4, 0x56cb6b903764ec76, 0x98775d0ef91e1a5a, 0xae7b713bc15c1db9, - 0x3b4c1a7870ed7a0d, 0x46666965f305cc34, 0x0ea0c3b2e9c6b3cd, 0x4dc387039a143bff, - 0x5f38bb9229ef9477, 0xea5d39ba72af7850, 0x69a5ed0174ce2b6d, 0x06969a36bfe7594d, - 0x0adee8e4065ccaa3, 0x908a581d57113718, 0x64822d6c5a8190ed, 0x8c5068b56ace4e4c, - 0x88ba3b4fb4e30bef, 0xa6ec0b8bb5896cfe, 0x4e23fcc6b47996fd, 0xe18e75b0dd549c7a, - 0xcd90f17e106cf939, 0x1666fdfb2ef7c52f, 0x4fae325f206dd88c, 0xe7bc1160e25b062d, - 0x3cc999cb246db950, 0xc5930a7326cd5c37, 0xb008a48a211367bd, 0xc5559da145a88fd4, - 0x1e3ad46655fac69c, 0x7834266b4841bfd7, 0xa764450fbffc58cc, 0x54d8cf93a939c667, - 0x93c51f11b21b2d9d, 0x0964112082ed65cc, 0x4c2df21213e7fb03, 0xf0405bc877468615, - 0x17b4fc835d116ab4, 0xa6b112ae5f3cb4ef, 0x23cfc8a7fd38a46e, 0x8e0a360dc2774808, - 0x24ca9c8092105ad5, 0xafd3f75524f2e0d5, 0x4f39ed7dbaddc24c, 0xe5e362c7679a7875, - 0x00914a916b07b389, 0xdfe1119b7d5ab5da, 0xabd6ed9940e46161, 0x630ed2044171e22c, - 0xdecc244157dd1601, 0x777e6d5b4b4868d5, 0x9b3530bee67017d8, 0xd2faf08b291fdcb9, - 0x006e99455d6523de, 0xd559b5817f6955b5, 0xefcc1063b0088c61, 0xed73145ae0f00ae7, - 0xab2af402cf5b7421, 0x897767f537644926, 0x26c9c0473ca83695, 0x192e34e1881b2962, - 0xf7cf666ec3b3d020, 0x27f9b79c7404afb7, 0xe533e8bed3010767, 0xe5817838e11d05d3, - 0x65659c531bd36517, 0xd427c5e0a23836fd, 0xf3eab7ea58fa3528, 0x07683adae1289f35, - 0x201d6af7e896dd32, 0xd5da938b9a21ad88, 0x843fb73ad67bc316, 0x1782ec7d5feef21b, - 0x943f66f6ec772877, 0x7e9112e7b26da097, 0xeac8161f8663c2c7, 0xe8600db480a9ebf4, - 0x07807fc90f6eaf5f, 0xe0e4c9deb41abf83, 0xbdf533db271f9c15, 0xb398411b0497afe2, - 0xdebb45ef25448940, 0xe7a5decefcd376c4, 0xaf1ef3c728c83735, 0xb8b83a99355cb15a, - 0x6444a0344f1611e4, 0xe8bb7f5cf3c60179, 0x77ab5c5177e75ff7, 0xc38fd6fa849d585d, - 0x390d57d53029060a, 0xa66327eb7b8b593c, 0x6350a14f6fcd5ac9, 0x2c08125bcd7008b4, - 0x2d00c299a6a6bf8e, 0x6b0039c1f68d1445, 0x0035150c5d06f143, 0xa34d01628cc927e1, - 0xdf5b3164d7b2ede1, 0x8167db1d0583d72e, 0x4e13b341cd2ae8bc, 0xa693d9b1f416e306, - 0xc15ed7ca0bc67609, 0xdc344313c1c4f0af, 0x88b6887ccf772bb4, 0x6326d8f93ca0b20e, - 0x6964fad667dc2f11, 0xe9783dd38fc6d515, 0x359ed258fa022718, 0x27ac934d1f7fd60a, - 0xd68130437294dbcc, 0xaf5f869921f8f416, 0x2b8f149b4ab4bf9f, 0xc41caca607e421cb, - 0x7746976904238ef9, 0x604cb5529b1532f0, 0x1c94cd17c4c4e4ab, 0xe833274b734d6bbe, - 0xe9f1d3ef674539ce, 0x64f56ed68d193c6a, 0xe34192343d8ecfc1, 0xcb162f6c3aa71fe8, - 0x99eaf25f4c0f8fa4, 0x92f11e7361cb8d02, 0xb89170cddff37197, 0x4f86e68a51e071e3, - 0x31abf6afd911a75b, 0x6d20cf259c269333, 0x4150b9f88fcb6513, 0x705063989ebf7451, - 0x559231d927c84410, 0x1ca8ec4b098bc687, 0xebed22405c9180e0, 0xaa815b37d052af59}, - {// seed = 6 - 0x946ac62246e04460, 0x9cebee264fcbc1ae, 0x8af54943a415652b, 0x2b327ed3b17b8682, - 0x983fde47b3c3847e, 0x10a3013f99a2ad33, 0x6e230bb92d2721ef, 0x1cf8b8369e5c5c50, - 0x7f64017f2b7b3738, 0xd393248a62417fa1, 0x9ff01c0b20a372c5, 0xb0e44abce7e7c220, - 0xcebb9f88d48a815f, 0xdb7df6bd09033886, 0x7844fc82b6fa9091, 0x72d095449863b8ec, - 0xc13e678c89da2c7e, 0x6caf4d5ad231d12f, 0x2e0ab7b5fcf35c49, 0xf410720cb932a70f, - 0xd66ea581f16fce06, 0x175c9f002f57dc98, 0xccbcfd0d32988775, 0xfde4c407d3b0a232, - 0x5db2931ae7e97223, 0x6e07e2173085809f, 0x6e1d1ec0f9cad73c, 0xb2fc251a7f802619, - 0xbc1fc17f04f342de, 0x8de8f21ec658e078, 0x72c0f40cbee53fd6, 0x0678244411fc17a1, - 0x1d5837ca166b9bbd, 0xc8cada003c554345, 0x6a2fe2bfb2e58652, 0xfca9d797a6f7988b, - 0x6699e24ac737948b, 0x69623ffcb05789ba, 0x946429c529d95b75, 0x0d14df0b2a13970f, - 0x593d8592c440dfec, 0x2ee176f3d7e74b94, 0xae003f1da3be9e26, 0x0c7b02c4c0f6764a, - 0x3117e2fa1f632462, 0xf0f23265b6f1eaeb, 0x3111255d9b10c137, 0xc82745e509a00397, - 0xbd1d04037005fea7, 0xe104ab0dd22a9036, 0x51b27ce50851ac7a, 0xb2cb9fb21b471b15, - 0x29d298074c5a3e26, 0x6ebdf2058b737418, 0xc4a974041431b96f, 0x1ec5a30ccb6bdaac, - 0xe818beede9bf4425, 0x4b69b1bce67a5555, 0xf5c35f1eb0d62698, 0xf4509bbd8e99867c, - 0xb17206debd52e1bc, 0x35785668c770b3be, 0xe9343987ff5863bc, 0x2ee768499ac73114, - 0x5132bb3426eeaaf4, 0x471bce2c6833c5ff, 0xbb9a2d5428e6f6f9, 0xd5678943c595792d, - 0xab2a65e7f81e479c, 0xa82407bb23990b31, 0xdae321383984923c, 0x01823bb22648e6f1, - 0xda6e8df4214a8b04, 0x0e172bb88e03d94f, 0x552da6c22e362777, 0x7ce67329fb0e90cb, - 0x7b2d7f287ede7ebf, 0xd44f8222500651bd, 0x4acca1ef58fbb8ab, 0x428ecf058df9656b, - 0xd7e1ec6a8987c185, 0x365be6a54b253246, 0x168849be1e271ee8, 0x6a00f3c4151a8db2, - 0x37602727ca94b33d, 0xf6b50f18504fa9ce, 0x1c10817f6bc872de, 0x4bfe1fe42b0f3638, - 0x135fad4b8ef6143b, 0x1b25ad2bafc25f58, 0x41e37f85cf321f92, 0xfc73f75d9d5b9bea, - 0x9eb3694d1e9cb7e1, 0x601d51f08fa83b90, 0x234a2a9b88366f41, 0x63fe903e16f2c3bf, - 0x1cdbd34fa751c0b0, 0x0ce4fc6747c0558c, 0x51ed72afb8bb49aa, 0x20313ba13ca12c96, - 0x271fa38f9ebd54c1, 0x3696a5ac03a8edde, 0x05602be7df625702, 0x11f1ac73790f7a9f, - 0xa2836c099f0810bd, 0xe5ac2e47caa532fa, 0xd9c000a66d39f681, 0xd93d900e6f3d9d5f, - 0x792c81c65b7900f2, 0x5c5dce790ee20da1, 0x74ff1950edec1aee, 0x71fc85fa1e277d8f, - 0x0e77df17d6546cbc, 0x07debad44816c3b4, 0xbafa721581e92a70, 0x8ab6fbe2ed27bba8, - 0xe83243a20dea304a, 0xaa85a63a84c00a07, 0xde0e79917fc4153a, 0x21bb445e83537896, - 0xeedcac49fc0b433a, 0xffb2926a810ae57a, 0xf724be1f41d28702, 0x79cb95746039bb3b, - 0x5a54fe3742a00900, 0xda4768d64922c04f, 0x420396a84a339dae, 0xa171e26ee5e8724e, - 0x4c8da7c5d289c20a, 0x9ebd79a1a8e94742, 0x39235232b97e9782, 0xb75df0be9bba7d80, - 0x0c1d204dd87d48fc, 0x8f81f3e7177266e8, 0xe4a460b39e78d72b, 0x50b98fa151e65351, - 0xb7cb585c3ee1eddc, 0x11cdad9a76ee1dc4, 0xa38054a78595dc1c, 0x92f09e2ec4978edc, - 0xa8f0061b5efdabaa, 0x04bcc4abc224d230, 0xc58606738e692d46, 0xdd2b27b565952433, - 0x19e6ed1b740beec0, 0xceadd49b2ef9891f, 0x328178c28fe95cad, 0xe5ad4c43afe02848, - 0x03c0cb538cd967c0, 0xec4352526d19a630, 0x4c7e99389d39b031, 0xf65dd05362c2deb6, - 0xd1e70daf6879d28d, 0xbe9f57db6309b265, 0xa4b66f370b872bb7, 0xe26896fbc6ee1fd5, - 0xac705e661bfcf7c5, 0xab4d0d07d7f09940, 0x976417c06aeb6267, 0x8161c684a6bd468c, - 0xf77b6b9976dc4601, 0xc6489b779a39c12c, 0xb2aa58d5681cea1a, 0x043b1b40f8c3e04c, - 0x681fcbfadc845430, 0xab8896c921ba8def, 0x57aaf172606f37b2, 0xc3735048cd5eb8d7, - 0xa7078b96955631bd, 0xdd6b3543aa187f33, 0xc7103ea4a2a697fd, 0x8d7b95f6ff1f7407, - 0xe44f419e84709530, 0xf340caa9132cbb0a, 0x2ba407283143c66c, 0xe1be240ca636c844, - 0x90d32f2877ac08bc, 0x5d26e6294b2c8673, 0x4a6b2f5b27c87a44, 0x961fb9043f76d34f, - 0x0afee02d8d3c55d2, 0x6228e3f48c42e5dc, 0xc338e69ee6593675, 0x853f74b16efb7bdd, - 0xd062f40bdd22e687, 0x647164b9ab4c4190, 0xf94689f67d598369, 0x8e4b29d87a5012d7, - 0xaf02b8b925656fbd, 0x7a722a767179a630, 0xb5c8afe937a75ace, 0xfdb8e8d02d279372, - 0x887ef700cb25fae1, 0xcfe9bd912f72cabe, 0xb1d4dedc24f978de, 0x517522d38319cc2a, - 0x7dd87b2b36aab798, 0x579c4ff3046b5a04, 0xf5c5975c5028b7a7, 0x7094579d1000ec84, - 0xbc8d5b1ea70a5291, 0x161b2d783be8855c, 0xd26d0b0d6d18279f, 0x0be1945f02a78bd5, - 0xb822a5a9e045415b, 0x2fe9d68b1ccc3562, 0xb2e375960033d14f, 0x26aca04e49b4ff22, - 0x732a81c862112aea, 0x8bd901ed6e4260b8, 0xe839532c561ad5b0, 0x8fb6e4d517a79b12, - 0x0dd37f8c0be9b429, 0xc8ad87ad12f1b1b0, 0xc51f3aa62b90318b, 0x031a7e8b86c1cefc, - 0xa95547af2b70fc76, 0x9cb3615c5a98801e, 0xa387e3c3341d7032, 0xa087ea52a1debaef, - 0x16325ec9a2e6e835, 0x587944a484c585eb, 0xc8879033bde22ecc, 0xa39dbfce709c464a, - 0x7acc010f99208774, 0x98dd2973a096c5ad, 0x26458b51139f198c, 0x2f5d19575e8c4f02, - 0x726643f0d38af352, 0x44d879b6d73e6e94, 0xa68a03885c980abe, 0x06048acd161c40c0, - 0xa4dab8f89d405d28, 0x7120c880cb04be18, 0xa062ace22a1cf0cf, 0x3901a9daf29704f4, - 0xff08f3ed989db30a, 0x6d22b13e874c67e9, 0x80c6f35518d73f4d, 0xc23c2a521aac6f29, - 0x2e708fd83aaa42e0, 0x7fc3780f55f1b0fd, 0xabb3075c98cf87f2, 0xb4df3f40f7c61143, - 0x2a04418098a76d75, 0x0d9eeee9509b2d37, 0x6be8ae51f4b59cdc, 0xe746cc7c00e4a2ab, - 0x785bc6df9cac597c, 0x33cb6620ce8adc48, 0xc1ba30739bffcef7, 0x6d95771f18e503f7, - 0xf7be3ae2e62652ff, 0xc8d82ffd2a73c62b, 0x8725a3ba5b110973, 0x67ed6b9c724757ec}, - {// seed = 7 - 0xc0272d42c19ff3ae, 0x4694228b43ea043b, 0x5709a6ef8a462841, 0xc9210a1e538805c9, - 0x279b171196113ec2, 0x859b769fc2d9e815, 0x0d5d3125a2bf14d3, 0x22bca1cfefa878ba, - 0x481b6bf58037bd83, 0x4933ba8647728d22, 0xf08c7b6b56f6e1b6, 0x374e8af5a15407c7, - 0xa95c4dc3d2487a5c, 0x9b832808ff11e751, 0xf2048507e9da01d5, 0xa9c576189f544a4a, - 0xf6c2a45b2e9d2b41, 0x9b9874c9f10ecc2f, 0x37d9b5f51f8c149e, 0x93aead54c9de9467, - 0x59cf0b4af262da23, 0xe7e9929af18194b2, 0x9df2644e33eb0178, 0xde4122d6f0671938, - 0xf005786c07f4800b, 0xb1fc9d254b5d1039, 0x0bf1088631f6dd7b, 0x665623f0a4b8f0c7, - 0x60f0113a9187db7c, 0xfd7cceda4f0d23a6, 0x26c01e9d89955940, 0x33afa1dfc0f5a6a0, - 0xeb77daf215e9283c, 0xc7575214bf85edb4, 0xeb0d804bf297e616, 0x84bff4ffd564f747, - 0xc4ac33189246f620, 0x43ef61213ecc1005, 0xcbbb0dea6cd96acd, 0x8ed27abfa8cfcb05, - 0x543b61529cb996b6, 0xa5f987ca41ea5e59, 0x3c50e0ac5254cb7a, 0x4192b0446c06d1e6, - 0x3e86592e21b45388, 0xdb766f06fcc6e51e, 0x0448ee36efe632db, 0x663c9db689253e35, - 0x72e0bd4985331dd4, 0xff501b5bf7d94e74, 0xe911ce758e2113a8, 0xec3a8d03a75a6ba4, - 0xaf6b4b72f56edc83, 0xf284857936c0a391, 0x5ba6feff407d46f4, 0x9d689c26de9d6702, - 0x28c04a9083726b5d, 0x2ccf4a627a029730, 0x7b4719500d4f0c71, 0x76470a9a7da250a8, - 0xcc48409404a1c890, 0xccefbdc7ec9a8055, 0xe0db91bff3cc42d3, 0x0532436426141254, - 0xf2ee9325e6f0ff0b, 0x149c20a5fbb28d9d, 0xe71624cd8d2d14d4, 0x8f01d4dc8cc2dd77, - 0x29cf409b333015b7, 0xba8bebd211884dd1, 0xc3396635e8c8db1d, 0x8ed0f6208d0528b8, - 0x0d90b43fdd0ee334, 0xd73c9a3333a044c7, 0xa2595cd208dbdc38, 0xae93cb264f940c09, - 0x8e0538d8afb07a97, 0x19115ec881385ba2, 0xa886f9e6a8039c6a, 0xcd5d62147ce3ecac, - 0xaecdf9e0bb4969f7, 0x2ddd631c53dcad10, 0x73ad1c97b3412054, 0xb08915fa2722efc6, - 0x97966047e5067eb0, 0x337f1675ed91445c, 0xb3a833d150b96a0d, 0x5940a98fe35e5e2e, - 0xfd03cc354ed0d8ff, 0x4e65b98291a8644a, 0x14a259f2852a60b2, 0x7648e3478c1e8e5f, - 0xbc0fbef6d9a919b4, 0xbec4302081346cf1, 0x57d2ce7aa1c7c511, 0x234c209d8f4e1ac3, - 0x87cf80cc933ce443, 0x7c262c616931e94e, 0xc5e33b049cf9eddf, 0x1a80790ed03ae51b, - 0xf2e8b9494f7220cf, 0x124cb59c14fff3ff, 0xa8a06cbfdb86ce18, 0x9068ef1f80b37653, - 0x0c55417b8d90338f, 0xcd579a523f6bcd30, 0xa31bfe2476a8d2a9, 0x1f8d142208094223, - 0x332dc40a5203cfad, 0xf8792fe5b2d33b4c, 0x443bd9668bf9461e, 0xc9019db0ace1409e, - 0x781bea919a113e8b, 0xb0f11d866abfbeec, 0xcfe139a60db0c26a, 0x869ab8721e6aa39e, - 0xdb48a4977717837a, 0x588a5ff151065b18, 0xe4a251ea0028864d, 0x7f0e43ba408a77c3, - 0x65f66dd50a536135, 0x6f49e934d9331c3e, 0xb8d742e0f0fa6b09, 0xe4e9b272deca2348, - 0xaee132ff902f773c, 0x43f658f7c2a0c90a, 0x28cb4dbc76cc53ea, 0x7d92253aa99ac39b, - 0x4fea3d832370baab, 0xb29e36936e51d78e, 0xea10778712321064, 0xff4f21f8ef274be2, - 0x84eff18ddfa0933f, 0xd0ec6a9f86c758a0, 0xaf82e5973c431ae0, 0x352023c00c045425, - 0xad34d7bc4a2f8961, 0xbdb4a02a24d4dee0, 0x354a4846d97447cf, 0x331a8b944d5bc19f, - 0x5ce04f8e17909035, 0x6497581bad8f4aab, 0x07c503bba647111e, 0x85f412ba78e1f7ff, - 0x7f3b920fd20f4cff, 0x424e1a9a4ce34e2f, 0x3035e2d62e1b9f0a, 0xef63114bff7b729a, - 0xe86a05889ab6bb60, 0xee0830cf095585a1, 0x4a54f7fa47d9c94b, 0x17daeece9fcb556a, - 0xc506d3f391834c6f, 0xb3f24be362e1af64, 0xc435e4e23608efdd, 0xeeba9caaa4cc1768, - 0x5a71f306daddc22d, 0x18e5205f41eba1a0, 0x7b29b4d1f6610925, 0x065cb65a0258d9a9, - 0x3e5ac8faa9fd1f95, 0x3b362362c1ea0470, 0xce0e4f6434db7a2e, 0xf327341098de52f2, - 0xcfca3b9e2a1992c3, 0x7483bf9401233e41, 0xbafbac531c6f9281, 0x4b52dd71b2c106f8, - 0xdf73b66e50b5a1f7, 0x237aec0202a20283, 0x23dd5be23dffdf2b, 0xea9730731ee122ef, - 0x5cb3f846014fbcd3, 0xc3b21c8ffdce9201, 0x06a99a02f91a8760, 0x721a81fa8fd7b7a3, - 0x6aafcdddc53cbcd8, 0xd03b464005a93bcc, 0x8212edc1b1669dcb, 0x71f4c31364c31bc7, - 0xfeeec0eba8772307, 0x1948d00a13d88cf1, 0x19064fd6d943ada8, 0x4ec8d31722697bfd, - 0x596d9a953a516609, 0xc4cb4bff53507da2, 0x1d59f3c5be36e4ca, 0xe5b4fc5bf6044c9b, - 0x1bb74e052232f735, 0x04e8a0db611ddd5d, 0x8d04eaa009b421bf, 0xa7878ae0ac0e6d58, - 0x28c1030217cab2b3, 0x827943767e56a883, 0x28fce5fa02d22809, 0xb30c322fffc8c58e, - 0x1ca5a6a9f8066c5b, 0xb24db5f1462b2513, 0x02f653b89b7e5f6c, 0xe31f8fb5d5f78eee, - 0x266acc514ed93501, 0x936879d1c6fddcc4, 0xcd51be3636af1952, 0x3fdbb6fc332c78c8, - 0x9eb656379fa73094, 0x056146cc92fa0f96, 0xed6c4f1836c027c3, 0x021e0bb5d2113f2a, - 0x8983e42ec1c626b3, 0x73ea9bc6513ad9c9, 0x0c904903b24f4247, 0xacbac1e6243e2525, - 0x0b1069a0c230fb06, 0x77d709fca3fc1ce5, 0x87ad0f65020947e6, 0x555302641c53f4e6, - 0x65ea87871fa9aaee, 0x58aaf4ecc1067bb4, 0x1a66c48cc4c65b3f, 0xca96aca48b2ea969, - 0xa68eb70bad14de2b, 0x5ccdb3d7e00a6f6e, 0xe178fbfec73fe72f, 0x2b63d6a16b83e890, - 0x32fdb7a5330fbae0, 0x2ab5803c8d1bf32c, 0xda838388c1527c94, 0x16a50bdc4de24acb, - 0xe561301f134c074a, 0xd7ae63d2816b4db1, 0x036aabd4df0dd741, 0xc5e0db8783435b9d, - 0x9c4386cf0a07f3b2, 0x6a72ac1aa56a13a1, 0x299bbdb04bb20a23, 0x138c1018fda16b81, - 0x0e354f0b3bda49df, 0x9f4c295b23127437, 0xd133ceb2bd561341, 0xd8b4bfd5a526ac29, - 0xcdd0a70ddc1c7bbd, 0x81dce595bf572225, 0x1c6f925c05f6efd7, 0x8ae5097553856ea0, - 0x3aabeaeef248f60d, 0xd9005809d19a69e2, 0x2a3a1a314311cc27, 0x89bb2dc76b2b624a, - 0x50a2a95d0412e289, 0x9def8df564e68581, 0xf49010a9b2e2ea5c, 0x8602ae175d9ff3f0, - 0xbf037e245369a618, 0x8038164365f6e2b5, 0xe2e1f6163b4e8d08, 0x8df9314914f0857e}}; - -} // namespace parquet::internal From d98fe258f7ebfb69129f47fbc2eb3189ac3bfedc Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Thu, 6 Mar 2025 20:06:25 +0100 Subject: [PATCH 34/48] more docstring for CDC arguments --- cpp/src/parquet/chunker_internal.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/chunker_internal.h b/cpp/src/parquet/chunker_internal.h index b7334d3f6a535..77ab1b7784a5a 100644 --- a/cpp/src/parquet/chunker_internal.h +++ b/cpp/src/parquet/chunker_internal.h @@ -99,11 +99,24 @@ class ContentDefinedChunker { /// Create a new ContentDefinedChunker instance /// /// @param level_info Information about definition and repetition levels - /// @param size_range Min/max chunk size as pair, the chunker will - /// attempt to uniformly distribute the chunks between these extremes. + /// @param min_size Minimum chunk size in bytes, the rolling hash will not be updated + /// until this size is reached for each chunk. Note that all data sent + /// through the hash function is counted towards the chunk size, + /// including definition and repetition levels if present. + /// @param max_size Maximum chunk size in bytes, the chunker will create a new chunk + /// whenever the chunk size exceeds this value. The chunker will + /// attempt to uniformly distribute the chunks between min_size and + /// max_size. /// @param norm_factor Normalization factor to center the chunk size around the average /// size more aggressively. By increasing the normalization factor, - /// probability of finding a chunk boundary increases. + /// probability of finding a chunk boundary increases improving the + /// deduplication ratio, but also increases the number of small + /// chunks resulting in small parquet data pages. The default value + /// provides a good balance between deduplication ratio and + /// fragmentation. Use norm_factor=1 or norm_factor=2 if a higher + /// deduplication ratio is required at the expense of fragmentation, + /// norm_factor>2 is typically not increasing the deduplication + /// ratio. ContentDefinedChunker(const LevelInfo& level_info, uint64_t min_size, uint64_t max_size, uint8_t norm_factor = 0); From 7af7e7877e0ec4b1ec97e95ad21bfc231dfcb049 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Thu, 6 Mar 2025 21:02:56 +0100 Subject: [PATCH 35/48] prefer templated GenerateArray rather than macro --- cpp/src/parquet/chunker_internal_test.cc | 79 ++++++++++++++---------- 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/cpp/src/parquet/chunker_internal_test.cc b/cpp/src/parquet/chunker_internal_test.cc index 99301179ebd49..c4d963217f688 100644 --- a/cpp/src/parquet/chunker_internal_test.cc +++ b/cpp/src/parquet/chunker_internal_test.cc @@ -65,34 +65,38 @@ inline uint64_t hash(uint64_t seed, uint64_t index) { return h; } -#define GENERATE_CASE_BODY(BUILDER_TYPE, VALUE_EXPR) \ - { \ - BUILDER_TYPE builder(type, default_memory_pool()); \ - if (nullable) { \ - for (int64_t i = 0; i < length; ++i) { \ - uint64_t val = hash(seed, i); \ - if (val % 10 == 0) { \ - RETURN_NOT_OK(builder.AppendNull()); \ - } else { \ - RETURN_NOT_OK(builder.Append(VALUE_EXPR)); \ - } \ - } \ - } else { \ - for (int64_t i = 0; i < length; ++i) { \ - uint64_t val = hash(seed, i); \ - RETURN_NOT_OK(builder.Append(VALUE_EXPR)); \ - } \ - } \ - std::shared_ptr array; \ - RETURN_NOT_OK(builder.Finish(&array)); \ - RETURN_NOT_OK(array->ValidateFull()); \ - return array; \ - } - -// Macro to generate a case for a given scalar type. -#define GENERATE_CASE(TYPE_ID, BUILDER_TYPE, VALUE_EXPR) \ - case ::arrow::Type::TYPE_ID: { \ - GENERATE_CASE_BODY(BUILDER_TYPE, VALUE_EXPR) \ +template +Result> GenerateArray(const std::shared_ptr& type, + bool nullable, int64_t length, uint64_t seed, + ValueFunc value_func) { + BuilderType builder(type, default_memory_pool()); + + if (nullable) { + for (int64_t i = 0; i < length; ++i) { + uint64_t val = hash(seed, i); + if (val % 10 == 0) { + RETURN_NOT_OK(builder.AppendNull()); + } else { + RETURN_NOT_OK(builder.Append(value_func(val))); + } + } + } else { + for (int64_t i = 0; i < length; ++i) { + uint64_t val = hash(seed, i); + RETURN_NOT_OK(builder.Append(value_func(val))); + } + } + + std::shared_ptr array; + RETURN_NOT_OK(builder.Finish(&array)); + RETURN_NOT_OK(array->ValidateFull()); + return array; +} + +#define GENERATE_CASE(TYPE_ID, BUILDER_TYPE, VALUE_EXPR) \ + case ::arrow::Type::TYPE_ID: { \ + auto value_func = [](uint64_t val) { return VALUE_EXPR; }; \ + return GenerateArray(type, nullable, length, seed, value_func); \ } Result> GenerateArray(const std::shared_ptr& field, @@ -122,7 +126,11 @@ Result> GenerateArray(const std::shared_ptr& field // Limit the value to fit within the specified precision int32_t max_exponent = decimal_type.precision() - decimal_type.scale(); int64_t max_value = static_cast(std::pow(10, max_exponent) - 1); - GENERATE_CASE_BODY(::arrow::Decimal128Builder, ::arrow::Decimal128(val % max_value)) + auto value_func = [&](uint64_t val) { + return ::arrow::Decimal128(val % max_value); + }; + return GenerateArray<::arrow::Decimal128Builder>(type, nullable, length, seed, + value_func); } case ::arrow::Type::DECIMAL256: { const auto& decimal_type = static_cast(*type); @@ -130,7 +138,11 @@ Result> GenerateArray(const std::shared_ptr& field // int64_t overflow int32_t max_exponent = std::min(9, decimal_type.precision() - decimal_type.scale()); int64_t max_value = static_cast(std::pow(10, max_exponent) - 1); - GENERATE_CASE_BODY(::arrow::Decimal256Builder, ::arrow::Decimal256(val % max_value)) + auto value_func = [&](uint64_t val) { + return ::arrow::Decimal256(val % max_value); + }; + return GenerateArray<::arrow::Decimal256Builder>(type, nullable, length, seed, + value_func); } // Temporal types @@ -151,8 +163,11 @@ Result> GenerateArray(const std::shared_ptr& field std::string("bin_") + std::to_string(val)) case ::arrow::Type::FIXED_SIZE_BINARY: { auto size = static_cast<::arrow::FixedSizeBinaryType*>(type.get())->byte_width(); - GENERATE_CASE_BODY(::arrow::FixedSizeBinaryBuilder, - std::string("bin_") + std::to_string(val).substr(0, size - 4)) + auto value_func = [size](uint64_t val) { + return std::string("bin_") + std::to_string(val).substr(0, size - 4); + }; + return GenerateArray<::arrow::FixedSizeBinaryBuilder>(type, nullable, length, seed, + value_func); } case ::arrow::Type::STRUCT: { From f80265709b63368fc3419a91d1e48c5af7dcc4a2 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 7 Mar 2025 13:25:57 +0100 Subject: [PATCH 36/48] don't hash undefined null values; reduce generated code size by dispatching based on type width; use pointers when calculating rolling hash --- cpp/src/parquet/chunker_internal.cc | 172 ++++++++++++++++------------ cpp/src/parquet/chunker_internal.h | 10 +- 2 files changed, 107 insertions(+), 75 deletions(-) diff --git a/cpp/src/parquet/chunker_internal.cc b/cpp/src/parquet/chunker_internal.cc index 2ded58c7bdd41..a85e7ccfaa30f 100644 --- a/cpp/src/parquet/chunker_internal.cc +++ b/cpp/src/parquet/chunker_internal.cc @@ -28,16 +28,6 @@ namespace parquet::internal { -// create a fake null array class with a GetView method returning 0 always -class FakeNullArray { - public: - uint8_t GetView(int64_t i) const { return 0; } - - std::shared_ptr<::arrow::DataType> type() const { return ::arrow::null(); } - - int64_t null_count() const { return 0; } -}; - static uint64_t GetMask(uint64_t min_size, uint64_t max_size, uint8_t norm_factor) { // we aim for gaussian-like distribution of chunk sizes between min_size and max_size uint64_t avg_size = (min_size + max_size) / 2; @@ -59,8 +49,18 @@ ContentDefinedChunker::ContentDefinedChunker(const LevelInfo& level_info, max_size_(max_size), hash_mask_(GetMask(min_size, max_size, norm_factor)) {} +void ContentDefinedChunker::Roll(const bool value) { + if (chunk_size_++ < min_size_) { + // short-circuit if we haven't reached the minimum chunk size, this speeds up the + // chunking process since the gearhash doesn't need to be updated + return; + } + rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][value]; + has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); +} + template -void ContentDefinedChunker::Roll(const T value) { +void ContentDefinedChunker::Roll(const T* value) { constexpr size_t BYTE_WIDTH = sizeof(T); chunk_size_ += BYTE_WIDTH; if (chunk_size_ < min_size_) { @@ -68,23 +68,22 @@ void ContentDefinedChunker::Roll(const T value) { // chunking process since the gearhash doesn't need to be updated return; } - auto bytes = reinterpret_cast(&value); + auto bytes = reinterpret_cast(value); for (size_t i = 0; i < BYTE_WIDTH; ++i) { rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][bytes[i]]; has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); } } -void ContentDefinedChunker::Roll(std::string_view value) { - chunk_size_ += value.size(); +void ContentDefinedChunker::Roll(const uint8_t* value, int64_t num_bytes) { + chunk_size_ += num_bytes; if (chunk_size_ < min_size_) { // short-circuit if we haven't reached the minimum chunk size, this speeds up the // chunking process since the gearhash doesn't need to be updated return; } - for (char c : value) { - rolling_hash_ = - (rolling_hash_ << 1) + kGearhashTable[nth_run_][static_cast(c)]; + for (int64_t i = 0; i < num_bytes; ++i) { + rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][value[i]]; has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); } } @@ -114,22 +113,20 @@ bool ContentDefinedChunker::NeedNewChunk() { return false; } -template +template const std::vector ContentDefinedChunker::Calculate(const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, - const T& leaf_array) { + const RollFunc& RollValue) { std::vector chunks; bool has_def_levels = level_info_.def_level > 0; bool has_rep_levels = level_info_.rep_level > 0; if (!has_rep_levels && !has_def_levels) { // fastest path for non-nested non-null data - int64_t offset = 0; int64_t prev_offset = 0; - while (offset < num_levels) { - Roll(leaf_array.GetView(offset)); - ++offset; + for (int64_t offset = 0; offset < num_levels; ++offset) { + RollValue(offset); if (NeedNewChunk()) { chunks.emplace_back(prev_offset, prev_offset, offset - prev_offset); prev_offset = offset; @@ -140,12 +137,15 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev } } else if (!has_rep_levels) { // non-nested data with nulls - int64_t offset = 0; + int16_t def_level; int64_t prev_offset = 0; - while (offset < num_levels) { - Roll(def_levels[offset]); - Roll(leaf_array.GetView(offset)); - ++offset; + for (int64_t offset = 0; offset < num_levels; ++offset) { + def_level = def_levels[offset]; + + Roll(&def_level); + if (def_level == level_info_.def_level) { + RollValue(offset); + } if (NeedNewChunk()) { chunks.emplace_back(prev_offset, prev_offset, offset - prev_offset); prev_offset = offset; @@ -161,8 +161,8 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev int16_t def_level; int16_t rep_level; int64_t value_offset = 0; - int64_t record_level_offset = 0; - int64_t record_value_offset = 0; + int64_t prev_level_offset = 0; + int64_t prev_value_offset = 0; for (int64_t level_offset = 0; level_offset < num_levels; ++level_offset) { def_level = def_levels[level_offset]; @@ -171,18 +171,18 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev has_leaf_value = def_level >= level_info_.repeated_ancestor_def_level; is_record_boundary = rep_level == 0; - Roll(def_level); - Roll(rep_level); + Roll(&def_level); + Roll(&rep_level); if (has_leaf_value) { - Roll(leaf_array.GetView(value_offset)); + RollValue(value_offset); } if (is_record_boundary && NeedNewChunk()) { - auto levels_to_write = level_offset - record_level_offset; + auto levels_to_write = level_offset - prev_level_offset; if (levels_to_write > 0) { - chunks.emplace_back(record_level_offset, record_value_offset, levels_to_write); - record_level_offset = level_offset; - record_value_offset = value_offset; + chunks.emplace_back(prev_level_offset, prev_value_offset, levels_to_write); + prev_level_offset = level_offset; + prev_value_offset = value_offset; } } @@ -191,57 +191,87 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev } } - auto levels_to_write = num_levels - record_level_offset; + auto levels_to_write = num_levels - prev_level_offset; if (levels_to_write > 0) { - chunks.emplace_back(record_level_offset, record_value_offset, levels_to_write); + chunks.emplace_back(prev_level_offset, prev_value_offset, levels_to_write); } } return chunks; } -#define PRIMITIVE_CASE(TYPE_ID, ArrowType) \ - case ::arrow::Type::TYPE_ID: \ - return Calculate(def_levels, rep_levels, num_levels, \ - static_cast(values)); +#define FIXED_WIDTH_CASE(CType) \ + { \ + const auto raw_values = values.data()->GetValues(1); \ + return Calculate(def_levels, rep_levels, num_levels, \ + [&](int64_t i) { return Roll(raw_values + i); }); \ + } + +#define BINARY_LIKE_CASE(OffsetCType) \ + { \ + const auto raw_offsets = values.data()->GetValues(1); \ + const auto raw_values = values.data()->GetValues(2); \ + return Calculate(def_levels, rep_levels, num_levels, [&](int64_t i) { \ + const OffsetCType pos = raw_offsets[i]; \ + const OffsetCType length = raw_offsets[i + 1] - pos; \ + Roll(raw_values + pos, length); \ + }); \ + } const std::vector ContentDefinedChunker::GetBoundaries( const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, const ::arrow::Array& values) { auto type_id = values.type()->id(); switch (type_id) { - PRIMITIVE_CASE(BOOL, Boolean) - PRIMITIVE_CASE(INT8, Int8) - PRIMITIVE_CASE(INT16, Int16) - PRIMITIVE_CASE(INT32, Int32) - PRIMITIVE_CASE(INT64, Int64) - PRIMITIVE_CASE(UINT8, UInt8) - PRIMITIVE_CASE(UINT16, UInt16) - PRIMITIVE_CASE(UINT32, UInt32) - PRIMITIVE_CASE(UINT64, UInt64) - PRIMITIVE_CASE(HALF_FLOAT, HalfFloat) - PRIMITIVE_CASE(FLOAT, Float) - PRIMITIVE_CASE(DOUBLE, Double) - PRIMITIVE_CASE(STRING, String) - PRIMITIVE_CASE(LARGE_STRING, LargeString) - PRIMITIVE_CASE(BINARY, Binary) - PRIMITIVE_CASE(LARGE_BINARY, LargeBinary) - PRIMITIVE_CASE(FIXED_SIZE_BINARY, FixedSizeBinary) - PRIMITIVE_CASE(DATE32, Date32) - PRIMITIVE_CASE(DATE64, Date64) - PRIMITIVE_CASE(TIME32, Time32) - PRIMITIVE_CASE(TIME64, Time64) - PRIMITIVE_CASE(TIMESTAMP, Timestamp) - PRIMITIVE_CASE(DURATION, Duration) - PRIMITIVE_CASE(DECIMAL128, Decimal128) - PRIMITIVE_CASE(DECIMAL256, Decimal256) + case ::arrow::Type::NA: { + return Calculate(def_levels, rep_levels, num_levels, [](int64_t) {}); + } + case ::arrow::Type::BOOL: { + const auto& bool_array = static_cast(values); + return Calculate(def_levels, rep_levels, num_levels, + [&](int64_t i) { return Roll(bool_array.Value(i)); }); + } + case ::arrow::Type::INT8: + case ::arrow::Type::UINT8: + FIXED_WIDTH_CASE(uint8_t) + case ::arrow::Type::INT16: + case ::arrow::Type::UINT16: + case ::arrow::Type::HALF_FLOAT: + FIXED_WIDTH_CASE(uint16_t) + case ::arrow::Type::INT32: + case ::arrow::Type::UINT32: + case ::arrow::Type::FLOAT: + case ::arrow::Type::DATE32: + case ::arrow::Type::TIME32: + FIXED_WIDTH_CASE(uint32_t) + case ::arrow::Type::INT64: + case ::arrow::Type::UINT64: + case ::arrow::Type::DOUBLE: + case ::arrow::Type::DATE64: + case ::arrow::Type::TIME64: + case ::arrow::Type::TIMESTAMP: + case ::arrow::Type::DURATION: + FIXED_WIDTH_CASE(uint64_t) + case ::arrow::Type::BINARY: + case ::arrow::Type::STRING: + BINARY_LIKE_CASE(int32_t) + case ::arrow::Type::LARGE_BINARY: + case ::arrow::Type::LARGE_STRING: + BINARY_LIKE_CASE(int64_t) + case ::arrow::Type::DECIMAL128: + case ::arrow::Type::DECIMAL256: + case ::arrow::Type::FIXED_SIZE_BINARY: { + const auto raw_values = values.data()->GetValues(1); + const auto byte_width = + static_cast(values).byte_width(); + return Calculate(def_levels, rep_levels, num_levels, [&](int64_t i) { + return Roll(raw_values + i * byte_width, byte_width); + }); + } case ::arrow::Type::DICTIONARY: return GetBoundaries( def_levels, rep_levels, num_levels, *static_cast(values).indices()); - case ::arrow::Type::NA: - FakeNullArray fake_null_array; - return Calculate(def_levels, rep_levels, num_levels, fake_null_array); default: throw ParquetException("Unsupported Arrow array type " + values.type()->ToString()); } diff --git a/cpp/src/parquet/chunker_internal.h b/cpp/src/parquet/chunker_internal.h index 77ab1b7784a5a..cc550e1a4ddaf 100644 --- a/cpp/src/parquet/chunker_internal.h +++ b/cpp/src/parquet/chunker_internal.h @@ -132,23 +132,25 @@ class ContentDefinedChunker { const ::arrow::Array& values); private: + void Roll(const bool value); + // Update the rolling hash with a compile-time known sized value, set has_matched_ to // true if the hash matches the mask. template - void Roll(const T value); + void Roll(const T* value); // Update the rolling hash with a binary-like value, set has_matched_ to true if the // hash matches the mask. - void Roll(std::string_view value); + void Roll(const uint8_t* value, int64_t num_bytes); // Evaluate whether a new chunk should be created based on the has_matched_, nth_run_ // and chunk_size_ state. inline bool NeedNewChunk(); // Calculate the chunk boundaries for typed Arrow arrays. - template + template const std::vector Calculate(const int16_t* def_levels, const int16_t* rep_levels, - int64_t num_levels, const T& leaf_array); + int64_t num_levels, const RollFunc& RollValue); // Reference to the column's level information const internal::LevelInfo& level_info_; From ccfaa6fcb56edaf57187f755d0959db8434d94a0 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 7 Mar 2025 14:34:56 +0100 Subject: [PATCH 37/48] only hash non-null values in the nested case as well --- cpp/src/parquet/chunker_internal.cc | 54 ++++++++++++----------------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/cpp/src/parquet/chunker_internal.cc b/cpp/src/parquet/chunker_internal.cc index a85e7ccfaa30f..874ff6f47fabf 100644 --- a/cpp/src/parquet/chunker_internal.cc +++ b/cpp/src/parquet/chunker_internal.cc @@ -119,26 +119,26 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev int64_t num_levels, const RollFunc& RollValue) { std::vector chunks; + int64_t offset; + int64_t prev_offset = 0; + int64_t prev_value_offset = 0; bool has_def_levels = level_info_.def_level > 0; bool has_rep_levels = level_info_.rep_level > 0; if (!has_rep_levels && !has_def_levels) { // fastest path for non-nested non-null data - int64_t prev_offset = 0; - for (int64_t offset = 0; offset < num_levels; ++offset) { + for (offset = 0; offset < num_levels; ++offset) { RollValue(offset); if (NeedNewChunk()) { chunks.emplace_back(prev_offset, prev_offset, offset - prev_offset); prev_offset = offset; } } - if (prev_offset < num_levels) { - chunks.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); - } + // set the previous value offset to add the last chunk + prev_value_offset = prev_offset; } else if (!has_rep_levels) { // non-nested data with nulls int16_t def_level; - int64_t prev_offset = 0; for (int64_t offset = 0; offset < num_levels; ++offset) { def_level = def_levels[offset]; @@ -151,52 +151,44 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev prev_offset = offset; } } - if (prev_offset < num_levels) { - chunks.emplace_back(prev_offset, prev_offset, num_levels - prev_offset); - } + // set the previous value offset to add the last chunk + prev_value_offset = prev_offset; } else { // nested data with nulls - bool has_leaf_value; - bool is_record_boundary; int16_t def_level; int16_t rep_level; int64_t value_offset = 0; - int64_t prev_level_offset = 0; - int64_t prev_value_offset = 0; - for (int64_t level_offset = 0; level_offset < num_levels; ++level_offset) { - def_level = def_levels[level_offset]; - rep_level = rep_levels[level_offset]; - - has_leaf_value = def_level >= level_info_.repeated_ancestor_def_level; - is_record_boundary = rep_level == 0; + for (offset = 0; offset < num_levels; ++offset) { + def_level = def_levels[offset]; + rep_level = rep_levels[offset]; Roll(&def_level); Roll(&rep_level); - if (has_leaf_value) { + if (def_level == level_info_.def_level) { RollValue(value_offset); } - if (is_record_boundary && NeedNewChunk()) { - auto levels_to_write = level_offset - prev_level_offset; + if ((rep_level == 0) && NeedNewChunk()) { + // if we are at a record boundary and need a new chunk, we create a new chunk + auto levels_to_write = offset - prev_offset; if (levels_to_write > 0) { - chunks.emplace_back(prev_level_offset, prev_value_offset, levels_to_write); - prev_level_offset = level_offset; + chunks.emplace_back(prev_offset, prev_value_offset, levels_to_write); + prev_offset = offset; prev_value_offset = value_offset; } } - - if (has_leaf_value) { + if (def_level >= level_info_.repeated_ancestor_def_level) { + // we only increment the value offset if we have a leaf value ++value_offset; } } - - auto levels_to_write = num_levels - prev_level_offset; - if (levels_to_write > 0) { - chunks.emplace_back(prev_level_offset, prev_value_offset, levels_to_write); - } } + // add the last chunk if we have any levels left + if (prev_offset < num_levels) { + chunks.emplace_back(prev_offset, prev_value_offset, num_levels - prev_offset); + } return chunks; } From 35ef2f98560cfc3adb80f039494d35886b71e73c Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 7 Mar 2025 15:32:24 +0100 Subject: [PATCH 38/48] add docstrings to the hashtable generating pythons script --- cpp/src/parquet/chunker_internal_codegen.py | 34 +++++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/chunker_internal_codegen.py b/cpp/src/parquet/chunker_internal_codegen.py index 29cd856f3c47f..f01e1a56d26d8 100644 --- a/cpp/src/parquet/chunker_internal_codegen.py +++ b/cpp/src/parquet/chunker_internal_codegen.py @@ -17,6 +17,32 @@ # specific language governing permissions and limitations # under the License. +""" +Produce the given number gearhash tables for rolling hash calculations. + +Each table consists of 256 64-bit integer values and by default 8 tables are +produced. The tables are written to a header file that can be included in the +C++ code. + +The generated numbers are deterministic "random" numbers created by MD5 hashing +a fixed seed and the table index. This ensures that the tables are the same +across different runs and platforms. The function of generating the numbers is +less important as long as they have sufficiently uniform distribution. + +Reference implementations: +- https://github.com/Borelset/destor/blob/master/src/chunking/fascdc_chunking.c +- https://github.com/nlfiedler/fastcdc-rs/blob/master/examples/table64.rs + +Usage: + python chunker_internal_codegen.py [ntables] + + ntables: Number of gearhash tables to generate (default 8), the + the C++ implementation expects 8 tables so this should not be + changed unless the C++ code is also updated. + + The generated header file is written to ./chunker_internal_generated.h +""" + import hashlib import pathlib import sys @@ -54,18 +80,20 @@ def generate_hash(n: int, seed: int): + """Produce predictable hash values for a given seed and n using MD5.""" value = bytes([seed] * 64 + [n] * 64) hasher = hashlib.md5(value) return hasher.hexdigest()[:16] def generate_hashtable(seed: int, length=256): + """Generate and render a single gearhash table.""" table = [generate_hash(n, seed=seed) for n in range(length)] out = StringIO() out.write(f" {{// seed = {seed}\n") for i in range(0, length, 4): - values = [f"0x{value}" for value in table[i:i + 4]] + values = [f"0x{value}" for value in table[i : i + 4]] values = ", ".join(values) out.write(f" {values}") if i < length - 4: @@ -76,8 +104,8 @@ def generate_hashtable(seed: int, length=256): def generate_header(ntables=8, relative_path="chunker_internal_generated.h"): + """Generate a header file with multiple gearhash tables.""" path = pathlib.Path(__file__).parent / relative_path - tables = [generate_hashtable(seed) for seed in range(ntables)] text = template.format(content=",\n".join(tables)) path.write_text(text) @@ -85,4 +113,4 @@ def generate_header(ntables=8, relative_path="chunker_internal_generated.h"): if __name__ == "__main__": ntables = int(sys.argv[1]) if len(sys.argv) > 1 else 8 - generate_header(ntables) \ No newline at end of file + generate_header(ntables) From 64171f80a92368caaeb0015b99d74c6df485bd69 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 7 Mar 2025 16:17:30 +0100 Subject: [PATCH 39/48] prefer to use signed integers as size arguments --- cpp/src/parquet/chunker_internal.cc | 22 +++- cpp/src/parquet/chunker_internal.h | 12 +- cpp/src/parquet/chunker_internal_test.cc | 148 +++++++++++------------ cpp/src/parquet/properties.h | 20 +-- 4 files changed, 104 insertions(+), 98 deletions(-) diff --git a/cpp/src/parquet/chunker_internal.cc b/cpp/src/parquet/chunker_internal.cc index 874ff6f47fabf..d13186ec91d4c 100644 --- a/cpp/src/parquet/chunker_internal.cc +++ b/cpp/src/parquet/chunker_internal.cc @@ -28,12 +28,12 @@ namespace parquet::internal { -static uint64_t GetMask(uint64_t min_size, uint64_t max_size, uint8_t norm_factor) { +static uint64_t GetMask(int64_t min_size, int64_t max_size, uint8_t norm_factor) { // we aim for gaussian-like distribution of chunk sizes between min_size and max_size - uint64_t avg_size = (min_size + max_size) / 2; + int64_t avg_size = (min_size + max_size) / 2; // we skip calculating gearhash for the first `min_size` bytes, so we are looking for // a smaller chunk as the average size - uint64_t target_size = avg_size - min_size; + int64_t target_size = avg_size - min_size; size_t mask_bits = static_cast(std::floor(std::log2(target_size))); // -3 because we are using 8 hash tables to have more gaussian-like distribution // `norm_factor` narrows the chunk size distribution aroun avg_size @@ -42,12 +42,22 @@ static uint64_t GetMask(uint64_t min_size, uint64_t max_size, uint8_t norm_facto } ContentDefinedChunker::ContentDefinedChunker(const LevelInfo& level_info, - uint64_t min_size, uint64_t max_size, - uint8_t norm_factor) + int64_t min_size, int64_t max_size, + int8_t norm_factor) : level_info_(level_info), min_size_(min_size), max_size_(max_size), - hash_mask_(GetMask(min_size, max_size, norm_factor)) {} + hash_mask_(GetMask(min_size, max_size, norm_factor)) { + if (min_size_ < 0) { + throw ParquetException("min_size must be non-negative"); + } + if (max_size_ < 0) { + throw ParquetException("max_size must be non-negative"); + } + if (min_size_ > max_size_) { + throw ParquetException("min_size must be less than or equal to max_size"); + } +} void ContentDefinedChunker::Roll(const bool value) { if (chunk_size_++ < min_size_) { diff --git a/cpp/src/parquet/chunker_internal.h b/cpp/src/parquet/chunker_internal.h index cc550e1a4ddaf..5299e1c8ec853 100644 --- a/cpp/src/parquet/chunker_internal.h +++ b/cpp/src/parquet/chunker_internal.h @@ -117,8 +117,8 @@ class ContentDefinedChunker { /// deduplication ratio is required at the expense of fragmentation, /// norm_factor>2 is typically not increasing the deduplication /// ratio. - ContentDefinedChunker(const LevelInfo& level_info, uint64_t min_size, uint64_t max_size, - uint8_t norm_factor = 0); + ContentDefinedChunker(const LevelInfo& level_info, int64_t min_size, int64_t max_size, + int8_t norm_factor = 0); /// Get the chunk boundaries for the given column data /// @@ -157,8 +157,8 @@ class ContentDefinedChunker { // Minimum chunk size in bytes, the rolling hash will not be updated until this size is // reached for each chunk. Note that all data sent through the hash function is counted // towards the chunk size, including definition and repetition levels. - const uint64_t min_size_; - const uint64_t max_size_; + const int64_t min_size_; + const int64_t max_size_; // The mask to match the rolling hash against to determine if a new chunk should be // created. The mask is calculated based on min/max chunk size and the normalization // factor. @@ -170,9 +170,9 @@ class ContentDefinedChunker { bool has_matched_ = false; // The current run of the rolling hash, used to normalize the chunk size distribution // by requiring multiple consecutive matches to create a new chunk. - uint64_t nth_run_ = 0; + int8_t nth_run_ = 0; // Current chunk size in bytes, reset to 0 when a new chunk is created. - uint64_t chunk_size_ = 0; + int64_t chunk_size_ = 0; // Rolling hash state, never reset only initialized once for the entire column. uint64_t rolling_hash_ = 0; }; diff --git a/cpp/src/parquet/chunker_internal_test.cc b/cpp/src/parquet/chunker_internal_test.cc index c4d963217f688..9062b74d53688 100644 --- a/cpp/src/parquet/chunker_internal_test.cc +++ b/cpp/src/parquet/chunker_internal_test.cc @@ -67,13 +67,13 @@ inline uint64_t hash(uint64_t seed, uint64_t index) { template Result> GenerateArray(const std::shared_ptr& type, - bool nullable, int64_t length, uint64_t seed, + bool nullable, int64_t length, int64_t seed, ValueFunc value_func) { BuilderType builder(type, default_memory_pool()); if (nullable) { for (int64_t i = 0; i < length; ++i) { - uint64_t val = hash(seed, i); + int64_t val = hash(seed, i); if (val % 10 == 0) { RETURN_NOT_OK(builder.AppendNull()); } else { @@ -82,7 +82,7 @@ Result> GenerateArray(const std::shared_ptr& ty } } else { for (int64_t i = 0; i < length; ++i) { - uint64_t val = hash(seed, i); + int64_t val = hash(seed, i); RETURN_NOT_OK(builder.Append(value_func(val))); } } @@ -95,12 +95,12 @@ Result> GenerateArray(const std::shared_ptr& ty #define GENERATE_CASE(TYPE_ID, BUILDER_TYPE, VALUE_EXPR) \ case ::arrow::Type::TYPE_ID: { \ - auto value_func = [](uint64_t val) { return VALUE_EXPR; }; \ + auto value_func = [](int64_t val) { return VALUE_EXPR; }; \ return GenerateArray(type, nullable, length, seed, value_func); \ } Result> GenerateArray(const std::shared_ptr& field, - int64_t length, uint64_t seed) { + int64_t length, int64_t seed) { const std::shared_ptr& type = field->type(); bool nullable = field->nullable(); @@ -126,9 +126,7 @@ Result> GenerateArray(const std::shared_ptr& field // Limit the value to fit within the specified precision int32_t max_exponent = decimal_type.precision() - decimal_type.scale(); int64_t max_value = static_cast(std::pow(10, max_exponent) - 1); - auto value_func = [&](uint64_t val) { - return ::arrow::Decimal128(val % max_value); - }; + auto value_func = [&](int64_t val) { return ::arrow::Decimal128(val % max_value); }; return GenerateArray<::arrow::Decimal128Builder>(type, nullable, length, seed, value_func); } @@ -138,9 +136,7 @@ Result> GenerateArray(const std::shared_ptr& field // int64_t overflow int32_t max_exponent = std::min(9, decimal_type.precision() - decimal_type.scale()); int64_t max_value = static_cast(std::pow(10, max_exponent) - 1); - auto value_func = [&](uint64_t val) { - return ::arrow::Decimal256(val % max_value); - }; + auto value_func = [&](int64_t val) { return ::arrow::Decimal256(val % max_value); }; return GenerateArray<::arrow::Decimal256Builder>(type, nullable, length, seed, value_func); } @@ -163,7 +159,7 @@ Result> GenerateArray(const std::shared_ptr& field std::string("bin_") + std::to_string(val)) case ::arrow::Type::FIXED_SIZE_BINARY: { auto size = static_cast<::arrow::FixedSizeBinaryType*>(type.get())->byte_width(); - auto value_func = [size](uint64_t val) { + auto value_func = [size](int64_t val) { return std::string("bin_") + std::to_string(val).substr(0, size - 4); }; return GenerateArray<::arrow::FixedSizeBinaryBuilder>(type, nullable, length, seed, @@ -176,7 +172,7 @@ Result> GenerateArray(const std::shared_ptr& field for (auto i = 0; i < struct_type->num_fields(); i++) { ARROW_ASSIGN_OR_RAISE(auto child_array, GenerateArray(struct_type->field(i), length, - seed + static_cast(i + 300))); + seed + static_cast(i + 300))); child_arrays.push_back(child_array); } auto struct_array = @@ -239,7 +235,7 @@ Result> GenerateArray(const std::shared_ptr& field } Result> GenerateTable( - const std::shared_ptr<::arrow::Schema>& schema, int64_t size, uint64_t seed = 0) { + const std::shared_ptr<::arrow::Schema>& schema, int64_t size, int64_t seed = 0) { std::vector> arrays; for (const auto& field : schema->fields()) { ARROW_ASSIGN_OR_RAISE(auto array, GenerateArray(field, size, seed)); @@ -257,8 +253,8 @@ Result> ConcatAndCombine( } Result> WriteTableToBuffer(const std::shared_ptr
& table, - uint64_t min_chunk_size, - uint64_t max_chunk_size, + int64_t min_chunk_size, + int64_t max_chunk_size, bool enable_dictionary = false, int64_t row_group_size = 1024 * 1024) { @@ -293,8 +289,8 @@ Result> ReadTableFromBuffer(const std::shared_ptr } struct PageSizes { - std::vector lengths; - std::vector sizes; + std::vector lengths; + std::vector sizes; }; PageSizes GetColumnPageSizes(const std::shared_ptr& data, int column_index = 0) { @@ -322,7 +318,7 @@ PageSizes GetColumnPageSizes(const std::shared_ptr& data, int column_ind } Result WriteAndGetPageSizes(const std::shared_ptr
& table, - uint64_t min_chunk_size, uint64_t max_chunk_size, + int64_t min_chunk_size, int64_t max_chunk_size, bool enable_dictionary = false, int column_index = 0) { // Write the table to a buffer and read it back to get the page sizes @@ -339,7 +335,7 @@ Result WriteAndGetPageSizes(const std::shared_ptr
& table, return GetColumnPageSizes(buffer, column_index); } -void AssertAllBetween(const std::vector& values, uint64_t min, uint64_t max, +void AssertAllBetween(const std::vector& values, int64_t min, int64_t max, bool expect_dictionary_fallback = false) { // expect the last chunk since it is not guaranteed to be within the range if (expect_dictionary_fallback) { @@ -365,8 +361,8 @@ void AssertAllBetween(const std::vector& values, uint64_t min, uint64_ ASSERT_LE(values.back(), max); } -std::vector, std::vector>> FindDifferences( - const std::vector& first, const std::vector& second) { +std::vector, std::vector>> FindDifferences( + const std::vector& first, const std::vector& second) { // Compute LCS table. size_t n = first.size(), m = second.size(); std::vector> dp(n + 1, std::vector(m + 1, 0)); @@ -395,7 +391,7 @@ std::vector, std::vector>> FindDiffere std::reverse(common.begin(), common.end()); // Build raw differences. - std::vector, std::vector>> result; + std::vector, std::vector>> result; size_t last_i = 0, last_j = 0; for (auto& c : common) { auto ci = c.first; @@ -414,7 +410,7 @@ std::vector, std::vector>> FindDiffere // Merge adjacent diffs if one side is empty in the first diff and the other side // is empty in the next diff, to avoid splitting single changes into two parts. - std::vector, std::vector>> merged; + std::vector, std::vector>> merged; for (auto& diff : result) { if (!merged.empty()) { auto& prev = merged.back(); @@ -438,8 +434,8 @@ std::vector, std::vector>> FindDiffere } void PrintDifferences( - const std::vector& original, const std::vector& modified, - std::vector, std::vector>>& diffs) { + const std::vector& original, const std::vector& modified, + std::vector, std::vector>>& diffs) { std::cout << "Original: "; for (const auto& val : original) { std::cout << val << " "; @@ -468,60 +464,60 @@ void PrintDifferences( } TEST(TestFindDifferences, Basic) { - std::vector first = {1, 2, 3, 4, 5}; - std::vector second = {1, 7, 8, 4, 5}; + std::vector first = {1, 2, 3, 4, 5}; + std::vector second = {1, 7, 8, 4, 5}; auto diffs = FindDifferences(first, second); ASSERT_EQ(diffs.size(), 1); - ASSERT_EQ(diffs[0].first, std::vector({2, 3})); - ASSERT_EQ(diffs[0].second, std::vector({7, 8})); + ASSERT_EQ(diffs[0].first, std::vector({2, 3})); + ASSERT_EQ(diffs[0].second, std::vector({7, 8})); } TEST(TestFindDifferences, MultipleDifferences) { - std::vector first = {1, 2, 3, 4, 5, 6, 7}; - std::vector second = {1, 8, 9, 4, 10, 6, 11}; + std::vector first = {1, 2, 3, 4, 5, 6, 7}; + std::vector second = {1, 8, 9, 4, 10, 6, 11}; auto diffs = FindDifferences(first, second); ASSERT_EQ(diffs.size(), 3); - ASSERT_EQ(diffs[0].first, std::vector({2, 3})); - ASSERT_EQ(diffs[0].second, std::vector({8, 9})); + ASSERT_EQ(diffs[0].first, std::vector({2, 3})); + ASSERT_EQ(diffs[0].second, std::vector({8, 9})); - ASSERT_EQ(diffs[1].first, std::vector({5})); - ASSERT_EQ(diffs[1].second, std::vector({10})); + ASSERT_EQ(diffs[1].first, std::vector({5})); + ASSERT_EQ(diffs[1].second, std::vector({10})); - ASSERT_EQ(diffs[2].first, std::vector({7})); - ASSERT_EQ(diffs[2].second, std::vector({11})); + ASSERT_EQ(diffs[2].first, std::vector({7})); + ASSERT_EQ(diffs[2].second, std::vector({11})); } TEST(TestFindDifferences, DifferentLengths) { - std::vector first = {1, 2, 3}; - std::vector second = {1, 2, 3, 4, 5}; + std::vector first = {1, 2, 3}; + std::vector second = {1, 2, 3, 4, 5}; auto diffs = FindDifferences(first, second); ASSERT_EQ(diffs.size(), 1); ASSERT_TRUE(diffs[0].first.empty()); - ASSERT_EQ(diffs[0].second, std::vector({4, 5})); + ASSERT_EQ(diffs[0].second, std::vector({4, 5})); } TEST(TestFindDifferences, EmptyArrays) { - std::vector first = {}; - std::vector second = {}; + std::vector first = {}; + std::vector second = {}; auto diffs = FindDifferences(first, second); ASSERT_TRUE(diffs.empty()); } TEST(TestFindDifferences, LongSequenceWithSingleDifference) { - std::vector first = { + std::vector first = { 1994, 2193, 2700, 1913, 2052, }; - std::vector second = {2048, 43, 2080, 2700, 1913, 2052}; + std::vector second = {2048, 43, 2080, 2700, 1913, 2052}; auto diffs = FindDifferences(first, second); ASSERT_EQ(diffs.size(), 1); - ASSERT_EQ(diffs[0].first, std::vector({1994, 2193})); - ASSERT_EQ(diffs[0].second, std::vector({2048, 43, 2080})); + ASSERT_EQ(diffs[0].first, std::vector({1994, 2193})); + ASSERT_EQ(diffs[0].second, std::vector({2048, 43, 2080})); // Verify that elements after the difference are identical for (size_t i = 3; i < second.size(); i++) { @@ -530,15 +526,15 @@ TEST(TestFindDifferences, LongSequenceWithSingleDifference) { } TEST(TestFindDifferences, LongSequenceWithMiddleChanges) { - std::vector first = {2169, 1976, 2180, 2147, 1934, 1772, - 1914, 2075, 2154, 1940, 1934, 1970}; - std::vector second = {2169, 1976, 2180, 2147, 2265, 1804, - 1717, 1925, 2122, 1940, 1934, 1970}; + std::vector first = {2169, 1976, 2180, 2147, 1934, 1772, + 1914, 2075, 2154, 1940, 1934, 1970}; + std::vector second = {2169, 1976, 2180, 2147, 2265, 1804, + 1717, 1925, 2122, 1940, 1934, 1970}; auto diffs = FindDifferences(first, second); ASSERT_EQ(diffs.size(), 1); - ASSERT_EQ(diffs[0].first, std::vector({1934, 1772, 1914, 2075, 2154})); - ASSERT_EQ(diffs[0].second, std::vector({2265, 1804, 1717, 1925, 2122})); + ASSERT_EQ(diffs[0].first, std::vector({1934, 1772, 1914, 2075, 2154})); + ASSERT_EQ(diffs[0].second, std::vector({2265, 1804, 1717, 1925, 2122})); // Verify elements before and after the difference are identical for (size_t i = 0; i < 4; i++) { @@ -550,14 +546,14 @@ TEST(TestFindDifferences, LongSequenceWithMiddleChanges) { } TEST(TestFindDifferences, AdditionalCase) { - std::vector original = {445, 312, 393, 401, 410, 138, 558, 457}; - std::vector modified = {445, 312, 393, 393, 410, 138, 558, 457}; + std::vector original = {445, 312, 393, 401, 410, 138, 558, 457}; + std::vector modified = {445, 312, 393, 393, 410, 138, 558, 457}; auto diffs = FindDifferences(original, modified); ASSERT_EQ(diffs.size(), 1); - ASSERT_EQ(diffs[0].first, std::vector({401})); - ASSERT_EQ(diffs[0].second, std::vector({393})); + ASSERT_EQ(diffs[0].first, std::vector({401})); + ASSERT_EQ(diffs[0].second, std::vector({393})); // Verify elements before and after the difference are identical for (size_t i = 0; i < 3; i++) { @@ -569,8 +565,8 @@ TEST(TestFindDifferences, AdditionalCase) { } void AssertUpdateCase(const std::shared_ptr<::arrow::DataType>& dtype, - const std::vector& original, - const std::vector& modified, uint8_t n_modifications) { + const std::vector& original, + const std::vector& modified, uint8_t n_modifications) { auto diffs = FindDifferences(original, modified); if (diffs.size() > n_modifications) { PrintDifferences(original, modified, diffs); @@ -595,9 +591,9 @@ void AssertUpdateCase(const std::shared_ptr<::arrow::DataType>& dtype, } void AssertDeleteCase(const std::shared_ptr<::arrow::DataType>& dtype, - const std::vector& original, - const std::vector& modified, uint8_t n_modifications, - uint64_t edit_length) { + const std::vector& original, + const std::vector& modified, uint8_t n_modifications, + int64_t edit_length) { auto diffs = FindDifferences(original, modified); if (diffs.size() != n_modifications) { PrintDifferences(original, modified, diffs); @@ -606,7 +602,7 @@ void AssertDeleteCase(const std::shared_ptr<::arrow::DataType>& dtype, for (const auto& diff : diffs) { if (!::arrow::is_list_like(dtype->id())) { - uint64_t left_sum = 0, right_sum = 0; + int64_t left_sum = 0, right_sum = 0; for (const auto& val : diff.first) left_sum += val; for (const auto& val : diff.second) right_sum += val; ASSERT_EQ(left_sum, right_sum + edit_length); @@ -617,9 +613,9 @@ void AssertDeleteCase(const std::shared_ptr<::arrow::DataType>& dtype, } void AssertInsertCase(const std::shared_ptr<::arrow::DataType>& dtype, - const std::vector& original, - const std::vector& modified, uint8_t n_modifications, - uint64_t edit_length) { + const std::vector& original, + const std::vector& modified, uint8_t n_modifications, + int64_t edit_length) { auto diffs = FindDifferences(original, modified); if (diffs.size() != n_modifications) { PrintDifferences(original, modified, diffs); @@ -638,8 +634,8 @@ void AssertInsertCase(const std::shared_ptr<::arrow::DataType>& dtype, } } -void AssertAppendCase(const std::vector& original, - const std::vector& modified) { +void AssertAppendCase(const std::vector& original, + const std::vector& modified) { ASSERT_GE(modified.size(), original.size()); for (size_t i = 0; i < original.size() - 1; i++) { ASSERT_EQ(original[i], modified[i]); @@ -647,7 +643,7 @@ void AssertAppendCase(const std::vector& original, ASSERT_GT(modified[original.size() - 1], original.back()); } -uint64_t ElementCount(uint64_t size, int32_t byte_width, bool nullable) { +uint64_t ElementCount(int64_t size, int32_t byte_width, bool nullable) { if (nullable) { // in case of nullable types the def_levels are also fed through the chunker // to identify changes in the null bitmap, this will increase the byte width @@ -659,9 +655,9 @@ uint64_t ElementCount(uint64_t size, int32_t byte_width, bool nullable) { void AssertChunkSizes(const std::shared_ptr<::arrow::DataType>& dtype, PageSizes base_result, PageSizes modified_result, bool nullable, - bool enable_dictionary, uint64_t min_chunk_size, - uint64_t max_chunk_size) { - max_chunk_size = static_cast(max_chunk_size * 1.2); + bool enable_dictionary, int64_t min_chunk_size, + int64_t max_chunk_size) { + max_chunk_size = static_cast(max_chunk_size * 1.2); if (::arrow::is_fixed_width(dtype->id())) { auto min_length = ElementCount(min_chunk_size, dtype->byte_width(), nullable); auto max_length = ElementCount(max_chunk_size, dtype->byte_width(), nullable); @@ -676,10 +672,10 @@ void AssertChunkSizes(const std::shared_ptr<::arrow::DataType>& dtype, } } -constexpr uint64_t kMinChunkSize = 8 * 1024; -constexpr uint64_t kMaxChunkSize = 32 * 1024; -constexpr uint64_t kPartSize = 128 * 1024; -constexpr uint64_t kEditSize = 128; +constexpr int64_t kMinChunkSize = 8 * 1024; +constexpr int64_t kMaxChunkSize = 32 * 1024; +constexpr int64_t kPartSize = 128 * 1024; +constexpr int64_t kEditSize = 128; class TestColumnCDC : public ::testing::TestWithParam< std::tuple, bool, size_t>> { diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h index 2674fadb05044..327bcb2127fea 100644 --- a/cpp/src/parquet/properties.h +++ b/cpp/src/parquet/properties.h @@ -167,9 +167,9 @@ static constexpr Compression::type DEFAULT_COMPRESSION_TYPE = Compression::UNCOM static constexpr bool DEFAULT_IS_PAGE_INDEX_ENABLED = true; static constexpr SizeStatisticsLevel DEFAULT_SIZE_STATISTICS_LEVEL = SizeStatisticsLevel::PageAndColumnChunk; -static constexpr std::pair DEFAULT_CDC_SIZE_RANGE = +static constexpr std::pair DEFAULT_CDC_SIZE_RANGE = std::make_pair(256 * 1024, 1024 * 1024); -static constexpr uint8_t DEFAULT_CDC_NORM_FACTOR = 0; +static constexpr int8_t DEFAULT_CDC_NORM_FACTOR = 0; class PARQUET_EXPORT ColumnProperties { public: @@ -298,12 +298,12 @@ class PARQUET_EXPORT WriterProperties { return this; } - Builder* cdc_size_range(uint64_t min_size, uint64_t max_size) { + Builder* cdc_size_range(int64_t min_size, int64_t max_size) { cdc_size_range_ = std::make_pair(min_size, max_size); return this; } - Builder* cdc_norm_factor(uint8_t norm_factor) { + Builder* cdc_norm_factor(int8_t norm_factor) { cdc_norm_factor_ = norm_factor; return this; } @@ -762,8 +762,8 @@ class PARQUET_EXPORT WriterProperties { std::unordered_map page_index_enabled_; bool cdc_enabled_; - std::pair cdc_size_range_; - uint8_t cdc_norm_factor_; + std::pair cdc_size_range_; + int8_t cdc_norm_factor_; }; inline MemoryPool* memory_pool() const { return pool_; } @@ -789,8 +789,8 @@ class PARQUET_EXPORT WriterProperties { inline bool page_checksum_enabled() const { return page_checksum_enabled_; } inline bool cdc_enabled() const { return cdc_enabled_; } - inline std::pair cdc_size_range() const { return cdc_size_range_; } - inline uint8_t cdc_norm_factor() const { return cdc_norm_factor_; } + inline std::pair cdc_size_range() const { return cdc_size_range_; } + inline int8_t cdc_norm_factor() const { return cdc_norm_factor_; } inline SizeStatisticsLevel size_statistics_level() const { return size_statistics_level_; @@ -895,7 +895,7 @@ class PARQUET_EXPORT WriterProperties { const std::unordered_map& column_properties, ParquetDataPageVersion data_page_version, bool store_short_decimal_as_integer, std::vector sorting_columns, bool cdc_enabled, - std::pair cdc_size_range, uint8_t cdc_norm_factor) + std::pair cdc_size_range, int8_t cdc_norm_factor) : pool_(pool), dictionary_pagesize_limit_(dictionary_pagesize_limit), write_batch_size_(write_batch_size), @@ -936,7 +936,7 @@ class PARQUET_EXPORT WriterProperties { bool cdc_enabled_; std::pair cdc_size_range_; - uint8_t cdc_norm_factor_; + int8_t cdc_norm_factor_; }; PARQUET_EXPORT const std::shared_ptr& default_writer_properties(); From 786e6a046c73a9dc7e5c773e32710e3413740882 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 7 Mar 2025 16:54:50 +0100 Subject: [PATCH 40/48] use type aliases for better readability in tests --- cpp/src/parquet/chunker_internal_test.cc | 230 ++++++++++++----------- 1 file changed, 118 insertions(+), 112 deletions(-) diff --git a/cpp/src/parquet/chunker_internal_test.cc b/cpp/src/parquet/chunker_internal_test.cc index 9062b74d53688..740d16c7e2e17 100644 --- a/cpp/src/parquet/chunker_internal_test.cc +++ b/cpp/src/parquet/chunker_internal_test.cc @@ -67,13 +67,13 @@ inline uint64_t hash(uint64_t seed, uint64_t index) { template Result> GenerateArray(const std::shared_ptr& type, - bool nullable, int64_t length, int64_t seed, + bool nullable, int64_t length, uint64_t seed, ValueFunc value_func) { BuilderType builder(type, default_memory_pool()); if (nullable) { for (int64_t i = 0; i < length; ++i) { - int64_t val = hash(seed, i); + uint64_t val = hash(seed, i); if (val % 10 == 0) { RETURN_NOT_OK(builder.AppendNull()); } else { @@ -82,7 +82,7 @@ Result> GenerateArray(const std::shared_ptr& ty } } else { for (int64_t i = 0; i < length; ++i) { - int64_t val = hash(seed, i); + uint64_t val = hash(seed, i); RETURN_NOT_OK(builder.Append(value_func(val))); } } @@ -95,7 +95,7 @@ Result> GenerateArray(const std::shared_ptr& ty #define GENERATE_CASE(TYPE_ID, BUILDER_TYPE, VALUE_EXPR) \ case ::arrow::Type::TYPE_ID: { \ - auto value_func = [](int64_t val) { return VALUE_EXPR; }; \ + auto value_func = [](uint64_t val) { return VALUE_EXPR; }; \ return GenerateArray(type, nullable, length, seed, value_func); \ } @@ -126,7 +126,9 @@ Result> GenerateArray(const std::shared_ptr& field // Limit the value to fit within the specified precision int32_t max_exponent = decimal_type.precision() - decimal_type.scale(); int64_t max_value = static_cast(std::pow(10, max_exponent) - 1); - auto value_func = [&](int64_t val) { return ::arrow::Decimal128(val % max_value); }; + auto value_func = [&](uint64_t val) { + return ::arrow::Decimal128(val % max_value); + }; return GenerateArray<::arrow::Decimal128Builder>(type, nullable, length, seed, value_func); } @@ -136,7 +138,9 @@ Result> GenerateArray(const std::shared_ptr& field // int64_t overflow int32_t max_exponent = std::min(9, decimal_type.precision() - decimal_type.scale()); int64_t max_value = static_cast(std::pow(10, max_exponent) - 1); - auto value_func = [&](int64_t val) { return ::arrow::Decimal256(val % max_value); }; + auto value_func = [&](uint64_t val) { + return ::arrow::Decimal256(val % max_value); + }; return GenerateArray<::arrow::Decimal256Builder>(type, nullable, length, seed, value_func); } @@ -159,7 +163,7 @@ Result> GenerateArray(const std::shared_ptr& field std::string("bin_") + std::to_string(val)) case ::arrow::Type::FIXED_SIZE_BINARY: { auto size = static_cast<::arrow::FixedSizeBinaryType*>(type.get())->byte_width(); - auto value_func = [size](int64_t val) { + auto value_func = [size](uint64_t val) { return std::string("bin_") + std::to_string(val).substr(0, size - 4); }; return GenerateArray<::arrow::FixedSizeBinaryBuilder>(type, nullable, length, seed, @@ -172,7 +176,7 @@ Result> GenerateArray(const std::shared_ptr& field for (auto i = 0; i < struct_type->num_fields(); i++) { ARROW_ASSIGN_OR_RAISE(auto child_array, GenerateArray(struct_type->field(i), length, - seed + static_cast(i + 300))); + seed + static_cast(i + 300))); child_arrays.push_back(child_array); } auto struct_array = @@ -256,7 +260,6 @@ Result> WriteTableToBuffer(const std::shared_ptr
& int64_t min_chunk_size, int64_t max_chunk_size, bool enable_dictionary = false, - int64_t row_group_size = 1024 * 1024) { auto sink = CreateOutputStream(); @@ -288,16 +291,20 @@ Result> ReadTableFromBuffer(const std::shared_ptr return result; } -struct PageSizes { - std::vector lengths; - std::vector sizes; +// Type to represent a list of chunks where each element is the size of the chunk. +using ChunkList = std::vector; + +// Type to represent the sizes and lengths of the data pages in a column. +struct PageInfo { + ChunkList lengths; + ChunkList sizes; }; -PageSizes GetColumnPageSizes(const std::shared_ptr& data, int column_index = 0) { +PageInfo GetColumnPageInfo(const std::shared_ptr& data, int column_index = 0) { // Read the parquet data out of the buffer and get the sizes and lengths of the // data pages in given column. We assert on the sizes and lengths of the pages // to ensure that the chunking is done correctly. - PageSizes result; + PageInfo result; auto buffer_reader = std::make_shared(data); auto parquet_reader = ParquetFileReader::Open(std::move(buffer_reader)); @@ -317,10 +324,10 @@ PageSizes GetColumnPageSizes(const std::shared_ptr& data, int column_ind return result; } -Result WriteAndGetPageSizes(const std::shared_ptr
& table, - int64_t min_chunk_size, int64_t max_chunk_size, - bool enable_dictionary = false, - int column_index = 0) { +Result WriteAndGetPageInfo(const std::shared_ptr
& table, + uint64_t min_chunk_size, uint64_t max_chunk_size, + bool enable_dictionary = false, + int column_index = 0) { // Write the table to a buffer and read it back to get the page sizes ARROW_ASSIGN_OR_RAISE( auto buffer, @@ -332,10 +339,10 @@ Result WriteAndGetPageSizes(const std::shared_ptr
& table, ARROW_RETURN_IF(!readback->Equals(*table), Status::Invalid("Readback table not equal to original")); } - return GetColumnPageSizes(buffer, column_index); + return GetColumnPageInfo(buffer, column_index); } -void AssertAllBetween(const std::vector& values, int64_t min, int64_t max, +void AssertAllBetween(const ChunkList& chunks, int64_t min, int64_t max, bool expect_dictionary_fallback = false) { // expect the last chunk since it is not guaranteed to be within the range if (expect_dictionary_fallback) { @@ -344,26 +351,29 @@ void AssertAllBetween(const std::vector& values, int64_t min, int64_t m // guarantee that all chunks are within the range in this case, but we // know that there can be at most 2 pages smaller than the min_chunk_size size_t smaller_count = 0; - for (size_t i = 0; i < values.size() - 1; i++) { - if (values[i] < min) { + for (size_t i = 0; i < chunks.size() - 1; i++) { + if (chunks[i] < min) { smaller_count++; } else { - ASSERT_LE(values[i], max); + ASSERT_LE(chunks[i], max); } } ASSERT_LE(smaller_count, 2); } else { - for (size_t i = 0; i < values.size() - 1; i++) { - ASSERT_GE(values[i], min); - ASSERT_LE(values[i], max); + for (size_t i = 0; i < chunks.size() - 1; i++) { + ASSERT_GE(chunks[i], min); + ASSERT_LE(chunks[i], max); } } - ASSERT_LE(values.back(), max); + ASSERT_LE(chunks.back(), max); } -std::vector, std::vector>> FindDifferences( - const std::vector& first, const std::vector& second) { - // Compute LCS table. +// A git-hunk like side-by-side data structure to represent the differences between two +// vectors of uint64_t values. +using ChunkDiff = std::pair; + +std::vector FindDifferences(const ChunkList& first, const ChunkList& second) { + // Compute longest-common-subsequence between the two vectors. size_t n = first.size(), m = second.size(); std::vector> dp(n + 1, std::vector(m + 1, 0)); for (size_t i = 0; i < n; i++) { @@ -391,7 +401,7 @@ std::vector, std::vector>> FindDifferenc std::reverse(common.begin(), common.end()); // Build raw differences. - std::vector, std::vector>> result; + std::vector result; size_t last_i = 0, last_j = 0; for (auto& c : common) { auto ci = c.first; @@ -410,7 +420,7 @@ std::vector, std::vector>> FindDifferenc // Merge adjacent diffs if one side is empty in the first diff and the other side // is empty in the next diff, to avoid splitting single changes into two parts. - std::vector, std::vector>> merged; + std::vector merged; for (auto& diff : result) { if (!merged.empty()) { auto& prev = merged.back(); @@ -433,9 +443,8 @@ std::vector, std::vector>> FindDifferenc return merged; } -void PrintDifferences( - const std::vector& original, const std::vector& modified, - std::vector, std::vector>>& diffs) { +void PrintDifferences(const ChunkList& original, const ChunkList& modified, + std::vector& diffs) { std::cout << "Original: "; for (const auto& val : original) { std::cout << val << " "; @@ -464,60 +473,60 @@ void PrintDifferences( } TEST(TestFindDifferences, Basic) { - std::vector first = {1, 2, 3, 4, 5}; - std::vector second = {1, 7, 8, 4, 5}; + ChunkList first = {1, 2, 3, 4, 5}; + ChunkList second = {1, 7, 8, 4, 5}; auto diffs = FindDifferences(first, second); ASSERT_EQ(diffs.size(), 1); - ASSERT_EQ(diffs[0].first, std::vector({2, 3})); - ASSERT_EQ(diffs[0].second, std::vector({7, 8})); + ASSERT_EQ(diffs[0].first, ChunkList({2, 3})); + ASSERT_EQ(diffs[0].second, ChunkList({7, 8})); } TEST(TestFindDifferences, MultipleDifferences) { - std::vector first = {1, 2, 3, 4, 5, 6, 7}; - std::vector second = {1, 8, 9, 4, 10, 6, 11}; + ChunkList first = {1, 2, 3, 4, 5, 6, 7}; + ChunkList second = {1, 8, 9, 4, 10, 6, 11}; auto diffs = FindDifferences(first, second); ASSERT_EQ(diffs.size(), 3); - ASSERT_EQ(diffs[0].first, std::vector({2, 3})); - ASSERT_EQ(diffs[0].second, std::vector({8, 9})); + ASSERT_EQ(diffs[0].first, ChunkList({2, 3})); + ASSERT_EQ(diffs[0].second, ChunkList({8, 9})); - ASSERT_EQ(diffs[1].first, std::vector({5})); - ASSERT_EQ(diffs[1].second, std::vector({10})); + ASSERT_EQ(diffs[1].first, ChunkList({5})); + ASSERT_EQ(diffs[1].second, ChunkList({10})); - ASSERT_EQ(diffs[2].first, std::vector({7})); - ASSERT_EQ(diffs[2].second, std::vector({11})); + ASSERT_EQ(diffs[2].first, ChunkList({7})); + ASSERT_EQ(diffs[2].second, ChunkList({11})); } TEST(TestFindDifferences, DifferentLengths) { - std::vector first = {1, 2, 3}; - std::vector second = {1, 2, 3, 4, 5}; + ChunkList first = {1, 2, 3}; + ChunkList second = {1, 2, 3, 4, 5}; auto diffs = FindDifferences(first, second); ASSERT_EQ(diffs.size(), 1); ASSERT_TRUE(diffs[0].first.empty()); - ASSERT_EQ(diffs[0].second, std::vector({4, 5})); + ASSERT_EQ(diffs[0].second, ChunkList({4, 5})); } TEST(TestFindDifferences, EmptyArrays) { - std::vector first = {}; - std::vector second = {}; + ChunkList first = {}; + ChunkList second = {}; auto diffs = FindDifferences(first, second); ASSERT_TRUE(diffs.empty()); } TEST(TestFindDifferences, LongSequenceWithSingleDifference) { - std::vector first = { + ChunkList first = { 1994, 2193, 2700, 1913, 2052, }; - std::vector second = {2048, 43, 2080, 2700, 1913, 2052}; + ChunkList second = {2048, 43, 2080, 2700, 1913, 2052}; auto diffs = FindDifferences(first, second); ASSERT_EQ(diffs.size(), 1); - ASSERT_EQ(diffs[0].first, std::vector({1994, 2193})); - ASSERT_EQ(diffs[0].second, std::vector({2048, 43, 2080})); + ASSERT_EQ(diffs[0].first, ChunkList({1994, 2193})); + ASSERT_EQ(diffs[0].second, ChunkList({2048, 43, 2080})); // Verify that elements after the difference are identical for (size_t i = 3; i < second.size(); i++) { @@ -526,15 +535,15 @@ TEST(TestFindDifferences, LongSequenceWithSingleDifference) { } TEST(TestFindDifferences, LongSequenceWithMiddleChanges) { - std::vector first = {2169, 1976, 2180, 2147, 1934, 1772, - 1914, 2075, 2154, 1940, 1934, 1970}; - std::vector second = {2169, 1976, 2180, 2147, 2265, 1804, - 1717, 1925, 2122, 1940, 1934, 1970}; + ChunkList first = {2169, 1976, 2180, 2147, 1934, 1772, + 1914, 2075, 2154, 1940, 1934, 1970}; + ChunkList second = {2169, 1976, 2180, 2147, 2265, 1804, + 1717, 1925, 2122, 1940, 1934, 1970}; auto diffs = FindDifferences(first, second); ASSERT_EQ(diffs.size(), 1); - ASSERT_EQ(diffs[0].first, std::vector({1934, 1772, 1914, 2075, 2154})); - ASSERT_EQ(diffs[0].second, std::vector({2265, 1804, 1717, 1925, 2122})); + ASSERT_EQ(diffs[0].first, ChunkList({1934, 1772, 1914, 2075, 2154})); + ASSERT_EQ(diffs[0].second, ChunkList({2265, 1804, 1717, 1925, 2122})); // Verify elements before and after the difference are identical for (size_t i = 0; i < 4; i++) { @@ -546,14 +555,14 @@ TEST(TestFindDifferences, LongSequenceWithMiddleChanges) { } TEST(TestFindDifferences, AdditionalCase) { - std::vector original = {445, 312, 393, 401, 410, 138, 558, 457}; - std::vector modified = {445, 312, 393, 393, 410, 138, 558, 457}; + ChunkList original = {445, 312, 393, 401, 410, 138, 558, 457}; + ChunkList modified = {445, 312, 393, 393, 410, 138, 558, 457}; auto diffs = FindDifferences(original, modified); ASSERT_EQ(diffs.size(), 1); - ASSERT_EQ(diffs[0].first, std::vector({401})); - ASSERT_EQ(diffs[0].second, std::vector({393})); + ASSERT_EQ(diffs[0].first, ChunkList({401})); + ASSERT_EQ(diffs[0].second, ChunkList({393})); // Verify elements before and after the difference are identical for (size_t i = 0; i < 3; i++) { @@ -565,8 +574,8 @@ TEST(TestFindDifferences, AdditionalCase) { } void AssertUpdateCase(const std::shared_ptr<::arrow::DataType>& dtype, - const std::vector& original, - const std::vector& modified, uint8_t n_modifications) { + const ChunkList& original, const ChunkList& modified, + uint8_t n_modifications) { auto diffs = FindDifferences(original, modified); if (diffs.size() > n_modifications) { PrintDifferences(original, modified, diffs); @@ -591,9 +600,8 @@ void AssertUpdateCase(const std::shared_ptr<::arrow::DataType>& dtype, } void AssertDeleteCase(const std::shared_ptr<::arrow::DataType>& dtype, - const std::vector& original, - const std::vector& modified, uint8_t n_modifications, - int64_t edit_length) { + const ChunkList& original, const ChunkList& modified, + uint8_t n_modifications, uint64_t edit_length) { auto diffs = FindDifferences(original, modified); if (diffs.size() != n_modifications) { PrintDifferences(original, modified, diffs); @@ -602,7 +610,7 @@ void AssertDeleteCase(const std::shared_ptr<::arrow::DataType>& dtype, for (const auto& diff : diffs) { if (!::arrow::is_list_like(dtype->id())) { - int64_t left_sum = 0, right_sum = 0; + uint64_t left_sum = 0, right_sum = 0; for (const auto& val : diff.first) left_sum += val; for (const auto& val : diff.second) right_sum += val; ASSERT_EQ(left_sum, right_sum + edit_length); @@ -613,9 +621,8 @@ void AssertDeleteCase(const std::shared_ptr<::arrow::DataType>& dtype, } void AssertInsertCase(const std::shared_ptr<::arrow::DataType>& dtype, - const std::vector& original, - const std::vector& modified, uint8_t n_modifications, - int64_t edit_length) { + const ChunkList& original, const ChunkList& modified, + uint8_t n_modifications, uint64_t edit_length) { auto diffs = FindDifferences(original, modified); if (diffs.size() != n_modifications) { PrintDifferences(original, modified, diffs); @@ -634,8 +641,7 @@ void AssertInsertCase(const std::shared_ptr<::arrow::DataType>& dtype, } } -void AssertAppendCase(const std::vector& original, - const std::vector& modified) { +void AssertAppendCase(const ChunkList& original, const ChunkList& modified) { ASSERT_GE(modified.size(), original.size()); for (size_t i = 0; i < original.size() - 1; i++) { ASSERT_EQ(original[i], modified[i]); @@ -643,7 +649,7 @@ void AssertAppendCase(const std::vector& original, ASSERT_GT(modified[original.size() - 1], original.back()); } -uint64_t ElementCount(int64_t size, int32_t byte_width, bool nullable) { +uint64_t ElementCount(uint64_t size, int32_t byte_width, bool nullable) { if (nullable) { // in case of nullable types the def_levels are also fed through the chunker // to identify changes in the null bitmap, this will increase the byte width @@ -654,10 +660,10 @@ uint64_t ElementCount(int64_t size, int32_t byte_width, bool nullable) { } void AssertChunkSizes(const std::shared_ptr<::arrow::DataType>& dtype, - PageSizes base_result, PageSizes modified_result, bool nullable, - bool enable_dictionary, int64_t min_chunk_size, - int64_t max_chunk_size) { - max_chunk_size = static_cast(max_chunk_size * 1.2); + PageInfo base_result, PageInfo modified_result, bool nullable, + bool enable_dictionary, uint64_t min_chunk_size, + uint64_t max_chunk_size) { + max_chunk_size = static_cast(max_chunk_size * 1.2); if (::arrow::is_fixed_width(dtype->id())) { auto min_length = ElementCount(min_chunk_size, dtype->byte_width(), nullable); auto max_length = ElementCount(max_chunk_size, dtype->byte_width(), nullable); @@ -710,11 +716,11 @@ TEST_P(TestColumnCDC, DeleteOnce) { for (bool enable_dictionary : {false, true}) { ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, kMinChunkSize, kMaxChunkSize); @@ -734,11 +740,11 @@ TEST_P(TestColumnCDC, DeleteTwice) { for (bool enable_dictionary : {false, true}) { ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, kMinChunkSize, kMaxChunkSize); @@ -756,11 +762,11 @@ TEST_P(TestColumnCDC, UpdateOnce) { for (bool enable_dictionary : {false, true}) { ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, kMinChunkSize, kMaxChunkSize); @@ -779,11 +785,11 @@ TEST_P(TestColumnCDC, UpdateTwice) { for (bool enable_dictionary : {false, true}) { ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, kMinChunkSize, kMaxChunkSize); @@ -800,11 +806,11 @@ TEST_P(TestColumnCDC, InsertOnce) { for (bool enable_dictionary : {false, true}) { ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, kMinChunkSize, kMaxChunkSize); @@ -823,11 +829,11 @@ TEST_P(TestColumnCDC, InsertTwice) { for (bool enable_dictionary : {false, true}) { ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, kMinChunkSize, kMaxChunkSize); @@ -845,11 +851,11 @@ TEST_P(TestColumnCDC, Append) { for (bool enable_dictionary : {false, true}) { ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageSizes(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageSizes(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, kMinChunkSize, kMaxChunkSize); @@ -866,8 +872,8 @@ TEST_P(TestColumnCDC, EmptyTable) { for (bool enable_dictionary : {false, true}) { ASSERT_OK_AND_ASSIGN(auto result, - WriteAndGetPageSizes(empty_table, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + WriteAndGetPageInfo(empty_table, kMinChunkSize, kMaxChunkSize, + /*enable_dictionary=*/enable_dictionary)); // An empty table should result in no data pages ASSERT_TRUE(result.lengths.empty()); From 683f87193d4d6e1f94b9fc85b5c5f2cab8e1348b Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 7 Mar 2025 17:26:12 +0100 Subject: [PATCH 41/48] use explicit struct instead of tuples for the test case configuration --- cpp/src/parquet/chunker_internal_test.cc | 230 +++++++++++------------ 1 file changed, 113 insertions(+), 117 deletions(-) diff --git a/cpp/src/parquet/chunker_internal_test.cc b/cpp/src/parquet/chunker_internal_test.cc index 740d16c7e2e17..4614d2511e2b4 100644 --- a/cpp/src/parquet/chunker_internal_test.cc +++ b/cpp/src/parquet/chunker_internal_test.cc @@ -649,7 +649,7 @@ void AssertAppendCase(const ChunkList& original, const ChunkList& modified) { ASSERT_GT(modified[original.size() - 1], original.back()); } -uint64_t ElementCount(uint64_t size, int32_t byte_width, bool nullable) { +uint64_t ElementCount(int64_t size, int32_t byte_width, bool nullable) { if (nullable) { // in case of nullable types the def_levels are also fed through the chunker // to identify changes in the null bitmap, this will increase the byte width @@ -683,20 +683,29 @@ constexpr int64_t kMaxChunkSize = 32 * 1024; constexpr int64_t kPartSize = 128 * 1024; constexpr int64_t kEditSize = 128; -class TestColumnCDC : public ::testing::TestWithParam< - std::tuple, bool, size_t>> { +struct CaseConfig { + // Arrow data type to generate the testing data for + std::shared_ptr<::arrow::DataType> dtype; + // Whether the data type is nullable + bool is_nullable; + // Approximate number of bytes per record to calculate the number of elements to + // generate + size_t bytes_per_record; +}; + +class TestColumnCDC : public ::testing::TestWithParam { protected: // Column random table parts for testing std::shared_ptr field_; std::shared_ptr
part1_, part2_, part3_, part4_, part5_, part6_, part7_; void SetUp() override { - auto [dtype, nullable, byte_per_record] = GetParam(); - auto field_ = ::arrow::field("f0", dtype, nullable); + const auto& param = GetParam(); + auto field_ = ::arrow::field("f0", param.dtype, param.is_nullable); auto schema = ::arrow::schema({field_}); - auto part_length = kPartSize / byte_per_record; - auto edit_length = kEditSize / byte_per_record; + auto part_length = kPartSize / param.bytes_per_record; + auto edit_length = kEditSize / param.bytes_per_record; ASSERT_OK_AND_ASSIGN(part1_, GenerateTable(schema, part_length, 0)); ASSERT_OK_AND_ASSIGN(part2_, GenerateTable(schema, edit_length, 1)); ASSERT_OK_AND_ASSIGN(part3_, GenerateTable(schema, part_length, part_length)); @@ -708,30 +717,30 @@ class TestColumnCDC : public ::testing::TestWithParam< }; TEST_P(TestColumnCDC, DeleteOnce) { - auto [dtype, nullable, _] = GetParam(); + const auto& param = GetParam(); ASSERT_OK_AND_ASSIGN(auto base, ConcatAndCombine({part1_, part2_, part3_})); ASSERT_OK_AND_ASSIGN(auto modified, ConcatAndCombine({part1_, part3_})); ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN( + auto base_result, + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, enable_dictionary)); + ASSERT_OK_AND_ASSIGN( + auto modified_result, + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, enable_dictionary)); - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); + AssertChunkSizes(param.dtype, base_result, modified_result, param.is_nullable, + enable_dictionary, kMinChunkSize, kMaxChunkSize); - AssertDeleteCase(dtype, base_result.lengths, modified_result.lengths, 1, + AssertDeleteCase(param.dtype, base_result.lengths, modified_result.lengths, 1, part2_->num_rows()); } } TEST_P(TestColumnCDC, DeleteTwice) { - auto [dtype, nullable, _] = GetParam(); + const auto& param = GetParam(); ASSERT_OK_AND_ASSIGN(auto base, ConcatAndCombine({part1_, part2_, part3_, part4_, part5_})); @@ -739,43 +748,43 @@ TEST_P(TestColumnCDC, DeleteTwice) { ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); - AssertDeleteCase(dtype, base_result.lengths, modified_result.lengths, 2, + ASSERT_OK_AND_ASSIGN( + auto base_result, + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, enable_dictionary)); + ASSERT_OK_AND_ASSIGN( + auto modified_result, + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, enable_dictionary)); + + AssertChunkSizes(param.dtype, base_result, modified_result, param.is_nullable, + enable_dictionary, kMinChunkSize, kMaxChunkSize); + AssertDeleteCase(param.dtype, base_result.lengths, modified_result.lengths, 2, part2_->num_rows()); } } TEST_P(TestColumnCDC, UpdateOnce) { - auto [dtype, nullable, _] = GetParam(); + const auto& param = GetParam(); ASSERT_OK_AND_ASSIGN(auto base, ConcatAndCombine({part1_, part2_, part3_})); ASSERT_OK_AND_ASSIGN(auto modified, ConcatAndCombine({part1_, part4_, part3_})); ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN( + auto base_result, + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, enable_dictionary)); + ASSERT_OK_AND_ASSIGN( + auto modified_result, + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, enable_dictionary)); - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); - AssertUpdateCase(dtype, base_result.lengths, modified_result.lengths, 1); + AssertChunkSizes(param.dtype, base_result, modified_result, param.is_nullable, + enable_dictionary, kMinChunkSize, kMaxChunkSize); + AssertUpdateCase(param.dtype, base_result.lengths, modified_result.lengths, 1); } } TEST_P(TestColumnCDC, UpdateTwice) { - auto [dtype, nullable, _] = GetParam(); + const auto& param = GetParam(); ASSERT_OK_AND_ASSIGN(auto base, ConcatAndCombine({part1_, part2_, part3_, part4_, part5_})); @@ -784,43 +793,43 @@ TEST_P(TestColumnCDC, UpdateTwice) { ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + ASSERT_OK_AND_ASSIGN( + auto base_result, + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, enable_dictionary)); + ASSERT_OK_AND_ASSIGN( + auto modified_result, + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, enable_dictionary)); - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); - AssertUpdateCase(dtype, base_result.lengths, modified_result.lengths, 2); + AssertChunkSizes(param.dtype, base_result, modified_result, param.is_nullable, + enable_dictionary, kMinChunkSize, kMaxChunkSize); + AssertUpdateCase(param.dtype, base_result.lengths, modified_result.lengths, 2); } } TEST_P(TestColumnCDC, InsertOnce) { - auto [dtype, nullable, _] = GetParam(); + const auto& param = GetParam(); ASSERT_OK_AND_ASSIGN(auto base, ConcatAndCombine({part1_, part3_})); ASSERT_OK_AND_ASSIGN(auto modified, ConcatAndCombine({part1_, part2_, part3_})); ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); - AssertInsertCase(dtype, base_result.lengths, modified_result.lengths, 1, + ASSERT_OK_AND_ASSIGN( + auto base_result, + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, enable_dictionary)); + ASSERT_OK_AND_ASSIGN( + auto modified_result, + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, enable_dictionary)); + + AssertChunkSizes(param.dtype, base_result, modified_result, param.is_nullable, + enable_dictionary, kMinChunkSize, kMaxChunkSize); + AssertInsertCase(param.dtype, base_result.lengths, modified_result.lengths, 1, part2_->num_rows()); } } TEST_P(TestColumnCDC, InsertTwice) { - auto [dtype, nullable, _] = GetParam(); + const auto& param = GetParam(); ASSERT_OK_AND_ASSIGN(auto base, ConcatAndCombine({part1_, part3_, part5_})); ASSERT_OK_AND_ASSIGN(auto modified, @@ -828,52 +837,52 @@ TEST_P(TestColumnCDC, InsertTwice) { ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); - AssertInsertCase(dtype, base_result.lengths, modified_result.lengths, 2, + ASSERT_OK_AND_ASSIGN( + auto base_result, + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, enable_dictionary)); + ASSERT_OK_AND_ASSIGN( + auto modified_result, + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, enable_dictionary)); + + AssertChunkSizes(param.dtype, base_result, modified_result, param.is_nullable, + enable_dictionary, kMinChunkSize, kMaxChunkSize); + AssertInsertCase(param.dtype, base_result.lengths, modified_result.lengths, 2, part2_->num_rows()); } } TEST_P(TestColumnCDC, Append) { - auto [dtype, nullable, _] = GetParam(); + const auto& param = GetParam(); ASSERT_OK_AND_ASSIGN(auto base, ConcatAndCombine({part1_, part2_, part3_})); ASSERT_OK_AND_ASSIGN(auto modified, ConcatAndCombine({part1_, part2_, part3_, part4_})); ASSERT_FALSE(base->Equals(*modified)); for (bool enable_dictionary : {false, true}) { - ASSERT_OK_AND_ASSIGN(auto base_result, - WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - ASSERT_OK_AND_ASSIGN(auto modified_result, - WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); - - AssertChunkSizes(dtype, base_result, modified_result, nullable, enable_dictionary, - kMinChunkSize, kMaxChunkSize); + ASSERT_OK_AND_ASSIGN( + auto base_result, + WriteAndGetPageInfo(base, kMinChunkSize, kMaxChunkSize, enable_dictionary)); + ASSERT_OK_AND_ASSIGN( + auto modified_result, + WriteAndGetPageInfo(modified, kMinChunkSize, kMaxChunkSize, enable_dictionary)); + + AssertChunkSizes(param.dtype, base_result, modified_result, param.is_nullable, + enable_dictionary, kMinChunkSize, kMaxChunkSize); AssertAppendCase(base_result.lengths, modified_result.lengths); } } TEST_P(TestColumnCDC, EmptyTable) { - auto [dtype, nullable, _] = GetParam(); + const auto& param = GetParam(); - auto schema = ::arrow::schema({::arrow::field("f0", dtype, nullable)}); + auto schema = ::arrow::schema({::arrow::field("f0", param.dtype, param.is_nullable)}); ASSERT_OK_AND_ASSIGN(auto empty_table, GenerateTable(schema, 0, 0)); ASSERT_EQ(empty_table->num_rows(), 0); for (bool enable_dictionary : {false, true}) { ASSERT_OK_AND_ASSIGN(auto result, WriteAndGetPageInfo(empty_table, kMinChunkSize, kMaxChunkSize, - /*enable_dictionary=*/enable_dictionary)); + enable_dictionary)); // An empty table should result in no data pages ASSERT_TRUE(result.lengths.empty()); @@ -881,43 +890,30 @@ TEST_P(TestColumnCDC, EmptyTable) { } } -// TODO(kszucs): add extension type and dictionary type INSTANTIATE_TEST_SUITE_P( FixedSizedTypes, TestColumnCDC, testing::Values( - // Numeric - std::make_tuple(::arrow::uint8(), false, 1), - std::make_tuple(::arrow::uint16(), false, 2), - std::make_tuple(::arrow::uint32(), false, 4), - std::make_tuple(::arrow::uint64(), true, 8), - std::make_tuple(::arrow::int8(), false, 1), - std::make_tuple(::arrow::int16(), false, 2), - std::make_tuple(::arrow::int32(), false, 4), - std::make_tuple(::arrow::int64(), true, 8), - std::make_tuple(::arrow::float16(), false, 2), - std::make_tuple(::arrow::float32(), false, 4), - std::make_tuple(::arrow::float64(), true, 8), - std::make_tuple(::arrow::decimal128(18, 6), false, 16), - std::make_tuple(::arrow::decimal256(40, 6), false, 32), - // Binary-like - std::make_tuple(::arrow::utf8(), false, 16), - std::make_tuple(::arrow::binary(), true, 16), - std::make_tuple(::arrow::fixed_size_binary(16), true, 16), - - // Temporal - std::make_tuple(::arrow::date32(), false, 4), - std::make_tuple(::arrow::time32(::arrow::TimeUnit::MILLI), true, 4), - std::make_tuple(::arrow::time64(::arrow::TimeUnit::NANO), false, 8), - std::make_tuple(::arrow::timestamp(::arrow::TimeUnit::NANO), true, 8), - std::make_tuple(::arrow::duration(::arrow::TimeUnit::NANO), false, 8), - // Nested types - std::make_tuple(::arrow::list(::arrow::int32()), false, 16), - std::make_tuple(::arrow::list(::arrow::int32()), true, 18), - std::make_tuple(::arrow::list(::arrow::utf8()), true, 18), - std::make_tuple(::arrow::struct_({::arrow::field("f0", ::arrow::int32())}), false, - 8), - std::make_tuple(::arrow::struct_({::arrow::field("f0", ::arrow::float64())}), - true, 10))); + CaseConfig{::arrow::uint8(), false, 1}, CaseConfig{::arrow::uint16(), false, 2}, + CaseConfig{::arrow::uint32(), false, 4}, CaseConfig{::arrow::uint64(), true, 8}, + CaseConfig{::arrow::int8(), false, 1}, CaseConfig{::arrow::int16(), false, 2}, + CaseConfig{::arrow::int32(), false, 4}, CaseConfig{::arrow::int64(), true, 8}, + CaseConfig{::arrow::float16(), false, 2}, + CaseConfig{::arrow::float32(), false, 4}, CaseConfig{::arrow::float64(), true, 8}, + CaseConfig{::arrow::decimal128(18, 6), false, 16}, + CaseConfig{::arrow::decimal256(40, 6), false, 32}, + CaseConfig{::arrow::utf8(), false, 16}, CaseConfig{::arrow::binary(), true, 16}, + CaseConfig{::arrow::fixed_size_binary(16), true, 16}, + CaseConfig{::arrow::date32(), false, 4}, + CaseConfig{::arrow::time32(::arrow::TimeUnit::MILLI), true, 4}, + CaseConfig{::arrow::time64(::arrow::TimeUnit::NANO), false, 8}, + CaseConfig{::arrow::timestamp(::arrow::TimeUnit::NANO), true, 8}, + CaseConfig{::arrow::duration(::arrow::TimeUnit::NANO), false, 8}, + CaseConfig{::arrow::list(::arrow::int32()), false, 16}, + CaseConfig{::arrow::list(::arrow::int32()), true, 18}, + CaseConfig{::arrow::list(::arrow::utf8()), true, 18}, + CaseConfig{::arrow::struct_({::arrow::field("f0", ::arrow::int32())}), false, 8}, + CaseConfig{::arrow::struct_({::arrow::field("f0", ::arrow::float64())}), true, + 10})); } // namespace parquet From 6e20b4c52bc81f01a748e524d2ebe0d51c83a712 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 7 Mar 2025 18:04:38 +0100 Subject: [PATCH 42/48] add a boolean test case --- cpp/src/parquet/chunker_internal_test.cc | 81 ++++++++++++++---------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/cpp/src/parquet/chunker_internal_test.cc b/cpp/src/parquet/chunker_internal_test.cc index 4614d2511e2b4..f998dc016751e 100644 --- a/cpp/src/parquet/chunker_internal_test.cc +++ b/cpp/src/parquet/chunker_internal_test.cc @@ -342,32 +342,6 @@ Result WriteAndGetPageInfo(const std::shared_ptr
& table, return GetColumnPageInfo(buffer, column_index); } -void AssertAllBetween(const ChunkList& chunks, int64_t min, int64_t max, - bool expect_dictionary_fallback = false) { - // expect the last chunk since it is not guaranteed to be within the range - if (expect_dictionary_fallback) { - // if dictionary encoding is enabled, the writer can fallback to plain - // encoding splitting within a content defined chunk, so we can't - // guarantee that all chunks are within the range in this case, but we - // know that there can be at most 2 pages smaller than the min_chunk_size - size_t smaller_count = 0; - for (size_t i = 0; i < chunks.size() - 1; i++) { - if (chunks[i] < min) { - smaller_count++; - } else { - ASSERT_LE(chunks[i], max); - } - } - ASSERT_LE(smaller_count, 2); - } else { - for (size_t i = 0; i < chunks.size() - 1; i++) { - ASSERT_GE(chunks[i], min); - ASSERT_LE(chunks[i], max); - } - } - ASSERT_LE(chunks.back(), max); -} - // A git-hunk like side-by-side data structure to represent the differences between two // vectors of uint64_t values. using ChunkDiff = std::pair; @@ -659,14 +633,42 @@ uint64_t ElementCount(int64_t size, int32_t byte_width, bool nullable) { return size / byte_width; } +void AssertAllBetween(const ChunkList& chunks, int64_t min, int64_t max, + bool expect_dictionary_fallback = false) { + // expect the last chunk since it is not guaranteed to be within the range + if (expect_dictionary_fallback) { + // if dictionary encoding is enabled, the writer can fallback to plain + // encoding splitting within a content defined chunk, so we can't + // guarantee that all chunks are within the range in this case, but we + // know that there can be at most 2 pages smaller than the min_chunk_size + size_t smaller_count = 0; + for (size_t i = 0; i < chunks.size() - 1; i++) { + if (chunks[i] < min) { + smaller_count++; + } else { + ASSERT_LE(chunks[i], max); + } + } + ASSERT_LE(smaller_count, 2); + } else { + for (size_t i = 0; i < chunks.size() - 1; i++) { + ASSERT_GE(chunks[i], min); + ASSERT_LE(chunks[i], max); + } + } + ASSERT_LE(chunks.back(), max); +} + void AssertChunkSizes(const std::shared_ptr<::arrow::DataType>& dtype, PageInfo base_result, PageInfo modified_result, bool nullable, - bool enable_dictionary, uint64_t min_chunk_size, - uint64_t max_chunk_size) { - max_chunk_size = static_cast(max_chunk_size * 1.2); - if (::arrow::is_fixed_width(dtype->id())) { - auto min_length = ElementCount(min_chunk_size, dtype->byte_width(), nullable); - auto max_length = ElementCount(max_chunk_size, dtype->byte_width(), nullable); + bool enable_dictionary, int64_t min_chunk_size, + int64_t max_chunk_size) { + if (::arrow::is_fixed_width(dtype->id()) && !nullable) { + // for nullable types we cannot calculate the exact number of elements because + // not all elements are fed through the chunker (null elements are skipped) + auto byte_width = (dtype->id() == ::arrow::Type::BOOL) ? 1 : dtype->byte_width(); + auto min_length = ElementCount(min_chunk_size, byte_width, nullable); + auto max_length = ElementCount(max_chunk_size, byte_width, nullable); AssertAllBetween(base_result.lengths, min_length, max_length, /*expect_dictionary_fallback=*/enable_dictionary); AssertAllBetween(modified_result.lengths, min_length, max_length, @@ -693,6 +695,15 @@ struct CaseConfig { size_t bytes_per_record; }; +// Define PrintTo for MyStruct +void PrintTo(const CaseConfig& param, std::ostream* os) { + *os << "{ " << param.dtype->ToString(); + if (param.is_nullable) { + *os << " nullable"; + } + *os << " }"; +} + class TestColumnCDC : public ::testing::TestWithParam { protected: // Column random table parts for testing @@ -893,6 +904,9 @@ TEST_P(TestColumnCDC, EmptyTable) { INSTANTIATE_TEST_SUITE_P( FixedSizedTypes, TestColumnCDC, testing::Values( + // Boolean + CaseConfig{::arrow::boolean(), false, 1}, + // Numeric CaseConfig{::arrow::uint8(), false, 1}, CaseConfig{::arrow::uint16(), false, 2}, CaseConfig{::arrow::uint32(), false, 4}, CaseConfig{::arrow::uint64(), true, 8}, CaseConfig{::arrow::int8(), false, 1}, CaseConfig{::arrow::int16(), false, 2}, @@ -901,13 +915,16 @@ INSTANTIATE_TEST_SUITE_P( CaseConfig{::arrow::float32(), false, 4}, CaseConfig{::arrow::float64(), true, 8}, CaseConfig{::arrow::decimal128(18, 6), false, 16}, CaseConfig{::arrow::decimal256(40, 6), false, 32}, + // Binary-like CaseConfig{::arrow::utf8(), false, 16}, CaseConfig{::arrow::binary(), true, 16}, CaseConfig{::arrow::fixed_size_binary(16), true, 16}, + // Temporal CaseConfig{::arrow::date32(), false, 4}, CaseConfig{::arrow::time32(::arrow::TimeUnit::MILLI), true, 4}, CaseConfig{::arrow::time64(::arrow::TimeUnit::NANO), false, 8}, CaseConfig{::arrow::timestamp(::arrow::TimeUnit::NANO), true, 8}, CaseConfig{::arrow::duration(::arrow::TimeUnit::NANO), false, 8}, + // Nested types CaseConfig{::arrow::list(::arrow::int32()), false, 16}, CaseConfig{::arrow::list(::arrow::int32()), true, 18}, CaseConfig{::arrow::list(::arrow::utf8()), true, 18}, From 3242099aae44f7854c666d7af7081711f45ecf73 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 7 Mar 2025 19:49:45 +0100 Subject: [PATCH 43/48] describe test utilities in more details --- cpp/src/parquet/chunker_internal_test.cc | 42 ++++++++++++++++++++---- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/cpp/src/parquet/chunker_internal_test.cc b/cpp/src/parquet/chunker_internal_test.cc index f998dc016751e..d5c625a093586 100644 --- a/cpp/src/parquet/chunker_internal_test.cc +++ b/cpp/src/parquet/chunker_internal_test.cc @@ -346,79 +346,107 @@ Result WriteAndGetPageInfo(const std::shared_ptr
& table, // vectors of uint64_t values. using ChunkDiff = std::pair; +/** + * Finds the differences between two sequences of chunk lengths or sizes. + * Uses a longest common subsequence algorithm to identify matching elements + * and extract the differences between the sequences. + * + * @param first The first sequence of chunk values + * @param second The second sequence of chunk values + * @return A vector of differences, where each difference is a pair of + * subsequences (one from each input) that differ + */ std::vector FindDifferences(const ChunkList& first, const ChunkList& second) { - // Compute longest-common-subsequence between the two vectors. + // Compute the longest common subsequence using dynamic programming size_t n = first.size(), m = second.size(); std::vector> dp(n + 1, std::vector(m + 1, 0)); + + // Fill the dynamic programming table for (size_t i = 0; i < n; i++) { for (size_t j = 0; j < m; j++) { if (first[i] == second[j]) { + // If current elements match, extend the LCS dp[i + 1][j + 1] = dp[i][j] + 1; } else { + // If current elements don't match, take the best option dp[i + 1][j + 1] = std::max(dp[i + 1][j], dp[i][j + 1]); } } } - // Backtrack to get common indices. + // Backtrack through the dynamic programming table to reconstruct the common + // parts and their positions in the original sequences std::vector> common; for (size_t i = n, j = m; i > 0 && j > 0;) { if (first[i - 1] == second[j - 1]) { + // Found a common element, add to common list common.emplace_back(i - 1, j - 1); i--, j--; } else if (dp[i - 1][j] >= dp[i][j - 1]) { + // Move in the direction of the larger LCS value i--; } else { j--; } } + // Reverse to get indices in ascending order std::reverse(common.begin(), common.end()); - // Build raw differences. + // Build the differences by finding sequences between common elements std::vector result; size_t last_i = 0, last_j = 0; for (auto& c : common) { auto ci = c.first; auto cj = c.second; + // If there's a gap between the last common element and this one, + // record the difference if (ci > last_i || cj > last_j) { result.push_back({{first.begin() + last_i, first.begin() + ci}, {second.begin() + last_j, second.begin() + cj}}); } + // Move past this common element last_i = ci + 1; last_j = cj + 1; } + + // Handle any remaining elements after the last common element if (last_i < n || last_j < m) { result.push_back( {{first.begin() + last_i, first.end()}, {second.begin() + last_j, second.end()}}); } - // Merge adjacent diffs if one side is empty in the first diff and the other side - // is empty in the next diff, to avoid splitting single changes into two parts. + // Post-process: merge adjacent diffs to avoid splitting single changes into multiple + // parts std::vector merged; for (auto& diff : result) { if (!merged.empty()) { auto& prev = merged.back(); + // Check if we can merge with the previous diff bool can_merge_a = prev.first.empty() && !prev.second.empty() && !diff.first.empty() && diff.second.empty(); bool can_merge_b = prev.second.empty() && !prev.first.empty() && !diff.second.empty() && diff.first.empty(); + if (can_merge_a) { - // Combine into one change + // Combine into one diff: keep prev's second, use diff's first prev.first = std::move(diff.first); continue; } else if (can_merge_b) { + // Combine into one diff: keep prev's first, use diff's second prev.second = std::move(diff.second); continue; } } + // If we can't merge, add this diff to the result merged.push_back(std::move(diff)); } - return merged; } void PrintDifferences(const ChunkList& original, const ChunkList& modified, std::vector& diffs) { + // Utility function to print the original and modified sequences, and the diffs + // between them. Used in case of failing assertions to display the differences. std::cout << "Original: "; for (const auto& val : original) { std::cout << val << " "; From 50884d0adf14484a38a6f319548f24c9c004bc0d Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 10 Mar 2025 22:28:41 +0100 Subject: [PATCH 44/48] fix use .getValue() for binary arrays --- cpp/src/parquet/chunker_internal.cc | 76 ++++++++++++++++------------- cpp/src/parquet/chunker_internal.h | 9 ++-- 2 files changed, 47 insertions(+), 38 deletions(-) diff --git a/cpp/src/parquet/chunker_internal.cc b/cpp/src/parquet/chunker_internal.cc index d13186ec91d4c..09375154e3dcc 100644 --- a/cpp/src/parquet/chunker_internal.cc +++ b/cpp/src/parquet/chunker_internal.cc @@ -69,30 +69,33 @@ void ContentDefinedChunker::Roll(const bool value) { has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); } -template -void ContentDefinedChunker::Roll(const T* value) { - constexpr size_t BYTE_WIDTH = sizeof(T); - chunk_size_ += BYTE_WIDTH; +template +void ContentDefinedChunker::Roll(const uint8_t* value) { + chunk_size_ += ByteWidth; if (chunk_size_ < min_size_) { // short-circuit if we haven't reached the minimum chunk size, this speeds up the // chunking process since the gearhash doesn't need to be updated return; } - auto bytes = reinterpret_cast(value); - for (size_t i = 0; i < BYTE_WIDTH; ++i) { - rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][bytes[i]]; + for (size_t i = 0; i < ByteWidth; ++i) { + rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][value[i]]; has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); } } -void ContentDefinedChunker::Roll(const uint8_t* value, int64_t num_bytes) { - chunk_size_ += num_bytes; +template +void ContentDefinedChunker::Roll(const T* value) { + return Roll(reinterpret_cast(value)); +} + +void ContentDefinedChunker::Roll(const uint8_t* value, int64_t length) { + chunk_size_ += length; if (chunk_size_ < min_size_) { // short-circuit if we haven't reached the minimum chunk size, this speeds up the // chunking process since the gearhash doesn't need to be updated return; } - for (int64_t i = 0; i < num_bytes; ++i) { + for (auto i = 0; i < length; ++i) { rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][value[i]]; has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); } @@ -202,21 +205,22 @@ const std::vector ContentDefinedChunker::Calculate(const int16_t* def_lev return chunks; } -#define FIXED_WIDTH_CASE(CType) \ - { \ - const auto raw_values = values.data()->GetValues(1); \ - return Calculate(def_levels, rep_levels, num_levels, \ - [&](int64_t i) { return Roll(raw_values + i); }); \ +#define FIXED_WIDTH_CASE(ByteWidth) \ + { \ + const auto raw_values = values.data()->GetValues(1); \ + return Calculate(def_levels, rep_levels, num_levels, [&](int64_t i) { \ + return Roll(raw_values + i * ByteWidth); \ + }); \ } -#define BINARY_LIKE_CASE(OffsetCType) \ +#define BINARY_LIKE_CASE(ArrayType) \ { \ - const auto raw_offsets = values.data()->GetValues(1); \ - const auto raw_values = values.data()->GetValues(2); \ + const auto& array = static_cast(values); \ + const uint8_t* value; \ + ArrayType::offset_type length; \ return Calculate(def_levels, rep_levels, num_levels, [&](int64_t i) { \ - const OffsetCType pos = raw_offsets[i]; \ - const OffsetCType length = raw_offsets[i + 1] - pos; \ - Roll(raw_values + pos, length); \ + value = array.GetValue(i, &length); \ + Roll(value, length); \ }); \ } @@ -235,17 +239,17 @@ const std::vector ContentDefinedChunker::GetBoundaries( } case ::arrow::Type::INT8: case ::arrow::Type::UINT8: - FIXED_WIDTH_CASE(uint8_t) + FIXED_WIDTH_CASE(1) case ::arrow::Type::INT16: case ::arrow::Type::UINT16: case ::arrow::Type::HALF_FLOAT: - FIXED_WIDTH_CASE(uint16_t) + FIXED_WIDTH_CASE(2) case ::arrow::Type::INT32: case ::arrow::Type::UINT32: case ::arrow::Type::FLOAT: case ::arrow::Type::DATE32: case ::arrow::Type::TIME32: - FIXED_WIDTH_CASE(uint32_t) + FIXED_WIDTH_CASE(4) case ::arrow::Type::INT64: case ::arrow::Type::UINT64: case ::arrow::Type::DOUBLE: @@ -253,22 +257,24 @@ const std::vector ContentDefinedChunker::GetBoundaries( case ::arrow::Type::TIME64: case ::arrow::Type::TIMESTAMP: case ::arrow::Type::DURATION: - FIXED_WIDTH_CASE(uint64_t) + FIXED_WIDTH_CASE(8) + case ::arrow::Type::DECIMAL128: + FIXED_WIDTH_CASE(16) + case ::arrow::Type::DECIMAL256: + FIXED_WIDTH_CASE(32) case ::arrow::Type::BINARY: + BINARY_LIKE_CASE(::arrow::BinaryArray) case ::arrow::Type::STRING: - BINARY_LIKE_CASE(int32_t) + BINARY_LIKE_CASE(::arrow::StringArray) case ::arrow::Type::LARGE_BINARY: + BINARY_LIKE_CASE(::arrow::LargeBinaryArray) case ::arrow::Type::LARGE_STRING: - BINARY_LIKE_CASE(int64_t) - case ::arrow::Type::DECIMAL128: - case ::arrow::Type::DECIMAL256: + BINARY_LIKE_CASE(::arrow::LargeStringArray) case ::arrow::Type::FIXED_SIZE_BINARY: { - const auto raw_values = values.data()->GetValues(1); - const auto byte_width = - static_cast(values).byte_width(); - return Calculate(def_levels, rep_levels, num_levels, [&](int64_t i) { - return Roll(raw_values + i * byte_width, byte_width); - }); + const auto& array = static_cast(values); + const auto byte_width = array.byte_width(); + return Calculate(def_levels, rep_levels, num_levels, + [&](int64_t i) { Roll(array.GetValue(i), byte_width); }); } case ::arrow::Type::DICTIONARY: return GetBoundaries( diff --git a/cpp/src/parquet/chunker_internal.h b/cpp/src/parquet/chunker_internal.h index 5299e1c8ec853..235543fc70679 100644 --- a/cpp/src/parquet/chunker_internal.h +++ b/cpp/src/parquet/chunker_internal.h @@ -132,16 +132,19 @@ class ContentDefinedChunker { const ::arrow::Array& values); private: - void Roll(const bool value); + inline void Roll(const bool value); // Update the rolling hash with a compile-time known sized value, set has_matched_ to // true if the hash matches the mask. + template + void inline Roll(const uint8_t* value); + template - void Roll(const T* value); + inline void Roll(const T* value); // Update the rolling hash with a binary-like value, set has_matched_ to true if the // hash matches the mask. - void Roll(const uint8_t* value, int64_t num_bytes); + inline void Roll(const uint8_t* value, int64_t length); // Evaluate whether a new chunk should be created based on the has_matched_, nth_run_ // and chunk_size_ state. From d64ee0b6357ac6dd9f388d761e436118fb4258ce Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 10 Mar 2025 23:38:13 +0100 Subject: [PATCH 45/48] add more details about calculating the mask --- cpp/src/parquet/chunker_internal.cc | 41 +++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/cpp/src/parquet/chunker_internal.cc b/cpp/src/parquet/chunker_internal.cc index 09375154e3dcc..bb4da9a0d3a8d 100644 --- a/cpp/src/parquet/chunker_internal.cc +++ b/cpp/src/parquet/chunker_internal.cc @@ -28,15 +28,46 @@ namespace parquet::internal { +/// Calculate the mask to use for the rolling hash, the mask is used to determine if a +/// new chunk should be created based on the rolling hash value. The mask is calculated +/// based on the min_size, max_size and norm_factor parameters. +/// +/// Assuming that the gear hash hash random values with a uniform distribution, then each +/// bit in the actual value of rolling_hash_ has even probability of being set so a mask +/// with the top N bits set has a probability of 1/2^N of matching the rolling hash. This +/// is the judgment criteria for the original gear hash based content-defined chunking. +/// The main drawback of this approach is the non-uniform distribution of the chunk sizes. +/// +/// Later on the FastCDC has improved the process by introducing: +/// - sub-minimum chunk cut-point skipping (not hashing the first `min_size` bytes) +/// - chunk size normalization (using two masks) +/// +/// This implementation uses cut-point skipping because it improves the overall +/// performance and a more accurate alternative to have less skewed chunk size +/// distribution. Instead of using two different masks (one with a lower and one with a +/// probability of matching and switching them based on the actual chunk size), we rather +/// use 8 different gear hash tables and require having 8 consecutive matches while +/// switching between the used hashtables. This approach is based on central limit theorem +/// and approximates normal distribution of the chunk sizes. +// +// @param min_size The minimum chunk size (default 256KiB) +// @param max_size The maximum chunk size (default 1MiB) +// @param norm_factor Normalization factor (default 0) +// @return The mask used to compare against the rolling hash static uint64_t GetMask(int64_t min_size, int64_t max_size, uint8_t norm_factor) { - // we aim for gaussian-like distribution of chunk sizes between min_size and max_size + // calculate the average size of the chunks int64_t avg_size = (min_size + max_size) / 2; - // we skip calculating gearhash for the first `min_size` bytes, so we are looking for - // a smaller chunk as the average size + // since we are skipping the first `min_size` bytes for each chunk, we need to + // target a smaller chunk size to reach the average size after skipping the first + // `min_size` bytes int64_t target_size = avg_size - min_size; + // assuming that the gear hash has a uniform distribution, we can calculate the mask + // by taking the log2 of the target size size_t mask_bits = static_cast(std::floor(std::log2(target_size))); - // -3 because we are using 8 hash tables to have more gaussian-like distribution - // `norm_factor` narrows the chunk size distribution aroun avg_size + // -3 because we are using 8 hash tables to have more gaussian-like distribution, + // a user defined `norm_factor` can be used to adjust the mask size, hence the matching + // probability, by increasing the norm_factor we increase the probability of matching + // the mask, forcing the distribution closer to the average size size_t effective_bits = mask_bits - 3 - norm_factor; return std::numeric_limits::max() << (64 - effective_bits); } From 534c8e320274b0dc7e64c6c181277c76ee8a2066 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Thu, 13 Mar 2025 09:30:43 +0100 Subject: [PATCH 46/48] Address review comments --- cpp/src/parquet/chunker_internal.cc | 17 +++++++++-------- cpp/src/parquet/chunker_internal.h | 4 ++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/cpp/src/parquet/chunker_internal.cc b/cpp/src/parquet/chunker_internal.cc index bb4da9a0d3a8d..b597806330620 100644 --- a/cpp/src/parquet/chunker_internal.cc +++ b/cpp/src/parquet/chunker_internal.cc @@ -45,8 +45,8 @@ namespace parquet::internal { /// This implementation uses cut-point skipping because it improves the overall /// performance and a more accurate alternative to have less skewed chunk size /// distribution. Instead of using two different masks (one with a lower and one with a -/// probability of matching and switching them based on the actual chunk size), we rather -/// use 8 different gear hash tables and require having 8 consecutive matches while +/// higher probability of matching and switching them based on the actual chunk size), we +/// rather use 8 different gear hash tables and require having 8 consecutive matches while /// switching between the used hashtables. This approach is based on central limit theorem /// and approximates normal distribution of the chunk sizes. // @@ -139,8 +139,9 @@ bool ContentDefinedChunker::NeedNewChunk() { has_matched_ = false; // in order to have a normal distribution of chunk sizes, we only create a new chunk // if the adjused mask matches the rolling hash 8 times in a row, each run uses a - // different gearhash table (gearhash's chunk size has exponential distribution, and - // we use central limit theorem to approximate normal distribution) + // different gearhash table (gearhash's chunk size has geometric distribution, and + // we use central limit theorem to approximate normal distribution, see section 6.2.1 + // in paper https://www.cidrdb.org/cidr2023/papers/p43-low.pdf) if (ARROW_PREDICT_FALSE(++nth_run_ >= 7)) { nth_run_ = 0; chunk_size_ = 0; @@ -158,10 +159,10 @@ bool ContentDefinedChunker::NeedNewChunk() { } template -const std::vector ContentDefinedChunker::Calculate(const int16_t* def_levels, - const int16_t* rep_levels, - int64_t num_levels, - const RollFunc& RollValue) { +std::vector ContentDefinedChunker::Calculate(const int16_t* def_levels, + const int16_t* rep_levels, + int64_t num_levels, + const RollFunc& RollValue) { std::vector chunks; int64_t offset; int64_t prev_offset = 0; diff --git a/cpp/src/parquet/chunker_internal.h b/cpp/src/parquet/chunker_internal.h index 235543fc70679..53830d41a5b58 100644 --- a/cpp/src/parquet/chunker_internal.h +++ b/cpp/src/parquet/chunker_internal.h @@ -152,8 +152,8 @@ class ContentDefinedChunker { // Calculate the chunk boundaries for typed Arrow arrays. template - const std::vector Calculate(const int16_t* def_levels, const int16_t* rep_levels, - int64_t num_levels, const RollFunc& RollValue); + std::vector Calculate(const int16_t* def_levels, const int16_t* rep_levels, + int64_t num_levels, const RollFunc& RollValue); // Reference to the column's level information const internal::LevelInfo& level_info_; From 82723130b16bee676a339ce7cc028a64754337fc Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Thu, 13 Mar 2025 09:32:10 +0100 Subject: [PATCH 47/48] Separate include groups with a new line --- cpp/src/parquet/chunker_internal.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/parquet/chunker_internal.cc b/cpp/src/parquet/chunker_internal.cc index b597806330620..2aaaf4dc0e267 100644 --- a/cpp/src/parquet/chunker_internal.cc +++ b/cpp/src/parquet/chunker_internal.cc @@ -20,6 +20,7 @@ #include #include #include + #include "arrow/array.h" #include "arrow/util/logging.h" #include "parquet/chunker_internal_generated.h" From 2968d9bed3643e064e4f8edd95c11b7470f4baff Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Fri, 14 Mar 2025 16:12:03 +0100 Subject: [PATCH 48/48] Remove Chunk constructor and hide implementation using PIMPL --- cpp/src/parquet/chunker_internal.cc | 433 +++++++++++++++------------- cpp/src/parquet/chunker_internal.h | 59 +--- cpp/src/parquet/column_writer.cc | 4 +- 3 files changed, 246 insertions(+), 250 deletions(-) diff --git a/cpp/src/parquet/chunker_internal.cc b/cpp/src/parquet/chunker_internal.cc index 2aaaf4dc0e267..f3c64a9d42297 100644 --- a/cpp/src/parquet/chunker_internal.cc +++ b/cpp/src/parquet/chunker_internal.cc @@ -73,170 +73,176 @@ static uint64_t GetMask(int64_t min_size, int64_t max_size, uint8_t norm_factor) return std::numeric_limits::max() << (64 - effective_bits); } -ContentDefinedChunker::ContentDefinedChunker(const LevelInfo& level_info, - int64_t min_size, int64_t max_size, - int8_t norm_factor) - : level_info_(level_info), - min_size_(min_size), - max_size_(max_size), - hash_mask_(GetMask(min_size, max_size, norm_factor)) { - if (min_size_ < 0) { - throw ParquetException("min_size must be non-negative"); - } - if (max_size_ < 0) { - throw ParquetException("max_size must be non-negative"); - } - if (min_size_ > max_size_) { - throw ParquetException("min_size must be less than or equal to max_size"); +class ContentDefinedChunker::Impl { + public: + Impl(const LevelInfo& level_info, int64_t min_size, int64_t max_size, + int8_t norm_factor) + : level_info_(level_info), + min_size_(min_size), + max_size_(max_size), + hash_mask_(GetMask(min_size, max_size, norm_factor)) { + if (min_size_ < 0) { + throw ParquetException("min_size must be non-negative"); + } + if (max_size_ < 0) { + throw ParquetException("max_size must be non-negative"); + } + if (min_size_ > max_size_) { + throw ParquetException("min_size must be less than or equal to max_size"); + } } -} -void ContentDefinedChunker::Roll(const bool value) { - if (chunk_size_++ < min_size_) { - // short-circuit if we haven't reached the minimum chunk size, this speeds up the - // chunking process since the gearhash doesn't need to be updated - return; + void Roll(const bool value) { + if (chunk_size_++ < min_size_) { + // short-circuit if we haven't reached the minimum chunk size, this speeds up the + // chunking process since the gearhash doesn't need to be updated + return; + } + rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][value]; + has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); } - rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][value]; - has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); -} -template -void ContentDefinedChunker::Roll(const uint8_t* value) { - chunk_size_ += ByteWidth; - if (chunk_size_ < min_size_) { - // short-circuit if we haven't reached the minimum chunk size, this speeds up the - // chunking process since the gearhash doesn't need to be updated - return; + template + void Roll(const uint8_t* value) { + // Update the rolling hash with a compile-time known sized value, set has_matched_ to + // true if the hash matches the mask. + + chunk_size_ += ByteWidth; + if (chunk_size_ < min_size_) { + // short-circuit if we haven't reached the minimum chunk size, this speeds up the + // chunking process since the gearhash doesn't need to be updated + return; + } + for (size_t i = 0; i < ByteWidth; ++i) { + rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][value[i]]; + has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); + } } - for (size_t i = 0; i < ByteWidth; ++i) { - rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][value[i]]; - has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); + + template + void Roll(const T* value) { + return Roll(reinterpret_cast(value)); } -} -template -void ContentDefinedChunker::Roll(const T* value) { - return Roll(reinterpret_cast(value)); -} + void Roll(const uint8_t* value, int64_t length) { + // Update the rolling hash with a binary-like value, set has_matched_ to true if the + // hash matches the mask. -void ContentDefinedChunker::Roll(const uint8_t* value, int64_t length) { - chunk_size_ += length; - if (chunk_size_ < min_size_) { - // short-circuit if we haven't reached the minimum chunk size, this speeds up the - // chunking process since the gearhash doesn't need to be updated - return; - } - for (auto i = 0; i < length; ++i) { - rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][value[i]]; - has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); + chunk_size_ += length; + if (chunk_size_ < min_size_) { + // short-circuit if we haven't reached the minimum chunk size, this speeds up the + // chunking process since the gearhash doesn't need to be updated + return; + } + for (auto i = 0; i < length; ++i) { + rolling_hash_ = (rolling_hash_ << 1) + kGearhashTable[nth_run_][value[i]]; + has_matched_ = has_matched_ || ((rolling_hash_ & hash_mask_) == 0); + } } -} -bool ContentDefinedChunker::NeedNewChunk() { - // decide whether to create a new chunk based on the rolling hash; has_matched_ is - // set to true if we encountered a match since the last NeedNewChunk() call - if (ARROW_PREDICT_FALSE(has_matched_)) { - has_matched_ = false; - // in order to have a normal distribution of chunk sizes, we only create a new chunk - // if the adjused mask matches the rolling hash 8 times in a row, each run uses a - // different gearhash table (gearhash's chunk size has geometric distribution, and - // we use central limit theorem to approximate normal distribution, see section 6.2.1 - // in paper https://www.cidrdb.org/cidr2023/papers/p43-low.pdf) - if (ARROW_PREDICT_FALSE(++nth_run_ >= 7)) { - nth_run_ = 0; + bool NeedNewChunk() { + // decide whether to create a new chunk based on the rolling hash; has_matched_ is + // set to true if we encountered a match since the last NeedNewChunk() call + if (ARROW_PREDICT_FALSE(has_matched_)) { + has_matched_ = false; + // in order to have a normal distribution of chunk sizes, we only create a new chunk + // if the adjused mask matches the rolling hash 8 times in a row, each run uses a + // different gearhash table (gearhash's chunk size has geometric distribution, and + // we use central limit theorem to approximate normal distribution, see + // section 6.2.1 in paper https://www.cidrdb.org/cidr2023/papers/p43-low.pdf) + if (ARROW_PREDICT_FALSE(++nth_run_ >= 7)) { + nth_run_ = 0; + chunk_size_ = 0; + return true; + } + } + if (ARROW_PREDICT_FALSE(chunk_size_ >= max_size_)) { + // we have a hard limit on the maximum chunk size, note that we don't reset the + // rolling hash state here, so the next NeedNewChunk() call will continue from the + // current state chunk_size_ = 0; return true; } + return false; } - if (ARROW_PREDICT_FALSE(chunk_size_ >= max_size_)) { - // we have a hard limit on the maximum chunk size, note that we don't reset the - // rolling hash state here, so the next NeedNewChunk() call will continue from the - // current state - chunk_size_ = 0; - return true; - } - return false; -} -template -std::vector ContentDefinedChunker::Calculate(const int16_t* def_levels, - const int16_t* rep_levels, - int64_t num_levels, - const RollFunc& RollValue) { - std::vector chunks; - int64_t offset; - int64_t prev_offset = 0; - int64_t prev_value_offset = 0; - bool has_def_levels = level_info_.def_level > 0; - bool has_rep_levels = level_info_.rep_level > 0; + template + std::vector Calculate(const int16_t* def_levels, const int16_t* rep_levels, + int64_t num_levels, const RollFunc& RollValue) { + // Calculate the chunk boundaries for typed Arrow arrays. + std::vector chunks; + int64_t offset; + int64_t prev_offset = 0; + int64_t prev_value_offset = 0; + bool has_def_levels = level_info_.def_level > 0; + bool has_rep_levels = level_info_.rep_level > 0; - if (!has_rep_levels && !has_def_levels) { - // fastest path for non-nested non-null data - for (offset = 0; offset < num_levels; ++offset) { - RollValue(offset); - if (NeedNewChunk()) { - chunks.emplace_back(prev_offset, prev_offset, offset - prev_offset); - prev_offset = offset; - } - } - // set the previous value offset to add the last chunk - prev_value_offset = prev_offset; - } else if (!has_rep_levels) { - // non-nested data with nulls - int16_t def_level; - for (int64_t offset = 0; offset < num_levels; ++offset) { - def_level = def_levels[offset]; - - Roll(&def_level); - if (def_level == level_info_.def_level) { + if (!has_rep_levels && !has_def_levels) { + // fastest path for non-nested non-null data + for (offset = 0; offset < num_levels; ++offset) { RollValue(offset); + if (NeedNewChunk()) { + chunks.push_back({prev_offset, prev_offset, offset - prev_offset}); + prev_offset = offset; + } } - if (NeedNewChunk()) { - chunks.emplace_back(prev_offset, prev_offset, offset - prev_offset); - prev_offset = offset; + // set the previous value offset to add the last chunk + prev_value_offset = prev_offset; + } else if (!has_rep_levels) { + // non-nested data with nulls + int16_t def_level; + for (int64_t offset = 0; offset < num_levels; ++offset) { + def_level = def_levels[offset]; + + Roll(&def_level); + if (def_level == level_info_.def_level) { + RollValue(offset); + } + if (NeedNewChunk()) { + chunks.push_back({prev_offset, prev_offset, offset - prev_offset}); + prev_offset = offset; + } } - } - // set the previous value offset to add the last chunk - prev_value_offset = prev_offset; - } else { - // nested data with nulls - int16_t def_level; - int16_t rep_level; - int64_t value_offset = 0; + // set the previous value offset to add the last chunk + prev_value_offset = prev_offset; + } else { + // nested data with nulls + int16_t def_level; + int16_t rep_level; + int64_t value_offset = 0; - for (offset = 0; offset < num_levels; ++offset) { - def_level = def_levels[offset]; - rep_level = rep_levels[offset]; + for (offset = 0; offset < num_levels; ++offset) { + def_level = def_levels[offset]; + rep_level = rep_levels[offset]; - Roll(&def_level); - Roll(&rep_level); - if (def_level == level_info_.def_level) { - RollValue(value_offset); - } + Roll(&def_level); + Roll(&rep_level); + if (def_level == level_info_.def_level) { + RollValue(value_offset); + } - if ((rep_level == 0) && NeedNewChunk()) { - // if we are at a record boundary and need a new chunk, we create a new chunk - auto levels_to_write = offset - prev_offset; - if (levels_to_write > 0) { - chunks.emplace_back(prev_offset, prev_value_offset, levels_to_write); - prev_offset = offset; - prev_value_offset = value_offset; + if ((rep_level == 0) && NeedNewChunk()) { + // if we are at a record boundary and need a new chunk, we create a new chunk + auto levels_to_write = offset - prev_offset; + if (levels_to_write > 0) { + chunks.push_back({prev_offset, prev_value_offset, levels_to_write}); + prev_offset = offset; + prev_value_offset = value_offset; + } + } + if (def_level >= level_info_.repeated_ancestor_def_level) { + // we only increment the value offset if we have a leaf value + ++value_offset; } - } - if (def_level >= level_info_.repeated_ancestor_def_level) { - // we only increment the value offset if we have a leaf value - ++value_offset; } } - } - // add the last chunk if we have any levels left - if (prev_offset < num_levels) { - chunks.emplace_back(prev_offset, prev_value_offset, num_levels - prev_offset); + // add the last chunk if we have any levels left + if (prev_offset < num_levels) { + chunks.push_back({prev_offset, prev_value_offset, num_levels - prev_offset}); + } + return chunks; } - return chunks; -} #define FIXED_WIDTH_CASE(ByteWidth) \ { \ @@ -257,65 +263,104 @@ std::vector ContentDefinedChunker::Calculate(const int16_t* def_levels, }); \ } -const std::vector ContentDefinedChunker::GetBoundaries( - const int16_t* def_levels, const int16_t* rep_levels, int64_t num_levels, - const ::arrow::Array& values) { - auto type_id = values.type()->id(); - switch (type_id) { - case ::arrow::Type::NA: { - return Calculate(def_levels, rep_levels, num_levels, [](int64_t) {}); - } - case ::arrow::Type::BOOL: { - const auto& bool_array = static_cast(values); - return Calculate(def_levels, rep_levels, num_levels, - [&](int64_t i) { return Roll(bool_array.Value(i)); }); - } - case ::arrow::Type::INT8: - case ::arrow::Type::UINT8: - FIXED_WIDTH_CASE(1) - case ::arrow::Type::INT16: - case ::arrow::Type::UINT16: - case ::arrow::Type::HALF_FLOAT: - FIXED_WIDTH_CASE(2) - case ::arrow::Type::INT32: - case ::arrow::Type::UINT32: - case ::arrow::Type::FLOAT: - case ::arrow::Type::DATE32: - case ::arrow::Type::TIME32: - FIXED_WIDTH_CASE(4) - case ::arrow::Type::INT64: - case ::arrow::Type::UINT64: - case ::arrow::Type::DOUBLE: - case ::arrow::Type::DATE64: - case ::arrow::Type::TIME64: - case ::arrow::Type::TIMESTAMP: - case ::arrow::Type::DURATION: - FIXED_WIDTH_CASE(8) - case ::arrow::Type::DECIMAL128: - FIXED_WIDTH_CASE(16) - case ::arrow::Type::DECIMAL256: - FIXED_WIDTH_CASE(32) - case ::arrow::Type::BINARY: - BINARY_LIKE_CASE(::arrow::BinaryArray) - case ::arrow::Type::STRING: - BINARY_LIKE_CASE(::arrow::StringArray) - case ::arrow::Type::LARGE_BINARY: - BINARY_LIKE_CASE(::arrow::LargeBinaryArray) - case ::arrow::Type::LARGE_STRING: - BINARY_LIKE_CASE(::arrow::LargeStringArray) - case ::arrow::Type::FIXED_SIZE_BINARY: { - const auto& array = static_cast(values); - const auto byte_width = array.byte_width(); - return Calculate(def_levels, rep_levels, num_levels, - [&](int64_t i) { Roll(array.GetValue(i), byte_width); }); + std::vector GetChunks(const int16_t* def_levels, const int16_t* rep_levels, + int64_t num_levels, const ::arrow::Array& values) { + auto type_id = values.type()->id(); + switch (type_id) { + case ::arrow::Type::NA: { + return Calculate(def_levels, rep_levels, num_levels, [](int64_t) {}); + } + case ::arrow::Type::BOOL: { + const auto& bool_array = static_cast(values); + return Calculate(def_levels, rep_levels, num_levels, + [&](int64_t i) { return Roll(bool_array.Value(i)); }); + } + case ::arrow::Type::INT8: + case ::arrow::Type::UINT8: + FIXED_WIDTH_CASE(1) + case ::arrow::Type::INT16: + case ::arrow::Type::UINT16: + case ::arrow::Type::HALF_FLOAT: + FIXED_WIDTH_CASE(2) + case ::arrow::Type::INT32: + case ::arrow::Type::UINT32: + case ::arrow::Type::FLOAT: + case ::arrow::Type::DATE32: + case ::arrow::Type::TIME32: + FIXED_WIDTH_CASE(4) + case ::arrow::Type::INT64: + case ::arrow::Type::UINT64: + case ::arrow::Type::DOUBLE: + case ::arrow::Type::DATE64: + case ::arrow::Type::TIME64: + case ::arrow::Type::TIMESTAMP: + case ::arrow::Type::DURATION: + FIXED_WIDTH_CASE(8) + case ::arrow::Type::DECIMAL128: + FIXED_WIDTH_CASE(16) + case ::arrow::Type::DECIMAL256: + FIXED_WIDTH_CASE(32) + case ::arrow::Type::BINARY: + BINARY_LIKE_CASE(::arrow::BinaryArray) + case ::arrow::Type::STRING: + BINARY_LIKE_CASE(::arrow::StringArray) + case ::arrow::Type::LARGE_BINARY: + BINARY_LIKE_CASE(::arrow::LargeBinaryArray) + case ::arrow::Type::LARGE_STRING: + BINARY_LIKE_CASE(::arrow::LargeStringArray) + case ::arrow::Type::FIXED_SIZE_BINARY: { + const auto& array = static_cast(values); + const auto byte_width = array.byte_width(); + return Calculate(def_levels, rep_levels, num_levels, + [&](int64_t i) { Roll(array.GetValue(i), byte_width); }); + } + case ::arrow::Type::DICTIONARY: + return GetChunks(def_levels, rep_levels, num_levels, + *static_cast(values).indices()); + default: + throw ParquetException("Unsupported Arrow array type " + + values.type()->ToString()); } - case ::arrow::Type::DICTIONARY: - return GetBoundaries( - def_levels, rep_levels, num_levels, - *static_cast(values).indices()); - default: - throw ParquetException("Unsupported Arrow array type " + values.type()->ToString()); } + + private: + // Reference to the column's level information + const internal::LevelInfo& level_info_; + // Minimum chunk size in bytes, the rolling hash will not be updated until this size is + // reached for each chunk. Note that all data sent through the hash function is counted + // towards the chunk size, including definition and repetition levels. + const int64_t min_size_; + const int64_t max_size_; + // The mask to match the rolling hash against to determine if a new chunk should be + // created. The mask is calculated based on min/max chunk size and the normalization + // factor. + const uint64_t hash_mask_; + + // Whether the rolling hash has matched the mask since the last chunk creation. This + // flag is set true by the Roll() function when the mask is matched and reset to false + // by NeedNewChunk() method. + bool has_matched_ = false; + // The current run of the rolling hash, used to normalize the chunk size distribution + // by requiring multiple consecutive matches to create a new chunk. + int8_t nth_run_ = 0; + // Current chunk size in bytes, reset to 0 when a new chunk is created. + int64_t chunk_size_ = 0; + // Rolling hash state, never reset only initialized once for the entire column. + uint64_t rolling_hash_ = 0; +}; + +ContentDefinedChunker::ContentDefinedChunker(const LevelInfo& level_info, + int64_t min_size, int64_t max_size, + int8_t norm_factor) + : impl_(new Impl(level_info, min_size, max_size, norm_factor)) {} + +ContentDefinedChunker::~ContentDefinedChunker() = default; + +std::vector ContentDefinedChunker::GetChunks(const int16_t* def_levels, + const int16_t* rep_levels, + int64_t num_levels, + const ::arrow::Array& values) { + return impl_->GetChunks(def_levels, rep_levels, num_levels, values); } } // namespace parquet::internal diff --git a/cpp/src/parquet/chunker_internal.h b/cpp/src/parquet/chunker_internal.h index 53830d41a5b58..505300517544d 100644 --- a/cpp/src/parquet/chunker_internal.h +++ b/cpp/src/parquet/chunker_internal.h @@ -31,11 +31,6 @@ struct Chunk { int64_t level_offset; int64_t value_offset; int64_t levels_to_write; - - Chunk(int64_t level_offset, int64_t value_offset, int64_t levels_to_write) - : level_offset(level_offset), - value_offset(value_offset), - levels_to_write(levels_to_write) {} }; /// CDC (Content-Defined Chunking) is a technique that divides data into variable-sized @@ -119,6 +114,7 @@ class ContentDefinedChunker { /// ratio. ContentDefinedChunker(const LevelInfo& level_info, int64_t min_size, int64_t max_size, int8_t norm_factor = 0); + ~ContentDefinedChunker(); /// Get the chunk boundaries for the given column data /// @@ -127,57 +123,12 @@ class ContentDefinedChunker { /// @param num_levels Number of levels /// @param values Column values as an Arrow array /// @return Vector of Chunk objects representing the chunk boundaries - const std::vector GetBoundaries(const int16_t* def_levels, - const int16_t* rep_levels, int64_t num_levels, - const ::arrow::Array& values); + std::vector GetChunks(const int16_t* def_levels, const int16_t* rep_levels, + int64_t num_levels, const ::arrow::Array& values); private: - inline void Roll(const bool value); - - // Update the rolling hash with a compile-time known sized value, set has_matched_ to - // true if the hash matches the mask. - template - void inline Roll(const uint8_t* value); - - template - inline void Roll(const T* value); - - // Update the rolling hash with a binary-like value, set has_matched_ to true if the - // hash matches the mask. - inline void Roll(const uint8_t* value, int64_t length); - - // Evaluate whether a new chunk should be created based on the has_matched_, nth_run_ - // and chunk_size_ state. - inline bool NeedNewChunk(); - - // Calculate the chunk boundaries for typed Arrow arrays. - template - std::vector Calculate(const int16_t* def_levels, const int16_t* rep_levels, - int64_t num_levels, const RollFunc& RollValue); - - // Reference to the column's level information - const internal::LevelInfo& level_info_; - // Minimum chunk size in bytes, the rolling hash will not be updated until this size is - // reached for each chunk. Note that all data sent through the hash function is counted - // towards the chunk size, including definition and repetition levels. - const int64_t min_size_; - const int64_t max_size_; - // The mask to match the rolling hash against to determine if a new chunk should be - // created. The mask is calculated based on min/max chunk size and the normalization - // factor. - const uint64_t hash_mask_; - - // Whether the rolling hash has matched the mask since the last chunk creation. This - // flag is set true by the Roll() function when the mask is matched and reset to false - // by NeedNewChunk() method. - bool has_matched_ = false; - // The current run of the rolling hash, used to normalize the chunk size distribution - // by requiring multiple consecutive matches to create a new chunk. - int8_t nth_run_ = 0; - // Current chunk size in bytes, reset to 0 when a new chunk is created. - int64_t chunk_size_ = 0; - // Rolling hash state, never reset only initialized once for the entire column. - uint64_t rolling_hash_ = 0; + class Impl; + std::unique_ptr impl_; }; } // namespace parquet::internal diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index af3fbe3ce8289..5dec9f324675f 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -1341,8 +1341,8 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< } if (properties_->cdc_enabled()) { - auto boundaries = content_defined_chunker_.GetBoundaries(def_levels, rep_levels, - num_levels, leaf_array); + auto boundaries = content_defined_chunker_.GetChunks(def_levels, rep_levels, + num_levels, leaf_array); for (auto chunk : boundaries) { auto chunk_array = leaf_array.Slice(chunk.value_offset); auto chunk_def_levels = AddIfNotNull(def_levels, chunk.level_offset);