From a32863660488eaeeb12f89fa6d7c9862b0e00ef3 Mon Sep 17 00:00:00 2001 From: William Dealtry Date: Tue, 5 Nov 2024 09:48:48 +0000 Subject: [PATCH] Add some lightweight encodings --- cpp/arcticdb/CMakeLists.txt | 10 +- cpp/arcticdb/codec/bitpack_fused.hpp | 150 +++++++++++++++ cpp/arcticdb/codec/codec.cpp | 7 + cpp/arcticdb/codec/codec.hpp | 2 + cpp/arcticdb/codec/constant_encoding.hpp | 57 ++++++ cpp/arcticdb/codec/delta.hpp | 37 ++++ cpp/arcticdb/codec/fastlanes_common.hpp | 68 +++++++ cpp/arcticdb/codec/ffor.hpp | 28 +++ cpp/arcticdb/codec/frequency_encoding.hpp | 174 ++++++++++++++++++ cpp/arcticdb/codec/statistics.hpp | 174 ++++++++++++++++++ .../codec/test/encoding_test_common.hpp | 54 ++++++ .../test/rapidcheck_frequency_encoding.cpp | 97 ++++++++++ .../codec/test/test_constant_encoding.cpp | 45 +++++ cpp/arcticdb/codec/test/test_ffor.cpp | 140 ++++++++++++++ .../codec/test/test_frequency_encoding.cpp | 119 ++++++++++++ .../codec/test/test_fused_bitpack.cpp | 119 ++++++++++++ cpp/arcticdb/codec/test/test_stats.cpp | 123 +++++++++++++ .../test/test_index_filtering.cpp | 2 - cpp/arcticdb/entity/metrics.cpp | 3 +- cpp/arcticdb/pipeline/frame_slice.hpp | 1 - cpp/arcticdb/pipeline/read_pipeline.hpp | 6 +- cpp/arcticdb/storage/library.hpp | 3 - cpp/arcticdb/util/bitset.hpp | 32 ++++ cpp/arcticdb/util/preprocess.hpp | 2 + .../util/test/test_tracing_allocator.cpp | 1 - 25 files changed, 1438 insertions(+), 16 deletions(-) create mode 100644 cpp/arcticdb/codec/bitpack_fused.hpp create mode 100644 cpp/arcticdb/codec/constant_encoding.hpp create mode 100644 cpp/arcticdb/codec/delta.hpp create mode 100644 cpp/arcticdb/codec/fastlanes_common.hpp create mode 100644 cpp/arcticdb/codec/ffor.hpp create mode 100644 cpp/arcticdb/codec/frequency_encoding.hpp create mode 100644 cpp/arcticdb/codec/statistics.hpp create mode 100644 cpp/arcticdb/codec/test/encoding_test_common.hpp create mode 100644 cpp/arcticdb/codec/test/rapidcheck_frequency_encoding.cpp create mode 100644 cpp/arcticdb/codec/test/test_constant_encoding.cpp create mode 100644 cpp/arcticdb/codec/test/test_ffor.cpp create mode 100644 cpp/arcticdb/codec/test/test_frequency_encoding.cpp create mode 100644 cpp/arcticdb/codec/test/test_fused_bitpack.cpp create mode 100644 cpp/arcticdb/codec/test/test_stats.cpp diff --git a/cpp/arcticdb/CMakeLists.txt b/cpp/arcticdb/CMakeLists.txt index 86436146d7..63851f9d64 100644 --- a/cpp/arcticdb/CMakeLists.txt +++ b/cpp/arcticdb/CMakeLists.txt @@ -511,7 +511,7 @@ set(arcticdb_srcs version/symbol_list.cpp version/version_map_batch_methods.cpp storage/s3/ec2_utils.cpp - util/buffer_holder.cpp) + util/buffer_holder.cpp codec/frequency_encoding.hpp codec/constant_encoding.hpp codec/statistics.hpp codec/fastlanes_common.hpp codec/delta.hpp codec/ffor.hpp codec/bitpack_fused.hpp) add_library(arcticdb_core_object OBJECT ${arcticdb_srcs}) @@ -653,7 +653,7 @@ set (arcticdb_core_libraries arcticdb_proto xxHash::xxHash prometheus-cpp::push - prometheus-cpp::pull + #prometheus-cpp::pull unordered_dense::unordered_dense ${standard_libraries} fmt::fmt @@ -872,6 +872,7 @@ else() ${CMAKE_COMMAND} -E copy $ ${CMAKE_INSTALL_PREFIX}) endif() + ## Unit Tests ## if(${TEST}) unset(Python_USE_STATIC_LIBS) @@ -956,12 +957,11 @@ if(${TEST}) version/test/version_map_model.hpp python/python_handlers.cpp storage/test/common.hpp - version/test/test_sort_index.cpp) + version/test/test_sort_index.cpp codec/test/test_frequency_encoding.cpp codec/test/test_constant_encoding.cpp codec/test/encoding_test_common.hpp codec/test/test_stats.cpp codec/test/test_ffor.cpp codec/test/test_fused_bitpack.cpp) set(EXECUTABLE_PERMS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) # 755 add_executable(test_unit_arcticdb ${unit_test_srcs}) - install(TARGETS test_unit_arcticdb RUNTIME DESTINATION . PERMISSIONS ${EXECUTABLE_PERMS} @@ -1063,7 +1063,7 @@ if(${TEST}) util/test/rapidcheck_string_pool.cpp util/test/rapidcheck_main.cpp util/test/rapidcheck_lru_cache.cpp - version/test/rapidcheck_version_map.cpp) + version/test/rapidcheck_version_map.cpp codec/test/rapidcheck_frequency_encoding.cpp) add_executable(arcticdb_rapidcheck_tests ${rapidcheck_srcs}) install(TARGETS arcticdb_rapidcheck_tests RUNTIME diff --git a/cpp/arcticdb/codec/bitpack_fused.hpp b/cpp/arcticdb/codec/bitpack_fused.hpp new file mode 100644 index 0000000000..e0f5b6c5aa --- /dev/null +++ b/cpp/arcticdb/codec/bitpack_fused.hpp @@ -0,0 +1,150 @@ +#include +#include + + +#include +#include + +namespace arcticdb { + + +template +constexpr T construct_mask() { + if constexpr (bit_width == type_bits()) + return T(-1); + else + return (T(1) << bit_width) - 1; +} + +template +struct BitPackHelper { + static constexpr size_t bit_width = width; + static constexpr size_t num_bits = Helper::num_bits; + static constexpr size_t num_lanes = Helper::num_lanes; + static_assert(bit_width <= num_bits); + + static constexpr T mask = construct_mask(); + + static constexpr size_t remaining_bits(size_t row) { + return ((row + 1) * bit_width) % num_bits; + }; + + static constexpr size_t current_bits(size_t row) { + return bit_width - remaining_bits(row); + } + + static constexpr size_t current_word (size_t row) { + return (row * bit_width) / num_bits; + } + + static constexpr size_t next_word (size_t row) { + return ((row + 1) * bit_width) / num_bits; + } + + static constexpr bool at_end(size_t row) { + return next_word(row) > current_word(row); + } + + static constexpr size_t shift(size_t row) { + return (row * bit_width) % num_bits; + } +}; + +static_assert(BitPackHelper::mask == 7); +static_assert(BitPackHelper::at_end(2)); +static_assert(!BitPackHelper::at_end(3)); + +template +void bitpack_lane( + const size_t lane, + const T* __restrict in, + T* __restrict out, + Kernel& kernel) { + static constexpr auto num_bits = p::num_bits; + static constexpr auto num_lanes = p::num_lanes; + static constexpr auto mask = p::mask; + + T tmp = 0; + loop([lane, in, out, &tmp, &kernel](auto r) { + constexpr size_t row = r; + size_t idx = index(row, lane); + T src = kernel(in[idx]); + src &= mask; + + if constexpr(row == 0) { + tmp = src; + } else { + tmp |= src << ((row * bit_width) & (num_bits - 1)); + } + + if constexpr(p::at_end(row)) { + constexpr auto current_word = p::current_word(row); + constexpr auto remaining_bits = p::remaining_bits(row); + out[num_lanes * current_word + lane] = tmp; + //log::version().info("Writing to index {}", num_bits * current_word + lane); + tmp = src >> (bit_width - remaining_bits); + } + }); +} + +template +struct BitPackFused : public BitPackHelper { + using Parent = BitPackHelper; + static constexpr auto num_lanes = Parent::num_lanes; + + template + static void go(const T *__restrict in, T *__restrict out, Kernel &&kernel) { + for(auto lane = 0UL; lane < num_lanes; ++lane) { + bitpack_lane(lane, in, out, kernel); + }; + } +}; + +template +void bitunpack_lane( + size_t lane, + const T *__restrict in, + T *__restrict out, + Kernel &kernel) { + static constexpr auto num_bits = Parent::num_bits; + static constexpr auto num_lanes = Parent::num_lanes; + static constexpr auto mask = Parent::mask; + using p = Parent; + + T src = in[lane]; + T tmp; + loop([lane, in, out, &tmp, &kernel, &src](auto row) { + constexpr auto shift = p::shift(row); + if constexpr (p::at_end(row)) { + constexpr auto current_bits = p::current_bits(row); + constexpr auto current_bits_mask = construct_mask(); + tmp = (src >> shift) & current_bits_mask; + if constexpr (p::next_word(row) < bit_width) { + constexpr auto next_word = p::next_word(row); + constexpr auto remaining_bits_mask = construct_mask(); + src = in[num_lanes * next_word + lane]; + tmp |= (src & remaining_bits_mask) << current_bits; + } + } else { + tmp = (src >> shift) & mask; + } + + size_t idx = index(row, lane); + out[idx] = kernel(tmp); + }); +} + +template +struct BitUnpackFused : public BitPackHelper { + using Parent = BitPackHelper; + static constexpr auto num_lanes = Parent::num_lanes; + + template + static void go(const T *__restrict in, T *__restrict out, Kernel &&kernel) { + for(auto lane = 0UL; lane < num_lanes; ++lane) { + bitunpack_lane(lane, in, out, kernel); + } + } +}; + +} // namespace arcticdb \ No newline at end of file diff --git a/cpp/arcticdb/codec/codec.cpp b/cpp/arcticdb/codec/codec.cpp index 30f3c0e8d7..a55e75e398 100644 --- a/cpp/arcticdb/codec/codec.cpp +++ b/cpp/arcticdb/codec/codec.cpp @@ -634,6 +634,13 @@ void add_bitmagic_compressed_size( } } +bm::serializer >::buffer encode_bitmap(const util::BitSet& sparse_map) { + bm::serializer > bvs; + bm::serializer >::buffer buffer; + bvs.serialize(sparse_map, buffer); + return buffer; +} + /// @brief Write the sparse map to the out buffer /// Bitmagic achieves the theoretical best compression for booleans. Adding additional encoding (lz4, zstd, etc...) /// will not improve anything and in fact it might worsen the encoding. diff --git a/cpp/arcticdb/codec/codec.hpp b/cpp/arcticdb/codec/codec.hpp index 446bacaed2..1588a4d5e5 100644 --- a/cpp/arcticdb/codec/codec.hpp +++ b/cpp/arcticdb/codec/codec.hpp @@ -89,6 +89,8 @@ HashedValue get_segment_hash(Segment& seg); SegmentDescriptorImpl read_segment_descriptor(const uint8_t*& data); +bm::serializer >::buffer encode_bitmap(const util::BitSet& sparse_map); + } // namespace arcticdb #define ARCTICDB_SEGMENT_ENCODER_H_ diff --git a/cpp/arcticdb/codec/constant_encoding.hpp b/cpp/arcticdb/codec/constant_encoding.hpp new file mode 100644 index 0000000000..d2aa1ebfd2 --- /dev/null +++ b/cpp/arcticdb/codec/constant_encoding.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + +#include + +namespace arcticdb { +template +struct ConstantEncoding { + +#pragma pack(push, 1) + struct Data { + uint64_t size_; + T value_; + }; +#pragma pack(pop) + + std::optional max_required_bytes(const T* data_in, size_t num_rows) { + if (num_rows == 0) + return 0; + + const auto *pos = data_in; + const auto *end = pos + num_rows; + T first = *pos; + ++pos; + do { + if (*pos != first) + return std::nullopt; + + ++pos; + } while (pos != end); + + return sizeof(Data); + } + + size_t encode(const T *data_in, size_t num_rows, uint8_t *data_out) { + if (num_rows == 0) + return 0; + + auto *state = reinterpret_cast(data_out); + state->size_ = num_rows; + state->value_ = *data_in; + return sizeof(Data); + } + + size_t decode(const uint8_t *data_in, size_t bytes, T *data_out) { + util::check(bytes == sizeof(Data), "Not enough bytes in constant encoding"); + + const auto *state = reinterpret_cast(data_in); + auto *target = data_out; + auto *target_end = target + state->size_; + std::fill(target, target_end, state->value_); + return state->size_; + } +}; +} \ No newline at end of file diff --git a/cpp/arcticdb/codec/delta.hpp b/cpp/arcticdb/codec/delta.hpp new file mode 100644 index 0000000000..0b80969dd7 --- /dev/null +++ b/cpp/arcticdb/codec/delta.hpp @@ -0,0 +1,37 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + +#include +#include +#include + +#include + +namespace arcticdb { + +template +void rsum(const uint8_t *__restrict a_in_p, uint8_t* __restrict a_out_p, const uint8_t* __restrict a_base_p) { + auto out = reinterpret_cast(a_out_p); + const auto in = reinterpret_cast(a_in_p); + const auto base = reinterpret_cast(a_base_p); + + for (auto lane = 0U; lane < Helper::num_lanes; ++lane) { + uint8_t register_0; + uint8_t tmp; + tmp = base[lane]; + loop::num_bits>([lane, base, in, &tmp, &out, ®ister_0](auto j) { + register_0 = in[index(j, lane)]; + tmp = tmp + register_0; + out[index(j, lane)] = tmp; + }); + } +} + + + + +} // nam \ No newline at end of file diff --git a/cpp/arcticdb/codec/fastlanes_common.hpp b/cpp/arcticdb/codec/fastlanes_common.hpp new file mode 100644 index 0000000000..a11e4486fe --- /dev/null +++ b/cpp/arcticdb/codec/fastlanes_common.hpp @@ -0,0 +1,68 @@ +/* Copyright 2024 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + +#pragma once + +#include +#include + +namespace arcticdb { +namespace detail { + +constexpr std::size_t FastLanesWidth = 1024; + +template +constexpr void loop(std::integer_sequence, F &&f) { + (f(std::integral_constant{}), ...); +} +} + +template +constexpr void loop(F &&f) { + detail::loop(std::make_integer_sequence{}, std::forward(f)); +} + +template +constexpr size_t type_bits() { + return sizeof(T) * std::numeric_limits::digits; +} + +template +struct Helper { + static constexpr size_t num_bits = type_bits(); + static constexpr size_t register_width = detail::FastLanesWidth; + static constexpr size_t num_lanes = register_width / num_bits; +}; + +static_assert(Helper::num_lanes == 16); +static_assert(Helper::num_lanes == 128); +static_assert(Helper::num_bits == 16); + +constexpr std::array FL_ORDER = { 0, 4, 2, 6, 1, 5, 3, 7 }; + +constexpr size_t transposed_index(size_t index) { + auto lane = index % 16; + auto order = (index / 16) % 8; + auto row = index / 128; + + return (lane * 64) + (FL_ORDER[order] * 8) + row; +} + +constexpr size_t index(size_t row, size_t lane) { + const auto o = row / 8; + const auto s = row % 8; + return (FL_ORDER[o] * 16) + (s * 128) + lane; +} + +static_assert(transposed_index(1) == 64); +static_assert(transposed_index(57) == 624); +static_assert(transposed_index(1022) == 959); + +static_assert(index(1, 0) == 128); +static_assert(transposed_index(57) == 624); +static_assert(transposed_index(1022) == 959); +} // namespace arcticdb \ No newline at end of file diff --git a/cpp/arcticdb/codec/ffor.hpp b/cpp/arcticdb/codec/ffor.hpp new file mode 100644 index 0000000000..e4272f388a --- /dev/null +++ b/cpp/arcticdb/codec/ffor.hpp @@ -0,0 +1,28 @@ +#include +#include +#include + +#include + +namespace arcticdb { + +template +void rsum(const uint8_t *__restrict a_in_p, uint8_t* __restrict a_out_p, const uint8_t* __restrict a_base_p) { + + auto out = reinterpret_cast(a_out_p); + const auto in = reinterpret_cast(a_in_p); + const auto base = reinterpret_cast(a_base_p); + + for (auto lane = 0U; lane < Helper::num_lanes; ++lane) { + uint8_t register_0; + uint8_t tmp; + tmp = base[lane]; + loop::num_bits>([lane, base, in, &tmp, &out, ®ister_0](auto j) { + register_0 = in[index(j, lane)]; + tmp = tmp + register_0; + out[index(j, lane)] = tmp; + }); + } +} + +} // namespace arcticdb \ No newline at end of file diff --git a/cpp/arcticdb/codec/frequency_encoding.hpp b/cpp/arcticdb/codec/frequency_encoding.hpp new file mode 100644 index 0000000000..6cb2d7aa0b --- /dev/null +++ b/cpp/arcticdb/codec/frequency_encoding.hpp @@ -0,0 +1,174 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace arcticdb { + +template +void batch_apply(T* data, size_t num_rows, F functor) { + if(num_rows == 0) + return; + + constexpr size_t CHUNK_SIZE = 64; + + auto loops = num_rows / CHUNK_SIZE; + for(auto i = 0UL; i < loops; ++i) { +#pragma clang loop vectorize(enable) interleave(enable) + for(size_t j = 0; j < CHUNK_SIZE; ++j) { + functor(data[i * CHUNK_SIZE + j]); + } + } + + auto remainder = num_rows - (loops * CHUNK_SIZE); + for(auto i = num_rows - remainder; i < num_rows; ++i) { + functor(data[i]); + } +} + +template +void fill(T* data, size_t num_rows, const T value) { + auto setter = [value](T& x) { x = value; }; + batch_apply(data, num_rows, setter); +} + +template +struct FrequencyEncoding { + static_assert(required_percentage != 0); + T value_; + uint32_t count_ = 0; + std::pair leader_; + std::optional bitset_; + std::optional expected_bytes_; + + struct Data { + T leader_; + uint32_t exceptions_; + uint32_t bitset_bytes_; + uint32_t num_rows_; + }; + + size_t bitset_max_bytes() { + bm::serializer::statistics_type stat{}; + bitset_->calc_stat(&stat); + ARCTICDB_DEBUG(log::version(), "Bitset predicted bytes: {}", stat.max_serialize_mem); + return stat.max_serialize_mem; + } + + void scan(const T* data_in, size_t num_rows) { + for(auto k = 0UL; k < num_rows; ++k) { + if (count_ == 0) { + ++count_; + value_ = data_in[k]; + } else { + if(value_ != data_in[k]) + --count_; + else + ++count_; + } + } + } + + size_t max_exceptions(size_t num_rows) const { + return num_rows * double(100 / required_percentage); + } + + void fill_bitset(const T* begin, size_t num_rows) { + const T* pos = begin; + const auto* end = begin + num_rows; + bitset_.emplace(util::BitSet(num_rows)); + util::BitSet::bulk_insert_iterator inserter(*bitset_); + do { + if (*pos != value_) { + inserter = std::distance(begin, pos); + } + ++pos; + } while (pos != end); + inserter.flush(); + } + + + std::optional max_required_bytes(const T *data_in, size_t num_rows) { + if (num_rows == 0) + return 0; + + fill_bitset(data_in, num_rows); + auto leader = num_rows - bitset_->count(); + auto percent = double(leader) / num_rows * 100; + if(percent > required_percentage) { + leader_.first = value_; + leader_.second = leader; + auto num_exceptions = num_rows - leader; + util::check(leader <= num_rows, "Count of leader {} cannot be more than num_rows {} in frequency encoding", count_, num_rows); + + expected_bytes_ = sizeof(Data) + (num_exceptions * sizeof(T)) + bitset_max_bytes(); + ARCTICDB_DEBUG(log::version(), "Frequency encoding max required bytes: {}", *expected_bytes_); + util::check(*expected_bytes_ != 0, "Frequency encoding expects non-zero output bytes"); + return expected_bytes_; + } else { + return std::nullopt; + } + } + + size_t encode(const T *data_in, size_t num_rows, uint8_t *data_out) { + if (num_rows == 0) + return 0; + + const auto *pos = data_in; + auto *target = data_out; + auto* data = reinterpret_cast(target); + const auto leader = leader_.first; + data->leader_ = leader; + data->exceptions_ = num_rows - leader_.second; + data->num_rows_ = num_rows; + target += sizeof(Data); + auto *exception_ptr = reinterpret_cast(target); + BitVisitorFunctor visitor{[&exception_ptr, pos] (util::BitSetSizeType offset, uint64_t) { + *exception_ptr++ = pos[offset]; + }}; + bm::for_each_bit(*bitset_, visitor); + + auto buffer = encode_bitmap(*bitset_); + ARCTICDB_DEBUG(log::version(), "Bitset actual bytes: {}", buffer.size()); + target += data->exceptions_ * sizeof(T); + memcpy(target, buffer.data(), buffer.size()); + data->bitset_bytes_ = buffer.size(); + target += buffer.size(); + ARCTICDB_DEBUG(log::version(), "Frequency encoding actual bytes: {}", target - data_out); + return target - data_out; + } + + size_t decode(const uint8_t *data_in, size_t bytes, T *data_out) { + auto* data = reinterpret_cast(data_in); + const auto exceptions_bytes = data->exceptions_ * sizeof(T); + util::check(sizeof(Data) + exceptions_bytes + data->bitset_bytes_ == bytes, "Size mismatch, expected {} + {} + {} = {}", sizeof(Data), exceptions_bytes, data->bitset_bytes_, bytes); + + const auto bitset_offset = sizeof(Data) + (data->exceptions_ * sizeof(T)); + auto bitmap_ptr = &data_in[bitset_offset]; + auto bitmap = util::deserialize_bytes_to_bitmap(bitmap_ptr, data->bitset_bytes_); + auto *target = reinterpret_cast(data_out); + const auto num_rows = data->num_rows_; + std::fill(target, target + num_rows, data->leader_); + //fill(target, num_rows, data->leader_); + auto* exceptions = reinterpret_cast(data_in + sizeof(Data)); + BitVisitorFunctor visitor{[target, exceptions] (util::BitSetSizeType offset, uint64_t rank) { + target[offset] = exceptions[rank]; + }}; + bm::for_each_bit(bitmap, visitor); + return num_rows; + } +}; +} \ No newline at end of file diff --git a/cpp/arcticdb/codec/statistics.hpp b/cpp/arcticdb/codec/statistics.hpp new file mode 100644 index 0000000000..8eb2a37be4 --- /dev/null +++ b/cpp/arcticdb/codec/statistics.hpp @@ -0,0 +1,174 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ +#pragma once + +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +namespace arcticdb { + +template +bool is_constant(const T* data, size_t num_rows) { + if(num_rows == 0) + return true; + + const T value = data[0]; + constexpr size_t CHUNK_SIZE = 64; + + auto loops = num_rows / CHUNK_SIZE; + for(auto i = 0UL; i < loops; ++i) { + size_t chunk_mismatch = 0; + #pragma clang loop vectorize(enable) interleave(enable) + for(size_t j = 0; j < CHUNK_SIZE; ++j) { + chunk_mismatch |= (data[i + j] != value); + } + + if(chunk_mismatch) + return false; + } + + auto remainder = num_rows - (loops * CHUNK_SIZE); + for(auto i = num_rows - remainder; i < num_rows; ++i) { + if(data[i] != value) + return false; + } + + return true; +} + +template +bool is_constant_simd(const T* data, size_t num_rows) { + if(num_rows == 0) + return true; + + const T value = *data; + // Broadcast value to vector register + __m256i val_vec = _mm256_set1_epi64x(value); + + // Process 4 elements at once using AVX2 + size_t vec_size = num_rows / 4; + for(size_t i = 0; i < vec_size; i += 4) { + __m256i data_vec = _mm256_loadu_si256((__m256i*)(data + i)); + __m256i cmp = _mm256_cmpeq_epi64(data_vec, val_vec); + if(_mm256_movemask_pd((__m256d)cmp) != 0xF) + return false; + } + + // Handle remainder + for(size_t i = vec_size * 4; i < num_rows; ++i) { + if(data[i] != value) + return false; + } + return true; +} + +#ifndef WIN23 +template +uint64_t leftmost_bit(T t) { + if (t == 0) + return 0; + + if constexpr(sizeof(T) == 8) { + return (Helper::num_bits - 1) - __builtin_clzll(t); + } else { + return (Helper::num_bits - 1) - __builtin_clz(static_cast(t)); + } +} + +#else +#include +#include + +template +size_t leftmost_bit(size_t x) { + unsigned long index; + if (_BitScanReverse64(&index, x)) { + return index; + } + return 0; // No bits are set +} +#endif + +template +uint8_t msb(T* data) { + std::array bits; + for (auto i = 0UL; i < 1024UL; i += 8) { + bits[0] = std::max(bits[0], leftmost_bit(data[i])); + bits[1] = std::max(bits[1], leftmost_bit(data[i + 1])); + bits[2] = std::max(bits[2], leftmost_bit(data[i + 2])); + bits[3] = std::max(bits[3], leftmost_bit(data[i + 3])); + bits[4] = std::max(bits[4], leftmost_bit(data[i + 4])); + bits[5] = std::max(bits[5], leftmost_bit(data[i + 5])); + bits[6] = std::max(bits[6], leftmost_bit(data[i + 6])); + bits[7] = std::max(bits[7], leftmost_bit(data[i + 7])); + } + + auto it = std::max_element(bits.begin(), bits.end()); + + return *it; +} + + +template +uint8_t msb_single(T* data) { + uint8_t bit = 0; + for (auto i = 0UL; i < 1024UL; ++i) { + bit = std::max(bit, leftmost_bit(data[i])); + } + + return bit; +} + +template +uint8_t msb_max(T* data) { + T max = 0; + for (auto i = 0UL; i < 1024; ++i) { + max = std::max(max, data[i]); + } + + return leftmost_bit(max); +} + +template +std::enable_if::value, std::pair>::type min_max(T* data) { + T max = 0; + + for (auto i = 0UL; i < 1024; ++i) { + max = std::max(max, data[i]); + + } + + T min = 0; + for (auto i = 0UL; i < 1024; ++i) { + min = std::min(min, data[i]); + + } + return {min, max}; +} + +template +std::pair min_max_pair(std::pair left, std::pair right) { + return {std::min(left.first, right.first), std::max(left.second, right.second)}; +} + +} //namespace arcticdb diff --git a/cpp/arcticdb/codec/test/encoding_test_common.hpp b/cpp/arcticdb/codec/test/encoding_test_common.hpp new file mode 100644 index 0000000000..fa02a9a3fa --- /dev/null +++ b/cpp/arcticdb/codec/test/encoding_test_common.hpp @@ -0,0 +1,54 @@ +#pragma once + +#include +#include + +namespace arcticdb { + +template +std::vector random_numbers_with_runs(std::size_t count, uint64_t seed, size_t max_run_length=100) { + std::vector numbers; + numbers.reserve(count); + std::mt19937_64 rng(seed); + std::uniform_int_distribution rand_num_dist(0, std::numeric_limits::max()); + std::uniform_int_distribution run_length_dist(1, max_run_length); + + while (numbers.size() < count) { + uint64_t num = rand_num_dist(rng); + std::size_t run_length = run_length_dist(rng); + for (std::size_t i = 0; i < run_length && numbers.size() < count; ++i) { + numbers.push_back(num); + } + } + + return numbers; +} + +// https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/ +inline uint32_t reduce(uint32_t x, uint32_t N) { + return ((uint64_t)x * (uint64_t)N) >> 32 ; +} + +template +std::vector random_numbers_with_leader(size_t length, T leader, double percentage, unsigned int seed = 42) { + if (percentage < 0.0 || percentage > 1.0 || length <= 0) { + throw std::invalid_argument("Invalid length or percentage"); + } + + std::mt19937 gen(seed); + std::uniform_int_distribution<> dis(1, 100); + + std::vector vec(length); + auto num_leaders = static_cast(std::ceil(length * percentage)); + std::generate(vec.begin(), vec.end(), [&]() { return dis(gen); }); + std::fill_n(vec.begin(), num_leaders, leader); + // std::shuffle is very slow and we would mostly be swapping numbers that are the same. + for(auto i = num_leaders; i < vec.size(); ++i) { + auto rnd = dis(gen); + auto pos = reduce(rnd, num_leaders); + std::swap(vec[i], vec[pos]); + } + return vec; +} + +} //namespace arcticdb \ No newline at end of file diff --git a/cpp/arcticdb/codec/test/rapidcheck_frequency_encoding.cpp b/cpp/arcticdb/codec/test/rapidcheck_frequency_encoding.cpp new file mode 100644 index 0000000000..081871a0ab --- /dev/null +++ b/cpp/arcticdb/codec/test/rapidcheck_frequency_encoding.cpp @@ -0,0 +1,97 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + +#include +#include +#include + +#include +#include + +#include + +// The first test checks that data will actually be encoded and +// decoded correctly, and the second test checks that we +// generally fail gracefully with unencodable data + +template +void test_encoder() { + using namespace arcticdb; + FrequencyEncoding encoder; + + rc::check("Encode and decode should be reversible", [&] { + int length = *rc::gen::inRange(1, 1000); + T leader = *rc::gen::arbitrary(); + uint64_t percentage = *rc::gen::inRange(91, 100); + double ratio = static_cast(percentage) / 100; + unsigned int seed = *rc::gen::arbitrary(); + + std::vector input = random_numbers_with_leader(length, leader, ratio, seed); + size_t num_rows = input.size(); + + auto max_bytes_opt = encoder.max_required_bytes(input.data(), num_rows); + + RC_ASSERT(max_bytes_opt.has_value()); + + size_t max_bytes = max_bytes_opt.value(); + std::vector encoded_data(max_bytes); + + size_t encoded_size = encoder.encode(input.data(), num_rows, encoded_data.data()); + + std::vector decoded_data(num_rows); + encoder.decode(encoded_data.data(), encoded_size, decoded_data.data()); + + RC_ASSERT(input == decoded_data); + }); +} + +RC_GTEST_PROP(FrequencyEncoding, ReversibleEncodingDecoding, ()) { + test_encoder(); + test_encoder(); + test_encoder(); + test_encoder(); + test_encoder(); + test_encoder(); + test_encoder(); + test_encoder(); + test_encoder(); + test_encoder(); +} + +template +void test_encoder_random_data() { + using namespace arcticdb; + rc::check("frequency_encode random data", + [](const std::vector &input) { + FrequencyEncoding encoding; + auto required_bytes = encoding.max_required_bytes(input.data(), input.size()); + if (!required_bytes.has_value()) + RC_SUCCEED("No single value comprises more than 90% of the array"); + + std::vector encoded(*required_bytes); + size_t encoded_size = encoding.encode(input.data(), input.size(), encoded.data()); + + std::vector decoded(input.size()); + size_t decoded_size = encoding.decode(encoded.data(), encoded_size, decoded.data()); + + RC_ASSERT(decoded_size == input.size()); + RC_ASSERT(decoded == input); + }); +} + +RC_GTEST_PROP(FrequencyEncoding, GeneratedData, ()) { + test_encoder_random_data(); + test_encoder_random_data(); + test_encoder_random_data(); + test_encoder_random_data(); + test_encoder_random_data(); + test_encoder_random_data(); + test_encoder_random_data(); + test_encoder_random_data(); + test_encoder_random_data(); + test_encoder_random_data(); +} \ No newline at end of file diff --git a/cpp/arcticdb/codec/test/test_constant_encoding.cpp b/cpp/arcticdb/codec/test/test_constant_encoding.cpp new file mode 100644 index 0000000000..a0131902ef --- /dev/null +++ b/cpp/arcticdb/codec/test/test_constant_encoding.cpp @@ -0,0 +1,45 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ +#include +#include + +#include + +TEST(ConstantEncoding, Basic) { + using namespace arcticdb; + using InputType = uint32_t; + std::vector data(30); + std::fill(std::begin(data), std::end(data), 23); + ConstantEncoding encoding; + auto estimated_size = encoding.max_required_bytes(data.data(), data.size()); + ASSERT_EQ(estimated_size.has_value(), true); + std::vector output(*estimated_size); + + auto bytes = encoding.encode(data.data(), data.size(), output.data()); + ASSERT_EQ(bytes, 12); + std::vector decompressed(data.size()); + //(void)run_length_decode(output.data(), bytes, decompressed.data()); + auto num_rows = encoding.decode(output.data(), bytes, decompressed.data()); + ASSERT_EQ(num_rows, data.size()); + ASSERT_EQ(decompressed, data); +} + +TEST(ConstantEncoding, Scan) { + using namespace arcticdb; + using InputType = uint32_t; + std::vector data {1, 1, 1, 2, 3, 1, 2, 2, 2, 2, 5}; + + ConstantEncoding encoding; + auto estimated_size = encoding.max_required_bytes(data.data(), data.size()); + ASSERT_EQ(estimated_size.has_value(), false); + + data.clear(); + data.resize(42); + std::fill(std::begin(data), std::end(data), 23); + estimated_size = encoding.max_required_bytes(data.data(), data.size()); + ASSERT_EQ(estimated_size.has_value(), true); +} \ No newline at end of file diff --git a/cpp/arcticdb/codec/test/test_ffor.cpp b/cpp/arcticdb/codec/test/test_ffor.cpp new file mode 100644 index 0000000000..744876bb64 --- /dev/null +++ b/cpp/arcticdb/codec/test/test_ffor.cpp @@ -0,0 +1,140 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +TEST(FFor, PrintIndex) { + using namespace arcticdb; + for(auto i = 0UL; i < Helper::num_lanes; ++i) { + for(auto j = 0UL; j < Helper::register_width; ++j) { + log::version().info("{}", index(j, i)); + } + } +} + +namespace arcticdb { + +template +struct FForCompress { + const T reference_; + + explicit FForCompress(T reference) : + reference_(reference) { + } + + ARCTICDB_ALWAYS_INLINE T operator()(const T t) { + return t - reference_; + } +}; + +template +struct FForUncompress { + const T reference_; + + FForUncompress(T reference) : + reference_(reference) { + } + + ARCTICDB_ALWAYS_INLINE T operator()(T value) { + return value + reference_; + } +}; + +} + +template +std::vector random_vector( + size_t size, + T min = std::numeric_limits::min(), + T max = std::numeric_limits::max()) { + const unsigned int seed = 12345; + std::mt19937 generator(seed); + std::uniform_int_distribution distribution(min, max); + + std::vector output(size); + std::generate(output.begin(), output.end(), [&]() { + return distribution(generator); + }); + + return output; +} + +TEST(FFor, SimpleRoundtrip) { + using namespace arcticdb; + auto data = random_vector(1024, 21UL, 1UL << 10); + auto compressed = std::vector(1024); + BitPackFused::go(data.data(), compressed.data(), [] (auto t) { return t + 20; }); + + std::vector uncompressed(1024); + + BitUnpackFused::go(compressed.data(), uncompressed.data(), FForUncompress{20UL}); + for(auto i = 0U; i < 1024; ++i) { + ASSERT_EQ(data[i], uncompressed[i]); + } +} + +TEST(FForStress, fused) { + using namespace arcticdb; + auto data = random_vector(1024 * 100, 21UL, 1UL << 10); + auto compressed = std::vector(1024 * 100); + + size_t num_runs = 1000000; + interval_timer timer; + timer.start_timer("pack"); + for(auto k = 0UL; k < num_runs; ++k) { + for (auto i = 0; i < 100; ++i) + //BitPackFused::go(data.data() + 1024 * i, compressed.data() + 176 * i, FForCompress{20UL}); + BitPackFused::go(data.data() + 1024 * i, compressed.data() + 176 * i, [] (auto t) { return t + 20; }); + } + timer.stop_timer("pack"); + std::vector uncompressed(1024 * 100); + timer.start_timer("unpack"); + for(auto k = 0UL; k < num_runs; ++k) { + for (auto i = 0; i < 100; ++i) + BitUnpackFused::go(compressed.data() + 176 * i, uncompressed.data() + 1024 * i, [] (auto t) { return t - 20; }); + } + + timer.stop_timer("unpack"); + log::version().info("\n{}", timer.display_all()); + for(auto i = 0; i < 100 * 1024; ++i) { + ASSERT_EQ(data[i], uncompressed[i]); + } +} + +TEST(FForStress, FusedWithScan) { + using namespace arcticdb; + auto data = random_vector(1024 * 100, 21UL, 1UL << 10); + auto compressed = std::vector(1024 * 100); + + auto result = std::pair(0, 0); + for (auto i = 0; i < 100; ++i) { + result = min_max_pair(result, min_max(data.data() + i * 1024)); + } + const auto min = result.first; + size_t num_runs = 1000000; + interval_timer timer; + timer.start_timer("pack"); + for(auto k = 0UL; k < num_runs; ++k) { + for (auto i = 0; i < 100; ++i) + //BitPackFused::go(data.data() + 1024 * i, compressed.data() + 176 * i, FForCompress{20UL}); + BitPackFused::go(data.data() + 1024 * i, compressed.data() + 176 * i, [min] (auto t) { return t + min; }); + } + timer.stop_timer("pack"); + std::vector uncompressed(1024 * 100); + timer.start_timer("unpack"); + for(auto k = 0UL; k < num_runs; ++k) { + for (auto i = 0; i < 100; ++i) + BitUnpackFused::go(compressed.data() + 176 * i, uncompressed.data() + 1024 * i, [min] (auto t) { return t - min; }); + } + + timer.stop_timer("unpack"); + log::version().info("\n{}", timer.display_all()); + for(auto i = 0; i < 100 * 1024; ++i) { + ASSERT_EQ(data[i], uncompressed[i]); + } +} \ No newline at end of file diff --git a/cpp/arcticdb/codec/test/test_frequency_encoding.cpp b/cpp/arcticdb/codec/test/test_frequency_encoding.cpp new file mode 100644 index 0000000000..f8f4c7f642 --- /dev/null +++ b/cpp/arcticdb/codec/test/test_frequency_encoding.cpp @@ -0,0 +1,119 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ + +#include +#include +#include +#include + +#include + +TEST(FrequencyEncoding, Basic) { + using namespace arcticdb; + using InputType = uint32_t; + std::vector data {1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 5}; + FrequencyEncoding encoding; + encoding.scan(data.data(), data.size()); + auto estimated_size = encoding.max_required_bytes(data.data(), data.size()); + ASSERT_EQ(estimated_size.has_value(), true); + std::vector output(*estimated_size); + + auto bytes = encoding.encode(data.data(), data.size(), output.data()); + ASSERT_EQ(bytes, 49); + std::vector decompressed(data.size()); + //(void)run_length_decode(output.data(), bytes, decompressed.data()); + auto num_rows = encoding.decode(output.data(), bytes, decompressed.data()); + ASSERT_EQ(num_rows, data.size()); + ASSERT_EQ(decompressed, data); +} + +TEST(FrequencyEncoding, Scan) { + using namespace arcticdb; + using InputType = uint32_t; + std::vector data {1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 5, 1, 1, 1}; + + FrequencyEncoding encoding; + encoding.scan(data.data(), data.size()); + auto estimated_size = encoding.max_required_bytes(data.data(), data.size()); + ASSERT_EQ(estimated_size.has_value(), true); + ASSERT_EQ(*estimated_size, 9066); // TODO this value is too large +} + +TEST(FrequencyEncoding, Stress) { + using namespace arcticdb; + using InputType = uint32_t; + auto data = random_numbers_with_leader(100000, 23, 0.9); + FrequencyEncoding encoding; + interval_timer timer; + encoding.scan(data.data(), data.size()); + auto estimated_size = encoding.max_required_bytes(data.data(), data.size()); + ASSERT_EQ(estimated_size.has_value(), true); + std::vector output(*estimated_size); + timer.start_timer("Compress"); + size_t bytes; + const auto num_runs = 1000000UL; + for(auto i = 0UL; i < num_runs; ++i) { + bytes = encoding.encode(data.data(), data.size(), output.data()); + } + timer.stop_timer("Compress"); + ASSERT_LT(bytes, estimated_size.value()); + std::vector decompressed(data.size()); + timer.start_timer("Decompress"); + for(auto i = 0UL; i < num_runs; ++i) { + (void) encoding.decode(output.data(), bytes, decompressed.data()); + } + timer.stop_timer("Decompress"); + log::version().info("{}", timer.display_all()); + ASSERT_EQ(decompressed, data); +} + + +TEST(FrequencyEncoding, StressFill) { + using namespace arcticdb; + using InputType = uint64_t; + std::vector data(100'000); + interval_timer timer; + FrequencyEncoding encoding; + timer.start_timer("Fill"); + const auto num_runs = 1000'000UL; + for(auto i = 0UL; i < num_runs; ++i) { + fill(data.data(), 100'000, 27UL); + } + timer.stop_timer("Fill"); + log::version().info("{}", timer.display_all()); +} + +TEST(FrequencyEncoding, StressScan) { + using namespace arcticdb; + using InputType = uint64_t; + auto data = random_numbers_with_leader(100000, 23, 0.9); + interval_timer timer; + FrequencyEncoding encoding; + timer.start_timer("Scan"); + const auto num_runs = 1000'000UL; + for(auto i = 0UL; i < num_runs; ++i) { + encoding.scan(data.data(), 100'000); + } + timer.stop_timer("Scan"); + log::version().info("{}", timer.display_all()); +} + +TEST(FrequencyEncoding, StressMaxRequired) { + using namespace arcticdb; + using InputType = uint64_t; + auto data = random_numbers_with_leader(100000, 23, 0.9); + interval_timer timer; + FrequencyEncoding encoding; + timer.start_timer("MaxRequired"); + const auto num_runs = 1000'000UL; + std::optional max_bytes; + for(auto i = 0UL; i < num_runs; ++i) { + max_bytes = encoding.max_required_bytes(data.data(), 100'000); + } + timer.stop_timer("MaxRequired"); + log::version().info("{}", timer.display_all()); +} \ No newline at end of file diff --git a/cpp/arcticdb/codec/test/test_fused_bitpack.cpp b/cpp/arcticdb/codec/test/test_fused_bitpack.cpp new file mode 100644 index 0000000000..d018e5c86a --- /dev/null +++ b/cpp/arcticdb/codec/test/test_fused_bitpack.cpp @@ -0,0 +1,119 @@ +#include +#include + +static uint64_t rand_arr_70_b11_w64_arr[1024] = + {403UL, 1272UL, 863UL, 2026UL, 1646UL, 1274UL, 1737UL, 1105UL, 939UL, 311UL, 292UL, 183UL, 938UL, 1939UL, 1185UL, + 1228UL, 1714UL, 1908UL, 1155UL, 219UL, 1642UL, 1067UL, 1333UL, 70UL, 306UL, 470UL, 843UL, 1328UL, 97UL, 1135UL, + 1920UL, 800UL, 502UL, 1619UL, 190UL, 1270UL, 222UL, 1367UL, 1431UL, 632UL, 182UL, 245UL, 995UL, 1964UL, 1790UL, + 1265UL, 1998UL, 1217UL, 1444UL, 1323UL, 940UL, 881UL, 650UL, 1030UL, 442UL, 621UL, 1251UL, 654UL, 574UL, 1200UL, + 174UL, 774UL, 282UL, 1746UL, 1823UL, 1513UL, 521UL, 805UL, 1063UL, 225UL, 1970UL, 590UL, 1150UL, 1748UL, 1347UL, + 1192UL, 864UL, 1212UL, 1445UL, 262UL, 1484UL, 1589UL, 1739UL, 440UL, 1219UL, 1556UL, 1771UL, 202UL, 1UL, 1311UL, + 1377UL, 695UL, 1076UL, 1575UL, 836UL, 251UL, 1766UL, 637UL, 1919UL, 1503UL, 510UL, 218UL, 94UL, 1066UL, 671UL, + 141UL, 593UL, 1987UL, 1786UL, 962UL, 298UL, 1742UL, 579UL, 1823UL, 1868UL, 1631UL, 751UL, 378UL, 1768UL, 401UL, + 1211UL, 637UL, 330UL, 395UL, 1840UL, 190UL, 336UL, 734UL, 1848UL, 399UL, 1849UL, 2019UL, 977UL, 398UL, 747UL, + 1753UL, 856UL, 981UL, 2015UL, 1263UL, 1077UL, 8UL, 1895UL, 579UL, 2041UL, 13UL, 1850UL, 352UL, 1963UL, 1761UL, + 555UL, 872UL, 825UL, 720UL, 1442UL, 1922UL, 1924UL, 816UL, 1903UL, 244UL, 745UL, 1007UL, 358UL, 25UL, 1359UL, + 1176UL, 778UL, 1381UL, 2027UL, 294UL, 1525UL, 49UL, 1351UL, 1460UL, 959UL, 802UL, 1370UL, 679UL, 302UL, 31UL, + 1907UL, 945UL, 1991UL, 357UL, 220UL, 1483UL, 952UL, 431UL, 981UL, 1407UL, 16UL, 1286UL, 725UL, 1872UL, 790UL, + 1635UL, 440UL, 245UL, 403UL, 950UL, 72UL, 807UL, 1329UL, 2003UL, 1363UL, 403UL, 355UL, 172UL, 171UL, 1509UL, 837UL, + 249UL, 1535UL, 1653UL, 1425UL, 1404UL, 597UL, 309UL, 1433UL, 1370UL, 1934UL, 1681UL, 956UL, 2009UL, 544UL, 220UL, + 408UL, 1930UL, 120UL, 1547UL, 602UL, 229UL, 1931UL, 1638UL, 441UL, 990UL, 881UL, 1752UL, 989UL, 764UL, 620UL, + 1163UL, 497UL, 425UL, 366UL, 750UL, 1753UL, 1970UL, 1805UL, 729UL, 1029UL, 1694UL, 327UL, 1194UL, 1130UL, 1557UL, + 1352UL, 1203UL, 865UL, 420UL, 487UL, 1387UL, 1751UL, 474UL, 1730UL, 1002UL, 1121UL, 1537UL, 1141UL, 1391UL, 1449UL, + 387UL, 1972UL, 596UL, 1351UL, 1462UL, 72UL, 1602UL, 777UL, 960UL, 365UL, 866UL, 1202UL, 524UL, 1972UL, 868UL, + 106UL, 2036UL, 1011UL, 1228UL, 1607UL, 1027UL, 966UL, 1992UL, 1874UL, 1521UL, 2016UL, 1959UL, 488UL, 829UL, 1358UL, + 1398UL, 56UL, 1261UL, 777UL, 1880UL, 84UL, 295UL, 378UL, 715UL, 1471UL, 745UL, 1704UL, 571UL, 627UL, 1211UL, 904UL, + 1101UL, 1708UL, 634UL, 1346UL, 1709UL, 493UL, 612UL, 1605UL, 1621UL, 1404UL, 1810UL, 533UL, 1907UL, 1502UL, 1903UL, + 1124UL, 860UL, 1199UL, 886UL, 1686UL, 463UL, 1785UL, 1308UL, 1369UL, 767UL, 1388UL, 1950UL, 788UL, 388UL, 1502UL, + 1451UL, 393UL, 1825UL, 2022UL, 384UL, 1543UL, 897UL, 552UL, 1670UL, 1033UL, 1260UL, 612UL, 58UL, 160UL, 1347UL, + 385UL, 904UL, 888UL, 389UL, 1341UL, 134UL, 766UL, 1718UL, 1946UL, 233UL, 1587UL, 1096UL, 1991UL, 1314UL, 1009UL, + 514UL, 953UL, 1952UL, 236UL, 992UL, 1164UL, 123UL, 1802UL, 717UL, 1885UL, 1933UL, 914UL, 1962UL, 1945UL, 1537UL, + 508UL, 1433UL, 924UL, 1381UL, 1019UL, 1166UL, 1505UL, 1732UL, 1513UL, 1278UL, 593UL, 352UL, 271UL, 1528UL, 1885UL, + 545UL, 668UL, 1319UL, 1942UL, 1530UL, 1617UL, 530UL, 1905UL, 109UL, 1122UL, 719UL, 1370UL, 1208UL, 1079UL, 384UL, + 1545UL, 1258UL, 39UL, 1502UL, 1622UL, 1002UL, 2034UL, 1352UL, 1573UL, 885UL, 352UL, 1797UL, 1430UL, 476UL, 752UL, + 1641UL, 138UL, 1818UL, 176UL, 730UL, 945UL, 1312UL, 288UL, 306UL, 881UL, 1586UL, 1251UL, 1075UL, 605UL, 298UL, + 713UL, 1308UL, 267UL, 272UL, 407UL, 30UL, 1810UL, 1202UL, 1364UL, 1650UL, 1165UL, 1098UL, 235UL, 133UL, 789UL, + 1639UL, 897UL, 541UL, 282UL, 1136UL, 564UL, 186UL, 1777UL, 1843UL, 230UL, 1292UL, 1171UL, 1899UL, 4UL, 750UL, + 1276UL, 1720UL, 1615UL, 529UL, 296UL, 1106UL, 1722UL, 1355UL, 781UL, 1302UL, 1616UL, 1022UL, 850UL, 1319UL, 1054UL, + 1724UL, 1833UL, 1939UL, 639UL, 2030UL, 227UL, 261UL, 1342UL, 99UL, 1673UL, 1040UL, 241UL, 1668UL, 2033UL, 154UL, + 518UL, 1291UL, 1379UL, 966UL, 711UL, 1364UL, 1484UL, 975UL, 840UL, 812UL, 499UL, 143UL, 424UL, 1703UL, 1433UL, + 1384UL, 856UL, 1117UL, 632UL, 53UL, 523UL, 1398UL, 350UL, 1887UL, 1956UL, 532UL, 1189UL, 1874UL, 451UL, 1293UL, + 1587UL, 1798UL, 1832UL, 673UL, 808UL, 1495UL, 998UL, 704UL, 1130UL, 1586UL, 1450UL, 1399UL, 1263UL, 1288UL, 1109UL, + 909UL, 1813UL, 285UL, 893UL, 300UL, 221UL, 1683UL, 1305UL, 1801UL, 38UL, 1449UL, 1627UL, 1150UL, 1564UL, 1334UL, + 7UL, 1935UL, 491UL, 1462UL, 590UL, 1448UL, 1323UL, 1856UL, 1242UL, 558UL, 1499UL, 306UL, 984UL, 1558UL, 1268UL, + 350UL, 376UL, 1539UL, 1449UL, 1052UL, 617UL, 1537UL, 21UL, 1322UL, 1582UL, 957UL, 1383UL, 791UL, 575UL, 181UL, + 1053UL, 1837UL, 607UL, 734UL, 589UL, 826UL, 128UL, 1759UL, 1425UL, 71UL, 1924UL, 607UL, 1486UL, 1269UL, 391UL, + 1073UL, 647UL, 56UL, 776UL, 552UL, 439UL, 864UL, 984UL, 1272UL, 1546UL, 1102UL, 496UL, 648UL, 1315UL, 1182UL, + 720UL, 372UL, 1559UL, 707UL, 468UL, 1885UL, 1182UL, 1649UL, 1153UL, 2038UL, 1570UL, 1954UL, 1539UL, 21UL, 76UL, + 1395UL, 250UL, 1008UL, 977UL, 1234UL, 1832UL, 1890UL, 1020UL, 591UL, 2016UL, 106UL, 702UL, 1204UL, 2003UL, 1161UL, + 357UL, 1242UL, 578UL, 1406UL, 902UL, 788UL, 630UL, 1621UL, 1708UL, 1810UL, 214UL, 132UL, 1856UL, 1702UL, 98UL, + 1121UL, 1219UL, 161UL, 262UL, 752UL, 1550UL, 573UL, 885UL, 319UL, 466UL, 97UL, 468UL, 2026UL, 1512UL, 1518UL, + 518UL, 219UL, 556UL, 1396UL, 1751UL, 603UL, 1622UL, 1453UL, 1233UL, 769UL, 241UL, 346UL, 251UL, 1248UL, 1206UL, + 981UL, 1562UL, 912UL, 1267UL, 87UL, 866UL, 1364UL, 1940UL, 1019UL, 61UL, 1662UL, 326UL, 1633UL, 1451UL, 569UL, + 973UL, 746UL, 527UL, 1227UL, 1122UL, 6UL, 773UL, 739UL, 1002UL, 2029UL, 1521UL, 1941UL, 604UL, 688UL, 129UL, 615UL, + 507UL, 537UL, 817UL, 595UL, 1637UL, 1535UL, 1856UL, 1056UL, 1317UL, 1047UL, 137UL, 402UL, 358UL, 1270UL, 787UL, + 169UL, 517UL, 767UL, 907UL, 539UL, 576UL, 1959UL, 1890UL, 1828UL, 1714UL, 250UL, 1420UL, 1013UL, 1067UL, 1012UL, + 1539UL, 966UL, 1170UL, 1336UL, 505UL, 245UL, 1266UL, 1280UL, 1739UL, 1673UL, 671UL, 1533UL, 1140UL, 500UL, 605UL, + 135UL, 516UL, 699UL, 1161UL, 160UL, 502UL, 759UL, 1414UL, 236UL, 767UL, 638UL, 807UL, 1537UL, 1683UL, 1516UL, + 1538UL, 1926UL, 1696UL, 1508UL, 1434UL, 568UL, 459UL, 1527UL, 86UL, 182UL, 1280UL, 34UL, 1703UL, 1557UL, 62UL, + 361UL, 584UL, 430UL, 1552UL, 1657UL, 1525UL, 763UL, 2033UL, 1590UL, 1930UL, 762UL, 920UL, 51UL, 1885UL, 727UL, + 2037UL, 124UL, 1323UL, 1677UL, 1583UL, 1283UL, 1443UL, 1741UL, 22UL, 1412UL, 223UL, 1912UL, 1124UL, 814UL, 822UL, + 924UL, 938UL, 685UL, 1365UL, 894UL, 366UL, 956UL, 1535UL, 776UL, 902UL, 1612UL, 1196UL, 1288UL, 1055UL, 1458UL, + 1175UL, 1755UL, 749UL, 760UL, 749UL, 79UL, 1090UL, 975UL, 641UL, 1457UL, 809UL, 1570UL, 1965UL, 754UL, 29UL, 637UL, + 1146UL, 511UL, 1747UL, 166UL, 1904UL, 562UL, 1077UL, 1047UL, 1160UL, 328UL, 1336UL, 243UL, 767UL, 739UL, 161UL, + 1673UL, 1179UL, 205UL, 1417UL, 1214UL, 81UL, 991UL, 463UL, 250UL, 1750UL, 2005UL, 597UL, 527UL, 1850UL, 1079UL, + 983UL, 134UL, 1006UL, 1197UL, 1491UL, 1937UL, 811UL, 629UL, 878UL, 428UL, 596UL, 43UL, 450UL, 1454UL, 1582UL, + 1847UL, 724UL, 210UL, 197UL, 1257UL, 1231UL, 985UL, 602UL, 210UL, 434UL, 1428UL, 46UL, 1187UL, 1420UL, 1354UL, + 257UL, 1525UL, 659UL, 1001UL, 1993UL, 970UL, 1423UL, 102UL, 326UL, 1221UL, 1345UL, 505UL, 1381UL, 765UL, 916UL, + 1499UL, 765UL, 1810UL, 1632UL, 456UL, 117UL, 113UL, 437UL, 1910UL, 1311UL, 513UL, 945UL, 716UL, 620UL, 1673UL, + 502UL, 213UL, 1008UL, 478UL, 845UL, 1487UL, 229UL, 1683UL, 355UL, 564UL, 655UL, 68UL, 1428UL, 276UL, 65UL, 1396UL, + 872UL, 1332UL, 1966UL, 1443UL, 2001UL, 642UL, 1762UL, 561UL, 1305UL, 495UL, 412UL, 1334UL, 1834UL, 154UL, 479UL, + 1297UL, 1482UL, 1047UL, 469UL, 743UL, 1819UL, 942UL, 631UL, 670UL, 2042UL, 728UL, 1279UL, 325UL, 1997UL, 1281UL, + 2018UL, 1476UL, 866UL, 1210UL, 1358UL, 851UL, 826UL, 632UL, 1573UL, 1401UL, 1891UL, 297UL, 1015UL, 1743UL,}; + + +struct CompressIdentity{ + uint64_t operator()(const uint64_t t) { return t; } +}; + +struct UncompressIdentity{ + uint64_t operator()(uint64_t value) { return value; } +}; + +TEST(BitPackFused, Roundtrip64to11) { + using namespace arcticdb; + auto *base64 = new uint8_t[1](); + *base64 = 0; + std::vector local_packed64(1024); + struct CompressIdentity{ + uint64_t operator()(const uint64_t t) { return t; } + }; + + BitPackFused::go(rand_arr_70_b11_w64_arr, local_packed64.data(), CompressIdentity{}); + std::vector local_unpacked64(1024); + + struct UncompressIdentity{ + uint64_t operator()(uint64_t value) { return value; } + }; + + BitUnpackFused::go(local_packed64.data(), local_unpacked64.data(), UncompressIdentity{}); + for(auto i = 0U; i < 1024; ++i) { + ASSERT_EQ(rand_arr_70_b11_w64_arr[i], local_unpacked64[i]); + } +} + +uint8_t rand_arr_3_b3_w8_arr[1024] = + {2UL,4UL,2UL,5UL,4UL,2UL,2UL,7UL,7UL,4UL,5UL,0UL,0UL,2UL,7UL,6UL,2UL,5UL,7UL,5UL,2UL,1UL,1UL,3UL,5UL,5UL,0UL,3UL,7UL,0UL,0UL,7UL,3UL,6UL,6UL,0UL,3UL,4UL,3UL,6UL,3UL,1UL,6UL,0UL,6UL,2UL,2UL,5UL,2UL,3UL,3UL,5UL,0UL,1UL,5UL,3UL,3UL,6UL,6UL,5UL,7UL,0UL,6UL,1UL,0UL,0UL,2UL,7UL,4UL,5UL,2UL,6UL,4UL,7UL,4UL,3UL,6UL,6UL,0UL,7UL,2UL,4UL,5UL,4UL,3UL,7UL,4UL,0UL,0UL,3UL,4UL,3UL,1UL,4UL,6UL,3UL,7UL,0UL,7UL,7UL,5UL,7UL,4UL,5UL,3UL,6UL,4UL,2UL,6UL,7UL,7UL,6UL,7UL,1UL,1UL,7UL,6UL,0UL,6UL,3UL,6UL,6UL,5UL,6UL,3UL,1UL,4UL,7UL,1UL,1UL,2UL,3UL,3UL,3UL,0UL,4UL,0UL,3UL,6UL,5UL,6UL,2UL,6UL,2UL,2UL,1UL,3UL,7UL,6UL,7UL,1UL,0UL,0UL,6UL,1UL,3UL,4UL,3UL,3UL,7UL,3UL,5UL,4UL,3UL,5UL,3UL,4UL,3UL,0UL,0UL,3UL,3UL,5UL,6UL,1UL,5UL,0UL,6UL,4UL,0UL,6UL,3UL,3UL,2UL,7UL,6UL,1UL,1UL,4UL,4UL,7UL,7UL,5UL,4UL,1UL,7UL,0UL,7UL,1UL,2UL,3UL,4UL,6UL,7UL,2UL,7UL,0UL,1UL,1UL,3UL,3UL,1UL,7UL,6UL,4UL,1UL,2UL,6UL,6UL,4UL,1UL,5UL,7UL,0UL,7UL,7UL,1UL,3UL,1UL,5UL,7UL,0UL,7UL,0UL,1UL,0UL,7UL,4UL,5UL,6UL,3UL,7UL,6UL,7UL,0UL,6UL,6UL,0UL,5UL,1UL,7UL,5UL,0UL,6UL,2UL,4UL,6UL,5UL,3UL,5UL,1UL,1UL,7UL,2UL,4UL,1UL,0UL,6UL,4UL,6UL,1UL,2UL,3UL,3UL,7UL,2UL,6UL,4UL,2UL,3UL,4UL,1UL,3UL,7UL,0UL,4UL,0UL,4UL,3UL,2UL,2UL,0UL,3UL,1UL,1UL,2UL,6UL,3UL,4UL,3UL,5UL,0UL,7UL,6UL,3UL,2UL,5UL,3UL,0UL,4UL,2UL,6UL,4UL,0UL,2UL,2UL,4UL,0UL,6UL,6UL,4UL,2UL,0UL,3UL,7UL,2UL,2UL,0UL,4UL,4UL,6UL,1UL,0UL,0UL,7UL,3UL,1UL,7UL,4UL,1UL,7UL,1UL,0UL,1UL,6UL,2UL,1UL,1UL,6UL,6UL,3UL,5UL,0UL,0UL,3UL,7UL,5UL,6UL,5UL,5UL,3UL,2UL,3UL,5UL,6UL,0UL,4UL,5UL,2UL,7UL,3UL,0UL,7UL,5UL,2UL,2UL,1UL,7UL,3UL,6UL,1UL,7UL,1UL,5UL,2UL,0UL,6UL,2UL,3UL,0UL,6UL,3UL,3UL,5UL,5UL,5UL,1UL,5UL,0UL,3UL,7UL,7UL,6UL,2UL,4UL,1UL,6UL,0UL,6UL,0UL,0UL,0UL,4UL,2UL,4UL,1UL,1UL,7UL,1UL,6UL,1UL,2UL,2UL,3UL,1UL,1UL,5UL,7UL,6UL,7UL,1UL,1UL,2UL,5UL,5UL,5UL,6UL,5UL,6UL,4UL,5UL,3UL,5UL,5UL,2UL,7UL,2UL,6UL,1UL,1UL,0UL,2UL,5UL,5UL,4UL,7UL,6UL,1UL,7UL,3UL,6UL,0UL,6UL,7UL,6UL,4UL,1UL,6UL,7UL,2UL,6UL,1UL,5UL,2UL,0UL,0UL,7UL,5UL,1UL,6UL,6UL,3UL,6UL,4UL,4UL,0UL,2UL,0UL,3UL,1UL,5UL,4UL,6UL,1UL,3UL,6UL,7UL,6UL,6UL,2UL,0UL,4UL,0UL,4UL,3UL,3UL,7UL,4UL,0UL,5UL,0UL,0UL,5UL,2UL,4UL,4UL,6UL,2UL,0UL,2UL,1UL,2UL,1UL,7UL,2UL,6UL,1UL,7UL,2UL,7UL,4UL,0UL,7UL,2UL,6UL,2UL,0UL,4UL,4UL,5UL,3UL,4UL,6UL,7UL,0UL,3UL,3UL,1UL,0UL,3UL,5UL,7UL,5UL,6UL,7UL,5UL,5UL,2UL,2UL,4UL,6UL,3UL,7UL,0UL,0UL,0UL,6UL,7UL,5UL,6UL,5UL,0UL,7UL,3UL,6UL,2UL,1UL,7UL,1UL,5UL,5UL,7UL,6UL,2UL,0UL,4UL,4UL,3UL,2UL,2UL,3UL,4UL,5UL,3UL,1UL,0UL,4UL,6UL,2UL,6UL,1UL,7UL,1UL,4UL,1UL,5UL,5UL,3UL,5UL,7UL,7UL,0UL,4UL,3UL,7UL,1UL,1UL,1UL,7UL,2UL,5UL,1UL,3UL,5UL,0UL,6UL,3UL,1UL,3UL,0UL,2UL,2UL,6UL,6UL,6UL,2UL,5UL,6UL,7UL,2UL,2UL,1UL,3UL,5UL,3UL,1UL,5UL,1UL,3UL,4UL,3UL,5UL,2UL,7UL,0UL,1UL,5UL,0UL,7UL,0UL,1UL,2UL,6UL,6UL,6UL,0UL,6UL,0UL,4UL,4UL,4UL,0UL,4UL,3UL,2UL,3UL,5UL,5UL,0UL,1UL,1UL,0UL,1UL,2UL,1UL,3UL,4UL,6UL,4UL,7UL,0UL,0UL,0UL,5UL,6UL,5UL,0UL,7UL,2UL,1UL,7UL,7UL,2UL,5UL,7UL,0UL,7UL,2UL,7UL,7UL,1UL,3UL,4UL,5UL,0UL,5UL,5UL,4UL,3UL,0UL,1UL,5UL,1UL,3UL,5UL,3UL,6UL,4UL,0UL,7UL,1UL,6UL,5UL,1UL,2UL,6UL,7UL,2UL,7UL,4UL,3UL,0UL,6UL,3UL,6UL,4UL,2UL,0UL,0UL,3UL,2UL,0UL,7UL,7UL,1UL,5UL,6UL,1UL,5UL,7UL,2UL,4UL,1UL,3UL,3UL,3UL,0UL,2UL,5UL,4UL,3UL,5UL,5UL,6UL,5UL,4UL,5UL,0UL,5UL,4UL,1UL,2UL,5UL,6UL,5UL,4UL,1UL,6UL,4UL,6UL,0UL,0UL,2UL,6UL,5UL,7UL,4UL,1UL,0UL,1UL,2UL,3UL,1UL,0UL,2UL,5UL,5UL,7UL,6UL,1UL,2UL,2UL,2UL,4UL,2UL,2UL,5UL,5UL,0UL,6UL,2UL,3UL,6UL,1UL,1UL,2UL,6UL,7UL,5UL,2UL,4UL,1UL,3UL,1UL,1UL,4UL,2UL,2UL,5UL,4UL,2UL,0UL,7UL,0UL,4UL,1UL,0UL,4UL,0UL,3UL,1UL,4UL,1UL,1UL,3UL,2UL,5UL,5UL,6UL,5UL,2UL,2UL,7UL,7UL,6UL,7UL,2UL,7UL,1UL,1UL,3UL,0UL,3UL,3UL,7UL,2UL,1UL,4UL,3UL,4UL,3UL,0UL,6UL,5UL,0UL,3UL,0UL,6UL,0UL,0UL,0UL,4UL,4UL,5UL,0UL,6UL,3UL,4UL,3UL,6UL,4UL,2UL,7UL,7UL,1UL,1UL,4UL,2UL,0UL,0UL,1UL,7UL,3UL,6UL,4UL,5UL,0UL,2UL,2UL,3UL,5UL,6UL,0UL,5UL,5UL,4UL,2UL,0UL,0UL,0UL,4UL,4UL,1UL,6UL,7UL,6UL,4UL,4UL,3UL,7UL,3UL,3UL,6UL,2UL,1UL,6UL,6UL,1UL,0UL,4UL,3UL,1UL,6UL,1UL,7UL,0UL,5UL,1UL,3UL,3UL,7UL,3UL,0UL,2UL,2UL,5UL,4UL,4UL,3UL,7UL,0UL,7UL,7UL,0UL,2UL,0UL,1UL,3UL,7UL,7UL,2UL,2UL,1UL,6UL,2UL,1UL,0UL,4UL,5UL,1UL,2UL,6UL,5UL,6UL,6UL,4UL,0UL,4UL,3UL,3UL,7UL,3UL,7UL,0UL,7UL,7UL,3UL,5UL,0UL,1UL,6UL,3UL,3UL,7UL,2UL,5UL,0UL,3UL,1UL,5UL,2UL,4UL,3UL,3UL,}; + +TEST(BitPack, Roundtrip8to3) { + using namespace arcticdb; + auto *base64 = new uint8_t[1](); + *base64 = 0; + std::vector local_packed8(1024); + BitPackFused::go(rand_arr_3_b3_w8_arr, local_packed8.data(), CompressIdentity{}); + std::vector local_unpacked8(1024); + BitUnpackFused::go(local_packed8.data(), local_unpacked8.data(), UncompressIdentity{}); + for(auto i = 0U; i < 1024; ++i) { + ASSERT_EQ(rand_arr_3_b3_w8_arr[i], local_unpacked8[i]); + } +} \ No newline at end of file diff --git a/cpp/arcticdb/codec/test/test_stats.cpp b/cpp/arcticdb/codec/test/test_stats.cpp new file mode 100644 index 0000000000..2f2a226eae --- /dev/null +++ b/cpp/arcticdb/codec/test/test_stats.cpp @@ -0,0 +1,123 @@ +/* Copyright 2023 Man Group Operations Limited + * + * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt. + * + * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. + */ +#include +#include + +#include +#include + +#include +#include + +template +std::vector random_vector( + size_t size, + T min = std::numeric_limits::min(), + T max = std::numeric_limits::max()) { + const unsigned int seed = 12345; + std::mt19937 generator(seed); + std::uniform_int_distribution distribution(min, max); + + std::vector output(size); + std::generate(output.begin(), output.end(), [&]() { + return distribution(generator); + }); + + return output; +} + +TEST(ConstantStats, Stress) { + using namespace arcticdb; + std::random_device rd; + std::mt19937_64 gen(rd()); + std::uniform_int_distribution size_dist(0, 100); + std::vector vec(100'000 + size_dist(gen), 42); + interval_timer timer; + timer.start_timer("ConstantScan"); + const auto num_runs = 1000000UL; + bool constant; + for(auto i = 0UL; i < num_runs; ++i) { + constant = is_constant(vec.data(), vec.size()); + } + timer.stop_timer("Compress"); + ASSERT_EQ(constant, true); + log::version().info("{}", timer.display_all()); +} + +TEST(LeftmostBit, Simple) { + using namespace arcticdb; + std::vector vec(1024); + std::iota(std::begin(vec), std::end(vec), 0); + auto result = msb(vec.data()); + ASSERT_EQ(result, 9); +} + +TEST(LeftmostBit, Stress) { + using namespace arcticdb; + auto data = random_vector(1024 * 100, 21UL, 1UL << 10); + auto compressed = std::vector(1024 * 100); + + size_t num_runs = 1000000; + + interval_timer timer; + timer.start_timer("Scan"); + uint8_t result = 0; + auto count = 0; + for(auto k = 0UL; k < num_runs; ++k) { + for (auto i = 0; i < 100; ++i) { + ++count; + result = std::max(result, msb_max(data.data() + i * 1024)); + } + } + ASSERT_EQ(count, 100000000); + timer.stop_timer("Scan"); + log::version().info("{}\n{}", result, timer.display_all()); +} + +TEST(Max, Stress) { + using namespace arcticdb; + auto data = random_vector(1024 * 100, 21UL, 1UL << 10); + auto compressed = std::vector(1024 * 100); + + size_t num_runs = 1000000; + + interval_timer timer; + timer.start_timer("Scan"); + uint8_t result = 0; + auto count = 0; + for(auto k = 0UL; k < num_runs; ++k) { + for (auto i = 0; i < 100; ++i) { + ++count; + result = std::max(result, msb_max(data.data() + i * 1024)); + } + } + ASSERT_EQ(count, 100000000); + timer.stop_timer("Scan"); + log::version().info("{}\n{}", result, timer.display_all()); +} + +TEST(MinMax, Stress) { + using namespace arcticdb; + auto data = random_vector(1024 * 100, 21UL, 1UL << 10); + auto compressed = std::vector(1024 * 100); + + size_t num_runs = 1000000; + + interval_timer timer; + timer.start_timer("Scan"); + auto result = std::pair(0, 0); + auto count = 0; + for(auto k = 0UL; k < num_runs; ++k) { + for (auto i = 0; i < 100; ++i) { + ++count; + result = min_max_pair(result, min_max(data.data() + i * 1024)); + } + } + ASSERT_EQ(count, 100000000); + timer.stop_timer("Scan"); + log::version().info("{} - {}\n{}", result.first, result.second, timer.display_all()); +} \ No newline at end of file diff --git a/cpp/arcticdb/column_store/test/test_index_filtering.cpp b/cpp/arcticdb/column_store/test/test_index_filtering.cpp index 4dc1e5bf16..8e51d3f087 100644 --- a/cpp/arcticdb/column_store/test/test_index_filtering.cpp +++ b/cpp/arcticdb/column_store/test/test_index_filtering.cpp @@ -8,12 +8,10 @@ #include #include -#include #include #include #include #include -#include namespace arcticdb { using namespace arcticdb::pipelines; diff --git a/cpp/arcticdb/entity/metrics.cpp b/cpp/arcticdb/entity/metrics.cpp index 94ecdb8918..0fd669448c 100644 --- a/cpp/arcticdb/entity/metrics.cpp +++ b/cpp/arcticdb/entity/metrics.cpp @@ -69,7 +69,7 @@ namespace arcticdb { arcticdb::log::version().info("Prometheus Push created with settings {}", cfg_); } else if (cfg_.model_ == MetricsConfig::Model::PULL) { - +/* // create an http server ie "http://hostname:"+port()+"/metrics" std::string endpoint = cfg_.host + ":" + cfg_.port; @@ -88,6 +88,7 @@ namespace arcticdb { exposer_->RegisterCollectable(registry_, "/metrics"); arcticdb::log::version().info("Prometheus endpoint created on {}/metrics", endpoint); + */ } else { arcticdb::log::version().info("Prometheus not configured {}", cfg_); diff --git a/cpp/arcticdb/pipeline/frame_slice.hpp b/cpp/arcticdb/pipeline/frame_slice.hpp index 51f21c1892..bc3f954429 100644 --- a/cpp/arcticdb/pipeline/frame_slice.hpp +++ b/cpp/arcticdb/pipeline/frame_slice.hpp @@ -41,7 +41,6 @@ struct AxisRange : std::pair { template std::enable_if_t>, std::size_t> operator()(const T &r) const { - // try to make better use of msb lsb given how F14 is implemented #ifdef _WIN32 return r.first ^ _byteswap_uint64(r.second); #else diff --git a/cpp/arcticdb/pipeline/read_pipeline.hpp b/cpp/arcticdb/pipeline/read_pipeline.hpp index 00ed5c207a..d29c53a10a 100644 --- a/cpp/arcticdb/pipeline/read_pipeline.hpp +++ b/cpp/arcticdb/pipeline/read_pipeline.hpp @@ -84,7 +84,7 @@ std::vector filter_index(const ContainerType &container, std::optio return output; } -inline util::BitSet build_column_bitset(const StreamDescriptor& desc, const folly::F14FastSet& columns) { +inline util::BitSet build_column_bitset(const StreamDescriptor& desc, const ankerl::unordered_dense::set& columns) { util::BitSet col_bitset(static_cast(desc.fields().size())); for (std::size_t c = 0; c < static_cast(desc.fields().size()); ++c) { auto& f = desc.fields(static_cast(c)); @@ -96,7 +96,7 @@ inline util::BitSet build_column_bitset(const StreamDescriptor& desc, const foll } inline util::BitSet build_column_bitset(const StreamDescriptor& desc, const std::vector& columns) { - folly::F14FastSet col_set{columns.begin(), columns.end()}; + ankerl::unordered_dense::set col_set{columns.begin(), columns.end()}; return build_column_bitset(desc, col_set); } @@ -128,7 +128,7 @@ inline std::optional requested_column_bitset_including_index(const inline std::optional clause_column_bitset( const StreamDescriptor& desc, const std::vector>& clauses) { - folly::F14FastSet column_set; + ankerl::unordered_dense::set column_set; for (const auto& clause: clauses) { auto opt_columns = clause->clause_info().input_columns_; if (opt_columns.has_value()) { diff --git a/cpp/arcticdb/storage/library.hpp b/cpp/arcticdb/storage/library.hpp index 39542b03d6..588a576e73 100644 --- a/cpp/arcticdb/storage/library.hpp +++ b/cpp/arcticdb/storage/library.hpp @@ -21,12 +21,9 @@ #include #include -#include #include #include - - #ifdef _WIN32 //Windows #defines DELETE in winnt.h which clashes with OpenMode.DELETE #undef DELETE diff --git a/cpp/arcticdb/util/bitset.hpp b/cpp/arcticdb/util/bitset.hpp index f23fdd45f9..cb210889d2 100644 --- a/cpp/arcticdb/util/bitset.hpp +++ b/cpp/arcticdb/util/bitset.hpp @@ -32,4 +32,36 @@ using BitIndex = bm::bvector<>::rs_index_type; constexpr bm::bvector<>::size_type bv_size(uint64_t val) { return static_cast::size_type>(val); } + +// adapted from bm_algo.h +template +struct BitVisitorFunctor { + Func func_; + uint64_t rank_ = 0; + + BitVisitorFunctor(Func&& func) : + func_(func) { + } + + using size_type = util::BitSetSizeType; + + int add_bits( + size_type offset, + const unsigned char* bits, + unsigned size) { + for (unsigned i = 0; i < size; ++i) { + func_(offset + bits[i], rank_); + ++rank_; + } + return 0; + } + + int add_range(size_type offset, size_type size) { + for (size_type i = 0; i < size; ++i){ + func_(offset + i, rank_); + ++rank_; + } + return 0; + } +}; } diff --git a/cpp/arcticdb/util/preprocess.hpp b/cpp/arcticdb/util/preprocess.hpp index 416bd4e924..7d0b7c7af1 100644 --- a/cpp/arcticdb/util/preprocess.hpp +++ b/cpp/arcticdb/util/preprocess.hpp @@ -16,12 +16,14 @@ #define ARCTICDB_LIKELY(condition) __builtin_expect(condition, 1) #define ARCTICDB_UNLIKELY(condition) __builtin_expect(condition, 0) +#define ARCTICDB_ALWAYS_INLINE inline __attribute__((__always_inline__)) #else #define ARCTICDB_UNUSED [[maybe_unused]] #define ARCTICDB_UNREACHABLE __assume(0); #define ARCTICDB_VISIBILITY_HIDDEN #define ARCTICDB_VISIBILITY_DEFAULT +#define ARCTICDB_ALWAYS_INLINE __forceinline #define ARCTICDB_LIKELY #define ARCTICDB_UNLIKELY diff --git a/cpp/arcticdb/util/test/test_tracing_allocator.cpp b/cpp/arcticdb/util/test/test_tracing_allocator.cpp index 28869d0361..4b39d6c89b 100644 --- a/cpp/arcticdb/util/test/test_tracing_allocator.cpp +++ b/cpp/arcticdb/util/test/test_tracing_allocator.cpp @@ -7,7 +7,6 @@ #include #include -#include #include TEST(Allocator, Tracing) {