diff --git a/cpp/arcticdb/column_store/memory_segment_impl.cpp b/cpp/arcticdb/column_store/memory_segment_impl.cpp index 8f8acf55df..22382fcbbd 100644 --- a/cpp/arcticdb/column_store/memory_segment_impl.cpp +++ b/cpp/arcticdb/column_store/memory_segment_impl.cpp @@ -564,6 +564,7 @@ std::optional SegmentInMemoryImpl::string_at(position_t row, p const auto& col_ref = column(col); if(is_fixed_string_type(td.data_type()) && col_ref.is_inflated()) { + auto string_size = col_ref.bytes() / row_count(); auto ptr = col_ref.data().buffer().ptr_cast(row * string_size, string_size); return std::string_view(ptr, string_size); diff --git a/cpp/arcticdb/version/symbol_list.cpp b/cpp/arcticdb/version/symbol_list.cpp index 6298a76c60..a4ae5de32e 100644 --- a/cpp/arcticdb/version/symbol_list.cpp +++ b/cpp/arcticdb/version/symbol_list.cpp @@ -128,17 +128,35 @@ MaybeCompaction last_compaction(const std::vector& keys) { } } +// The below string_at and scalar_at functions should be used for symbol list cache segments instead of the ones +// provided in SegmentInMemory, because the symbol list structure is the only place where columns can have more entries +// than the segment has rows. Hence, we need to bypass the checks inside SegmentInMemory's function and directly call the +// Column's string_at and scalar_at. +std::string string_at(const SegmentInMemory& seg, position_t row, position_t col){ + auto offset = seg.column(col).scalar_at(row); + util::check(offset.has_value(), "Symbol list trying to call string_at for missing row {}, column {}", row, col); + return std::string(seg.string_pool_ptr()->get_view(offset.value())); +} + +template +T scalar_at(const SegmentInMemory& seg, position_t row, position_t col){ + auto scalar = seg.column(col).scalar_at(row); + util::check(scalar.has_value(), "Symbol list trying to call scalar_at for missing row {}, column {}", row, col); + return scalar.value(); +} + + StreamId stream_id_from_segment( DataType data_type, const SegmentInMemory& seg, position_t row_id, position_t column) { if (data_type == DataType::UINT64) { - auto num_id = seg.scalar_at(row_id, column).value(); + auto num_id = scalar_at(seg, row_id, column); ARCTICDB_DEBUG(log::symbol(), "Reading numeric symbol {}", num_id); return safe_convert_to_numeric_id(num_id); } else { - auto sym = std::string(seg.string_at(row_id, column).value()); + auto sym = string_at(seg, row_id, column); ARCTICDB_DEBUG(log::symbol(), "Reading string symbol '{}'", sym); return {std::move(sym)}; } @@ -183,8 +201,8 @@ std::vector read_new_style_list_from_storage(const SegmentInMem for(auto i = 0L; i < seg.column(0).row_count(); ++i) { auto stream_id = stream_id_from_segment(data_type, seg, i, 0); - auto reference_id = VersionId{seg.scalar_at(i, 1).value()}; - auto reference_time = timestamp{seg.scalar_at(i, 2).value()}; + auto reference_id = VersionId{scalar_at(seg, i, 1)}; + auto reference_time = timestamp{scalar_at(seg, i, 2)}; ARCTICDB_RUNTIME_DEBUG(log::symbol(), "Reading added symbol {}: {}@{}", stream_id, reference_id, reference_time); output.emplace_back(stream_id, reference_id, reference_time, ActionType::ADD); } @@ -195,8 +213,8 @@ std::vector read_new_style_list_from_storage(const SegmentInMem for (auto i = 0L; i < seg.column(3).row_count(); ++i) { auto stream_id = stream_id_from_segment(data_type, seg, i, 3); - auto reference_id = VersionId{seg.scalar_at(i, 4).value()}; - auto reference_time = timestamp{seg.scalar_at(i, 5).value()}; + auto reference_id = VersionId{scalar_at(seg, i, 4)}; + auto reference_time = timestamp{scalar_at(seg, i, 5)}; ARCTICDB_RUNTIME_DEBUG(log::symbol(), "Reading deleted symbol {}: {}@{}", stream_id, reference_id, reference_time); output.emplace_back(stream_id, reference_id, reference_time, ActionType::DELETE); }