diff --git a/cpp/arcticdb/CMakeLists.txt b/cpp/arcticdb/CMakeLists.txt
index 62c0759778..71fdc999e1 100644
--- a/cpp/arcticdb/CMakeLists.txt
+++ b/cpp/arcticdb/CMakeLists.txt
@@ -176,6 +176,7 @@ endif ()
 
 ## Core library without python bindings ##
 set(arcticdb_srcs
+        storage/memory_layout.hpp
         # header files
         async/async_store.hpp
         async/batch_read_args.hpp
@@ -188,7 +189,10 @@ set(arcticdb_srcs
         codec/lz4.hpp
         codec/magic_words.hpp
         codec/passthrough.hpp
+        codec/protobuf_mappings.hpp
         codec/slice_data_sink.hpp
+        codec/segment_header.hpp
+        codec/segment_identifier.hpp
         codec/typed_block_encoder_impl.hpp
         codec/zstd.hpp
         column_store/block.hpp
@@ -277,6 +281,7 @@ set(arcticdb_srcs
         storage/azure/azure_mock_client.hpp
         storage/azure/azure_real_client.hpp
         storage/azure/azure_storage.hpp
+        storage/lmdb/lmdb.hpp
         storage/lmdb/lmdb_client_wrapper.hpp
         storage/lmdb/lmdb_mock_client.hpp
         storage/lmdb/lmdb_real_client.hpp
@@ -310,7 +315,7 @@ set(arcticdb_srcs
         stream/index.hpp
         stream/merge.hpp
         stream/merge.hpp
-        stream/merge.hpp util/ref_counted_map.hpp
+        stream/merge.hpp
         stream/protobuf_mappings.hpp
         stream/row_builder.hpp
         stream/schema.hpp
@@ -352,8 +357,6 @@ set(arcticdb_srcs
         util/preconditions.hpp
         util/preprocess.hpp
         util/ranges_from_future.hpp
-        util/ref_counted_map.hpp
-        util/ref_counted_map.hpp
         util/regex_filter.hpp
         util/simple_string_hash.hpp
         util/slab_allocator.hpp
@@ -390,9 +393,10 @@ set(arcticdb_srcs
         codec/codec.cpp
         codec/encode_v1.cpp
         codec/encode_v2.cpp
-        codec/encoding_sizes.cpp
+        codec/encoded_field.cpp
+        codec/protobuf_mappings.cpp
         codec/segment.cpp
-        codec/variant_encoded_field_collection.cpp
+        codec/segment_header.cpp
         column_store/chunked_buffer.cpp
         column_store/column.cpp
         column_store/column_data.cpp
@@ -406,6 +410,7 @@ set(arcticdb_srcs
         entity/merge_descriptors.cpp
         entity/metrics.cpp
         entity/performance_tracing.cpp
+        entity/protobuf_mappings.cpp
         entity/types.cpp
         entity/type_utils.cpp
         entity/types_proto.cpp
@@ -455,6 +460,7 @@ set(arcticdb_srcs
         storage/mongo/mongo_mock_client.cpp
         storage/mongo/mongo_storage.cpp
         storage/s3/nfs_backed_storage.cpp
+        storage/s3/ec2_utils.cpp
         storage/s3/s3_api.cpp
         storage/s3/s3_real_client.cpp
         storage/s3/s3_mock_client.cpp
@@ -465,6 +471,7 @@ set(arcticdb_srcs
         stream/append_map.cpp
         stream/index.cpp
         stream/piloted_clock.cpp
+        stream/protobuf_mappings.cpp
         toolbox/library_tool.cpp
         util/allocator.cpp
         util/buffer_pool.cpp
@@ -490,7 +497,7 @@ set(arcticdb_srcs
         version/version_map_batch_methods.cpp
         storage/s3/ec2_utils.cpp
         storage/lmdb/lmdb.hpp
-)
+        )
 
 if(${ARCTICDB_INCLUDE_ROCKSDB})
     list (APPEND arcticdb_srcs
@@ -741,6 +748,9 @@ if(${TEST})
     set(unit_test_srcs
             async/test/test_async.cpp
             codec/test/test_codec.cpp
+            codec/test/test_encode_field_collection.cpp
+            codec/test/test_segment_header.cpp
+            codec/test/test_encoded_field.cpp
             column_store/test/ingestion_stress_test.cpp
             column_store/test/test_column.cpp
             column_store/test/test_column_data_random_accessor.cpp
@@ -808,7 +818,7 @@ if(${TEST})
             version/test/test_sorting_info_state_machine.cpp
             version/test/version_map_model.hpp
             storage/test/common.hpp
-    )
+         )
 
     set(EXECUTABLE_PERMS OWNER_WRITE OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) # 755
 
diff --git a/cpp/arcticdb/async/async_store.hpp b/cpp/arcticdb/async/async_store.hpp
index fc403347ae..65bb2e8a42 100644
--- a/cpp/arcticdb/async/async_store.hpp
+++ b/cpp/arcticdb/async/async_store.hpp
@@ -338,7 +338,7 @@ std::vector<folly::Future<bool>> batch_key_exists(
 }
 
 
-    folly::Future<SliceAndKey> async_write(
+folly::Future<SliceAndKey> async_write(
             folly::Future<std::tuple<PartialKey, SegmentInMemory, pipelines::FrameSlice>> &&input_fut,
             const std::shared_ptr<DeDupMap> &de_dup_map) override {
         using KeyOptSegment = std::pair<VariantKey, std::optional<Segment>>;
diff --git a/cpp/arcticdb/async/task_scheduler.hpp b/cpp/arcticdb/async/task_scheduler.hpp
index b47bc48cb0..1321c7e82a 100644
--- a/cpp/arcticdb/async/task_scheduler.hpp
+++ b/cpp/arcticdb/async/task_scheduler.hpp
@@ -150,15 +150,13 @@ class TaskScheduler {
 
     explicit TaskScheduler(const std::optional<size_t>& cpu_thread_count = std::nullopt, const std::optional<size_t>& io_thread_count = std::nullopt) :
         cpu_thread_count_(cpu_thread_count ? *cpu_thread_count : ConfigsMap::instance()->get_int("VersionStore.NumCPUThreads", get_default_num_cpus())),
-        io_thread_count_(io_thread_count ? *io_thread_count : ConfigsMap::instance()->get_int("VersionStore.NumIOThreads", std::min(100, (int) (cpu_thread_count_ * 1.5)))),
+        io_thread_count_(io_thread_count ? *io_thread_count : ConfigsMap::instance()->get_int("VersionStore.NumIOThreads", (int) (cpu_thread_count_ * 1.5))),
         cpu_exec_(cpu_thread_count_, std::make_shared<InstrumentedNamedFactory>("CPUPool")) ,
         io_exec_(io_thread_count_,  std::make_shared<InstrumentedNamedFactory>("IOPool")){
         util::check(cpu_thread_count_ > 0 && io_thread_count_ > 0, "Zero IO or CPU threads: {} {}", io_thread_count_, cpu_thread_count_);
         ARCTICDB_RUNTIME_DEBUG(log::schedule(), "Task scheduler created with {:d} {:d}", cpu_thread_count_, io_thread_count_);
     }
 
-    ~TaskScheduler() = default;
-
     template<class Task>
     auto submit_cpu_task(Task &&t) {
         auto task = std::forward<decltype(t)>(t);
diff --git a/cpp/arcticdb/async/tasks.cpp b/cpp/arcticdb/async/tasks.cpp
index b99bcf91e6..2a8d4e8731 100644
--- a/cpp/arcticdb/async/tasks.cpp
+++ b/cpp/arcticdb/async/tasks.cpp
@@ -27,10 +27,10 @@ namespace arcticdb::async {
                     }
                 }
 
-                return index_descriptor(StreamDescriptor::id_from_proto(desc.proto()), idx, fields);
+                return index_descriptor_from_range(desc.id(), idx, fields);
             }
             else {
-                return index_descriptor(StreamDescriptor::id_from_proto(desc.proto()), idx, desc.fields());
+                return index_descriptor_from_range(desc.id(), idx, desc.fields());
             }
         });
     }
@@ -39,10 +39,10 @@ namespace arcticdb::async {
         auto key = std::move(key_segment_pair.atom_key());
         auto seg = std::move(key_segment_pair.release_segment());
         ARCTICDB_DEBUG(log::storage(), "ReadAndDecodeAtomTask decoding segment of size {} with key {}",
-                       seg.total_segment_size(),
+                       seg.size(),
                        key);
         auto &hdr = seg.header();
-        auto desc = StreamDescriptor(std::make_shared<StreamDescriptor::Proto>(std::move(*hdr.mutable_stream_descriptor())), seg.fields_ptr());
+        const auto& desc = seg.descriptor();
         auto descriptor = async::get_filtered_descriptor(desc, columns_to_decode_);
         ranges_and_key_.col_range_.second = ranges_and_key_.col_range_.first + (descriptor.field_count() - descriptor.index().field_count());
         ARCTICDB_TRACE(log::codec(), "Creating segment");
@@ -53,12 +53,11 @@ namespace arcticdb::async {
 
     pipelines::SliceAndKey DecodeSlicesTask::decode_into_slice(std::pair<Segment, pipelines::SliceAndKey>&& sk_pair) const {
         auto [seg, sk] = std::move(sk_pair);
-        ARCTICDB_DEBUG(log::storage(), "ReadAndDecodeAtomTask decoding segment of size {} with key {}",
-                      seg.total_segment_size(),
+        ARCTICDB_DEBUG(log::storage(), "ReadAndDecodeAtomTask decoding segment with key {}",
                       variant_key_view(sk.key()));
 
         auto &hdr = seg.header();
-        auto desc = StreamDescriptor(std::make_shared<StreamDescriptor::Proto>(std::move(*hdr.mutable_stream_descriptor())), seg.fields_ptr());
+        const auto& desc = seg.descriptor();
         auto descriptor = async::get_filtered_descriptor(desc, filter_columns_);
         sk.slice_.adjust_columns(descriptor.field_count() - descriptor.index().field_count());
 
diff --git a/cpp/arcticdb/async/tasks.hpp b/cpp/arcticdb/async/tasks.hpp
index 0dd40c610a..28aca35433 100644
--- a/cpp/arcticdb/async/tasks.hpp
+++ b/cpp/arcticdb/async/tasks.hpp
@@ -89,7 +89,7 @@ struct EncodeAtomTask : BaseTask {
     storage::KeySegmentPair encode() {
         ARCTICDB_DEBUG(log::codec(), "Encoding object with partial key {}", partial_key_);
         auto enc_seg = ::arcticdb::encode_dispatch(std::move(segment_), *codec_meta_, encoding_version_);
-        auto content_hash = hash_segment_header(enc_seg.header());
+        auto content_hash = get_segment_hash(enc_seg);
 
         AtomKey k = partial_key_.build_key(creation_ts_, content_hash);
         return {std::move(k), std::move(enc_seg)};
@@ -208,7 +208,9 @@ struct KeySegmentContinuation {
 };
 
 inline storage::KeySegmentPair read_dispatch(const entity::VariantKey& variant_key, const std::shared_ptr<storage::Library>& lib, const storage::ReadKeyOpts& opts) {
-    return util::variant_match(variant_key, [&lib, &opts](const auto &key) { return lib->read(key, opts); });
+    return util::variant_match(variant_key, [&lib, &opts](const auto &key) {
+        return lib->read(key, opts);
+    });
 }
 
 template <typename Callable>
@@ -322,8 +324,7 @@ struct DecodeSegmentTask : BaseTask {
         ARCTICDB_SAMPLE(DecodeAtomTask, 0)
 
         auto key_seg = std::move(ks);
-        ARCTICDB_DEBUG(log::storage(), "ReadAndDecodeAtomTask decoding segment of size {} with key {}",
-                             key_seg.segment().total_segment_size(),
+        ARCTICDB_DEBUG(log::storage(), "ReadAndDecodeAtomTask decoding segment with key {}",
                              variant_key_view(key_seg.variant_key()));
 
         return {key_seg.variant_key(), decode_segment(std::move(key_seg.segment()))};
@@ -365,10 +366,10 @@ struct DecodeSlicesTask : BaseTask {
     Composite<pipelines::SliceAndKey> operator()(Composite<std::pair<Segment, pipelines::SliceAndKey>> && skp) const {
         ARCTICDB_SAMPLE(DecodeSlicesTask, 0)
         auto sk_pairs = std::move(skp);
-        return sk_pairs.transform([that=this] (auto&& ssp){
-            auto seg_slice_pair = std::forward<decltype(ssp)>(ssp);
+        return sk_pairs.transform([this] (auto&& ssp){
+            auto seg_slice_pair = std::move(ssp);
             ARCTICDB_DEBUG(log::version(), "Decoding slice {}", seg_slice_pair.second.key());
-            return that->decode_into_slice(std::move(seg_slice_pair));
+            return decode_into_slice(std::move(seg_slice_pair));
         });
     }
 
@@ -442,8 +443,7 @@ struct DecodeMetadataTask : BaseTask {
     std::pair<std::optional<VariantKey>, std::optional<google::protobuf::Any>> operator()(storage::KeySegmentPair &&ks) const {
         ARCTICDB_SAMPLE(ReadMetadataTask, 0)
         auto key_seg = std::move(ks);
-        ARCTICDB_DEBUG(log::storage(), "ReadAndDecodeMetadataTask decoding segment of size {} with key {}",
-                             key_seg.segment().total_segment_size(), variant_key_view(key_seg.variant_key()));
+        ARCTICDB_DEBUG(log::storage(), "ReadAndDecodeMetadataTask decoding segment with key {}", variant_key_view(key_seg.variant_key()));
 
         auto meta = decode_metadata_from_segment(key_seg.segment());
         std::pair<VariantKey, std::optional<google::protobuf::Any>> output;
@@ -463,18 +463,14 @@ struct DecodeTimeseriesDescriptorTask : BaseTask {
     std::pair<VariantKey, TimeseriesDescriptor> operator()(storage::KeySegmentPair &&ks) const {
         ARCTICDB_SAMPLE(DecodeTimeseriesDescriptorTask, 0)
         auto key_seg = std::move(ks);
-        ARCTICDB_DEBUG(log::storage(), "DecodeTimeseriesDescriptorTask decoding segment of size {} with key {}",
-                      key_seg.segment().total_segment_size(), variant_key_view(key_seg.variant_key()));
+        ARCTICDB_DEBUG(log::storage(), "DecodeTimeseriesDescriptorTask decoding segment with key {}", variant_key_view(key_seg.variant_key()));
 
         auto maybe_desc = decode_timeseries_descriptor(key_seg.segment());
 
         util::check(static_cast<bool>(maybe_desc), "Failed to decode timeseries descriptor");
         return std::make_pair(
             std::move(key_seg.variant_key()),
-            TimeseriesDescriptor{
-                std::make_shared<TimeseriesDescriptor::Proto>(std::move(std::get<1>(*maybe_desc))),
-                    std::make_shared<FieldCollection>(std::move(std::get<2>(*maybe_desc)))}
-            );
+            std::move(*maybe_desc));
 
     }
 };
@@ -486,8 +482,7 @@ struct DecodeMetadataAndDescriptorTask : BaseTask {
     std::tuple<VariantKey, std::optional<google::protobuf::Any>, StreamDescriptor> operator()(storage::KeySegmentPair &&ks) const {
         ARCTICDB_SAMPLE(ReadMetadataAndDescriptorTask, 0)
         auto key_seg = std::move(ks);
-        ARCTICDB_DEBUG(log::storage(), "DecodeMetadataAndDescriptorTask decoding segment of size {} with key {}",
-                      key_seg.segment().total_segment_size(), variant_key_view(key_seg.variant_key()));
+        ARCTICDB_DEBUG(log::storage(), "DecodeMetadataAndDescriptorTask decoding segment with key {}", variant_key_view(key_seg.variant_key()));
 
         auto [any, descriptor] = decode_metadata_and_descriptor_fields(key_seg.segment());
         return std::make_tuple(
diff --git a/cpp/arcticdb/async/test/test_async.cpp b/cpp/arcticdb/async/test/test_async.cpp
index 0e7bf169c7..ee28d3dc3a 100644
--- a/cpp/arcticdb/async/test/test_async.cpp
+++ b/cpp/arcticdb/async/test/test_async.cpp
@@ -43,7 +43,7 @@ TEST(Async, SinkBasic) {
 
     auto seg = ac::SegmentInMemory();
     aa::EncodeAtomTask enc{
-        ac::entity::KeyType::GENERATION, ac::entity::VersionId{6}, ac::entity::NumericId{123}, ac::entity::NumericId{456}, ac::timestamp{457}, ac::entity::NumericIndex{999}, std::move(seg), codec_opt, ac::EncodingVersion::V2
+        ac::entity::KeyType::GENERATION, ac::entity::VersionId{6}, ac::NumericId{123}, ac::NumericId{456}, ac::timestamp{457}, ac::entity::NumericIndex{999}, std::move(seg), codec_opt, ac::EncodingVersion::V2
     };
 
     auto v = sched.submit_cpu_task(std::move(enc)).via(&aa::io_executor()).thenValue(aa::WriteSegmentTask{lib}).get();
@@ -52,7 +52,7 @@ TEST(Async, SinkBasic) {
     auto default_content_hash = h.digest();
 
     ASSERT_EQ(ac::entity::atom_key_builder().gen_id(6).start_index(456).end_index(457).creation_ts(999)
-        .content_hash(default_content_hash).build(ac::entity::NumericId{123}, ac::entity::KeyType::GENERATION),
+        .content_hash(default_content_hash).build(ac::NumericId{123}, ac::entity::KeyType::GENERATION),
               to_atom(v)
     );
 }
@@ -129,10 +129,10 @@ TEST(Async, CollectWithThrow) {
        }
        auto vec_fut = folly::collectAll(stuff).get();
    } catch(std::exception&) {
-       log::version().info("Caught something");
+       ARCTICDB_DEBUG(log::version(), "Caught something");
    }
 
-   log::version().info("Collect returned");
+   ARCTICDB_DEBUG(log::version(), "Collect returned");
 }
 
 using IndexSegmentReader = int;
diff --git a/cpp/arcticdb/codec/codec-inl.hpp b/cpp/arcticdb/codec/codec-inl.hpp
index 4adcfc6f76..ac0368d1f4 100644
--- a/cpp/arcticdb/codec/codec-inl.hpp
+++ b/cpp/arcticdb/codec/codec-inl.hpp
@@ -15,8 +15,9 @@
 #include <arcticdb/codec/lz4.hpp>
 #include <arcticdb/codec/encoded_field.hpp>
 #include <arcticdb/codec/magic_words.hpp>
-
 #include <arcticdb/util/bitset.hpp>
+#include <arcticdb/util/buffer.hpp>
+#include <arcticdb/util/sparse_utils.hpp>
 
 #include <type_traits>
 
@@ -32,23 +33,23 @@ void decode_block(const BlockType &block, const std::uint8_t *input, T *output)
         arcticdb::detail::PassthroughDecoder::decode_block<T>(input, size_to_decode, output, decoded_size);
     } else {
         std::uint32_t encoder_version = block.encoder_version();
-        switch (block.codec().codec_case()) {
-            case arcticdb::proto::encoding::VariantCodec::kZstd:
-                arcticdb::detail::ZstdDecoder::decode_block<T>(encoder_version,
-                                                     input,
-                                                     size_to_decode,
-                                                     output,
-                                                     decoded_size);
-                break;
-            case arcticdb::proto::encoding::VariantCodec::kLz4:
-                arcticdb::detail::Lz4Decoder::decode_block<T>(encoder_version,
-                                                    input,
-                                                    size_to_decode,
-                                                    output,
-                                                    decoded_size);
-                break;
-            default:
-                util::raise_error_msg("Unsupported block codec {}", block);
+        switch (block.codec().codec_type()) {
+        case arcticdb::Codec::ZSTD:
+            arcticdb::detail::ZstdDecoder::decode_block<T>(encoder_version,
+                 input,
+                 size_to_decode,
+                 output,
+                 decoded_size);
+            break;
+        case arcticdb::Codec::LZ4:
+            arcticdb::detail::Lz4Decoder::decode_block<T>(encoder_version,
+                input,
+                size_to_decode,
+                output,
+                decoded_size);
+            break;
+        default:
+            util::raise_rte("Unsupported block codec {}", codec_type_to_string(block.codec().codec_type()));
         }
     }
 }
@@ -78,6 +79,7 @@ std::size_t decode_ndarray(
     EncodingVersion encoding_version
 ) {
     ARCTICDB_SUBSAMPLE_AGG(DecodeNdArray)
+
     std::size_t read_bytes = 0;
     td.visit_tag([&](auto type_desc_tag) {
         using TD = std::decay_t<decltype(type_desc_tag)>;
@@ -85,7 +87,7 @@ std::size_t decode_ndarray(
 
         const auto data_size = encoding_sizes::data_uncompressed_size(field);
         const bool is_empty_array = (data_size == 0) && type_desc_tag.dimension() > Dimension::Dim0;
-        // Empty array will not contain actual data, however, its sparse map should be loaded
+        // Empty array types will not contain actual data, however, its sparse map should be loaded
         // so that we can distinguish None from []
         if(data_size == 0 && !is_empty_array) {
             util::check(type_desc_tag.data_type() == DataType::EMPTYVAL,
@@ -152,25 +154,25 @@ std::size_t decode_ndarray(
     return read_bytes;
 }
 
-template<class DataSink, typename EncodedFieldType>
+template<class DataSink>
 std::size_t decode_field(
     const TypeDescriptor &td,
-    const EncodedFieldType &field,
+    const EncodedFieldImpl &field,
     const std::uint8_t *input,
     DataSink &data_sink,
     std::optional<util::BitMagic>& bv,
     EncodingVersion encoding_version) {
     size_t magic_size = 0u;
-    if constexpr(std::is_same_v<EncodedFieldType, EncodedField>) {
+    if (encoding_version != EncodingVersion::V1) {
         magic_size += sizeof(ColumnMagic);
         util::check_magic<ColumnMagic>(input);
     }
 
     switch (field.encoding_case()) {
-        case EncodedFieldType::kNdarray:
+        case EncodedFieldType::NDARRAY:
             return decode_ndarray(td, field.ndarray(), input, data_sink, bv, encoding_version) + magic_size;
         default:
-            util::raise_error_msg("Unsupported encoding {}", field);
+            util::raise_rte("Unsupported encoding {}", field);
     }
 }
 
diff --git a/cpp/arcticdb/codec/codec.cpp b/cpp/arcticdb/codec/codec.cpp
index f19391d7e5..0f2d1486e8 100644
--- a/cpp/arcticdb/codec/codec.cpp
+++ b/cpp/arcticdb/codec/codec.cpp
@@ -8,41 +8,26 @@
 #include <arcticdb/column_store/string_pool.hpp>
 #include <arcticdb/stream/protobuf_mappings.hpp>
 #include <arcticdb/entity/performance_tracing.hpp>
+#include <arcticdb/entity/protobuf_mappings.hpp>
 #include <arcticdb/codec/default_codecs.hpp>
 #include <arcticdb/codec/encoded_field.hpp>
 #include <arcticdb/codec/encoded_field_collection.hpp>
+#include <arcticdb/entity/stream_descriptor.hpp>
+#include <arcticdb/codec/encode_common.hpp>
+#include <arcticdb/codec/segment_identifier.hpp>
 
-
-#include <string>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
-
-
 #include <arcticdb/codec/encode_common.hpp>
+#include <string>
 
 namespace arcticdb {
 
-Segment encode_v2(
-    SegmentInMemory&& in_mem_seg,
-    const arcticdb::proto::encoding::VariantCodec& codec_opts
-);
-
-Segment encode_v1(
-    SegmentInMemory&& in_mem_seg,
-    const arcticdb::proto::encoding::VariantCodec& codec_opts
-);
-
 constexpr TypeDescriptor metadata_type_desc() {
     return TypeDescriptor{
         DataType::UINT8, Dimension::Dim1
     };
 }
 
-constexpr TypeDescriptor encoded_blocks_type_desc() {
-    return TypeDescriptor{
-        DataType::UINT8, Dimension::Dim1
-    };
-}
-
 SizeResult max_compressed_size_dispatch(
     const SegmentInMemory& in_mem_seg,
     const arcticdb::proto::encoding::VariantCodec &codec_opts,
@@ -105,15 +90,17 @@ class MetaBuffer {
 }
 
 std::optional<google::protobuf::Any> decode_metadata(
-    const arcticdb::proto::encoding::SegmentHeader& hdr,
+    const SegmentHeader& hdr,
     const uint8_t*& data,
     const uint8_t* begin ARCTICDB_UNUSED
     ) {
     if (hdr.has_metadata_field()) {
+        hdr.metadata_field().validate();
         auto meta_type_desc = metadata_type_desc();
         MetaBuffer meta_buf;
         std::optional<util::BitMagic> bv;
-        data += decode_field(meta_type_desc, hdr.metadata_field(), data, meta_buf, bv, to_encoding_version(hdr.encoding_version()));
+        ARCTICDB_DEBUG(log::codec(), "Decoding metadata at position {}: {}", data - begin, dump_bytes(data, 10));
+        data += decode_ndarray(meta_type_desc, hdr.metadata_field().ndarray(), data, meta_buf, bv, hdr.encoding_version());
         ARCTICDB_TRACE(log::codec(), "Decoded metadata to position {}", data - begin);
         google::protobuf::io::ArrayInputStream ais(meta_buf.buffer().data(),
                                                    static_cast<int>(meta_buf.buffer().bytes()));
@@ -127,13 +114,17 @@ std::optional<google::protobuf::Any> decode_metadata(
 }
 
 void decode_metadata(
-    const arcticdb::proto::encoding::SegmentHeader& hdr,
+    const SegmentHeader& hdr,
     const uint8_t*& data,
     const uint8_t* begin ARCTICDB_UNUSED,
     SegmentInMemory& res) {
     auto maybe_any = decode_metadata(hdr, data, begin);
-    if(maybe_any)
+    if(maybe_any) {
+        ARCTICDB_DEBUG(log::version(), "Found metadata on segment");
         res.set_metadata(std::move(*maybe_any));
+    } else {
+        ARCTICDB_DEBUG(log::version(), "No metadata on segment");
+    }
 }
 
 std::optional<google::protobuf::Any> decode_metadata_from_segment(const Segment &segment) {
@@ -147,44 +138,68 @@ std::optional<google::protobuf::Any> decode_metadata_from_segment(const Segment
     return decode_metadata(hdr, data, begin);
 }
 
-Buffer decode_encoded_fields(
-    const arcticdb::proto::encoding::SegmentHeader& hdr,
-    const uint8_t* data,
-    const uint8_t* begin ARCTICDB_UNUSED) {
-        ARCTICDB_TRACE(log::codec(), "Decoding encoded fields");
-        MetaBuffer meta_buffer;
-        std::optional<util::BitMagic> bv;
-        if(hdr.has_column_fields()) {
-            constexpr auto type_desc = encoded_blocks_type_desc();
-            decode_field(type_desc, hdr.column_fields(), data, meta_buffer, bv, to_encoding_version(hdr.encoding_version()));
-        }
-        ARCTICDB_TRACE(log::codec(), "Decoded encoded fields at position {}", data-begin);
-        return meta_buffer.detach_buffer();
+EncodedFieldCollection decode_encoded_fields(
+        const SegmentHeader& hdr,
+        const uint8_t* data,
+        const uint8_t* begin ARCTICDB_UNUSED) {
+    ARCTICDB_TRACE(log::codec(), "Decoding encoded fields");
+
+    util::check(hdr.has_column_fields() && hdr.column_fields().has_ndarray(), "Expected encoded field description to be set in header");
+    std::optional<util::BitMagic> bv;
+    const auto uncompressed_size = encoding_sizes::uncompressed_size(hdr.column_fields());
+    constexpr auto type_desc = encoded_fields_type_desc();
+    Column encoded_column(type_desc, uncompressed_size, false, false);
+    decode_ndarray(type_desc, hdr.column_fields().ndarray(), data, encoded_column, bv, hdr.encoding_version());
+
+    ARCTICDB_TRACE(log::codec(), "Decoded encoded fields at position {}", data-begin);
+    return {std::move(encoded_column.release_buffer()), std::move(encoded_column.release_shapes())};
+}
+
+std::shared_ptr<arcticdb::proto::descriptors::FrameMetadata> extract_frame_metadata(
+    SegmentInMemory& res) {
+    auto output = std::make_shared<arcticdb::proto::descriptors::FrameMetadata>();
+    util::check(res.has_metadata(), "Cannot extract frame metadata as it is null");
+    res.metadata()->UnpackTo(output.get());
+    return output;
+}
+
+FrameDescriptorImpl read_frame_descriptor(
+    const uint8_t*& data) {
+    auto* frame_descriptor = reinterpret_cast<const FrameDescriptorImpl*>(data);
+    data += sizeof(FrameDescriptorImpl);
+    return *frame_descriptor;
+}
+
+SegmentDescriptorImpl read_segment_descriptor(
+    const uint8_t*& data) {
+    util::check_magic<SegmentDescriptorMagic>(data);
+    auto* frame_descriptor = reinterpret_cast<const SegmentDescriptorImpl*>(data);
+    data += sizeof(SegmentDescriptorImpl);
+    return *frame_descriptor;
 }
 
-std::optional<FieldCollection> decode_index_fields(
-    const arcticdb::proto::encoding::SegmentHeader& hdr,
+std::shared_ptr<FieldCollection> decode_index_fields(
+    const SegmentHeader& hdr,
     const uint8_t*& data,
     const uint8_t* begin ARCTICDB_UNUSED,
     const uint8_t* end) {
-    //TODO append map sets the field but it's empty
+    auto fields = std::make_shared<FieldCollection>();
     if(hdr.has_index_descriptor_field() && hdr.index_descriptor_field().has_ndarray()) {
         ARCTICDB_TRACE(log::codec(), "Decoding index fields");
         util::check(data!=end, "Reached end of input block with index descriptor fields to decode");
         std::optional<util::BitMagic> bv;
-        FieldCollection fields;
-        data += decode_field(FieldCollection::type(),
-                       hdr.index_descriptor_field(),
+
+        data += decode_ndarray(FieldCollection::type(),
+                       hdr.index_descriptor_field().ndarray(),
                        data,
-                       fields,
+                       *fields,
                        bv,
-                       to_encoding_version(hdr.encoding_version()));
+                       hdr.encoding_version());
 
         ARCTICDB_TRACE(log::codec(), "Decoded index descriptor to position {}", data-begin);
-        return std::make_optional<FieldCollection>(std::move(fields));
-    } else {
-        return std::nullopt;
     }
+    fields->regenerate_offsets();
+    return fields;
 }
 
 namespace {
@@ -193,10 +208,16 @@ inline arcticdb::proto::descriptors::TimeSeriesDescriptor timeseries_descriptor_
     any.UnpackTo(&tsd);
     return tsd;
 }
+
+inline arcticdb::proto::descriptors::FrameMetadata frame_metadata_from_any(const google::protobuf::Any& any) {
+    arcticdb::proto::descriptors::FrameMetadata frame_meta;
+    any.UnpackTo(&frame_meta);
+    return frame_meta;
+}
 }
 
 std::optional<FieldCollection> decode_descriptor_fields(
-    const arcticdb::proto::encoding::SegmentHeader& hdr,
+    const SegmentHeader& hdr,
     const uint8_t*& data,
     const uint8_t* begin ARCTICDB_UNUSED,
     const uint8_t* end) {
@@ -210,7 +231,7 @@ std::optional<FieldCollection> decode_descriptor_fields(
                        data,
                        fields,
                        bv,
-                       to_encoding_version(hdr.encoding_version()));
+                       hdr.encoding_version());
 
         ARCTICDB_TRACE(log::codec(), "Decoded descriptor fields to position {}", data-begin);
         return std::make_optional<FieldCollection>(std::move(fields));
@@ -219,72 +240,89 @@ std::optional<FieldCollection> decode_descriptor_fields(
     }
 }
 
-std::optional<std::tuple<google::protobuf::Any, arcticdb::proto::descriptors::TimeSeriesDescriptor, FieldCollection>> decode_timeseries_descriptor(
-    const arcticdb::proto::encoding::SegmentHeader& hdr,
+TimeseriesDescriptor unpack_timeseries_descriptor_from_proto(
+    const google::protobuf::Any& any) {
+
+    auto tsd = timeseries_descriptor_from_any(any);
+    auto frame_meta = std::make_shared<arcticdb::proto::descriptors::FrameMetadata>();
+    exchange_timeseries_proto(tsd, *frame_meta);
+
+    auto segment_desc = std::make_shared<SegmentDescriptorImpl>(segment_descriptor_from_proto((tsd.stream_descriptor())));
+    auto frame_desc = std::make_shared<FrameDescriptorImpl>(frame_descriptor_from_proto(tsd));
+    const auto& desc = tsd.stream_descriptor();
+    auto old_fields = std::make_shared<FieldCollection>(fields_from_proto(tsd.stream_descriptor()));
+    StreamId stream_id = desc.id_case() == desc.kNumId ? StreamId(desc.num_id()) : StreamId(desc.str_id());
+    return {frame_desc, segment_desc, frame_meta, old_fields, stream_id};
+}
+
+std::optional<TimeseriesDescriptor> decode_timeseries_descriptor_v1(
+    const SegmentHeader& hdr,
     const uint8_t* data,
     const uint8_t* begin,
-    const uint8_t* end) {
-    util::check(data != nullptr, "Got null data ptr from segment");
-    const auto has_magic_numbers = EncodingVersion(hdr.encoding_version()) == EncodingVersion::V2;
-    if(has_magic_numbers)
-        util::check_magic<MetadataMagic>(data);
-
+    const uint8_t* end ARCTICDB_UNUSED) {
     auto maybe_any = decode_metadata(hdr, data, begin);
     if(!maybe_any)
         return std::nullopt;
 
-    auto tsd = timeseries_descriptor_from_any(*maybe_any);
-
-    if(has_magic_numbers)
-        util::check_magic<DescriptorMagic>(data);
+    return unpack_timeseries_descriptor_from_proto(*maybe_any);
+}
 
+void skip_descriptor(const uint8_t*& data, const SegmentHeader& hdr) {
+    util::check_magic<SegmentDescriptorMagic>(data);
+    data += sizeof(SegmentDescriptor);
+    skip_identifier(data);
+    util::check_magic<DescriptorFieldsMagic>(data);
     if(hdr.has_descriptor_field() && hdr.descriptor_field().has_ndarray())
-        data += encoding_sizes::ndarray_field_compressed_size(hdr.descriptor_field().ndarray());
+        data += encoding_sizes::field_compressed_size(hdr.descriptor_field());
 
-    if(has_magic_numbers)
-        util::check_magic<IndexMagic>(data);
+}
 
-    auto maybe_fields = decode_index_fields(hdr, data, begin, end);
-    if(!maybe_fields) {
-        auto old_fields = fields_from_proto(tsd.stream_descriptor());
-        return std::make_optional(std::make_tuple(std::move(*maybe_any), std::move(tsd), std::move(old_fields)));
-    }
+std::optional<TimeseriesDescriptor> decode_timeseries_descriptor_v2(
+    const SegmentHeader& hdr,
+    const uint8_t* data,
+    const uint8_t* begin,
+    const uint8_t* end) {
+        util::check_magic<MetadataMagic>(data);
 
-    maybe_fields->regenerate_offsets();
-    return std::make_tuple(std::move(*maybe_any), std::move(tsd), std::move(*maybe_fields));
-}
+    auto maybe_any = decode_metadata(hdr, data, begin);
+    if(!maybe_any)
+        return std::nullopt;
 
-std::optional<std::tuple<google::protobuf::Any, arcticdb::proto::descriptors::TimeSeriesDescriptor, FieldCollection>> decode_timeseries_descriptor(
-    Segment& segment) {
-    auto &hdr = segment.header();
-    const uint8_t* data = segment.buffer().data();
+    auto frame_meta = std::make_shared<arcticdb::proto::descriptors::FrameMetadata>(frame_metadata_from_any(*maybe_any));
 
-    util::check(data != nullptr, "Got null data ptr from segment");
-    const uint8_t* begin = data;
-    const uint8_t* end = data + segment.buffer().bytes();
+    skip_descriptor(data, hdr);
 
-    return decode_timeseries_descriptor(hdr, data, begin, end);
+    util::check_magic<IndexMagic>(data);
+    auto frame_desc = std::make_shared<FrameDescriptorImpl>(read_frame_descriptor(data));
+    auto segment_desc = std::make_shared<SegmentDescriptorImpl>(read_segment_descriptor(data));
+    auto segment_id = read_identifier(data);
+    auto index_fields = decode_index_fields(hdr, data, begin, end);
+    return std::make_optional<TimeseriesDescriptor>(frame_desc, segment_desc, frame_meta, std::move(index_fields), segment_id);
 }
 
-std::pair<std::optional<google::protobuf::Any>, StreamDescriptor> decode_metadata_and_descriptor_fields(
-    arcticdb::proto::encoding::SegmentHeader& hdr,
+std::optional<TimeseriesDescriptor> decode_timeseries_descriptor(
+    const SegmentHeader& hdr,
     const uint8_t* data,
     const uint8_t* begin,
     const uint8_t* end) {
     util::check(data != nullptr, "Got null data ptr from segment");
-    if(EncodingVersion(hdr.encoding_version()) == EncodingVersion::V2)
-        util::check_magic<MetadataMagic>(data);
+    auto encoding_version =  EncodingVersion(hdr.encoding_version());
+    if (encoding_version == EncodingVersion::V1)
+        return decode_timeseries_descriptor_v1(hdr, data, begin, end);
+    else
+        return decode_timeseries_descriptor_v2(hdr, data, begin, end);
+}
 
-    auto maybe_any = decode_metadata(hdr, data, begin);
-    if(EncodingVersion(hdr.encoding_version()) == EncodingVersion::V2)
-        util::check_magic<DescriptorMagic>(data);
+std::optional<TimeseriesDescriptor> decode_timeseries_descriptor(
+    Segment& segment) {
+    const auto &hdr = segment.header();
+    const uint8_t* data = segment.buffer().data();
 
-    auto maybe_fields = decode_descriptor_fields(hdr, data, begin, end);
-    if(!maybe_fields) {
-        auto old_fields = std::make_shared<FieldCollection>(fields_from_proto(hdr.stream_descriptor()));
-        return std::make_pair(std::move(maybe_any),StreamDescriptor{std::make_shared<StreamDescriptor::Proto>(std::move(*hdr.mutable_stream_descriptor())), old_fields});
-    }
-    return std::make_pair(std::move(maybe_any),StreamDescriptor{std::make_shared<StreamDescriptor::Proto>(std::move(*hdr.mutable_stream_descriptor())), std::make_shared<FieldCollection>(std::move(*maybe_fields))});
+    util::check(data != nullptr, "Got null data ptr from segment");
+    const uint8_t* begin = data;
+    const uint8_t* end = data + segment.buffer().bytes();
+
+    return decode_timeseries_descriptor(hdr, data, begin, end);
 }
 
 std::pair<std::optional<google::protobuf::Any>, StreamDescriptor> decode_metadata_and_descriptor_fields(
@@ -294,26 +332,32 @@ std::pair<std::optional<google::protobuf::Any>, StreamDescriptor> decode_metadat
 
     util::check(data != nullptr, "Got null data ptr from segment");
     const uint8_t* begin = data;
-    const uint8_t* end = data + segment.buffer().bytes();
+    if(EncodingVersion(hdr.encoding_version()) == EncodingVersion::V2)
+        util::check_magic<MetadataMagic>(data);
+
+    auto maybe_any = decode_metadata(hdr, data, begin);
+    if(EncodingVersion(hdr.encoding_version()) == EncodingVersion::V2)
+        util::check_magic<DescriptorFieldsMagic>(data);
 
-    return decode_metadata_and_descriptor_fields(hdr, data, begin, end);
+    return std::make_pair(std::move(maybe_any), segment.descriptor());
 }
 
-void decode_string_pool( const arcticdb::proto::encoding::SegmentHeader& hdr,
-                         const uint8_t*& data,
-                         const uint8_t* begin ARCTICDB_UNUSED,
-                         const uint8_t* end,
-                         SegmentInMemory& res) {
+void decode_string_pool(
+        const SegmentHeader& hdr,
+        const uint8_t*& data,
+        const uint8_t* begin ARCTICDB_UNUSED,
+        const uint8_t* end,
+        SegmentInMemory& res) {
     if (hdr.has_string_pool_field()) {
         ARCTICDB_TRACE(log::codec(), "Decoding string pool");
         util::check(data!=end, "Reached end of input block with string pool fields to decode");
         std::optional<util::BitMagic> bv;
-        data += decode_field(string_pool_descriptor().type(),
+        data += decode_ndarray(string_pool_descriptor().type(),
                        hdr.string_pool_field(),
                        data,
                        res.string_pool(),
                        bv,
-                       to_encoding_version(hdr.encoding_version()));
+                       hdr.encoding_version());
 
         ARCTICDB_TRACE(log::codec(), "Decoded string pool to position {}", data-begin);
     }
@@ -332,49 +376,62 @@ ssize_t calculate_last_row(const Column& col) {
     return last_row;
 }
 
-void decode_v2(
-    const Segment& segment,
-    arcticdb::proto::encoding::SegmentHeader& hdr,
-    SegmentInMemory& res,
-    const StreamDescriptor& desc
-) {
+void decode_v2(const Segment& segment,
+           const SegmentHeader& hdr,
+           SegmentInMemory& res,
+           const StreamDescriptor& desc) {
     ARCTICDB_SAMPLE(DecodeSegment, 0)
+    if(segment.buffer().data() == nullptr) {
+        ARCTICDB_DEBUG(log::codec(), "Segment contains no data in decode_v2");
+        return;
+    }
+
     const auto [begin, end] = get_segment_begin_end(segment, hdr);
     auto encoded_fields_ptr = end;
     auto data = begin;
     util::check_magic<MetadataMagic>(data);
     decode_metadata(hdr, data, begin, res);
-    util::check(hdr.has_descriptor_field(), "Expected descriptor field in v2 encoding");
-    util::check_magic<DescriptorMagic>(data);
-    if(hdr.has_descriptor_field() && hdr.descriptor_field().has_ndarray())
-        data += encoding_sizes::field_compressed_size(hdr.descriptor_field());
+    skip_descriptor(data, hdr);
 
     util::check_magic<IndexMagic>(data);
-    if(auto index_fields = decode_index_fields(hdr, data, begin, end); index_fields)
-        res.set_index_fields(std::make_shared<FieldCollection>(std::move(*index_fields)));
+    if(hdr.has_index_descriptor_field()) {
+        auto index_frame_descriptor = std::make_shared<FrameDescriptorImpl>(read_frame_descriptor(data));
+        auto frame_metadata = extract_frame_metadata(res);
+        auto index_segment_descriptor = std::make_shared<SegmentDescriptorImpl>(read_segment_descriptor(data));
+        auto index_segment_identifier = read_identifier(data);
+        auto index_fields = decode_index_fields(hdr, data, begin, end);
+        TimeseriesDescriptor tsd{std::move(index_frame_descriptor), std::move(index_segment_descriptor), std::move(frame_metadata), std::move(index_fields), index_segment_identifier};
+        res.set_timeseries_descriptor(tsd);
+        res.reset_metadata();
+    }
 
-    util::check(hdr.has_column_fields(), "Expected column fields in v2 encoding");
-    util::check_magic<EncodedMagic>(encoded_fields_ptr);
     if (data!=end) {
+        util::check(hdr.has_column_fields(), "Expected column fields in v2 encoding");
+        util::check_magic<EncodedMagic>(encoded_fields_ptr);
         auto encoded_fields_buffer = decode_encoded_fields(hdr, encoded_fields_ptr, begin);
         const auto fields_size = desc.fields().size();
         const auto start_row = res.row_count();
         EncodedFieldCollection encoded_fields(std::move(encoded_fields_buffer));
-        ssize_t seg_row_count = 0;
+
+        auto encoded_field = encoded_fields.begin();
         res.init_column_map();
 
+        ssize_t seg_row_count = 0;
         for (std::size_t i = 0; i < fields_size; ++i) {
-            const auto& encoded_field = encoded_fields.at(i);
+#ifdef DUMP_BYTES
+            log::version().debug("{}", dump_bytes(begin, (data - begin) + encoding_sizes::field_compressed_size(*encoded_field), 100u));
+#endif
             const auto& field_name = desc.fields(i).name();
             util::check(data!=end, "Reached end of input block with {} fields to decode", fields_size-i);
             if(auto col_index = res.column_index(field_name)) {
                 auto& col = res.column(static_cast<position_t>(*col_index));
-                data += decode_field(res.field(*col_index).type(), encoded_field, data, col, col.opt_sparse_map(), to_encoding_version(hdr.encoding_version()));
+
+                data += decode_field(res.field(*col_index).type(), *encoded_field, data, col, col.opt_sparse_map(), hdr.encoding_version());
                 seg_row_count = std::max(seg_row_count, calculate_last_row(col));
             } else {
-                data += encoding_sizes::field_compressed_size(encoded_field) + sizeof(ColumnMagic);
+                data += encoding_sizes::field_compressed_size(*encoded_field) + sizeof(ColumnMagic);
             }
-
+            ++encoded_field;
             ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", i, data-begin);
         }
 
@@ -386,53 +443,61 @@ void decode_v2(
     }
 }
 
-void decode_v1(
-    const Segment& segment,
-    const arcticdb::proto::encoding::SegmentHeader& hdr,
-    SegmentInMemory& res,
-    StreamDescriptor::Proto& desc
-) {
+void decode_v1(const Segment& segment,
+            const SegmentHeader& hdr,
+            SegmentInMemory& res,
+            const StreamDescriptor& desc) {
     ARCTICDB_SAMPLE(DecodeSegment, 0)
     const uint8_t* data = segment.buffer().data();
-    util::check(data != nullptr, "Got null data ptr from segment");
+    if(data == nullptr) {
+        ARCTICDB_DEBUG(log::codec(), "Segment contains no data in decode_v1");
+        return;
+    }
+
     const uint8_t* begin = data;
     const uint8_t* end = begin + segment.buffer().bytes();
     decode_metadata(hdr, data, begin, res);
+    if(res.has_metadata() && res.metadata()->Is<arcticdb::proto::descriptors::TimeSeriesDescriptor>()) {
+        ARCTICDB_DEBUG(log::version(), "Unpacking timeseries descriptor from metadata");
+        auto tsd = unpack_timeseries_descriptor_from_proto(*res.metadata());
+        res.set_timeseries_descriptor(tsd);
+        res.reset_metadata();
+    }
 
-    if (data!=end) {
+    if (data != end) {
         const auto fields_size = desc.fields().size();
-        util::check(fields_size == hdr.fields_size(), "Mismatch between descriptor and header field size: {} != {}", fields_size, hdr.fields_size());
-        const auto start_row = res.row_count();
+        const auto &column_fields = hdr.body_fields();
+        util::check(fields_size == segment.fields_size(),
+                    "Mismatch between descriptor and header field size: {} != {}",
+                    fields_size,
+                    column_fields.size());
+            const auto start_row = res.row_count();
 
-        ssize_t seg_row_count = 0;
         res.init_column_map();
 
-        for (int i = 0; i < fields_size; ++i) {
-            const auto& field = hdr.fields(i);
-            const auto& field_name = desc.fields(i).name();
+        ssize_t seg_row_count = 0;
+        for (std::size_t i = 0; i < fields_size; ++i) {
+            const auto &field = column_fields.at(i);
+            const auto& desc_field = desc.fields(i);
+            const auto &field_name = desc_field.name();
+            util::check(data != end || is_empty_type(desc_field.type().data_type()), "Reached end of input block with {} fields to decode", fields_size - i);
             if (auto col_index = res.column_index(field_name)) {
-                auto& col = res.column(static_cast<position_t>(*col_index));
-                util::check(
-                    data != end || is_empty_type(col.type().data_type()),
-                    "Reached end of input block with {} fields to decode",
-                    fields_size - i
-                );
+                auto &col = res.column(static_cast<position_t>(*col_index));
                 data += decode_field(
                     res.field(*col_index).type(),
                     field,
                     data,
                     col,
                     col.opt_sparse_map(),
-                    to_encoding_version(hdr.encoding_version())
+                    hdr.encoding_version()
                 );
                 seg_row_count = std::max(seg_row_count, calculate_last_row(col));
+                ARCTICDB_DEBUG(log::codec(), "Decoded column {} to position {}", i, data - begin);
             } else {
-                util::check(data != end, "Reached end of input block with {} fields to decode", fields_size - i);
                 data += encoding_sizes::field_compressed_size(field);
+                ARCTICDB_DEBUG(log::codec(), "Skipped column {}, at position {}", i, data - begin);
             }
-            ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", i, data - begin);
         }
-
         decode_string_pool(hdr, data, begin, end, res);
         res.set_row_data(static_cast<ssize_t>(start_row + seg_row_count));
         res.set_compacted(segment.header().compacted());
@@ -441,25 +506,21 @@ void decode_v1(
 
 void decode_into_memory_segment(
     const Segment& segment,
-    arcticdb::proto::encoding::SegmentHeader& hdr,
+    SegmentHeader& hdr,
     SegmentInMemory& res,
-    StreamDescriptor& desc)
+    const StreamDescriptor& desc)
 {
     if(EncodingVersion(segment.header().encoding_version()) == EncodingVersion::V2)
         decode_v2(segment, hdr, res, desc);
     else
-        decode_v1(segment, hdr, res, desc.mutable_proto());
+        decode_v1(segment, hdr, res, desc);
 }
 
 SegmentInMemory decode_segment(Segment&& s) {
     auto segment = std::move(s);
     auto &hdr = segment.header();
-    ARCTICDB_TRACE(log::codec(), "Decoding descriptor: {}", segment.header().stream_descriptor().DebugString());
-    StreamDescriptor descriptor(std::make_shared<StreamDescriptor::Proto>(std::move(*segment.header().mutable_stream_descriptor())), segment.fields_ptr());
-
-    if(EncodingVersion(segment.header().encoding_version()) != EncodingVersion::V2)
-        descriptor.fields() = field_collection_from_proto(std::move(*descriptor.mutable_proto().mutable_fields()));
-
+    ARCTICDB_TRACE(log::codec(), "Decoding descriptor: {}", segment.descriptor());
+    auto descriptor = segment.descriptor();
     descriptor.fields().regenerate_offsets();
     ARCTICDB_TRACE(log::codec(), "Creating segment");
     SegmentInMemory res(std::move(descriptor));
@@ -469,7 +530,8 @@ SegmentInMemory decode_segment(Segment&& s) {
     return res;
 }
 
-static void hash_field(const arcticdb::proto::encoding::EncodedField &field, HashAccum &accum) {
+template <typename EncodedFieldType>
+void hash_field(const EncodedFieldType &field, HashAccum &accum) {
     auto &n = field.ndarray();
     for(auto i = 0; i < n.shapes_size(); ++i) {
         auto v = n.shapes(i).hash();
@@ -482,17 +544,34 @@ static void hash_field(const arcticdb::proto::encoding::EncodedField &field, Has
     }
 }
 
-HashedValue hash_segment_header(const arcticdb::proto::encoding::SegmentHeader &hdr) {
+HashedValue get_segment_hash(Segment& seg) {
     HashAccum accum;
-    if (hdr.has_metadata_field()) {
-        hash_field(hdr.metadata_field(), accum);
-    }
-    for (int i = 0; i < hdr.fields_size(); ++i) {
-        hash_field(hdr.fields(i), accum);
-    }
-    if(hdr.has_string_pool_field()) {
-        hash_field(hdr.string_pool_field(), accum);
+    const auto& hdr = seg.header();
+    if(hdr.encoding_version() == EncodingVersion::V1) {
+        // The hashes are part of the encoded fields protobuf in the v1 header, which is not
+        // ideal but needs to be maintained for consistency
+        const auto& proto = seg.generate_header_proto();
+        if (proto.has_metadata_field()) {
+            hash_field(proto.metadata_field(), accum);
+        }
+        for (int i = 0; i < proto.fields_size(); ++i) {
+            hash_field(proto.fields(i), accum);
+        }
+        if (hdr.has_string_pool_field()) {
+            hash_field(proto.string_pool_field(), accum);
+        }
+    } else {
+        const auto& header_fields = hdr.header_fields();
+        for(auto i = 0UL; i < header_fields.size(); ++i) {
+            hash_field(header_fields.at(i), accum);
+        }
+
+        const auto& body_fields = hdr.body_fields();
+        for(auto i = 0UL; i < body_fields.size(); ++i) {
+            hash_field(body_fields.at(i), accum);
+        }
     }
+
     return accum.digest();
 }
 
@@ -532,7 +611,7 @@ void add_bitmagic_compressed_size(
 
 void encode_sparse_map(
     ColumnData& column_data,
-    std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
+    EncodedFieldImpl& field,
     Buffer& out,
     std::ptrdiff_t& pos
 ) {
@@ -540,9 +619,7 @@ void encode_sparse_map(
         util::check(!is_empty_type(column_data.type().data_type()), "Empty typed columns should not have sparse maps");
         ARCTICDB_DEBUG(log::codec(), "Sparse map count = {} pos = {}", column_data.bit_vector()->count(), pos);
         const size_t sparse_bm_bytes = encode_bitmap(*column_data.bit_vector(), out, pos);
-        util::variant_match(variant_field, [sparse_bm_bytes](auto field) {
-            field->mutable_ndarray()->set_sparse_map_bytes(static_cast<int>(sparse_bm_bytes));
-        });
+        field.mutable_ndarray()->set_sparse_map_bytes(static_cast<int>(sparse_bm_bytes));
     }
 }
 } // namespace arcticdb
diff --git a/cpp/arcticdb/codec/codec.hpp b/cpp/arcticdb/codec/codec.hpp
index 28bc60c806..d8ff0c6db7 100644
--- a/cpp/arcticdb/codec/codec.hpp
+++ b/cpp/arcticdb/codec/codec.hpp
@@ -8,28 +8,51 @@
 #pragma once
 
 #include <arcticdb/codec/core.hpp>
-#include <arcticdb/codec/segment.hpp>
-#include <arcticdb/column_store/memory_segment.hpp>
 #include <arcticdb/entity/types.hpp>
 #include <arcticdb/codec/encode_common.hpp>
+#include <arcticdb/entity/protobufs.hpp>
+#include <arcticdb/entity/stream_descriptor.hpp>
+#include <arcticdb/codec/segment_header.hpp>
 
 namespace arcticdb {
 
-using ShapesBlockTDT = TypeDescriptorTag<DataTypeTag<DataType::INT64>, DimensionTag<Dimension::Dim0>>;
+class Segment;
+class SegmentInMemory;
+
+using ShapesBlockTDT = entity::TypeDescriptorTag<entity::DataTypeTag<entity::DataType::INT64>, entity::DimensionTag<entity::Dimension::Dim0>>;
 
 Segment encode_dispatch(
     SegmentInMemory&& in_mem_seg,
     const arcticdb::proto::encoding::VariantCodec &codec_opts,
     EncodingVersion encoding_version);
 
+Segment encode_v2(
+    SegmentInMemory&& in_mem_seg,
+    const arcticdb::proto::encoding::VariantCodec& codec_opts
+);
+
+Segment encode_v1(
+    SegmentInMemory&& in_mem_seg,
+    const arcticdb::proto::encoding::VariantCodec& codec_opts
+);
+
+void decode_v1(const Segment& segment,
+               const SegmentHeader& hdr,
+               SegmentInMemory& res,
+               const StreamDescriptor& desc);
+
+void decode_v2(const Segment& segment,
+               const SegmentHeader& hdr,
+               SegmentInMemory& res,
+               const StreamDescriptor& desc);
 
 SizeResult max_compressed_size_dispatch(
     const SegmentInMemory& in_mem_seg,
     const arcticdb::proto::encoding::VariantCodec &codec_opts,
     EncodingVersion encoding_version);
 
-Buffer decode_encoded_fields(
-    const arcticdb::proto::encoding::SegmentHeader& hdr,
+EncodedFieldCollection decode_encoded_fields(
+    const SegmentHeader& hdr,
     const uint8_t* data,
     const uint8_t* begin ARCTICDB_UNUSED);
 
@@ -37,14 +60,14 @@ SegmentInMemory decode_segment(Segment&& segment);
 
 void decode_into_memory_segment(
     const Segment& segment,
-    arcticdb::proto::encoding::SegmentHeader& hdr,
+    SegmentHeader& hdr,
     SegmentInMemory& res,
-    StreamDescriptor& desc);
+    const entity::StreamDescriptor& desc);
 
-template<class DataSink, typename EncodedFieldType>
+template<class DataSink>
 std::size_t decode_field(
-    const TypeDescriptor &td,
-    const EncodedFieldType &field,
+    const entity::TypeDescriptor &td,
+    const EncodedFieldImpl &field,
     const uint8_t *input,
     DataSink &data_sink,
     std::optional<util::BitMagic>& bv,
@@ -56,10 +79,15 @@ std::optional<google::protobuf::Any> decode_metadata_from_segment(
 std::pair<std::optional<google::protobuf::Any>, StreamDescriptor> decode_metadata_and_descriptor_fields(
     Segment& segment);
 
-std::optional<std::tuple<google::protobuf::Any, arcticdb::proto::descriptors::TimeSeriesDescriptor, FieldCollection>> decode_timeseries_descriptor(
+std::optional<TimeseriesDescriptor> decode_timeseries_descriptor(
     Segment& segment);
 
-HashedValue hash_segment_header(const arcticdb::proto::encoding::SegmentHeader &hdr);
+HashedValue get_segment_hash(Segment& seg);
+
+SegmentDescriptorImpl read_segment_descriptor(const uint8_t*& data);
+
+TimeseriesDescriptor unpack_timeseries_descriptor_from_proto(const google::protobuf::Any& any);
+
 } // namespace arcticdb
 
 #define ARCTICDB_SEGMENT_ENCODER_H_
diff --git a/cpp/arcticdb/codec/core.hpp b/cpp/arcticdb/codec/core.hpp
index 5462c95300..d640908173 100644
--- a/cpp/arcticdb/codec/core.hpp
+++ b/cpp/arcticdb/codec/core.hpp
@@ -14,13 +14,14 @@
 #include <arcticdb/util/hash.hpp>
 #include <arcticdb/entity/types.hpp>
 #include <arcticdb/log/log.hpp>
+#include <arcticdb/util/dump_bytes.hpp>
 
 #include <type_traits>
 
 namespace arcticdb::detail {
 using namespace arcticdb::entity;
 
-struct BlockProtobufHelper {
+struct BlockDataHelper {
     std::size_t count_;
     std::size_t bytes_;
 
@@ -39,8 +40,8 @@ struct BlockProtobufHelper {
 
 struct NdArrayBlock {
     std::size_t item_count_;
-    BlockProtobufHelper shapes_;
-    BlockProtobufHelper values_;
+    BlockDataHelper shapes_;
+    BlockDataHelper values_;
 
     template <typename EncodedFieldType>
     void update_field_size(EncodedFieldType &field) const {
@@ -103,12 +104,12 @@ class CodecHelper {
         return v;
     }
 
-    static BlockProtobufHelper scalar_block(std::size_t elt) {
-        return {elt, elt * sizeof(T)};
+    static BlockDataHelper scalar_block(std::size_t row_count) {
+        return {row_count, row_count * sizeof(T)};
     }
 
-    static NdArrayBlock nd_array_block(std::size_t elt, const shape_t *shape) {
-        std::size_t shape_count = static_cast<std::size_t>(dim) * elt;
+    static NdArrayBlock nd_array_block(std::size_t row_count, const shape_t *shape) {
+        std::size_t shape_count = static_cast<std::size_t>(dim) * row_count;
         std::size_t total_values_count = 0;
         if constexpr (dim == Dimension::Dim1) {
             for (std::size_t i = 0; i < shape_count; ++i, ++shape) {
@@ -122,7 +123,7 @@ class CodecHelper {
         }
         std::size_t shape_bytes = shape_count * sizeof(shape_t);
         std::size_t data_bytes = total_values_count * sizeof(T);
-        return NdArrayBlock{elt, {shape_count, shape_bytes}, {total_values_count, data_bytes}};
+        return NdArrayBlock{row_count, {shape_count, shape_bytes}, {total_values_count, data_bytes}};
     }
 };
 
@@ -145,7 +146,7 @@ struct ShapeEncodingFromBlock {
     template<class T, class EncodedFieldType>
     static std::size_t encode_block(
         const T *in,
-        BlockProtobufHelper &block_utils,
+        BlockDataHelper &block_utils,
         HashAccum &hasher,
         T *out,
         std::size_t out_capacity,
@@ -214,11 +215,10 @@ struct GenericBlockEncoder {
         }
     }
 
-    template <typename EncodedFieldType>
     static void encode(
         const typename EncoderType::Opts &opts,
         const BlockType& block,
-        EncodedFieldType& field,
+        EncodedFieldImpl& field,
         Buffer& out,
         std::ptrdiff_t& pos
     ) {
@@ -245,7 +245,7 @@ struct GenericBlockEncoder {
             auto t_out = reinterpret_cast<T *>(out.data() + pos);
             const auto total_items_count = field_nd_array->items_count() + block_row_count;
             field_nd_array->set_items_count(total_items_count);
-            auto value_pb = field_nd_array->add_values();
+            auto value_pb = field_nd_array->add_values(EncodingVersion::V1);
             const auto compressed_size = EncoderType::encode_block(opts,
                 block.data(),
                 helper_scalar_block,
@@ -254,6 +254,7 @@ struct GenericBlockEncoder {
                 max_compressed_size,
                 pos,
                 *value_pb->mutable_codec());
+
             helper_scalar_block.set_block_data(*value_pb, helper.hasher_.digest(), compressed_size);
             helper_scalar_block.set_version(*value_pb, EncoderType::VERSION);
         } else {
@@ -281,7 +282,7 @@ struct GenericBlockEncoder {
             HashedValue shape_hash = helper.get_digest_and_reset();
 
             // write values
-            auto value_pb = field_nd_array->add_values();
+            auto value_pb = field_nd_array->add_values(EncodingVersion::V1);
             auto t_out = reinterpret_cast<T *>(out.data() + pos);
             const auto values_comp_size = EncoderType::encode_block(
                 opts,
@@ -339,7 +340,7 @@ struct GenericBlockEncoderV2 {
 
         Helper helper;
         helper.hasher_.reset(helper.seed);
-        auto helper_scalar_block = BlockProtobufHelper{block.nbytes() / sizeof(T), block.nbytes()};
+        auto helper_scalar_block = BlockDataHelper{block.nbytes() / sizeof(T), block.nbytes()};
         ARCTICDB_TRACE(log::codec(), "Generic block encode writing scalar of {} elements", block.row_count());
 
         const auto uncompressed_size = helper_scalar_block.bytes_;
diff --git a/cpp/arcticdb/codec/encode_common.hpp b/cpp/arcticdb/codec/encode_common.hpp
index 10e25479a1..eaee2ab18f 100644
--- a/cpp/arcticdb/codec/encode_common.hpp
+++ b/cpp/arcticdb/codec/encode_common.hpp
@@ -33,6 +33,14 @@ struct EncodingPolicyType {
     using ColumnEncoder = VersionedColumnEncoder;
 };
 
+template<typename EncodingPolicyType>
+size_t calc_num_blocks(const ColumnData& column_data) {
+    if constexpr (EncodingPolicyType::version == EncodingVersion::V1)
+        return column_data.num_blocks() + (column_data.num_blocks() * !column_data.shapes()->empty());
+    else
+        return column_data.num_blocks() + !column_data.shapes()->empty();
+}
+
 template<typename EncodingPolicyType>
 struct BytesEncoder {
     using Encoder = TypedBlockEncoderImpl<TypedBlockData, ByteArrayTDT, EncodingPolicyType::version>;
@@ -45,7 +53,7 @@ struct BytesEncoder {
         const arcticdb::proto::encoding::VariantCodec &codec_opts,
         Buffer &out_buffer,
         std::ptrdiff_t &pos,
-        EncodedFieldType *encoded_field
+        EncodedFieldType& encoded_field
     ) {
         if constexpr (EncodingPolicyType::version == EncodingVersion::V1) {
             const auto bytes_count = static_cast<shape_t>(data.bytes());
@@ -55,11 +63,9 @@ struct BytesEncoder {
                 bytes_count,
                 1u,
                 data.block_and_offset(0).block_);
-            Encoder::encode(codec_opts, typed_block, *encoded_field, out_buffer, pos);
+            Encoder::encode(codec_opts, typed_block, encoded_field, out_buffer, pos);
         } else if constexpr (EncodingPolicyType::version == EncodingVersion::V2) {
-            // On Man's Mac build servers size_t and ssize_t are long rather than long long but the shape TDT
-            // expects int64 (long long).
-            const size_t row_count = 1;
+            const shape_t row_count = 1;  // BytesEncoder data is stored as an array with a single row
             const auto shapes_data = static_cast<ShapesBlockTDT::DataTypeTag::raw_type>(data.bytes());
             auto shapes_block = TypedBlockData<ShapesBlockTDT>(&shapes_data,
                                                                nullptr,
@@ -72,9 +78,9 @@ struct BytesEncoder {
                                          static_cast<shape_t>(bytes_count),
                                          row_count,
                                          data.block_and_offset(0).block_);
-            ShapesEncoder::encode_shapes(codec::default_shapes_codec(), shapes_block, *encoded_field, out_buffer, pos);
-            Encoder::encode_values(codec_opts, data_block, *encoded_field, out_buffer, pos);
-            auto *field_nd_array = encoded_field->mutable_ndarray();
+            ShapesEncoder::encode_shapes(codec::default_shapes_codec(), shapes_block, encoded_field, out_buffer, pos);
+            Encoder::encode_values(codec_opts, data_block, encoded_field, out_buffer, pos);
+            auto *field_nd_array = encoded_field.mutable_ndarray();
             const auto total_items_count = field_nd_array->items_count() + row_count;
             field_nd_array->set_items_count(total_items_count);
         } else {
@@ -82,8 +88,7 @@ struct BytesEncoder {
         }
     }
 
-    static size_t
-    max_compressed_size(const arcticdb::proto::encoding::VariantCodec &codec_opts, shape_t data_size) {
+    static size_t max_compressed_size(const arcticdb::proto::encoding::VariantCodec &codec_opts, shape_t data_size) {
         const shape_t shapes_bytes = sizeof(shape_t);
         const auto values_block = BytesBlock(data_size, &data_size);
         if constexpr (EncodingPolicyType::version == EncodingVersion::V1) {
@@ -98,6 +103,10 @@ struct BytesEncoder {
             static_assert(std::is_same_v<decltype(EncodingPolicyType::version), void>, "Unknown encoding version");
         }
     }
+
+    static size_t num_encoded_blocks(const ChunkedBuffer& buffer) {
+        return buffer.num_blocks() + 1;
+    }
 };
 
 struct SizeResult {
@@ -108,10 +117,9 @@ struct SizeResult {
 
 template<typename EncodingPolicyType>
 void calc_metadata_size(
-    const SegmentInMemory &in_mem_seg,
-    const arcticdb::proto::encoding::VariantCodec &codec_opts,
-    SizeResult &result
-) {
+        const SegmentInMemory &in_mem_seg,
+        const arcticdb::proto::encoding::VariantCodec &codec_opts,
+        SizeResult &result) {
     if (in_mem_seg.metadata()) {
         const auto metadata_bytes = static_cast<shape_t>(in_mem_seg.metadata()->ByteSizeLong());
         result.uncompressed_bytes_ += metadata_bytes + sizeof(shape_t);
@@ -162,51 +170,51 @@ void calc_string_pool_size(
 
 template<typename EncodingPolicyType>
 void encode_metadata(
-    const SegmentInMemory &in_mem_seg,
-    arcticdb::proto::encoding::SegmentHeader &segment_header,
-    const arcticdb::proto::encoding::VariantCodec &codec_opts,
-    Buffer &out_buffer,
-    std::ptrdiff_t &pos
-) {
+        const SegmentInMemory& in_mem_seg,
+        SegmentHeader& segment_header,
+        const arcticdb::proto::encoding::VariantCodec& codec_opts,
+        Buffer &out_buffer,
+        std::ptrdiff_t& pos) {
     if (in_mem_seg.metadata()) {
         const auto bytes_count = static_cast<shape_t>(in_mem_seg.metadata()->ByteSizeLong());
         ARCTICDB_TRACE(log::codec(), "Encoding {} bytes of metadata", bytes_count);
-        auto encoded_field = segment_header.mutable_metadata_field();
-
         constexpr int max_stack_alloc = 1 << 11;
         bool malloced{false};
-        uint8_t *meta_ptr{nullptr};
+        uint8_t* meta_ptr;
         if (bytes_count > max_stack_alloc) {
-            meta_ptr = reinterpret_cast<uint8_t *>(malloc(bytes_count));
+            meta_ptr = reinterpret_cast<uint8_t*>(malloc(bytes_count));
             malloced = true;
         } else {
-            meta_ptr = reinterpret_cast<uint8_t *>(alloca(bytes_count));
+            meta_ptr = reinterpret_cast<uint8_t*>(alloca(bytes_count));
         }
         ChunkedBuffer meta_buffer;
         meta_buffer.add_external_block(meta_ptr, bytes_count, 0u);
+        const auto num_encoded_fields = BytesEncoder<EncodingPolicyType>::num_encoded_blocks(meta_buffer);
+        auto& encoded_field = segment_header.mutable_metadata_field(num_encoded_fields);
         google::protobuf::io::ArrayOutputStream aos(&meta_buffer[0], static_cast<int>(bytes_count));
         in_mem_seg.metadata()->SerializeToZeroCopyStream(&aos);
+        ARCTICDB_TRACE(log::codec(), "Encoding metadata to position {}", pos);
         BytesEncoder<EncodingPolicyType>::encode(meta_buffer, codec_opts, out_buffer, pos, encoded_field);
-        ARCTICDB_DEBUG(log::codec(), "Encoded metadata to position {}", pos);
+        ARCTICDB_TRACE(log::codec(), "Encoded metadata to position {}", pos);
         if (malloced)
             free(meta_ptr);
     } else {
-        ARCTICDB_DEBUG(log::codec(), "Not encoding any metadata");
+        ARCTICDB_TRACE(log::codec(), "Not encoding any metadata");
     }
 }
 
 template<typename EncodingPolicyType>
 void encode_string_pool(
     const SegmentInMemory &in_mem_seg,
-    arcticdb::proto::encoding::SegmentHeader &segment_header,
+    SegmentHeader &segment_header,
     const arcticdb::proto::encoding::VariantCodec &codec_opts,
     Buffer &out_buffer,
     std::ptrdiff_t &pos
 ) {
     if (in_mem_seg.has_string_pool()) {
         ARCTICDB_TRACE(log::codec(), "Encoding string pool to position {}", pos);
-        auto *encoded_field = segment_header.mutable_string_pool_field();
         auto col = in_mem_seg.string_pool_data();
+        auto& encoded_field = segment_header.mutable_string_pool_field(calc_num_blocks<EncodingPolicyType>(col));
         EncodingPolicyType::ColumnEncoder::encode(codec_opts, col, encoded_field, out_buffer, pos);
         ARCTICDB_TRACE(log::codec(), "Encoded string pool to position {}", pos);
     }
diff --git a/cpp/arcticdb/codec/encode_v1.cpp b/cpp/arcticdb/codec/encode_v1.cpp
index bd057bd8b7..8bda35aaa6 100644
--- a/cpp/arcticdb/codec/encode_v1.cpp
+++ b/cpp/arcticdb/codec/encode_v1.cpp
@@ -7,6 +7,7 @@
 #include <arcticdb/codec/encode_common.hpp>
 #include <arcticdb/codec/typed_block_encoder_impl.hpp>
 #include <arcticdb/column_store/memory_segment.hpp>
+#include <arcticdb/entity/protobuf_mappings.hpp>
 #include <arcticdb/util/configs_map.hpp>
 
 namespace arcticdb {
@@ -18,7 +19,7 @@ namespace arcticdb {
 
     void encode_sparse_map(
         ColumnData& column_data,
-        std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
+        EncodedFieldImpl& variant_field,
         Buffer& out,
         std::ptrdiff_t& pos
     );
@@ -32,15 +33,14 @@ namespace arcticdb {
         static void encode(
             const arcticdb::proto::encoding::VariantCodec &codec_opts,
             ColumnData& column_data,
-            std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
+            EncodedFieldImpl& variant_field,
             Buffer& out,
             std::ptrdiff_t& pos);
     };
 
     std::pair<size_t, size_t> ColumnEncoderV1::max_compressed_size(
-        const arcticdb::proto::encoding::VariantCodec& codec_opts,
-        ColumnData& column_data
-    ) {
+            const arcticdb::proto::encoding::VariantCodec& codec_opts,
+            ColumnData& column_data) {
         return column_data.type().visit_tag([&codec_opts, &column_data](auto type_desc_tag) {
             size_t max_compressed_bytes = 0;
             size_t uncompressed_bytes = 0;
@@ -58,19 +58,18 @@ namespace arcticdb {
                 // might be non-zero.
                 max_compressed_bytes += Encoder::max_compressed_size(codec_opts, *block);
             }
-            add_bitmagic_compressed_size(column_data, uncompressed_bytes, max_compressed_bytes);
+            add_bitmagic_compressed_size(column_data, max_compressed_bytes, uncompressed_bytes);
             return std::make_pair(uncompressed_bytes, max_compressed_bytes);
         });
     }
 
     void ColumnEncoderV1::encode(
-        const arcticdb::proto::encoding::VariantCodec& codec_opts,
-        ColumnData& column_data,
-        std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
-        Buffer& out,
-        std::ptrdiff_t& pos
-    ) {
-        column_data.type().visit_tag([&](auto type_desc_tag) {
+            const arcticdb::proto::encoding::VariantCodec& codec_opts,
+            ColumnData& column_data,
+            EncodedFieldImpl& field,
+            Buffer& out,
+            std::ptrdiff_t& pos) {
+        column_data.type().visit_tag([&codec_opts, &column_data, &field, &out, &pos](auto type_desc_tag) {
             using TDT = decltype(type_desc_tag);
             using Encoder = TypedBlockEncoderImpl<TypedBlockData, TDT, EncodingVersion::V1>;
             ARCTICDB_TRACE(log::codec(), "Column data has {} blocks", column_data.num_blocks());
@@ -78,20 +77,17 @@ namespace arcticdb {
                 if constexpr(must_contain_data(static_cast<TypeDescriptor>(type_desc_tag))) {
                     util::check(block.value().nbytes() > 0, "Zero-sized block");
                 }
-                std::visit([&](auto field){
-                    Encoder::encode(codec_opts, block.value(), *field, out, pos);
-                }, variant_field);
+                Encoder::encode(codec_opts, block.value(), field, out, pos);
             }
         });
-        encode_sparse_map(column_data, variant_field, out, pos);
+        encode_sparse_map(column_data, field, out, pos);
     }
 
     using EncodingPolicyV1 = EncodingPolicyType<EncodingVersion::V1, ColumnEncoderV1>;
 
     [[nodiscard]] SizeResult max_compressed_size_v1(
-        const SegmentInMemory& in_mem_seg,
-        const arcticdb::proto::encoding::VariantCodec& codec_opts
-    ) {
+            const SegmentInMemory& in_mem_seg,
+            const arcticdb::proto::encoding::VariantCodec& codec_opts) {
         ARCTICDB_SAMPLE(GetSegmentCompressedSize, 0)
         SizeResult result{};
         calc_metadata_size<EncodingPolicyV1>(in_mem_seg, codec_opts, result);
@@ -104,60 +100,61 @@ namespace arcticdb {
         return result;
     }
 
+    /*
+     * This takes an in memory segment with all the metadata, column tensors etc., loops through each column
+     * and based on the type of the column, calls the typed block encoder for that column.
+     */
     [[nodiscard]] Segment encode_v1(SegmentInMemory&& s, const arcticdb::proto::encoding::VariantCodec &codec_opts) {
-        /*
-         * This takes an in memory segment with all the metadata, column tensors etc., loops through each column
-         * and based on the type of the column, calls the typed block encoder for that column.
-         */
         ARCTICDB_SAMPLE(EncodeSegment, 0)
         auto in_mem_seg = std::move(s);
-        auto arena = std::make_unique<google::protobuf::Arena>();
-        auto segment_header = google::protobuf::Arena::CreateMessage<arcticdb::proto::encoding::SegmentHeader>(arena.get());
-        *segment_header->mutable_stream_descriptor() = in_mem_seg.descriptor().copy_to_proto();
-        segment_header->set_compacted(in_mem_seg.compacted());
+        SegmentHeader segment_header{EncodingVersion::V1};
+        segment_header.set_compacted(in_mem_seg.compacted());
+
+        if(in_mem_seg.has_index_descriptor()) {
+            ARCTICDB_TRACE(log::version(), "Memory segment has index descriptor, encoding to protobuf");
+            util::check(!in_mem_seg.has_metadata(), "Metadata already set when trying to set index descriptor");
+            auto proto = copy_time_series_descriptor_to_proto(in_mem_seg.index_descriptor());
+            google::protobuf::Any any;
+            any.PackFrom(proto);
+            in_mem_seg.set_metadata(std::move(any));
+        }
+
         std::ptrdiff_t pos = 0;
         static auto block_to_header_ratio = ConfigsMap::instance()->get_int("Codec.EstimatedHeaderRatio", 75);
         const auto preamble = in_mem_seg.num_blocks() * block_to_header_ratio;
-        auto [max_compressed_size, uncompressed_size, encoded_blocks_bytes] = max_compressed_size_v1(in_mem_seg, codec_opts);
+        auto [max_compressed_size, uncompressed_size, encoded_buffer_size] = max_compressed_size_v1(in_mem_seg, codec_opts);
         ARCTICDB_TRACE(log::codec(), "Estimated max buffer requirement: {}", max_compressed_size);
         auto out_buffer = std::make_shared<Buffer>(max_compressed_size, preamble);
         ColumnEncoderV1 encoder;
 
-        ARCTICDB_TRACE(log::codec(), "Encoding descriptor: {}", segment_header->stream_descriptor().DebugString());
-        auto *tsd = segment_header->mutable_stream_descriptor();
-        tsd->set_in_bytes(uncompressed_size);
-
-        encode_metadata<EncodingPolicyV1>(in_mem_seg, *segment_header, codec_opts, *out_buffer, pos);
+        encode_metadata<EncodingPolicyV1>(in_mem_seg, segment_header, codec_opts, *out_buffer, pos);
+        ARCTICDB_TRACE(log::codec(), "Encoding descriptor: {}", in_mem_seg.descriptor());
+        auto descriptor_data = in_mem_seg.descriptor().data_ptr();
+        descriptor_data->uncompressed_bytes_ = uncompressed_size;
 
+        EncodedFieldCollection encoded_fields;
         if(in_mem_seg.row_count() > 0) {
+            encoded_fields.reserve(encoded_buffer_size, in_mem_seg.num_columns());
             ARCTICDB_TRACE(log::codec(), "Encoding fields");
             for (std::size_t column_index = 0; column_index < in_mem_seg.num_columns(); ++column_index) {
                 auto column_data = in_mem_seg.column_data(column_index);
-                auto *encoded_field = segment_header->mutable_fields()->Add();
+                auto* column_field = encoded_fields.add_field(column_data.num_blocks());
                 if(column_data.num_blocks() > 0) {
-                    encoder.encode(codec_opts, column_data, encoded_field, *out_buffer, pos);
-                    ARCTICDB_TRACE(log::codec(),
-                                   "Encoded column {}: ({}) to position {}",
-                                   column_index,
-                                   in_mem_seg.descriptor().fields(column_index).name(),
-                                   pos);
+                    encoder.encode(codec_opts, column_data, *column_field, *out_buffer, pos);
+                    ARCTICDB_TRACE(log::codec(), "Encoded column {}: ({}) to position {}", column_index, in_mem_seg.descriptor().fields(column_index).name(),pos);
                 } else {
                     util::check(!must_contain_data(column_data.type()), "Column {} of type {} contains no blocks", column_index, column_data.type());
-                    auto* ndarray = encoded_field->mutable_ndarray();
+                    auto* ndarray = column_field->mutable_ndarray();
                     ndarray->set_items_count(0);
                 }
             }
-            encode_string_pool<EncodingPolicyV1>(in_mem_seg, *segment_header, codec_opts, *out_buffer, pos);
+            encode_string_pool<EncodingPolicyV1>(in_mem_seg, segment_header, codec_opts, *out_buffer, pos);
         }
-        ARCTICDB_DEBUG(log::codec(), "Setting buffer bytes to {}", pos);
+        segment_header.set_body_fields(EncodedFieldCollection(std::move(encoded_fields)));
+        ARCTICDB_TRACE(log::codec(), "Encode setting buffer bytes to {}", pos);
         out_buffer->set_bytes(pos);
-        tsd->set_out_bytes(pos);
-        ARCTICDB_TRACE(log::codec(), "Encoded header: {}", tsd->DebugString());
-        if(!segment_header->has_metadata_field())
-            ARCTICDB_DEBUG(log::codec(), "No metadata field");
-        ARCTICDB_DEBUG(log::codec(), "Block count {} header size {} ratio {}",
-            in_mem_seg.num_blocks(), segment_header->ByteSizeLong(),
-            in_mem_seg.num_blocks() ? segment_header->ByteSizeLong() / in_mem_seg.num_blocks() : 0);
-        return {std::move(arena), segment_header, std::move(out_buffer), in_mem_seg.descriptor().fields_ptr()};
+        descriptor_data->compressed_bytes_ = pos;
+        descriptor_data->row_count_ = in_mem_seg.row_count();
+        return Segment::initialize(std::move(segment_header), std::move(out_buffer), descriptor_data, in_mem_seg.descriptor().fields_ptr(), in_mem_seg.descriptor().id());
     }
 }
diff --git a/cpp/arcticdb/codec/encode_v2.cpp b/cpp/arcticdb/codec/encode_v2.cpp
index 657e4f7ca7..56a1aa7259 100644
--- a/cpp/arcticdb/codec/encode_v2.cpp
+++ b/cpp/arcticdb/codec/encode_v2.cpp
@@ -10,346 +10,382 @@
 #include <arcticdb/codec/typed_block_encoder_impl.hpp>
 #include <arcticdb/codec/magic_words.hpp>
 #include <arcticdb/column_store/memory_segment.hpp>
-#include <arcticdb/util/configs_map.hpp>
-
+#include <arcticdb/codec/segment_identifier.hpp>
 
 namespace arcticdb {
-    void add_bitmagic_compressed_size(
-        const ColumnData& column_data,
-        size_t& max_compressed_bytes,
-        size_t& uncompressed_bytes
-    );
+void add_bitmagic_compressed_size(
+    const ColumnData& column_data,
+    size_t& max_compressed_bytes,
+    size_t& uncompressed_bytes
+);
+
+void encode_sparse_map(
+    ColumnData& column_data,
+    EncodedFieldImpl& field,
+    Buffer& out,
+    std::ptrdiff_t& pos
+);
+
+template<typename MagicType>
+void write_magic(Buffer& buffer, std::ptrdiff_t& pos) {
+    new (buffer.data() + pos) MagicType{};
+    pos += sizeof(MagicType);
+}
+
+void write_frame_data(Buffer& buffer, std::ptrdiff_t& pos, const FrameDescriptor& frame_desc) {
+    auto ptr = new (buffer.data() + pos) FrameDescriptor{};
+    *ptr = frame_desc;
+    pos += sizeof(FrameDescriptor);
+}
+
+void write_segment_descriptor(Buffer& buffer, std::ptrdiff_t& pos, const SegmentDescriptorImpl& segment_desc) {
+    auto ptr = new (buffer.data() + pos) SegmentDescriptorImpl{};
+    *ptr = segment_desc;
+    pos += sizeof(SegmentDescriptorImpl);
+}
 
-    void encode_sparse_map(
+/// @brief Utility class used to encode and compute the max encoding size for regular data columns for V2 encoding
+/// What differs this from the already existing ColumnEncoder is that ColumnEncoder encodes the shapes of
+/// multidimensional data as part of each block. ColumnEncoder2 uses a better strategy and encodes the shapes for the
+/// whole column upfront (before all blocks).
+/// @note Although ArcticDB did not support multidimensional user data prior to creating ColumnEncoder2 some of the
+/// internal data was multidimensional and used ColumnEncoder. More specifically: string pool and metadata.
+/// @note This should be used for V2 encoding. V1 encoding can't use it as there is already data written the other
+///	way and it will be hard to distinguish both.
+struct ColumnEncoderV2 {
+public:
+    static void encode(
+        const arcticdb::proto::encoding::VariantCodec &codec_opts,
         ColumnData& column_data,
-        std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
+        EncodedFieldImpl& field,
         Buffer& out,
-        std::ptrdiff_t& pos
-    );
+        std::ptrdiff_t& pos);
 
-    template<typename MagicType>
-    static void write_magic(Buffer& buffer, std::ptrdiff_t& pos) {
-        new (buffer.data() + pos) MagicType{};
-        pos += sizeof(MagicType);
-    }
+    static std::pair<size_t, size_t> max_compressed_size(
+        const arcticdb::proto::encoding::VariantCodec& codec_opts,
+        ColumnData& column_data);
+private:
+    static void encode_shapes(
+        const ColumnData& column_data,
+        EncodedFieldImpl& field,
+        Buffer& out,
+        std::ptrdiff_t& pos_in_buffer);
 
-    /// @brief Utility class used to encode and compute the max encoding size for regular data columns for V2 encoding
-    /// What differs this from the already existing ColumnEncoder is that ColumnEncoder encodes the shapes of
-    /// multidimensional data as part of each block. ColumnEncoder2 uses a better strategy and encodes the shapes for the
-    /// whole column upfront (before all blocks).
-    /// @note Although ArcticDB did not support multidimensional user data prior to creating ColumnEncoder2 some of the
-    /// internal data was multidimensional and used ColumnEncoder. More specifically: string pool and metadata.
-    /// @note This should be used for V2 encoding. V1 encoding can't use it as there is already data written the other
-    ///	way and it will be hard to distinguish both.
-    struct ColumnEncoderV2 {
-    public:
-        static void encode(
-            const arcticdb::proto::encoding::VariantCodec &codec_opts,
-            ColumnData& column_data,
-            std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
-            Buffer& out,
-            std::ptrdiff_t& pos);
-        static std::pair<size_t, size_t> max_compressed_size(
-            const arcticdb::proto::encoding::VariantCodec& codec_opts,
-            ColumnData& column_data);
-    private:
-        static void encode_shapes(
-            const ColumnData& column_data,
-            std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
-            Buffer& out,
-            std::ptrdiff_t& pos_in_buffer);
-        static void encode_blocks(
-            const arcticdb::proto::encoding::VariantCodec &codec_opts,
-            ColumnData& column_data,
-            std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
-            Buffer& out,
-            std::ptrdiff_t& pos);
-    };
-
-
-    [[nodiscard]] static TypedBlockData<ShapesBlockTDT> create_shapes_typed_block(const ColumnData& column_data) {
-        static_assert(sizeof(ssize_t) == sizeof(int64_t));
-        const size_t row_count = column_data.shapes()->bytes() / sizeof(shape_t);
-        return {reinterpret_cast<const typename ShapesBlockTDT::DataTypeTag::raw_type*>(column_data.shapes()->data()),
-                nullptr,
-                column_data.shapes()->bytes(),
-                row_count,
-                nullptr};
-    }
+    static void encode_blocks(
+        const arcticdb::proto::encoding::VariantCodec &codec_opts,
+        ColumnData& column_data,
+        EncodedFieldImpl& field,
+        Buffer& out,
+        std::ptrdiff_t& pos);
+};
+
+[[nodiscard]] static TypedBlockData<ShapesBlockTDT> create_shapes_typed_block(const ColumnData& column_data) {
+    static_assert(sizeof(ssize_t) == sizeof(int64_t));
+    const size_t row_count = column_data.shapes()->bytes() / sizeof(shape_t);
+    return {reinterpret_cast<const typename ShapesBlockTDT::DataTypeTag::raw_type*>(column_data.shapes()->data()),
+            nullptr,
+            column_data.shapes()->bytes(),
+            row_count,
+            nullptr};
+}
 
-    void ColumnEncoderV2::encode(
+void ColumnEncoderV2::encode(
         const arcticdb::proto::encoding::VariantCodec& codec_opts,
         ColumnData& column_data,
-        std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
+        EncodedFieldImpl& field,
         Buffer& out,
-        std::ptrdiff_t& pos
-    ) {
-        ARCTICDB_DEBUG(log::codec(), "Encoding field with codec {}", codec_opts.DebugString());
-        encode_shapes(column_data, variant_field, out, pos);
-        encode_blocks(codec_opts, column_data, variant_field, out, pos);
-        encode_sparse_map(column_data, variant_field, out, pos);
-    }
+        std::ptrdiff_t& pos) {
+    encode_shapes(column_data, field, out, pos);
+    encode_blocks(codec_opts, column_data, field, out, pos);
+    encode_sparse_map(column_data, field, out, pos);
+}
 
-    void ColumnEncoderV2::encode_shapes(
+void ColumnEncoderV2::encode_shapes(
         const ColumnData& column_data,
-        std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
+        EncodedFieldImpl& field,
         Buffer& out,
-        std::ptrdiff_t& pos_in_buffer
-    ) {
-        // There is no need to store the shapes for a column of empty type as they will be all 0. The type handler will
-        // assign 0 for the shape upon reading. There is one edge case - when we have None in the column, as it should not
-        // have shape at all (since it's not an array). This is handled by the sparse map.
-        if(column_data.type().dimension() != Dimension::Dim0 && !is_empty_type(column_data.type().data_type())) {
-            TypedBlockData<ShapesBlockTDT> shapes_block = create_shapes_typed_block(column_data);
-            util::variant_match(variant_field, [&](auto field){
-                using ShapesEncoder = TypedBlockEncoderImpl<TypedBlockData, ShapesBlockTDT, EncodingVersion::V2>;
-                ShapesEncoder::encode_shapes(codec::default_shapes_codec(), shapes_block, *field, out, pos_in_buffer);
-            });
-        }
+        std::ptrdiff_t& pos_in_buffer) {
+    // There is no need to store the shapes for a column of empty type as they will be all 0. The type handler will
+    // assign 0 for the shape upon reading. There is one edge case - when we have None in the column, as it should not
+    // have shape at all (since it's not an array). This is handled by the sparse map.
+    if(column_data.type().dimension() != Dimension::Dim0 && !is_empty_type(column_data.type().data_type())) {
+        TypedBlockData<ShapesBlockTDT> shapes_block = create_shapes_typed_block(column_data);
+        using ShapesEncoder = TypedBlockEncoderImpl<TypedBlockData, ShapesBlockTDT, EncodingVersion::V2>;
+        ShapesEncoder::encode_shapes(codec::default_shapes_codec(), shapes_block, field, out, pos_in_buffer);
     }
+}
 
-    void ColumnEncoderV2::encode_blocks(
+void ColumnEncoderV2::encode_blocks(
         const arcticdb::proto::encoding::VariantCodec &codec_opts,
         ColumnData& column_data,
-        std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
+        EncodedFieldImpl& field,
         Buffer& out,
-        std::ptrdiff_t& pos
-    ) {
-        column_data.type().visit_tag([&](auto type_desc_tag) {
-            using TDT = decltype(type_desc_tag);
-            using Encoder = TypedBlockEncoderImpl<TypedBlockData, TDT, EncodingVersion::V2>;
-            ARCTICDB_TRACE(log::codec(), "Column data has {} blocks", column_data.num_blocks());
-            while (auto block = column_data.next<TDT>()) {
-                if constexpr(must_contain_data(static_cast<TypeDescriptor>(type_desc_tag))) {
-                    util::check(block.value().nbytes() > 0, "Zero-sized block");
-                }
-                util::variant_match(variant_field, [&](auto field) {
-                    Encoder::encode_values(codec_opts, block.value(), *field, out, pos);
-                });
+        std::ptrdiff_t& pos) {
+    column_data.type().visit_tag([&codec_opts, &column_data, &field, &out, &pos](auto type_desc_tag) {
+        using TDT = decltype(type_desc_tag);
+        using Encoder = TypedBlockEncoderImpl<TypedBlockData, TDT, EncodingVersion::V2>;
+        ARCTICDB_TRACE(log::codec(), "Column data has {} blocks", column_data.num_blocks());
+        while (auto block = column_data.next<TDT>()) {
+            if constexpr(must_contain_data(static_cast<TypeDescriptor>(type_desc_tag))) {
+                util::check(block->nbytes() > 0, "Zero-sized block");
+                Encoder::encode_values(codec_opts, block.value(), field, out, pos);
+            } else {
+                if(block->nbytes() > 0)
+                    Encoder::encode_values(codec_opts, block.value(), field, out, pos);
             }
-        });
-    }
+        }
+    });
+}
 
-    std::pair<size_t, size_t> ColumnEncoderV2::max_compressed_size(
+std::pair<size_t, size_t> ColumnEncoderV2::max_compressed_size(
         const arcticdb::proto::encoding::VariantCodec& codec_opts,
-        ColumnData& column_data
-    ) {
-        return column_data.type().visit_tag([&codec_opts, &column_data](auto type_desc_tag) {
-            size_t max_compressed_bytes = 0;
-            size_t uncompressed_bytes = 0;
-            using TDT = decltype(type_desc_tag);
-            using Encoder = TypedBlockEncoderImpl<TypedBlockData, TDT, EncodingVersion::V2>;
-            using ShapesEncoder = TypedBlockEncoderImpl<TypedBlockData, ShapesBlockTDT, EncodingVersion::V2>;
-            ARCTICDB_TRACE(log::codec(), "Column data has {} blocks", column_data.num_blocks());
-            const size_t shapes_byte_count = column_data.shapes()->bytes();
-            const TypedBlockData<ShapesBlockTDT> shapes_block = create_shapes_typed_block(column_data);
-            max_compressed_bytes += ShapesEncoder::max_compressed_size(codec::default_shapes_codec(), shapes_block);
-            uncompressed_bytes += shapes_byte_count;
-            while (auto block = column_data.next<TDT>()) {
-                const auto nbytes = block.value().nbytes();
-                if constexpr(must_contain_data(static_cast<TypeDescriptor>(type_desc_tag))) {
-                    util::check(nbytes > 0, "Zero-sized block");
-                }
+        ColumnData& column_data) {
+    return column_data.type().visit_tag([&codec_opts, &column_data](auto type_desc_tag) {
+        size_t max_compressed_bytes = 0;
+        size_t uncompressed_bytes = 0;
+        using TDT = decltype(type_desc_tag);
+        using Encoder = TypedBlockEncoderImpl<TypedBlockData, TDT, EncodingVersion::V2>;
+        using ShapesEncoder = TypedBlockEncoderImpl<TypedBlockData, ShapesBlockTDT, EncodingVersion::V2>;
+        ARCTICDB_TRACE(log::codec(), "Column data has {} blocks", column_data.num_blocks());
+        const size_t shapes_byte_count = column_data.shapes()->bytes();
+        const TypedBlockData<ShapesBlockTDT> shapes_block = create_shapes_typed_block(column_data);
+        max_compressed_bytes += ShapesEncoder::max_compressed_size(codec::default_shapes_codec(), shapes_block);
+        uncompressed_bytes += shapes_byte_count;
+        while (auto block = column_data.next<TDT>()) {
+            const auto nbytes = block.value().nbytes();
+            if constexpr(must_contain_data(static_cast<TypeDescriptor>(type_desc_tag))) {
+                util::check(nbytes > 0, "Zero-sized block");
+                uncompressed_bytes += nbytes;
+                max_compressed_bytes += Encoder::max_compressed_size(codec_opts, block.value());
+            } else if(nbytes > 0) {
                 uncompressed_bytes += nbytes;
-                // For the empty type the column will contain 0 size of user data however the encoder might need add some
-                // encoder specific data to the buffer, thus the uncompressed size will be 0 but the max_compressed_bytes
-                // might be non-zero.
                 max_compressed_bytes += Encoder::max_compressed_size(codec_opts, block.value());
             }
-            add_bitmagic_compressed_size(column_data, uncompressed_bytes, max_compressed_bytes);
-            return std::make_pair(uncompressed_bytes, max_compressed_bytes);
-        });
-    }
+        }
+        add_bitmagic_compressed_size(column_data, max_compressed_bytes, uncompressed_bytes);
+        return std::make_pair(uncompressed_bytes, max_compressed_bytes);
+    });
+}
 
-    using EncodingPolicyV2 = EncodingPolicyType<EncodingVersion::V2, ColumnEncoderV2>;
+using EncodingPolicyV2 = EncodingPolicyType<EncodingVersion::V2, ColumnEncoderV2>;
 
-    static void encode_field_descriptors(
+static void encode_field_descriptors(
         const SegmentInMemory& in_mem_seg,
-        arcticdb::proto::encoding::SegmentHeader& segment_header,
+        SegmentHeader& segment_header,
         const arcticdb::proto::encoding::VariantCodec& codec_opts,
         Buffer& out_buffer,
-        std::ptrdiff_t& pos
-    ) {
-        ARCTICDB_TRACE(log::codec(), "Encoding field descriptors to position {}", pos);
-        auto *encoded_field = segment_header.mutable_descriptor_field();
+        std::ptrdiff_t& pos) {
+    ARCTICDB_TRACE(log::codec(), "Encoding field descriptors to position {}", pos);
+    if(!in_mem_seg.fields().empty()) {
         auto col = in_mem_seg.descriptor().fields().column_data();
+        auto &encoded_field = segment_header.mutable_descriptor_field(calc_num_blocks<EncodingPolicyV2>(col));
+
         ColumnEncoderV2::encode(codec_opts, col, encoded_field, out_buffer, pos);
         ARCTICDB_TRACE(log::codec(), "Encoded field descriptors to position {}", pos);
+    }
+}
 
-        write_magic<IndexMagic>(out_buffer, pos);
-        if (in_mem_seg.index_fields()) {
-            ARCTICDB_TRACE(log::codec(), "Encoding index fields descriptors to position {}", pos);
-            auto *index_field = segment_header.mutable_index_descriptor_field();
-            auto index_col = in_mem_seg.index_fields()->column_data();
-            ColumnEncoderV2::encode(codec_opts, index_col, index_field, out_buffer, pos);
-            ARCTICDB_TRACE(log::codec(), "Encoded index field descriptors to position {}", pos);
-        }
+static void encode_index_descriptors(
+    const SegmentInMemory& in_mem_seg,
+    SegmentHeader& segment_header,
+    const arcticdb::proto::encoding::VariantCodec& codec_opts,
+    Buffer& out_buffer,
+    std::ptrdiff_t& pos) {
+    ARCTICDB_TRACE(log::codec(), "Encoding index descriptors to position {}", pos);
+
+    if (in_mem_seg.has_index_descriptor()) {
+        const auto& tsd = in_mem_seg.index_descriptor();
+        write_frame_data(out_buffer, pos, *tsd.frame_data_);
+        write_magic<SegmentDescriptorMagic>(out_buffer, pos);
+        write_segment_descriptor(out_buffer, pos, *tsd.segment_desc_);
+        write_identifier(out_buffer, pos, tsd.stream_id_);
+
+        ARCTICDB_TRACE(log::codec(), "Encoding index fields descriptors to position {}", pos);
+        auto index_field_data = tsd.fields().column_data();
+        auto& index_field = segment_header.mutable_index_descriptor_field(calc_num_blocks<EncodingPolicyV2>(index_field_data));
+
+        ColumnEncoderV2::encode(codec_opts, index_field_data, index_field, out_buffer, pos);
+        ARCTICDB_TRACE(log::codec(), "Encoded index field descriptors to position {}", pos);
     }
+}
 
-    [[nodiscard]] size_t calc_column_blocks_size(const Column& col) {
-        size_t bytes = EncodedField::Size;
-        if(col.type().dimension() != entity::Dimension::Dim0)
-            bytes += sizeof(EncodedBlock);
+[[nodiscard]] size_t calc_column_blocks_size(const Column& col) {
+    size_t bytes = EncodedFieldImpl::Size;
+    if(col.type().dimension() != entity::Dimension::Dim0)
+        bytes += sizeof(EncodedBlock);
 
-        bytes += sizeof(EncodedBlock) * col.num_blocks();
-        ARCTICDB_TRACE(log::version(), "Encoded block size: {} + shapes({}) + {} * {} = {}",
-            EncodedField::Size,
-            col.type().dimension() != entity::Dimension::Dim0 ? sizeof(EncodedBlock) : 0u,
-            sizeof(EncodedBlock),
-            col.num_blocks(),
-            bytes);
+    bytes += sizeof(EncodedBlock) * col.num_blocks();
+    ARCTICDB_TRACE(log::version(), "Encoded block size: {} + shapes({}) + {} * {} = {}",
+        EncodedFieldImpl::Size,
+        col.type().dimension() != entity::Dimension::Dim0 ? sizeof(EncodedBlock) : 0u,
+        sizeof(EncodedBlock),
+        col.num_blocks(),
+        bytes);
 
-        return bytes;
-    }
+    return bytes;
+}
 
-    [[nodiscard]] static size_t encoded_blocks_size(const SegmentInMemory& in_mem_seg) {
-        size_t bytes = 0;
-        for (std::size_t c = 0; c < in_mem_seg.num_columns(); ++c) {
-            const auto& col = in_mem_seg.column(position_t(c));
-            bytes += calc_column_blocks_size(col);
-        }
-        return bytes;
+[[nodiscard]] static size_t encoded_blocks_size(const SegmentInMemory& in_mem_seg) {
+    size_t bytes = 0;
+    for (std::size_t c = 0; c < in_mem_seg.num_columns(); ++c) {
+        const auto& col = in_mem_seg.column(position_t(c));
+        bytes += calc_column_blocks_size(col);
     }
+    bytes += sizeof(EncodedBlock);
+    return bytes;
+}
 
-    static void calc_encoded_blocks_size(
+static void calc_encoded_blocks_size(
         const SegmentInMemory& in_mem_seg,
         const arcticdb::proto::encoding::VariantCodec& codec_opts,
-        SizeResult& result
-    ) {
-        result.encoded_blocks_bytes_ = static_cast<shape_t>(encoded_blocks_size(in_mem_seg));
-        result.uncompressed_bytes_ += result.encoded_blocks_bytes_;
-        result.max_compressed_bytes_ += BytesEncoder<EncodingPolicyV2>::max_compressed_size(codec_opts, result.encoded_blocks_bytes_);
-    }
+        SizeResult& result) {
+    result.encoded_blocks_bytes_ = static_cast<shape_t>(encoded_blocks_size(in_mem_seg));
+    result.uncompressed_bytes_ += result.encoded_blocks_bytes_;
+    result.max_compressed_bytes_ += BytesEncoder<EncodingPolicyV2>::max_compressed_size(codec_opts, result.encoded_blocks_bytes_);
+}
+
+static void add_stream_descriptor_data_size(SizeResult& result, const StreamId& stream_id) {
+    result.max_compressed_bytes_ += sizeof(FrameDescriptor);
+    result.uncompressed_bytes_ += sizeof(FrameDescriptor);
+    const auto identifier_size = identifier_bytes(stream_id);
+    result.max_compressed_bytes_ += identifier_size;
+    result.uncompressed_bytes_ += identifier_size;
+}
 
-    static void calc_stream_descriptor_fields_size(
+static void calc_stream_descriptor_fields_size(
         const SegmentInMemory& in_mem_seg,
         const arcticdb::proto::encoding::VariantCodec& codec_opts,
-        SizeResult& result
-    ) {
-        auto segment_fields = in_mem_seg.descriptor().fields().column_data();
-        const auto [uncompressed, required] = ColumnEncoderV2::max_compressed_size(codec_opts, segment_fields);
-        result.uncompressed_bytes_ += uncompressed;
-        result.max_compressed_bytes_ += required;
-
-        // Calculate index fields size
-        if(in_mem_seg.index_fields()) {
-            auto index_field_data = in_mem_seg.index_fields()->column_data();
-            const auto [idx_uncompressed, idx_required] = ColumnEncoderV2::max_compressed_size(codec_opts, index_field_data);
-            result.uncompressed_bytes_ += idx_uncompressed;
-            result.max_compressed_bytes_ += idx_required;
-        }
+        SizeResult& result) {
+    auto segment_fields = in_mem_seg.descriptor().fields().column_data();
+    const auto [uncompressed, required] = ColumnEncoderV2::max_compressed_size(codec_opts, segment_fields);
+    result.uncompressed_bytes_ += uncompressed;
+    result.max_compressed_bytes_ += required;
+    add_stream_descriptor_data_size(result, in_mem_seg.descriptor().id());
+
+    if(in_mem_seg.has_index_descriptor()) {
+        const auto& tsd = in_mem_seg.index_descriptor();
+        auto index_field_data = tsd.fields().column_data();
+        const auto [idx_uncompressed, idx_required] = ColumnEncoderV2::max_compressed_size(codec_opts, index_field_data);
+        result.uncompressed_bytes_ += idx_uncompressed;
+        result.max_compressed_bytes_ += idx_required;
+        add_stream_descriptor_data_size(result, tsd.stream_id_);
     }
+}
 
-    [[nodiscard]] SizeResult max_compressed_size_v2(
+[[nodiscard]] SizeResult max_compressed_size_v2(
         const SegmentInMemory& in_mem_seg,
-        const arcticdb::proto::encoding::VariantCodec& codec_opts
-    ) {
-        ARCTICDB_SAMPLE(GetSegmentCompressedSize, 0)
-        SizeResult result{};
-        result.max_compressed_bytes_ += sizeof(MetadataMagic);
-        calc_metadata_size<EncodingPolicyV2>(in_mem_seg, codec_opts, result);
-        result.max_compressed_bytes_ += sizeof(DescriptorMagic);
-        result.max_compressed_bytes_ += sizeof(IndexMagic);
-        calc_stream_descriptor_fields_size(in_mem_seg, codec_opts, result);
-        result.max_compressed_bytes_ += sizeof(EncodedMagic);
-        calc_encoded_blocks_size(in_mem_seg, codec_opts, result);
-
-        // Calculate fields collection size
-        if(in_mem_seg.row_count() > 0) {
-            result.max_compressed_bytes_ += sizeof(ColumnMagic) * in_mem_seg.descriptor().field_count();
-            calc_columns_size<EncodingPolicyV2>(in_mem_seg, codec_opts, result);
-            result.max_compressed_bytes_ += sizeof(StringPoolMagic);
-            calc_string_pool_size<EncodingPolicyV2>(in_mem_seg, codec_opts, result);
-        }
-        ARCTICDB_TRACE(log::codec(), "Max compressed size {}", result.max_compressed_bytes_);
-        return result;
+        const arcticdb::proto::encoding::VariantCodec& codec_opts) {
+    ARCTICDB_SAMPLE(GetSegmentCompressedSize, 0)
+    SizeResult result{};
+    result.max_compressed_bytes_ += sizeof(MetadataMagic);
+    calc_metadata_size<EncodingPolicyV2>(in_mem_seg, codec_opts, result);
+    result.max_compressed_bytes_ += sizeof(DescriptorFieldsMagic);
+    result.max_compressed_bytes_ += sizeof(IndexMagic);
+    calc_stream_descriptor_fields_size(in_mem_seg, codec_opts, result);
+    result.max_compressed_bytes_ += sizeof(EncodedMagic);
+    calc_encoded_blocks_size(in_mem_seg, codec_opts, result);
+
+    // Calculate fields collection size
+    if(in_mem_seg.row_count() > 0) {
+        result.max_compressed_bytes_ += sizeof(ColumnMagic) * in_mem_seg.descriptor().field_count();
+        calc_columns_size<EncodingPolicyV2>(in_mem_seg, codec_opts, result);
+        result.max_compressed_bytes_ += sizeof(StringPoolMagic);
+        calc_string_pool_size<EncodingPolicyV2>(in_mem_seg, codec_opts, result);
     }
+    ARCTICDB_TRACE(log::codec(), "Max compressed size {}", result.max_compressed_bytes_);
+    return result;
+}
 
-    static void encode_encoded_fields(
-        arcticdb::proto::encoding::SegmentHeader& segment_header,
+static void encode_encoded_fields(
+        SegmentHeader& segment_header,
         const arcticdb::proto::encoding::VariantCodec& codec_opts,
         Buffer& out_buffer,
         std::ptrdiff_t& pos,
-        const ChunkedBuffer& encoded_blocks_buffer
-    ) {
+        EncodedFieldCollection&& encoded_fields) {
+    ARCTICDB_DEBUG(log::codec(), "Encoding encoded blocks to position {}", pos);
+
+    segment_header.set_footer_offset(pos);
+    write_magic<EncodedMagic>(out_buffer, pos);
+    Column encoded_fields_column(encoded_fields_type_desc(), false, encoded_fields.release_data());
+    auto data = encoded_fields_column.data();
+    auto& encoded_field = segment_header.mutable_column_fields(calc_num_blocks<EncodingPolicyV2>(data));
+    ColumnEncoderV2::encode(codec_opts, data, encoded_field, out_buffer, pos);
+    ARCTICDB_DEBUG(log::codec(), "Encoded encoded blocks to position {}", pos);
+}
 
-        ARCTICDB_TRACE(log::codec(), "Encoding encoded blocks to position {}", pos);
-        auto encoded_field = segment_header.mutable_column_fields();
-        encoded_field->set_offset(static_cast<uint32_t>(pos));
-        write_magic<EncodedMagic>(out_buffer, pos);
-        if(!encoded_blocks_buffer.empty())
-            BytesEncoder<EncodingPolicyV2>::encode(encoded_blocks_buffer, codec_opts, out_buffer, pos, encoded_field);
+[[nodiscard]] Segment encode_v2(
+        SegmentInMemory&& s,
+        const arcticdb::proto::encoding::VariantCodec &codec_opts) {
+    ARCTICDB_SAMPLE(EncodeSegment, 0)
+    auto in_mem_seg = std::move(s);
 
-        ARCTICDB_TRACE(log::codec(), "Encoded encoded blocks to position {}", pos);
+    if(in_mem_seg.has_index_descriptor()) {
+        google::protobuf::Any any;
+        util::pack_to_any(in_mem_seg.index_descriptor().proto(), any);
+        in_mem_seg.set_metadata(std::move(any));
     }
 
-    [[nodiscard]] Segment encode_v2(SegmentInMemory&& s, const arcticdb::proto::encoding::VariantCodec &codec_opts) {
-        ARCTICDB_SAMPLE(EncodeSegment, 0)
-
-        auto in_mem_seg = std::move(s);
-        auto arena = std::make_unique<google::protobuf::Arena>();
-        auto segment_header = google::protobuf::Arena::CreateMessage<arcticdb::proto::encoding::SegmentHeader>(arena.get());
-        auto& seg_descriptor = in_mem_seg.descriptor();
-        *segment_header->mutable_stream_descriptor() = std::move(seg_descriptor.mutable_proto());
-        segment_header->set_compacted(in_mem_seg.compacted());
-        segment_header->set_encoding_version(static_cast<uint16_t>(EncodingVersion::V2));
-
-        std::ptrdiff_t pos = 0;
-        static auto block_to_header_ratio = ConfigsMap::instance()->get_int("Codec.EstimatedHeaderRatio", 75);
-        const auto preamble = in_mem_seg.num_blocks() * block_to_header_ratio;
-        auto [max_compressed_size, uncompressed_size, encoded_buffer_size] = max_compressed_size_v2(in_mem_seg, codec_opts);
-        ARCTICDB_TRACE(log::codec(), "Estimated max buffer requirement: {}", max_compressed_size);
-        auto out_buffer = std::make_shared<Buffer>(max_compressed_size + encoded_buffer_size, preamble);
-        ARCTICDB_TRACE(log::codec(), "Encoding descriptor: {}", segment_header->stream_descriptor().DebugString());
-        auto *tsd = segment_header->mutable_stream_descriptor();
-        tsd->set_in_bytes(uncompressed_size);
-
-        write_magic<MetadataMagic>(*out_buffer, pos);
-        encode_metadata<EncodingPolicyV2>(in_mem_seg, *segment_header, codec_opts, *out_buffer, pos);
-        write_magic<DescriptorMagic>(*out_buffer, pos);
-        encode_field_descriptors(in_mem_seg, *segment_header, codec_opts, *out_buffer, pos);
-
-        auto encoded_fields_buffer = ChunkedBuffer::presized(static_cast<size_t>(encoded_buffer_size));
-        auto encoded_field_pos = 0u;
-        ColumnEncoderV2 encoder;
-        if(in_mem_seg.row_count() > 0) {
-            ARCTICDB_TRACE(log::codec(), "Encoding fields");
-            for (std::size_t column_index = 0; column_index < in_mem_seg.num_columns(); ++column_index) {
-                write_magic<ColumnMagic>(*out_buffer, pos);
-                auto column_field = new(encoded_fields_buffer.data() + encoded_field_pos) EncodedField;
-                ARCTICDB_TRACE(log::codec(),"Beginning encoding of column {}: ({}) to position {}", column_index, in_mem_seg.descriptor().field(column_index).name(), pos);
-                auto column_data = in_mem_seg.column_data(column_index);
-                if(column_data.num_blocks() > 0) {
-                    encoder.encode(codec_opts, column_data, column_field, *out_buffer, pos);
-                    ARCTICDB_TRACE(log::codec(),
-                                   "Encoded column {}: ({}) to position {}",
-                                   column_index,
-                                   in_mem_seg.descriptor().field(column_index).name(),
-                                   pos);
-                } else {
-                    util::check(!must_contain_data(column_data.type()), "Column {} of type {} contains no blocks", column_index, column_data.type());
-                    auto* ndarray = column_field->mutable_ndarray();
-                    ndarray->set_items_count(0);
-                }
-                encoded_field_pos += encoded_field_bytes(*column_field);
-                util::check(encoded_field_pos <= encoded_fields_buffer.bytes(),
-                            "Encoded field buffer overflow {} > {}",
-                            encoded_field_pos,
-                            encoded_fields_buffer.bytes());
-
+    SegmentHeader segment_header{EncodingVersion::V2};
+    segment_header.set_compacted(in_mem_seg.compacted());
+
+    std::ptrdiff_t pos = 0;
+    auto [max_compressed_size, uncompressed_size, encoded_buffer_size] = max_compressed_size_v2(in_mem_seg, codec_opts);
+    ARCTICDB_TRACE(log::codec(), "Estimated max buffer requirement: {}", max_compressed_size);
+    const auto preamble = segment_header.required_bytes(in_mem_seg);
+    auto out_buffer = std::make_shared<Buffer>(max_compressed_size + encoded_buffer_size, preamble);
+    ARCTICDB_TRACE(log::codec(), "Encoding descriptor: {}", in_mem_seg.descriptor());
+
+    auto descriptor_data = in_mem_seg.descriptor().data_ptr();
+    descriptor_data->uncompressed_bytes_ = uncompressed_size;
+
+    write_magic<MetadataMagic>(*out_buffer, pos);
+    encode_metadata<EncodingPolicyV2>(in_mem_seg, segment_header, codec_opts, *out_buffer, pos);
+    write_magic<SegmentDescriptorMagic>(*out_buffer, pos);
+    write_segment_descriptor(*out_buffer, pos, in_mem_seg.descriptor().data());
+    write_identifier(*out_buffer, pos, in_mem_seg.descriptor().id());
+    write_magic<DescriptorFieldsMagic>(*out_buffer, pos);
+    encode_field_descriptors(in_mem_seg, segment_header, codec_opts, *out_buffer, pos);
+    write_magic<IndexMagic>(*out_buffer, pos);
+    encode_index_descriptors(in_mem_seg, segment_header, codec_opts, *out_buffer, pos);
+
+    EncodedFieldCollection encoded_fields;
+    ColumnEncoderV2 encoder;
+    if(in_mem_seg.row_count() > 0) {
+        encoded_fields.reserve(encoded_buffer_size, in_mem_seg.num_columns());
+        ARCTICDB_TRACE(log::codec(), "Encoding fields");
+        for (std::size_t column_index = 0; column_index < in_mem_seg.num_columns(); ++column_index) {
+            write_magic<ColumnMagic>(*out_buffer, pos);
+            auto column_data = in_mem_seg.column_data(column_index);
+            auto* column_field = encoded_fields.add_field(column_data.num_blocks());
+            ARCTICDB_TRACE(log::codec(),"Beginning encoding of column {}: ({}) to position {}", column_index, in_mem_seg.descriptor().field(column_index).name(), pos);
+
+            if(column_data.num_blocks() > 0) {
+                encoder.encode(codec_opts, column_data, *column_field, *out_buffer, pos);
+                ARCTICDB_TRACE(log::codec(), "Encoded column {}: ({}) to position {}", column_index, in_mem_seg.descriptor().field(column_index).name(), pos);
+            } else {
+                util::check(!must_contain_data(column_data.type()), "Column {} of type {} contains no blocks", column_index, column_data.type());
+                auto* ndarray = column_field->mutable_ndarray();
+                ndarray->set_items_count(0);
             }
-            write_magic<StringPoolMagic>(*out_buffer, pos);
-            encode_string_pool<EncodingPolicyV2>(in_mem_seg, *segment_header, codec_opts, *out_buffer, pos);
         }
+        write_magic<StringPoolMagic>(*out_buffer, pos);
+        encode_string_pool<EncodingPolicyV2>(in_mem_seg, segment_header, codec_opts, *out_buffer, pos);
+        encode_encoded_fields(segment_header, codec_opts, *out_buffer, pos, std::move(encoded_fields));
+    } else {
+        segment_header.set_footer_offset(pos);
+    }
 
-        encode_encoded_fields(*segment_header, codec_opts, *out_buffer, pos, encoded_fields_buffer);
-        out_buffer->set_bytes(pos);
-        tsd->set_out_bytes(pos);
+    out_buffer->set_bytes(pos);
+    descriptor_data->compressed_bytes_ = pos;
+    descriptor_data->row_count_ = in_mem_seg.row_count();
 
-        ARCTICDB_TRACE(log::codec(), "Encoded header: {}", tsd->DebugString());
-        ARCTICDB_DEBUG(log::codec(), "Block count {} header size {} ratio {}",
-            in_mem_seg.num_blocks(), segment_header->ByteSizeLong(),
-            in_mem_seg.num_blocks() ? segment_header->ByteSizeLong() / in_mem_seg.num_blocks() : 0);
-        return {std::move(arena), segment_header, std::move(out_buffer), seg_descriptor.fields_ptr()};
-    }
+#ifdef DEBUG_BUILD
+    segment_header.validate();
+#endif
+
+    ARCTICDB_TRACE(log::codec(), "Encoded header: {}", segment_header);
+    const auto& desc = in_mem_seg.descriptor();
+    return Segment::initialize(std::move(segment_header), std::move(out_buffer), descriptor_data, desc.fields_ptr(), desc.id());
 }
+
+}  //namespace arcticdb
diff --git a/cpp/arcticdb/codec/encoded_field.cpp b/cpp/arcticdb/codec/encoded_field.cpp
new file mode 100644
index 0000000000..e64a99ed69
--- /dev/null
+++ b/cpp/arcticdb/codec/encoded_field.cpp
@@ -0,0 +1,25 @@
+/* Copyright 2023 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
+ */
+
+#include <arcticdb/codec/encoded_field.hpp>
+#include <arcticdb/entity/protobufs.hpp>
+#include <arcticdb/codec/segment.hpp>
+
+namespace arcticdb {
+
+std::pair<const uint8_t *, const uint8_t *> get_segment_begin_end(
+        const Segment &segment,
+        const SegmentHeader &hdr) {
+    const uint8_t *data = segment.buffer().data();
+    util::check(data != nullptr, "Got null data ptr from segment in get_segment_begin_end");
+    const uint8_t *begin = data;
+    const auto fields_offset = hdr.footer_offset();
+    const auto end = begin + fields_offset;
+    return {begin, end};
+}
+
+} // namespace arcticdb
\ No newline at end of file
diff --git a/cpp/arcticdb/codec/encoded_field.hpp b/cpp/arcticdb/codec/encoded_field.hpp
index 01e5ec5004..067b609428 100644
--- a/cpp/arcticdb/codec/encoded_field.hpp
+++ b/cpp/arcticdb/codec/encoded_field.hpp
@@ -4,183 +4,123 @@
  *
  * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
  */
-
 #pragma once
 
-#include <arcticdb/codec/segment.hpp>
-
-#pragma pack(push)
-#pragma pack(1)
+#include <arcticdb/util/preconditions.hpp>
+#include <arcticdb/entity/protobufs.hpp>
+#include "arcticdb/storage/memory_layout.hpp"
 
+#include <utility>
+#include <boost/iterator/iterator_facade.hpp>
 namespace arcticdb {
 
-inline std::pair<const uint8_t *, const uint8_t *> get_segment_begin_end(const Segment &segment,
-                                                                         const arcticdb::proto::encoding::SegmentHeader &hdr) {
-    const uint8_t *data = segment.buffer().data();
-    util::check(data != nullptr, "Got null data ptr from segment");
-    const uint8_t *begin = data;
-
-    const auto fields_offset = hdr.column_fields().offset();
-    const auto end = begin + fields_offset;
-    return {begin, end};
-}
-
-constexpr size_t encoding_size = 6;
-enum class Codec : uint16_t {
-    Unknown = 0,
-    Zstd,
-    TurboPfor,
-    Lz4,
-    Passthrough
-};
-
-struct ZstdCodec {
-    static constexpr Codec type_ = Codec::Zstd;
-
-    void MergeFrom(const arcticdb::proto::encoding::VariantCodec::Zstd &zstd) {
-        level_ = zstd.level();
-        is_streaming = zstd.is_streaming();
+class Segment;
+class SegmentHeader;
+
+std::pair<const uint8_t*, const uint8_t*> get_segment_begin_end(
+    const Segment &segment,
+    const SegmentHeader& hdr);
+
+constexpr std::string_view codec_type_to_string(Codec codec) {
+    switch(codec) {
+    case Codec::LZ4:
+        return "LZ4";
+    case Codec::ZSTD:
+        return "ZSTD";
+    case Codec::PFOR:
+        return "PFOR";
+    case Codec::PASS:
+        return "PASS";
+    default:
+        return "Unknown";
     }
+}
 
-    int32_t level_ = 0;
-    bool is_streaming = false;
-    uint8_t padding_ = 0;
-};
-
-static_assert(sizeof(ZstdCodec) == encoding_size);
-
-struct TurboPforCodec {
-    static constexpr Codec type_ = Codec::TurboPfor;
-
-    void MergeFrom(const arcticdb::proto::encoding::VariantCodec::TurboPfor &tp4) {
-        sub_codec_ = SubCodec(tp4.sub_codec());
+struct BlockCodecImpl : public BlockCodec {
+    uint8_t* data() {
+        return &data_[0];
     }
 
-    enum class SubCodec : uint32_t {
-        UNKNOWN = 0,
-        P4 = 16,
-        P4_DELTA = 17,
-        P4_DELTA_RLE = 18,
-        P4_ZZ = 20,
-
-        FP_DELTA = 32, // fpp
-        FP_DELTA2_ZZ = 33,  // fpzz
-        FP_GORILLA_RLE = 34, // fpg
-        FP_ZZ = 36, // bvz
-        FP_ZZ_DELTA = 40, // bvz
-    };
-
-    SubCodec sub_codec_ = SubCodec::UNKNOWN;
-    uint16_t padding_ = 0;
-};
-
-static_assert(sizeof(TurboPforCodec) == encoding_size);
-
-struct Lz4Codec {
-    static constexpr Codec type_ = Codec::Lz4;
-
-    void MergeFrom(const arcticdb::proto::encoding::VariantCodec::Lz4& lz4) {
-        acceleration_ = lz4.acceleration();
+    [[nodiscard]] Codec codec_type() const {
+        return codec_;
     }
 
-    int32_t acceleration_ = 1;
-    int16_t padding_ = 0;
-};
-
-static_assert(sizeof(Lz4Codec) == encoding_size);
-
-struct PassthroughCodec {
-    static constexpr Codec type_ = Codec::Passthrough;
-
-    uint32_t unused_ = 0;
-    uint16_t padding_ = 0;
-};
-
-static_assert(sizeof(PassthroughCodec) == encoding_size);
-
-struct BlockCodec {
-    Codec codec_ = Codec::Unknown;
-    constexpr static size_t DataSize = 24;
-    std::array<uint8_t, DataSize> data_;
-
-    uint8_t* data() {
+    [[nodiscard]] const uint8_t* data() const {
         return &data_[0];
     }
 
-    BlockCodec() {
+    BlockCodecImpl() {
         memset(data(), 0, DataSize);
     }
 
     ZstdCodec *mutable_zstd() {
-        codec_ = Codec::Zstd;
+        codec_ = Codec::ZSTD;
         auto zstd = new(data()) ZstdCodec{};
         return zstd;
     }
 
     Lz4Codec *mutable_lz4() {
-        codec_ = Codec::Lz4;
+        codec_ = Codec::LZ4;
         auto lz4 = new(data()) Lz4Codec{};
         return lz4;
     }
 
-    TurboPforCodec *mutable_turbopfor() {
-        codec_ = Codec::TurboPfor;
-        auto pfor = new(data()) TurboPforCodec{};
+    PforCodec *mutable_pfor() {
+        codec_ = Codec::PFOR;
+        auto pfor = new(data()) PforCodec{};
         return pfor;
     }
 
     PassthroughCodec *mutable_passthrough() {
-        codec_ = Codec::Passthrough;
+        codec_ = Codec::PASS;
         auto pass = new(data()) PassthroughCodec{};
         return pass;
     }
 
-    arcticdb::proto::encoding::VariantCodec::CodecCase codec_case() const {
-        switch (codec_) {
-        case Codec::Zstd:return arcticdb::proto::encoding::VariantCodec::kLz4;
-        case Codec::Lz4:return arcticdb::proto::encoding::VariantCodec::kLz4;
-        case Codec::TurboPfor:return arcticdb::proto::encoding::VariantCodec::kTp4;
-        case Codec::Passthrough:return arcticdb::proto::encoding::VariantCodec::kPassthrough;
-        default:util::raise_rte("Unknown codec");
-        }
+    [[nodiscard]] const ZstdCodec& zstd() const {
+        util::check(codec_ == Codec::ZSTD, "Not a zstd codec");
+        return *reinterpret_cast<const ZstdCodec*>(data());
     }
 
-    template<class CodecType>
-    explicit BlockCodec(const CodecType &codec) :
-        codec_(CodecType::type) {
-        memcpy(data_, &codec, encoding_size);
+    [[nodiscard]] const Lz4Codec& lz4() const {
+        util::check(codec_ == Codec::LZ4, "Not an lz4 codec");
+        return *reinterpret_cast<const Lz4Codec*>(data());
     }
-};
 
-struct EncodedBlock {
-    uint32_t in_bytes_ = 0;
-    uint32_t out_bytes_ = 0;
-    uint64_t hash_ = 0;
-    uint16_t encoder_version_ = 0;
-    bool is_shape_ = false;
-    uint8_t pad_ = 0;
-    BlockCodec codec_;
+    [[nodiscard]] const PforCodec& pfor() const {
+        util::check(codec_ == Codec::PFOR, "Not a pfor codec");
+        return *reinterpret_cast<const PforCodec*>(data());
+    }
 
-    EncodedBlock() = default;
+    [[nodiscard]] const PassthroughCodec& passthrough() const {
+        util::check(codec_ == Codec::PASS, "Not a passthrough codec");
+        return *reinterpret_cast<const PassthroughCodec*>(data());
+    }
 
-    explicit EncodedBlock(bool is_shape) :
-        is_shape_(is_shape) {
+    template<class CodecType>
+    explicit BlockCodecImpl(const CodecType &codec) {
+        codec_ = CodecType::type;
+        memcpy(data_, &codec, encoding_size);
     }
+};
 
-    std::string DebugString() const {
-        return "";
+struct EncodedBlock : Block {
+    explicit EncodedBlock(bool is_shape) {
+        is_shape_ = is_shape;
     }
 
-    bool has_codec() const {
-        return codec_.codec_ != Codec::Passthrough;
+    EncodedBlock() = default;
+
+    [[nodiscard]] bool has_codec() const {
+        return codecs_[0].codec_ != Codec::PASS;
     }
 
-    auto encoder_version() const {
+    [[nodiscard]] auto encoder_version() const {
         return encoder_version_;
     }
 
-    auto codec() const {
-        return codec_;
+    [[nodiscard]] auto codec() const {
+        return *reinterpret_cast<const BlockCodecImpl*>(&codecs_[0]);
     }
 
     void set_in_bytes(uint32_t bytes) {
@@ -203,57 +143,42 @@ struct EncodedBlock {
         return hash_;
     }
 
-    uint32_t out_bytes() const {
+    [[nodiscard]] uint32_t out_bytes() const {
         return out_bytes_;
     }
 
-    uint32_t in_bytes() const {
+    [[nodiscard]] uint32_t in_bytes() const {
         return in_bytes_;
     }
 
-    BlockCodec *mutable_codec() {
-        return &codec_;
+    BlockCodecImpl *mutable_codec() {
+        return reinterpret_cast<BlockCodecImpl*>(&codecs_[0]);
     }
 };
 
-struct EncodedField {
-
-    enum class EncodedFieldType : uint8_t {
-        Unknown,
-        kNdarray,
-        Dictionary
-    };
-
-    EncodedFieldType type_ = EncodedFieldType::Unknown;
-    google::protobuf::uint8 shapes_count_ = 0u;
-    uint16_t values_count_ = 0u;
-    uint32_t sparse_map_bytes_ = 0u;
-    uint64_t items_count_ = 0u;
-    std::array<EncodedBlock, 1> blocks_;
-
-    static constexpr size_t MinimumSize = sizeof(type_) + sizeof(shapes_count_) + sizeof(values_count_) + sizeof(sparse_map_bytes_) + sizeof(items_count_);
-
-    static constexpr EncodedFieldType kNdarray = EncodedFieldType::kNdarray;
-
+struct EncodedFieldImpl : public EncodedField {
     static constexpr size_t Size =
         sizeof(type_) +
             sizeof(shapes_count_) +
             sizeof(values_count_) +
             sizeof(sparse_map_bytes_) +
-            sizeof(items_count_);
+            sizeof(items_count_) +
+            sizeof(format_);
 
-    EncodedField() = default;
+    EncodedFieldImpl() = default;
+
+    ARCTICDB_NO_MOVE_OR_COPY(EncodedFieldImpl)
 
     EncodedBlock *blocks() {
-        return &blocks_[0];
+        return reinterpret_cast<EncodedBlock*>(&blocks_[0]);
     }
 
-    const EncodedBlock* blocks() const {
-        return &blocks_[0];
+    [[nodiscard]] const EncodedBlock* blocks() const {
+        return reinterpret_cast<const EncodedBlock*>(&blocks_[0]);
     }
 
     struct EncodedBlockCollection {
-        EncodedBlockCollection(const EncodedField &field, bool is_shapes) :
+        EncodedBlockCollection(const EncodedFieldImpl &field, bool is_shapes) :
             field_(field),
             is_shapes_(is_shapes) {
         }
@@ -263,19 +188,22 @@ struct EncodedField {
                                                                              ValueType,
                                                                              boost::forward_traversal_tag> {
         public:
-            explicit EncodedBlockCollectionIterator(EncodedBlock *blocks) :
-                blocks_(blocks) {}
+            EncodedBlockCollectionIterator(EncodedBlock *blocks, size_t increment) :
+                blocks_(blocks),
+                increment_(increment){}
 
            ~EncodedBlockCollectionIterator() = default;
 
-            explicit EncodedBlockCollectionIterator(EncodedBlock *blocks, size_t pos) :
+            EncodedBlockCollectionIterator(EncodedBlock *blocks, size_t pos, size_t increment) :
                 blocks_(blocks),
-                pos_(pos) {}
+                pos_(pos),
+                increment_(increment){}
 
             template<class OtherValue>
             explicit EncodedBlockCollectionIterator(const EncodedBlockCollectionIterator<OtherValue> &other) :
                 blocks_(other.blocks_),
-                pos_(other.pos_) {}
+                pos_(other.pos_),
+                increment_(other.increment_){}
 
             EncodedBlockCollectionIterator() = default;
 
@@ -283,6 +211,7 @@ struct EncodedField {
                 if (&other != this) {
                     pos_ = other.pos_;
                     blocks_ = other.blocks_;
+                    increment_ = other.increment_;
                 }
 
                 return *this;
@@ -290,16 +219,17 @@ struct EncodedField {
 
             EncodedBlockCollectionIterator(const EncodedBlockCollectionIterator &other) :
                 blocks_(other.blocks_),
-                pos_(other.pos_) {
+                pos_(other.pos_),
+                increment_(other.increment_) {
             }
 
             template<class OtherValue>
-            bool equal(const EncodedBlockCollectionIterator<OtherValue> &other) const {
-                return pos_ == other.pos_ && blocks_ == other.blocks_;
+            [[nodiscard]] bool equal(const EncodedBlockCollectionIterator<OtherValue> &other) const {
+                return pos_ == other.pos_ && blocks_ == other.blocks_ && increment_ == other.increment_;
             }
 
             void increment() {
-                ++pos_;
+               pos_ += increment_;
             }
 
             [[nodiscard]] ValueType &dereference() const {
@@ -308,81 +238,125 @@ struct EncodedField {
 
             EncodedBlock *blocks_ = nullptr;
             size_t pos_ = 0;
+            size_t increment_ = 1;
         };
 
-        EncodedBlock *blocks() const {
-            return const_cast<EncodedField &>(field_).blocks();
+        [[nodiscard]] EncodedBlock *blocks() const {
+            return const_cast<EncodedFieldImpl&>(field_).blocks();
+        }
+
+        [[nodiscard]] size_t increment() const {
+            return field_.is_scalar() || !field_.is_old_style_shapes() ? 1 : 2;
         }
 
         [[nodiscard]] auto begin() {
-            return EncodedBlockCollectionIterator<EncodedBlock>(blocks(), first());
+            return EncodedBlockCollectionIterator<EncodedBlock>(blocks(), first(), increment());
         }
 
         [[nodiscard]] auto end() {
-            return EncodedBlockCollectionIterator<EncodedBlock>(blocks(), last());
+            return EncodedBlockCollectionIterator<EncodedBlock>(blocks(), last(), increment());
         }
 
         [[nodiscard]] auto begin() const {
-            return EncodedBlockCollectionIterator<const EncodedBlock>(blocks(), first());
+            return EncodedBlockCollectionIterator<const EncodedBlock>(blocks(), first(), increment());
         }
 
         [[nodiscard]] auto end() const {
-            return EncodedBlockCollectionIterator<const EncodedBlock>(blocks(), last());
+            return EncodedBlockCollectionIterator<const EncodedBlock>(blocks(), last(), increment());
         }
 
-        size_t first() const {
-            return is_shapes_ ? 0u : field_.shapes_count_;
+        [[nodiscard]] size_t shape_value_offset() const {
+            return is_shapes_  || field_.is_scalar() ? 0U : 1U;
         }
 
-        size_t last() const {
-            return is_shapes_ ? field_.shapes_count_ : field_.shapes_count_ + field_.values_count_;
+        [[nodiscard]] size_t first() const {
+            return shape_value_offset();
+        }
+
+        [[nodiscard]] size_t last() const {
+            if(field_.is_scalar())
+                return is_shapes_ ? 0 : field_.values_count_;
+
+            if(field_.is_old_style_shapes())
+                return field_.values_count_ + field_.shapes_count_ + shape_value_offset();
+            else
+                return is_shapes_ ? field_.shapes_count_ : field_.shapes_count_ + field_.values_count_;
         }
 
         [[nodiscard]] const EncodedBlock& operator[](const size_t idx) const {
-            // Shape blocks are located before values blocks in the field. In case this is a collection of value blocks
-            // we have to skip all shape blocks. In case this is a collection of shapes we can start from 0 index.
-            const size_t shape_offset = !is_shapes_ * field_.shapes_count_;
+            const size_t shape_offset = is_shapes_ ? 0 : field_.shapes_count_;
             return field_.blocks()[shape_offset + idx];
         }
-        const EncodedField &field_;
+
+        const EncodedFieldImpl& field_;
         bool is_shapes_;
     };
 
-    EncodedFieldType encoding_case() const {
+    [[nodiscard]] bool is_scalar() const {
+        return shapes_count_ == 0;
+    }
+
+    [[nodiscard]] bool is_old_style_shapes() const {
+        return shapes_size() == values_size();
+    }
+
+    [[nodiscard]] EncodedFieldType encoding_case() const {
         return type_;
     }
 
-    const EncodedBlock& shapes(size_t n) const {
-        util::check(n == 0, "Expected only one shape");
+    [[nodiscard]] const EncodedBlock& shapes(size_t n) const {
         util::check(shapes_count_ != 0, "No shape allocated");
-        return blocks_[0];
+        if(!is_old_style_shapes())
+            return *reinterpret_cast<const EncodedBlock*>(&blocks_[0]);
+        else
+            return *reinterpret_cast<const EncodedBlock*>(&blocks_[n * 2]);
     }
 
-    const EncodedBlock &values(size_t n) const {
+    [[nodiscard]] const EncodedBlock &values(size_t n) const {
         util::check(n < values_count_ + shapes_count_, "Cannot return block {} from {} blocks ({} shapes)", n, values_count_, shapes_count_);
-        return blocks()[shapes_count_ + n];
+        if(is_scalar() || !is_old_style_shapes())
+            return blocks()[shapes_count_ + n];
+        else
+            return blocks()[(n * 2) + 1];
     }
 
-    EncodedBlockCollection shapes() const {
+    [[nodiscard]] EncodedBlockCollection shapes() const {
         return {*this, true};
     }
 
-    EncodedBlockCollection values() const {
+    [[nodiscard]] EncodedBlockCollection values() const {
         return {*this, false};
     }
 
+    void validate() const {
+        size_t shapes_count = 0;
+        for(const auto& shape : shapes()) {
+            util::check(shape.is_shape_, "Expected shape to have is_shape_set");
+            util::check(shape.codecs_[0].codec_ != Codec::UNKNOWN, "Unknown shape codec");
+            ++shapes_count;
+        }
+        util::check(shapes_count == static_cast<size_t>(shapes_size()), "Shape size mismatch: {} != {}", shapes_count, shapes_size());
+
+        size_t values_count = 0;
+        for(const auto& value : values()) {
+            util::check(!value.is_shape_, "Value has is_shape set");
+            util::check(value.codec().codec_type() != Codec::UNKNOWN, "Unknown codec in block {}", values_count);
+            ++values_count;
+        }
+        util::check(values_count == static_cast<size_t>(values_size()), "Shape size mismatch: {} != {}", values_count, values_size());
+    }
+
     EncodedBlock *add_shapes() {
-        util::check(shapes_count_ == 0, "Expected single shapes block");
-        auto block = new(blocks() + items_count()) EncodedBlock{true};
+        auto block = new(blocks() + (shapes_count_ * 2)) EncodedBlock{true};
         ++shapes_count_;
         return block;
     }
 
-    int shapes_size() const {
+    [[nodiscard]] int shapes_size() const {
         return shapes_count_;
     }
 
-    int values_size() const {
+    [[nodiscard]] int values_size() const {
         return values_count_;
     }
 
@@ -390,49 +364,94 @@ struct EncodedField {
         sparse_map_bytes_ = bytes;
     }
 
-    EncodedBlock *add_values() {
-        auto block = new(static_cast<void *>(blocks() + shapes_count_ + values_count_)) EncodedBlock{false};
+    EncodedBlock *add_values(EncodingVersion encoding_version) {
+        const bool old_style = encoding_version == EncodingVersion::V1;
+        size_t pos;
+        if(!old_style || is_scalar())
+            pos = shapes_count_ + values_count_;
+        else
+            pos = (values_count_ * 2) + 1;
+
+        auto block = new(static_cast<void *>(blocks() + pos)) EncodedBlock{false};
         ++values_count_;
         return block;
     }
 
-    EncodedField *mutable_ndarray() {
-        type_ = EncodedFieldType::kNdarray;
+    EncodedFieldImpl *mutable_ndarray() {
+        type_ = EncodedFieldType::NDARRAY;
         return this;
     }
 
-    const EncodedField &ndarray() const {
+    [[nodiscard]] const EncodedFieldImpl &ndarray() const {
         return *this;
     }
 
-    bool has_ndarray() const {
-        return type_ == EncodedFieldType::kNdarray;
+    [[nodiscard]] bool has_ndarray() const {
+        return type_ == EncodedFieldType::NDARRAY;
     }
 
-    std::string DebugString() const {
-        return "";
+    [[nodiscard]] std::string DebugString() const {
+        return fmt::format("{}: {} shapes {} values", has_ndarray() ? "NDARRAY" : "DICT", shapes_size(), values_size());
     }
 
     [[nodiscard]] size_t items_count() const {
         return items_count_;
     }
 
-    size_t sparse_map_bytes() const {
+    [[nodiscard]] size_t sparse_map_bytes() const {
         return sparse_map_bytes_;
     }
 
-    void set_items_count(size_t count) {
+    void set_items_count(uint32_t count) {
         items_count_ = count;
     }
 };
 
-static_assert(sizeof(EncodedField) - sizeof(EncodedBlock) == EncodedField::Size);
+static_assert(EncodedFieldImpl::Size == sizeof(EncodedFieldImpl) - sizeof(EncodedBlock));
+
+inline size_t calc_field_bytes(size_t num_blocks) {
+    return EncodedFieldImpl::Size + (sizeof(EncodedBlock) * num_blocks);
+}
 
 inline size_t encoded_field_bytes(const EncodedField &encoded_field) {
-    return sizeof(EncodedField)
-        + (sizeof(EncodedBlock) * ((encoded_field.shapes_count_ + encoded_field.values_count_) - 1));
+    return calc_field_bytes(encoded_field.shapes_count_ + encoded_field.values_count_);
 }
 
-#pragma pack(pop)
+} //namespace arcticdb
+
+namespace fmt {
+template<>
+struct formatter<arcticdb::BlockCodecImpl> {
+    template<typename ParseContext>
+    constexpr auto parse(ParseContext &ctx) { return ctx.begin(); }
+
+    template<typename FormatContext>
+    auto format(arcticdb::BlockCodecImpl codec, FormatContext &ctx) const {
+        return format_to(ctx.out(), "{}", arcticdb::codec_type_to_string(codec.codec_type()));
+    }
+};
+
+template<>
+struct formatter<arcticdb::EncodedFieldImpl> {
+    template<typename ParseContext>
+    constexpr auto parse(ParseContext &ctx) { return ctx.begin(); }
+
+    template<typename FormatContext>
+    auto format(const arcticdb::EncodedFieldImpl& field, FormatContext &ctx) const {
+        const char* label = field.has_ndarray() ? "NDARRAY\n" : "DICT\n";
+        fmt::format_to(ctx.out(), "{}", label);
+
+        fmt::format_to(ctx.out(), "Shapes: {}\n", field.shapes_size());
+        for(const auto& shape : field.shapes()) {
+            fmt::format_to(ctx.out(), "\tCodec: {} in_bytes: {}, out_bytes {}\n", arcticdb::codec_type_to_string(shape.codecs_[0].codec_), shape.in_bytes(), shape.out_bytes());
+        }
+
+        fmt::format_to(ctx.out(), "Values: {}\n", field.values_size());
+        for(const auto& value : field.values()) {
+            fmt::format_to(ctx.out(), "\tCodec: {} in_bytes: {}, out_bytes {}\n", arcticdb::codec_type_to_string(value.codecs_[0].codec_), value.in_bytes(), value.out_bytes());
+        }
+        return fmt::format_to(ctx.out(), "\n");
+    }
+};
 
-} //namespace arcticc
+} // namespace fmt
diff --git a/cpp/arcticdb/codec/encoded_field_collection.hpp b/cpp/arcticdb/codec/encoded_field_collection.hpp
index e3028fa677..1fe0b434c9 100644
--- a/cpp/arcticdb/codec/encoded_field_collection.hpp
+++ b/cpp/arcticdb/codec/encoded_field_collection.hpp
@@ -7,55 +7,171 @@
 
 #pragma once
 
+#include <arcticdb/entity/types.hpp>
 #include <arcticdb/codec/encoded_field.hpp>
+#include <arcticdb/column_store/chunked_buffer.hpp>
 #include <arcticdb/util/buffer.hpp>
 
 namespace arcticdb {
 
+using namespace arcticdb::entity;
+
+constexpr TypeDescriptor encoded_fields_type_desc() {
+    using namespace arcticdb::entity;
+    return TypeDescriptor{
+        DataType::UINT8, Dimension::Dim0
+    };
+}
+
 class EncodedFieldCollection {
-    Buffer buffer_;
-    std::vector<size_t> offsets_;
+    ChunkedBuffer data_;
+    Buffer offsets_;
+    size_t count_ = 0U;
+    size_t offset_ = 0U;
 
 public:
-    explicit EncodedFieldCollection(Buffer &&buffer) :
-        buffer_(std::move(buffer)) {
-        regenerate_offsets();
+    struct EncodedFieldCollectionIterator {
+        size_t pos_ = 0UL;
+        ChunkedBuffer* buffer_ = nullptr;
+
+        explicit EncodedFieldCollectionIterator(ChunkedBuffer* buffer) :
+            buffer_(buffer) {
+        }
+
+        [[nodiscard]] EncodedFieldImpl& current() const {
+            return *reinterpret_cast<EncodedFieldImpl*>(buffer_->ptr_cast<uint8_t>(pos_, EncodedFieldImpl::Size));
+        }
+
+        EncodedFieldImpl& operator*() {
+            return current();
+        }
+
+        void operator++() {
+            pos_ += encoded_field_bytes(current());
+        }
+
+        EncodedFieldImpl* operator->() {
+            return &(current());
+        }
+    };
+
+    EncodedFieldCollection(ChunkedBuffer&& data, Buffer&& offsets) :
+        data_(std::move(data)),
+        offsets_(std::move(offsets)) {
+    }
+
+    void reserve(size_t bytes, size_t num_fields) {
+        data_.reserve(bytes);
+        offsets_.reserve(num_fields * sizeof(uint64_t));
     }
 
     EncodedFieldCollection() = default;
 
+    [[nodiscard]] EncodedFieldCollection clone() const {
+        return {data_.clone(), offsets_.clone()};
+    }
+
     ARCTICDB_MOVE_ONLY_DEFAULT(EncodedFieldCollection)
 
+    [[nodiscard]] EncodedFieldCollectionIterator begin() const {
+        return EncodedFieldCollectionIterator{const_cast<ChunkedBuffer*>(&data_)};
+    }
+
+    [[nodiscard]] size_t num_blocks() const {
+        return data_.num_blocks();
+    }
+
     [[nodiscard]] bool empty() const {
-        return buffer_.empty();
+        return data_.empty();
+    }
+
+    [[nodiscard]] size_t data_bytes() const {
+        return data_.bytes();
+    }
+
+    [[nodiscard]] const uint8_t* data_buffer() const {
+        return data_.data();
+    }
+
+    [[nodiscard]] size_t offset_bytes() const {
+        return offsets_.bytes();
+    }
+
+    [[nodiscard]] const uint8_t* offsets_buffer() const {
+        return offsets_.data();
+    }
+
+    [[nodiscard]] uint64_t get_offset(size_t pos) const {
+        const auto offset = *offsets_.ptr_cast<uint64_t>(pos * sizeof(uint64_t), sizeof(uint64_t));
+        return offset;
+    }
+
+    void write_offset(size_t pos, uint64_t value) {
+        *offsets_.ptr_cast<uint64_t>(pos * sizeof(uint64_t), sizeof(uint64_t)) = value;
     }
 
-    [[nodiscard]] size_t get_offset(size_t pos) const {
-        util::check(pos < offsets_.size(), "Offset {} exceeds offsets size {}", pos, offsets_.size());
-        return offsets_[pos];
+    [[nodiscard]] const EncodedFieldImpl& to_field(size_t bytes_pos) const {
+        return *reinterpret_cast<const EncodedFieldImpl*>(data_.ptr_cast<const uint8_t>(bytes_pos, EncodedFieldImpl::Size));
     }
 
-    [[nodiscard]] const EncodedField &at(size_t pos) const {
-        return *(buffer_.ptr_cast<EncodedField>(get_offset(pos), EncodedField::MinimumSize));
+    [[nodiscard]] EncodedFieldImpl& to_field(size_t bytes_pos) {
+        return *reinterpret_cast<EncodedFieldImpl*>(data_.ptr_cast<uint8_t>(bytes_pos, EncodedFieldImpl::Size));
     }
 
-    [[nodiscard]] EncodedField &at(size_t pos) {
-        return *(buffer_.ptr_cast<EncodedField>(get_offset(pos), EncodedField::MinimumSize));
+    [[nodiscard]] const EncodedFieldImpl& at(size_t pos) const {
+        return to_field(get_offset(pos));
+    }
+
+    [[nodiscard]] EncodedFieldImpl &at(size_t pos) {
+        return to_field(get_offset(pos));
+    }
+
+    void write_data_to(uint8_t*& dst) const {
+        for(auto block : data_.blocks()) {
+            memcpy(dst, block->data(), block->bytes());
+            dst += block->bytes();
+        }
     }
 
     [[nodiscard]] size_t size() const {
-        return offsets_.size();
+        return offsets_.bytes() / sizeof(uint64_t);
     }
 
     void regenerate_offsets() {
-        if (!offsets_.empty())
+        if(!offsets_.empty())
             return;
 
-        auto field_pos = 0u;
-        while (field_pos < buffer_.bytes()) {
-            offsets_.push_back(field_pos);
-            field_pos += encoded_field_bytes(*reinterpret_cast<const EncodedField *>(buffer_.data() + field_pos));
+        auto pos = 0UL;
+        count_ = 0UL;
+        while(pos < data_.bytes()) {
+            const auto& field = to_field(pos);
+            offsets_.ensure((count_ + 1) * sizeof(uint64_t));
+            write_offset(count_, pos);
+            ++count_;
+            pos += encoded_field_bytes(field);
         }
+        util::check(pos == data_.bytes(), "Size mismatch in regenerate_offsets, {} != {}", pos, data_.bytes());
+    }
+
+    [[nodiscard]] EncodedFieldImpl* add_field(size_t num_blocks) {
+        offsets_.ensure((count_ + 1) * sizeof(uint64_t));
+        write_offset(count_, offset_);
+        const auto required_bytes = calc_field_bytes(num_blocks);
+        util::check(required_bytes >= EncodedFieldImpl::Size, "Unexpectedly small allocation size: {}", required_bytes);
+        data_.ensure(offset_ + required_bytes);
+        auto* field = new (data_.ptr_cast<uint8_t>(offset_, required_bytes)) EncodedFieldImpl;
+        ARCTICDB_DEBUG(log::codec(), "Adding encoded field with {} blocks at position {}, {} bytes required", num_blocks, offset_, required_bytes);
+        ++count_;
+        offset_ += required_bytes;
+        return field;
+    }
+
+    Buffer&& release_offsets() {
+        return std::move(offsets_);
+    }
+
+    ChunkedBuffer&& release_data() {
+        return std::move(data_);
     }
 };
 
diff --git a/cpp/arcticdb/codec/encoding_sizes.cpp b/cpp/arcticdb/codec/encoding_sizes.cpp
deleted file mode 100644
index 393b3ba8f1..0000000000
--- a/cpp/arcticdb/codec/encoding_sizes.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Copyright 2023 Man Group Operations Limited
- *
- * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
- *
- * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
- */
-
-#include <arcticdb/codec/encoding_sizes.hpp>
-#include <arcticdb/entity/types.hpp>
-#include <arcticdb/stream/protobuf_mappings.hpp>
-
-namespace arcticdb::encoding_sizes {
-
-std::size_t represented_size(const arcticdb::proto::encoding::SegmentHeader& sh, size_t total_rows) {
-    std::size_t total = 0;
-
-    for(const auto& field : sh.stream_descriptor().fields()) {
-        total += total_rows * get_type_size(entity::data_type_from_proto(field.type_desc()));
-    }
-
-    return total;
-}
-
-}
\ No newline at end of file
diff --git a/cpp/arcticdb/codec/encoding_sizes.hpp b/cpp/arcticdb/codec/encoding_sizes.hpp
index ca29b6bf05..3eeb0dfb0d 100644
--- a/cpp/arcticdb/codec/encoding_sizes.hpp
+++ b/cpp/arcticdb/codec/encoding_sizes.hpp
@@ -11,54 +11,66 @@
 #include <arcticdb/util/pb_util.hpp>
 #include <arcticdb/log/log.hpp>
 #include <arcticdb/util/preconditions.hpp>
+#include <arcticdb/codec/encoded_field.hpp>
 #include <numeric>
 
 namespace arcticdb::encoding_sizes {
 
 template <typename NDArrayEncodedFieldType> std::size_t shape_compressed_size(const NDArrayEncodedFieldType &nda) {
-        return std::accumulate(std::begin(nda.shapes()), std::end(nda.shapes()), size_t(0),
-                               [] (size_t a, const auto& block) { return a + block.out_bytes(); });
-    }
+    return std::accumulate(
+            std::begin(nda.shapes()),
+            std::end(nda.shapes()),
+            size_t(0),
+            [] (size_t a, const auto& block) { return a + block.out_bytes(); });
+}
 
-    template <typename NDArrayEncodedFieldType> std::size_t data_compressed_size(const NDArrayEncodedFieldType &nda) {
-        return std::accumulate(std::begin(nda.values()), std::end(nda.values()), size_t(0),
-                               [] (size_t a, const auto& block) { return a + block.out_bytes(); });
-    }
+template <typename NDArrayEncodedFieldType> std::size_t data_compressed_size(const NDArrayEncodedFieldType &nda) {
+    return std::accumulate(
+           std::begin(nda.values()),
+           std::end(nda.values()),
+           size_t(0),
+           [] (size_t a, const auto& block) { return a + block.out_bytes(); });
+}
 
-    template <typename NDArrayEncodedFieldType> std::size_t shape_uncompressed_size(const NDArrayEncodedFieldType &nda) {
-        return std::accumulate(std::begin(nda.shapes()), std::end(nda.shapes()), size_t(0),
-                               [] (size_t a, const auto& block) { return a + block.in_bytes(); });
-    }
+template <typename NDArrayEncodedFieldType> std::size_t shape_uncompressed_size(const NDArrayEncodedFieldType &nda) {
+    return std::accumulate(
+           std::begin(nda.shapes()),
+           std::end(nda.shapes()),
+           size_t(0),
+           [] (size_t a, const auto& block) { return a + block.in_bytes(); });
+}
 
-    template <typename NDArrayEncodedFieldType>
-    std::size_t data_uncompressed_size(const NDArrayEncodedFieldType &nda) {
-        return  std::accumulate(std::begin(nda.values()), std::end(nda.values()), size_t(0),
-                               [] (size_t a, const auto& block) { return a + block.in_bytes(); });
-    }
+template <typename NDArrayEncodedFieldType>
+std::size_t data_uncompressed_size(const NDArrayEncodedFieldType &nda) {
+    return std::accumulate(
+           std::begin(nda.values()),
+           std::end(nda.values()),
+           size_t(0),
+           [] (size_t a, const auto& block) { return a + block.in_bytes(); });
+}
 
-    template <typename NDArrayEncodedFieldType>
-    std::size_t bitmap_serialized_size(const NDArrayEncodedFieldType &nda) {
-        return  nda.sparse_map_bytes();
-    }
+template <typename NDArrayEncodedFieldType>
+std::size_t bitmap_serialized_size(const NDArrayEncodedFieldType &nda) {
+    return nda.sparse_map_bytes();
+}
 
-    template <typename NDArrayEncodedFieldType>
-    std::size_t ndarray_field_compressed_size(const NDArrayEncodedFieldType &nda) {
-        return shape_compressed_size(nda) + data_compressed_size(nda) + bitmap_serialized_size(nda);
-    }
+template <typename NDArrayEncodedFieldType>
+std::size_t ndarray_field_compressed_size(const NDArrayEncodedFieldType &nda) {
+    return shape_compressed_size(nda) + data_compressed_size(nda) + bitmap_serialized_size(nda);
+}
 
-    template <typename NDArrayEncodedFieldType>
-    std::size_t uncompressed_size(const NDArrayEncodedFieldType &nda) {
-        return shape_uncompressed_size(nda) + data_uncompressed_size(nda) + bitmap_serialized_size(nda);
-    }
+template <typename NDArrayEncodedFieldType>
+std::size_t uncompressed_size(const NDArrayEncodedFieldType &nda) {
+    return shape_uncompressed_size(nda) + data_uncompressed_size(nda) + bitmap_serialized_size(nda);
+}
 
-    template <typename EncodedFieldType>
-    std::size_t field_compressed_size(const EncodedFieldType &field) {
-    switch (field.encoding_case()) {
-        case EncodedFieldType::kNdarray:
-            return ndarray_field_compressed_size(field.ndarray());
-        default:
-            util::raise_error_msg("Unsupported encoding {}", field);
-    }
+inline std::size_t field_compressed_size(const EncodedFieldImpl &field) {
+switch (field.encoding_case()) {
+    case EncodedFieldType::NDARRAY:
+        return ndarray_field_compressed_size(field.ndarray());
+    default:
+        util::raise_rte("Unsupported encoding {}", field.DebugString());
+}
 }
 
 template <typename FieldCollectionType>
@@ -82,28 +94,4 @@ std::size_t segment_compressed_size(const FieldCollectionType &fields) {
     return total;
 }
 
-template <typename FieldCollectionType>
-std::size_t segment_uncompressed_size(const FieldCollectionType &fields) {
-    std::size_t total = 0;
-    for (auto &field : fields) {
-        switch (field.encoding_case()) {
-        case arcticdb::proto::encoding::EncodedField::kNdarray: {
-            auto uncompressed_sz = uncompressed_size(field.ndarray());
-            ARCTICDB_TRACE(log::storage(), "From segment header: uncompressed: {}", uncompressed_sz);
-            total += uncompressed_sz;
-            break;
-        }
-          //  case arcticdb::proto::encoding::EncodedField::kDictionary:
-          //    total += uncompressed_size(field.dictionary());
-          //   break;
-            default:
-                util::raise_rte("Unsupported encoding in {}",  util::format(field));
-        }
-    }
-    return total;
-}
-
-
-std::size_t represented_size(const arcticdb::proto::encoding::SegmentHeader& sh, size_t total_rows);
-
 } // namespace encoding_sizes
diff --git a/cpp/arcticdb/codec/lz4.hpp b/cpp/arcticdb/codec/lz4.hpp
index 61c261c243..bca2ab57f1 100644
--- a/cpp/arcticdb/codec/lz4.hpp
+++ b/cpp/arcticdb/codec/lz4.hpp
@@ -34,7 +34,7 @@ struct Lz4BlockEncoder {
     static std::size_t encode_block(
             const Opts& opts,
             const T *in,
-            BlockProtobufHelper &block_utils,
+            BlockDataHelper &block_utils,
             HashAccum &hasher,
             T *out,
             std::size_t out_capacity,
@@ -53,28 +53,21 @@ struct Lz4BlockEncoder {
         ARCTICDB_TRACE(log::storage(), "Block of size {} compressed to {} bytes", block_utils.bytes_, compressed_bytes);
         hasher(in, block_utils.count_);
         pos += ssize_t(compressed_bytes);
-        out_codec.mutable_lz4()->MergeFrom(opts);
+        copy_codec(*out_codec.mutable_lz4(), opts);
         return std::size_t(compressed_bytes);
     }
 };
 
 struct Lz4Decoder {
-/*
- * encoder_version is here to support multiple versions but won't be used before we have them
- */
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-
     template<typename T>
     static void decode_block(
-        [[maybe_unused]] std::uint32_t encoder_version,
-        const std::uint8_t* in,
-        std::size_t in_bytes,
-        T* t_out,
-        std::size_t out_bytes
-    ) {
-
+            [[maybe_unused]] std::uint32_t encoder_version, //support multiple versions but won't be used before we have them
+            const std::uint8_t* in,
+            std::size_t in_bytes,
+            T* t_out,
+            std::size_t out_bytes) {
         ARCTICDB_TRACE(log::codec(), "Lz4 decoder reading block: {} {}", in_bytes, out_bytes);
+
         // Decompressed size < 0 means an error occurred in LZ4 during the decompression. In case it's  negative
         // the specific value is somewhat random and does not mean anything. Decompressed size of 0 is allowed and means
         // 0 bytes were passed for compression. In that case t_out is allowed to be null since it's not used at all.
@@ -89,13 +82,12 @@ struct Lz4Decoder {
             uintptr_t(in),
             in_bytes,
             decompressed_size);
+
         util::check_arg(std::size_t(decompressed_size) == out_bytes,
             "expected out_bytes == lz4 decompressed bytes, actual {} != {}",
             out_bytes,
             decompressed_size);
     }
-
-#pragma GCC diagnostic pop
 };
 
 } // namespace arcticdb::detail
diff --git a/cpp/arcticdb/codec/magic_words.hpp b/cpp/arcticdb/codec/magic_words.hpp
index c654c7e92c..a7b8f9ed81 100644
--- a/cpp/arcticdb/codec/magic_words.hpp
+++ b/cpp/arcticdb/codec/magic_words.hpp
@@ -7,11 +7,15 @@
 
 #pragma once
 
+#include <arcticdb/util/magic_num.hpp>
+
 namespace arcticdb {
-    using DescriptorMagic = util::MagicNum<'D','e','s','c'>;
+    using DescriptorFieldsMagic = util::MagicNum<'D','e','s','c'>;
     using EncodedMagic = util::MagicNum<'E','n','c','d'>;
     using StringPoolMagic = util::MagicNum<'S','t','r','p'>;
     using MetadataMagic = util::MagicNum<'M','e','t','a'>;
     using IndexMagic = util::MagicNum<'I','n','d','x'>;
     using ColumnMagic = util::MagicNum<'C','l','m','n'>;
+    using FrameMetadataMagic = util::MagicNum<'F','r', 'a', 'm'>;
+    using SegmentDescriptorMagic = util::MagicNum<'S','D', 's', 'c'>;
 }
diff --git a/cpp/arcticdb/codec/passthrough.hpp b/cpp/arcticdb/codec/passthrough.hpp
index 0660a05651..7629e3a11d 100644
--- a/cpp/arcticdb/codec/passthrough.hpp
+++ b/cpp/arcticdb/codec/passthrough.hpp
@@ -19,7 +19,6 @@ namespace arcticdb::detail {
 
 template<template<typename> class BlockType, class TD>
 struct PassthroughEncoderV1 {
-
     using Opts = arcticdb::proto::encoding::VariantCodec::Passthrough;
 
     static size_t max_compressed_size(const BlockType<TD> &block ) {
@@ -35,7 +34,12 @@ struct PassthroughEncoderV1 {
     }
 
     template <typename EncodedFieldType>
-    static void encode(const Opts&, const BlockType<TD>& block, EncodedFieldType& field, Buffer& out, std::ptrdiff_t& pos) {
+    static void encode(
+            const Opts&,
+            const BlockType<TD>& block,
+            EncodedFieldType& field,
+            Buffer& out,
+            std::ptrdiff_t& pos) {
         using namespace arcticdb::entity;
         using Helper = CodecHelper<TD>;
         using T = typename Helper::T;
@@ -46,19 +50,20 @@ struct PassthroughEncoderV1 {
 
         if constexpr (Helper::dim == entity::Dimension::Dim0) {
             // Only store data, no shapes since dimension is 0
-            auto v_block = Helper::scalar_block(block_row_count);
-            helper.ensure_buffer(out, pos, v_block.bytes_);
+            auto scalar_block = Helper::scalar_block(block_row_count);
+            helper.ensure_buffer(out, pos, scalar_block.bytes_);
 
             // doing copy + hash in one pass, this might have a negative effect on perf
             // since the hashing is path dependent. This is a toy example though so not critical
-            T *t_out = out.ptr_cast<T>(pos, v_block.bytes_);
-            encode_block(d, v_block, helper.hasher_, t_out, pos);
+            T *t_out = out.ptr_cast<T>(pos, scalar_block.bytes_);
+            encode_block(d, scalar_block, helper.hasher_, t_out, pos);
 
             auto *nd_array = field.mutable_ndarray();
             auto total_row_count = nd_array->items_count() + block_row_count;
             nd_array->set_items_count(total_row_count);
-            auto values_pb = nd_array->add_values();
-            v_block.set_block_data(*values_pb, helper.hasher_.digest(), v_block.bytes_);
+            auto values = nd_array->add_values(EncodingVersion::V1);
+            (void)values->mutable_codec()->mutable_passthrough();
+            scalar_block.set_block_data(*values, helper.hasher_.digest(), scalar_block.bytes_);
         } else {
             auto helper_array_block = Helper::nd_array_block(block_row_count, block.shapes());
             helper.ensure_buffer(out, pos, helper_array_block.shapes_.bytes_ + helper_array_block.values_.bytes_);
@@ -73,12 +78,16 @@ struct PassthroughEncoderV1 {
             encode_block(d, helper_array_block.values_, helper.hasher_, t_out, pos);
             auto field_nd_array = field.mutable_ndarray();
             // Important: In case V2 EncodedField is used shapes must be added before values.
-            auto shapes_pb = field_nd_array->add_shapes();
-            auto values_pb = field_nd_array->add_values();
+            auto shapes = field_nd_array->add_shapes();
+            (void)shapes->mutable_codec()->mutable_passthrough();
+
+            auto values = field_nd_array->add_values(EncodingVersion::V1);
+            (void)values->mutable_codec()->mutable_passthrough();
+
             helper_array_block.update_field_size(*field_nd_array);
             helper_array_block.set_block_data(
-				shapes_pb,
-				values_pb,
+                shapes,
+				values,
 				shape_hash,
 				helper_array_block.shapes_.bytes_,
 				helper.hasher_.digest(),
@@ -87,7 +96,7 @@ struct PassthroughEncoderV1 {
     }
 private:
     template<class T>
-    static void encode_block(const T *in, BlockProtobufHelper &block_utils, HashAccum &hasher, T *out, std::ptrdiff_t &pos) {
+    static void encode_block(const T *in, BlockDataHelper &block_utils, HashAccum &hasher, T *out, std::ptrdiff_t &pos) {
         memcpy(out, in, block_utils.bytes_);
         hasher(in, block_utils.bytes_ / sizeof(T));
         pos += static_cast<ssize_t>(block_utils.bytes_);
@@ -100,7 +109,6 @@ struct PassthroughEncoderV1 {
 /// @see arcticdb::ColumnEncoder2 arcticdb::detail::GenericBlockEncoder2
 template<template<typename> class BlockType, class TD>
 struct PassthroughEncoderV2 {
-
     using Opts = arcticdb::proto::encoding::VariantCodec::Passthrough;
 
     static size_t max_compressed_size(const BlockType<TD> &block) {
@@ -109,12 +117,11 @@ struct PassthroughEncoderV2 {
 
     template <typename EncodedBlockType>
     static void encode(
-        const Opts&,
-        const BlockType<TD> &block,
-        Buffer &out,
-        std::ptrdiff_t &pos,
-        EncodedBlockType* encoded_block
-    ) {
+            const Opts&,
+            const BlockType<TD> &block,
+            Buffer &out,
+            std::ptrdiff_t &pos,
+            EncodedBlockType* encoded_block) {
         using namespace arcticdb::entity;
         using Helper = CodecHelper<TD>;
         using T = typename Helper::T;
@@ -131,16 +138,16 @@ struct PassthroughEncoderV2 {
         encoded_block->set_in_bytes(data_byte_size);
         encoded_block->set_out_bytes(data_byte_size);
         encoded_block->set_hash(helper.hasher_.digest());
+        (void)encoded_block->mutable_codec()->mutable_passthrough();
     }
 private:
     template<class T>
     static void encode_block(
-        const T* in,
-        size_t in_byte_size,
-        HashAccum& hasher,
-        T* out,
-        std::ptrdiff_t& pos
-    ) {
+            const T* in,
+            size_t in_byte_size,
+            HashAccum& hasher,
+            T* out,
+            std::ptrdiff_t& pos) {
         memcpy(out, in, in_byte_size);
         hasher(in, in_byte_size / sizeof(T));
         pos += static_cast<ssize_t>(in_byte_size);
@@ -149,10 +156,12 @@ struct PassthroughEncoderV2 {
 
 struct PassthroughDecoder {
     template<typename T>
-    static void decode_block(const std::uint8_t *in, std::size_t in_bytes, T *t_out,
-                             std::size_t out_bytes) {
-        arcticdb::util::check_arg(in_bytes == out_bytes, "expected  in_bytes==out_bytes, actual {} != {}", in_bytes,
-                                 out_bytes);
+    static void decode_block(
+            const std::uint8_t *in,
+            std::size_t in_bytes,
+            T *t_out,
+            std::size_t out_bytes) {
+        arcticdb::util::check_arg(in_bytes == out_bytes, "expected  in_bytes==out_bytes, actual {} != {}", in_bytes,out_bytes);
         memcpy(t_out, in, in_bytes);
     }
 };
diff --git a/cpp/arcticdb/codec/protobuf_mappings.cpp b/cpp/arcticdb/codec/protobuf_mappings.cpp
new file mode 100644
index 0000000000..4b21907f11
--- /dev/null
+++ b/cpp/arcticdb/codec/protobuf_mappings.cpp
@@ -0,0 +1,158 @@
+/* Copyright 2023 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
+ */
+#include <arcticdb/entity/protobufs.hpp>
+#include <arcticdb/codec/encoded_field.hpp>
+#include <arcticdb/util/preconditions.hpp>
+#include "arcticdb/storage/memory_layout.hpp"
+#include <arcticdb/codec/segment_header.hpp>
+#include <arcticdb/codec/protobuf_mappings.hpp>
+#include <folly/container/Enumerate.h>
+
+namespace arcticdb {
+
+void block_from_proto(const arcticdb::proto::encoding::Block& input, EncodedBlock& output, bool is_shape) {
+    output.set_in_bytes(input.in_bytes());
+    output.set_out_bytes(input.out_bytes());
+    output.set_hash(input.hash());
+    output.set_encoder_version(static_cast<uint16_t>(input.encoder_version()));
+    output.is_shape_ = is_shape;
+    switch (input.codec().codec_case()) {
+    case arcticdb::proto::encoding::VariantCodec::kZstd: {
+        set_codec(input.codec().zstd(), *output.mutable_codec()->mutable_zstd());
+        break;
+    }
+    case arcticdb::proto::encoding::VariantCodec::kLz4: {
+        set_codec(input.codec().lz4(), *output.mutable_codec()->mutable_lz4());
+        break;
+    }
+    case arcticdb::proto::encoding::VariantCodec::kPassthrough : {
+        set_codec(input.codec().passthrough(), *output.mutable_codec()->mutable_passthrough());
+        break;
+    }
+    default:
+        util::raise_rte("Unrecognized_codec");
+    }
+}
+
+void proto_from_block(const EncodedBlock& input, arcticdb::proto::encoding::Block& output) {
+    output.set_in_bytes(input.in_bytes());
+    output.set_out_bytes(input.out_bytes());
+    output.set_hash(input.hash());
+    output.set_encoder_version(input.encoder_version());
+
+    switch (input.codec().codec_) {
+    case Codec::ZSTD: {
+        set_zstd(input.codec().zstd(), *output.mutable_codec()->mutable_zstd());
+        break;
+    }
+    case Codec::LZ4: {
+        set_lz4(input.codec().lz4(), *output.mutable_codec()->mutable_lz4());
+        break;
+    }
+    case Codec::PASS: {
+        set_passthrough(input.codec().passthrough(), *output.mutable_codec()->mutable_passthrough());
+        break;
+    }
+    default:
+        util::raise_rte("Unrecognized_codec");
+    }
+}
+
+void encoded_field_from_proto(const arcticdb::proto::encoding::EncodedField& input, EncodedFieldImpl& output) {
+    util::check(input.has_ndarray(), "Only ndarray fields supported for v1 encoding");
+    const auto& input_ndarray = input.ndarray();
+    auto* output_ndarray = output.mutable_ndarray();
+    output_ndarray->set_items_count(input_ndarray.items_count());
+    output_ndarray->set_sparse_map_bytes(input_ndarray.sparse_map_bytes());
+
+    for(auto i = 0; i < input_ndarray.shapes_size(); ++i) {
+        auto* shape_block = output_ndarray->add_shapes();
+        block_from_proto(input_ndarray.shapes(i), *shape_block, true);
+    }
+
+    for(auto i = 0; i < input_ndarray.values_size(); ++i) {
+        auto* value_block = output_ndarray->add_values(EncodingVersion::V1);
+        block_from_proto(input_ndarray.values(i), *value_block, false);
+    }
+}
+
+void copy_encoded_field_to_proto(const EncodedFieldImpl& input, arcticdb::proto::encoding::EncodedField& output) {
+    util::check(input.has_ndarray(), "Only ndarray fields supported for v1 encoding");
+    ARCTICDB_TRACE(log::codec(), "Copying field to proto: {}", input);
+    const auto& input_ndarray = input.ndarray();
+    auto* output_ndarray = output.mutable_ndarray();
+    output_ndarray->set_items_count(input_ndarray.items_count());
+    output_ndarray->set_sparse_map_bytes(input_ndarray.sparse_map_bytes());
+
+    for(auto i = 0; i < input_ndarray.shapes_size(); ++i) {
+        auto* shape_block = output_ndarray->add_shapes();
+        proto_from_block(input_ndarray.shapes(i), *shape_block);
+    }
+
+    for(auto i = 0; i < input_ndarray.values_size(); ++i) {
+        auto* value_block = output_ndarray->add_values();
+        proto_from_block(input_ndarray.values(i), *value_block);
+    }
+}
+
+size_t num_blocks(const arcticdb::proto::encoding::EncodedField& field) {
+    util::check(field.has_ndarray(), "Expected ndarray in segment header");
+    return field.ndarray().shapes_size() + field.ndarray().values_size();
+}
+
+SegmentHeader deserialize_segment_header_from_proto(const arcticdb::proto::encoding::SegmentHeader& header) {
+    SegmentHeader output;
+    output.set_encoding_version(EncodingVersion(header.encoding_version()));
+    output.set_compacted(header.compacted());
+
+    if(header.has_metadata_field())
+        encoded_field_from_proto(header.metadata_field(), output.mutable_metadata_field(num_blocks(header.metadata_field())));
+
+    if(header.has_string_pool_field())
+        encoded_field_from_proto(header.string_pool_field(), output.mutable_string_pool_field(num_blocks(header.string_pool_field())));
+
+    auto fields_from_proto = encoded_fields_from_proto(header);
+    output.set_body_fields(std::move(fields_from_proto));
+    return output;
+}
+
+size_t calc_proto_encoded_blocks_size(const arcticdb::proto::encoding::SegmentHeader& hdr) {
+    size_t bytes{};
+    for(const auto& field : hdr.fields()) {
+        bytes += EncodedFieldImpl::Size;
+        if(field.has_ndarray()) {
+            const auto& ndarray = field.ndarray();
+            const auto shapes_size = sizeof(EncodedBlock) * ndarray.shapes_size();
+            const auto values_size = sizeof(EncodedBlock) * ndarray.values_size();
+            bytes += shapes_size + values_size;
+        }
+    }
+    return bytes;
+}
+
+EncodedFieldCollection encoded_fields_from_proto(const arcticdb::proto::encoding::SegmentHeader& hdr) {
+    const auto encoded_buffer_size = calc_proto_encoded_blocks_size(hdr);
+    EncodedFieldCollection encoded_fields;
+    encoded_fields.reserve(encoded_buffer_size, hdr.fields_size());
+    for(auto&& [index, in_field] : folly::enumerate(hdr.fields())) {
+        auto* out_field = encoded_fields.add_field(num_blocks(in_field));
+        encoded_field_from_proto(in_field, *out_field);
+    }
+    return encoded_fields;
+}
+
+void copy_encoded_fields_to_proto(const EncodedFieldCollection& fields, arcticdb::proto::encoding::SegmentHeader& hdr) {
+    auto& proto_fields = *hdr.mutable_fields();
+    auto field = fields.begin();
+    for(auto i = 0U; i < fields.size(); ++i) {
+        auto* proto_field = proto_fields.Add();
+        copy_encoded_field_to_proto(field.current(), *proto_field);
+        ++field;
+    }
+}
+
+} // namespace arcticdb
diff --git a/cpp/arcticdb/codec/protobuf_mappings.hpp b/cpp/arcticdb/codec/protobuf_mappings.hpp
new file mode 100644
index 0000000000..e36a0a179d
--- /dev/null
+++ b/cpp/arcticdb/codec/protobuf_mappings.hpp
@@ -0,0 +1,79 @@
+/* Copyright 2023 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
+ */
+#pragma once
+
+#include <arcticdb/entity/protobufs.hpp>
+#include <arcticdb/codec/encoded_field.hpp>
+#include <arcticdb/util/preconditions.hpp>
+#include "arcticdb/storage/memory_layout.hpp"
+#include <arcticdb/codec/segment_header.hpp>
+
+namespace arcticdb {
+
+template <typename T, typename U>
+void copy_codec(T& out_codec, const U& in_codec) {
+    out_codec.MergeFrom(in_codec);
+}
+
+inline void copy_codec(ZstdCodec& codec, const arcticdb::proto::encoding::VariantCodec::Zstd& zstd) {
+    codec.level_ = zstd.level();
+    codec.is_streaming_ = zstd.is_streaming();
+}
+
+inline void copy_codec(Lz4Codec& codec, const arcticdb::proto::encoding::VariantCodec::Lz4& lz4) {
+    codec.acceleration_ = lz4.acceleration();
+}
+
+inline void copy_codec(PassthroughCodec&, const arcticdb::proto::encoding::VariantCodec::Passthrough&) {
+    // No data in passthrough
+}
+
+[[nodiscard]] inline arcticdb::proto::encoding::VariantCodec::CodecCase codec_case(Codec codec) {
+    switch (codec) {
+    case Codec::ZSTD:return arcticdb::proto::encoding::VariantCodec::kZstd;
+    case Codec::LZ4:return arcticdb::proto::encoding::VariantCodec::kLz4;
+    case Codec::PFOR:return arcticdb::proto::encoding::VariantCodec::kTp4;
+    case Codec::PASS:return arcticdb::proto::encoding::VariantCodec::kPassthrough;
+    default:util::raise_rte("Unknown codec");
+    }
+}
+
+template <typename Input, typename Output>
+void set_codec(Input& in, Output& out) {
+    copy_codec(out, in);
+}
+
+void block_from_proto(const arcticdb::proto::encoding::Block& input, EncodedBlock& output, bool is_shape);
+
+inline void set_lz4(const Lz4Codec& lz4_in, arcticdb::proto::encoding::VariantCodec::Lz4& lz4_out) {
+    lz4_out.set_acceleration(lz4_in.acceleration_);
+}
+
+inline void set_zstd(const ZstdCodec& zstd_in, arcticdb::proto::encoding::VariantCodec::Zstd& zstd_out) {
+    zstd_out.set_is_streaming(zstd_in.is_streaming_);
+    zstd_out.set_level(zstd_in.level_);
+}
+
+inline void set_passthrough(const PassthroughCodec& passthrough_in, arcticdb::proto::encoding::VariantCodec::Passthrough& passthrough_out) {
+    passthrough_out.set_mark(passthrough_in.unused_);
+}
+
+void proto_from_block(const EncodedBlock& input, arcticdb::proto::encoding::Block& output);
+
+void encoded_field_from_proto(const arcticdb::proto::encoding::EncodedField& input, EncodedFieldImpl& output);
+
+void copy_encoded_field_to_proto(const EncodedFieldImpl& input, arcticdb::proto::encoding::EncodedField& output);
+
+SegmentHeader deserialize_segment_header_from_proto(const arcticdb::proto::encoding::SegmentHeader& header);
+
+size_t calc_proto_encoded_blocks_size(const arcticdb::proto::encoding::SegmentHeader& hdr);
+
+EncodedFieldCollection encoded_fields_from_proto(const arcticdb::proto::encoding::SegmentHeader& hdr);
+
+void copy_encoded_fields_to_proto(const EncodedFieldCollection& fields, arcticdb::proto::encoding::SegmentHeader& hdr);
+
+} //namespace arcticdb
\ No newline at end of file
diff --git a/cpp/arcticdb/codec/python_bindings.cpp b/cpp/arcticdb/codec/python_bindings.cpp
index eeddaffd12..403ce2e783 100644
--- a/cpp/arcticdb/codec/python_bindings.cpp
+++ b/cpp/arcticdb/codec/python_bindings.cpp
@@ -102,7 +102,7 @@ Segment encode_segment(SegmentInMemory segment_in_memory, const py::object &opts
     return encode_dispatch(std::move(segment_in_memory), opts_cpp, encoding_version);
 }
 
-SegmentInMemory decode_python_segment(Segment segment) {
+SegmentInMemory decode_python_segment(Segment& segment) {
     return decode_segment(std::move(segment));
 }
 
@@ -188,8 +188,8 @@ void register_codec(py::module &m) {
             .def("fields_size", &Segment::fields_size)
             .def("fields", &Segment::fields_vector)
             .def_property_readonly("header", [](const Segment& self) {
-                return pb_to_python(self.header());
-            })
+                return self.header().clone();
+            },  py::return_value_policy::move)
             .def_property_readonly("bytes", [](const Segment& self) {
                 return py::bytes(reinterpret_cast<char *>(self.buffer().data()), self.buffer().bytes());
             });
diff --git a/cpp/arcticdb/codec/segment.cpp b/cpp/arcticdb/codec/segment.cpp
index 5eeab44b0d..a71ede7e65 100644
--- a/cpp/arcticdb/codec/segment.cpp
+++ b/cpp/arcticdb/codec/segment.cpp
@@ -8,247 +8,342 @@
 #include <arcticdb/codec/segment.hpp>
 #include <arcticdb/entity/performance_tracing.hpp>
 #include <arcticdb/stream/protobuf_mappings.hpp>
+#include <arcticdb/entity/protobuf_mappings.hpp>
 
-#include <arcticdb/util/pb_util.hpp>
 #include <arcticdb/util/dump_bytes.hpp>
 #include <arcticdb/codec/codec.hpp>
-#include <arcticdb/codec/magic_words.hpp>
+#include <arcticdb/codec/segment_identifier.hpp>
 
 namespace arcticdb {
+
+arcticdb::proto::encoding::SegmentHeader generate_v1_header(const SegmentHeader& header, const StreamDescriptor& desc) {
+    arcticdb::proto::encoding::SegmentHeader segment_header;
+    if(header.has_metadata_field())
+        copy_encoded_field_to_proto(header.metadata_field(), *segment_header.mutable_metadata_field());
+
+    if(header.has_string_pool_field())
+        copy_encoded_field_to_proto(header.string_pool_field(), *segment_header.mutable_string_pool_field());
+
+    copy_stream_descriptor_to_proto(desc, *segment_header.mutable_stream_descriptor());
+    copy_encoded_fields_to_proto(header.body_fields(), segment_header);
+
+    segment_header.set_compacted(header.compacted());
+    segment_header.set_encoding_version(static_cast<uint16_t>(header.encoding_version()));
+
+    ARCTICDB_TRACE(log::codec(), "Encoded segment header {}", segment_header.DebugString());
+    return segment_header;
+}
+
 namespace segment_size {
-std::tuple<size_t, size_t> compressed(const arcticdb::proto::encoding::SegmentHeader &seg_hdr) {
+
+size_t column_fields_size(const SegmentHeader& seg_hdr) {
+    if(!seg_hdr.has_column_fields())
+        return 0;
+
+    return encoding_sizes::ndarray_field_compressed_size(seg_hdr.column_fields().ndarray());
+}
+
+SegmentCompressedSize compressed(const SegmentHeader &seg_hdr, const std::optional<SegmentHeaderProtoWrapper>& proto_wrapper) {
     size_t string_pool_size = 0;
     if (seg_hdr.has_string_pool_field())
         string_pool_size = encoding_sizes::ndarray_field_compressed_size(seg_hdr.string_pool_field().ndarray());
 
-    std::size_t buffer_size = 0;
-    if (EncodingVersion(seg_hdr.encoding_version()) == EncodingVersion::V1) {
-        size_t metadata_size = 0;
-        // If we have metadata it is part of the buffer size, otherwise the allocated buffer is much too small
-        if (seg_hdr.has_metadata_field())
-            metadata_size = encoding_sizes::ndarray_field_compressed_size(seg_hdr.metadata_field().ndarray());
+    size_t metadata_size = 0;
+    if (seg_hdr.has_metadata_field())
+        metadata_size = encoding_sizes::ndarray_field_compressed_size(seg_hdr.metadata_field().ndarray());
+
+    size_t buffer_size;
+    size_t body_size;
+    if(seg_hdr.encoding_version() == EncodingVersion::V1) {
+        const auto fields_size = encoding_sizes::segment_compressed_size(proto_wrapper->proto().fields());
+        ARCTICDB_DEBUG(log::codec(), "Calculating total size: {} fields + {} metadata + {} string pool = {}", fields_size, metadata_size, string_pool_size, fields_size + metadata_size + string_pool_size);
+        buffer_size = fields_size + metadata_size + string_pool_size;
+        body_size = buffer_size;
+    } else {
+        buffer_size = seg_hdr.footer_offset();
+        if(seg_hdr.has_column_fields())
+            buffer_size += sizeof(EncodedMagic) + column_fields_size(seg_hdr);
 
-        buffer_size = encoding_sizes::segment_compressed_size(seg_hdr.fields()) + metadata_size + string_pool_size;
+        body_size = seg_hdr.footer_offset();
+        ARCTICDB_DEBUG(log::codec(), "V2 size buffer: {} body {}", buffer_size, body_size);
     }
-    else
-        buffer_size = seg_hdr.column_fields().offset() + sizeof(EncodedMagic) + encoding_sizes::ndarray_field_compressed_size(seg_hdr.column_fields().ndarray());
 
-    return {string_pool_size, buffer_size};
+    return {string_pool_size, buffer_size, body_size};
 }
 }
 
-FieldCollection decode_fields(
-    const arcticdb::proto::encoding::SegmentHeader& hdr,
-    const uint8_t* data) {
-    const auto begin ARCTICDB_UNUSED = data;
+FieldCollection decode_descriptor_fields(
+    const SegmentHeader& hdr,
+    const uint8_t* data,
+    const uint8_t* begin ARCTICDB_UNUSED) {
     FieldCollection fields;
     if (hdr.has_descriptor_field()) {
-        ARCTICDB_TRACE(log::codec(), "Decoding string pool");
         std::optional<util::BitMagic> bv;
-        data += decode_field(FieldCollection::type(),
+        util::check(hdr.descriptor_field().has_ndarray(), "Expected descriptor field to be ndarray");
+        (void)decode_ndarray(FieldCollection::type(),
             hdr.descriptor_field(),
             data,
             fields,
             bv,
-            to_encoding_version(hdr.encoding_version()));
+            hdr.encoding_version());
 
-        ARCTICDB_TRACE(log::codec(), "Decoded string pool to position {}", data-begin);
+        ARCTICDB_TRACE(log::codec(), "Decoded descriptor to position {}", data-begin);
     }
     fields.regenerate_offsets();
     return fields;
 }
 
-std::optional<FieldCollection> decode_index_fields(
-    const arcticdb::proto::encoding::SegmentHeader& hdr,
-    const uint8_t*& data,
-    const uint8_t* const begin ARCTICDB_UNUSED
-    ) {
-    if(hdr.has_index_descriptor_field()) {
-        FieldCollection fields;
-        ARCTICDB_TRACE(log::codec(), "Decoding string pool");
-        std::optional<util::BitMagic> bv;
-        data += decode_field(FieldCollection::type(),
-            hdr.index_descriptor_field(),
-            data,
-            fields,
-            bv,
-            to_encoding_version(hdr.encoding_version()));
+SegmentHeaderProtoWrapper decode_protobuf_header(const uint8_t* data, size_t header_bytes_size) {
+    google::protobuf::io::ArrayInputStream ais(data, static_cast<int>(header_bytes_size));
 
-        ARCTICDB_TRACE(log::codec(), "Decoded string pool to position {}", data-begin);
-        return std::make_optional<FieldCollection>(std::move(fields));
-    } else {
-        return std::nullopt;
+    auto arena = std::make_unique<google::protobuf::Arena>();
+    auto seg_hdr = google::protobuf::Arena::CreateMessage<arcticdb::proto::encoding::SegmentHeader>(arena.get());
+    seg_hdr->ParseFromZeroCopyStream(&ais);
+    ARCTICDB_TRACE(log::codec(), "Decoded protobuf header: {}", seg_hdr->DebugString());
+    return {seg_hdr, std::move(arena)};
+}
+
+void skip_metadata_field(const uint8_t*& src, const SegmentHeader& seg_hdr) {
+    util::check_magic<MetadataMagic>(src);
+    if(seg_hdr.has_metadata_field()) {
+        const auto metadata_size = encoding_sizes::field_compressed_size(seg_hdr.metadata_field());
+        ARCTICDB_TRACE(log::codec(), "Skipping {} bytes of metadata", metadata_size);
+        src += metadata_size;
     }
 }
 
-Segment Segment::from_bytes(const std::uint8_t* src, std::size_t readable_size, bool copy_data /* = false */) {
-    ARCTICDB_SAMPLE(SegmentFromBytes, 0)
-    auto* fixed_hdr = reinterpret_cast<const Segment::FixedHeader*>(src);
-    util::check_arg(fixed_hdr->magic_number == MAGIC_NUMBER, "expected first 2 bytes: {}, actual {}", fixed_hdr->magic_number, MAGIC_NUMBER);
+FieldCollection deserialize_descriptor_fields_collection(const uint8_t* src, const SegmentHeader& seg_hdr) {
+    FieldCollection fields;
 
+    util::check_magic<DescriptorFieldsMagic>(src);
+    if(seg_hdr.has_descriptor_field() && seg_hdr.descriptor_field().has_ndarray())
+         fields = decode_descriptor_fields(seg_hdr, src, src);
 
-    ARCTICDB_SUBSAMPLE(ReadHeaderAndSegment, 0)
-    auto header_bytes ARCTICDB_UNUSED = arcticdb::Segment::FIXED_HEADER_SIZE + fixed_hdr->header_bytes;
-    ARCTICDB_DEBUG(log::codec(), "Reading header: {} + {} = {}",
-                       arcticdb::Segment::FIXED_HEADER_SIZE,
-                       fixed_hdr->header_bytes,
-                       header_bytes);
-    google::protobuf::io::ArrayInputStream ais(src + arcticdb::Segment::FIXED_HEADER_SIZE, static_cast<int>(fixed_hdr->header_bytes));
-    auto arena = std::make_unique<google::protobuf::Arena>();
-    auto seg_hdr = google::protobuf::Arena::CreateMessage<arcticdb::proto::encoding::SegmentHeader>(arena.get());
-    seg_hdr->ParseFromZeroCopyStream(&ais);
-    if(!seg_hdr->has_metadata_field())
-        ARCTICDB_DEBUG(log::storage(), "Segment has no medatadata field");
+    return fields;
+}
 
+EncodedFieldCollection deserialize_body_fields(const SegmentHeader& hdr, const uint8_t* data) {
+    const auto* encoded_fields_ptr = data;
+    util::check(hdr.has_column_fields(), "Expected column fields in v2 encoding");
+    util::check_magic<EncodedMagic>(encoded_fields_ptr);
 
-    src += arcticdb::Segment::FIXED_HEADER_SIZE + fixed_hdr->header_bytes;
+    return EncodedFieldCollection{decode_encoded_fields(hdr, encoded_fields_ptr, data)};
+}
 
-    auto version = EncodingVersion(seg_hdr->encoding_version());
-    util::check(version == EncodingVersion::V1 || version == EncodingVersion::V2 ,
-                "expected encoding_version < 2, actual {}",
-                seg_hdr->encoding_version());
+struct DeserializedSegmentData {
+    SegmentHeader segment_header_;
+    std::shared_ptr<FieldCollection> fields_;
+    std::shared_ptr<SegmentDescriptorImpl> segment_desc_;
+    std::optional<SegmentHeaderProtoWrapper> proto_wrapper_;
+    StreamId stream_id_;
+};
 
-    FieldCollection fields;
-    if(version == EncodingVersion::V1)
-        fields = fields_from_proto(seg_hdr->stream_descriptor());
-    else {
-        const auto* fields_ptr = src;
-        util::check_magic<MetadataMagic>(fields_ptr);
-        if(seg_hdr->has_metadata_field())
-            fields_ptr += encoding_sizes::field_compressed_size(seg_hdr->metadata_field());
-
-        util::check_magic<DescriptorMagic>(fields_ptr);
-        if(seg_hdr->has_descriptor_field() && seg_hdr->descriptor_field().has_ndarray())
-            fields = decode_fields(*seg_hdr, fields_ptr);
+DeserializedSegmentData decode_header_and_fields(const uint8_t*& src, bool copy_data) {
+    auto* fixed_hdr = reinterpret_cast<const FixedHeader*>(src);
+    ARCTICDB_DEBUG(log::codec(), "Reading header: {} + {} = {}", FIXED_HEADER_SIZE, fixed_hdr->header_bytes, FIXED_HEADER_SIZE + fixed_hdr->header_bytes);
+
+    util::check_arg(fixed_hdr->magic_number == MAGIC_NUMBER, "expected first 2 bytes: {}, actual {}", fixed_hdr->magic_number, MAGIC_NUMBER);
+    std::optional<SegmentHeaderProtoWrapper> proto_wrapper;
+
+    const auto* header_ptr = src + FIXED_HEADER_SIZE;
+    if(const auto header_version = fixed_hdr->encoding_version; header_version == HEADER_VERSION_V1) {
+        proto_wrapper = decode_protobuf_header(header_ptr, fixed_hdr->header_bytes);
+        auto data = std::make_shared<SegmentDescriptorImpl>(segment_descriptor_from_proto(proto_wrapper->proto().stream_descriptor()));
+        auto segment_header = deserialize_segment_header_from_proto(proto_wrapper->proto());
+        util::check(segment_header.encoding_version() == EncodingVersion::V1, "Expected v1 header to contain legacy encoding version");
+        auto fields = std::make_shared<FieldCollection>(field_collection_from_proto(proto_wrapper->proto().stream_descriptor().fields()));
+        src += FIXED_HEADER_SIZE + fixed_hdr->header_bytes;
+        auto stream_id = stream_id_from_proto(proto_wrapper->proto().stream_descriptor());
+        return {std::move(segment_header), std::move(fields), std::move(data), std::move(proto_wrapper), stream_id};
+    } else {
+        SegmentHeader segment_header;
+        const auto* fields_ptr = header_ptr + fixed_hdr->header_bytes;
+        segment_header.deserialize_from_bytes(header_ptr, copy_data);
+        skip_metadata_field(fields_ptr, segment_header);
+        auto segment_desc = std::make_shared<SegmentDescriptorImpl>(read_segment_descriptor(fields_ptr));
+        auto stream_id = read_identifier(fields_ptr);
+        util::check(segment_header.encoding_version() == EncodingVersion::V2, "Expected V2 encoding in binary header");
+        auto fields = std::make_shared<FieldCollection>(deserialize_descriptor_fields_collection(fields_ptr, segment_header));
+        src += FIXED_HEADER_SIZE + fixed_hdr->header_bytes;
+        return {std::move(segment_header), std::move(fields), std::move(segment_desc), std::move(proto_wrapper), stream_id};
     }
+}
+
+void check_encoding(EncodingVersion encoding_version) {
+    util::check(encoding_version == EncodingVersion::V1 || encoding_version == EncodingVersion::V2 ,
+                "expected encoding_version < 2, actual {}",
+                encoding_version);
+}
 
-    const auto[string_pool_size, buffer_bytes] = segment_size::compressed(*seg_hdr);
-    ARCTICDB_DEBUG(log::codec(), "Reading string pool {} header {} + {} and buffer bytes {}", string_pool_size, arcticdb::Segment::FIXED_HEADER_SIZE, fixed_hdr->header_bytes, buffer_bytes);
-    util::check(arcticdb::Segment::FIXED_HEADER_SIZE + fixed_hdr->header_bytes + buffer_bytes <= readable_size,
+void check_size(const FixedHeader* fixed_hdr, size_t buffer_bytes, size_t readable_size, size_t string_pool_size) {
+    util::check(FIXED_HEADER_SIZE + fixed_hdr->header_bytes + buffer_bytes <= readable_size,
                 "Size disparity, fixed header size {} + variable header size {} + buffer size {}  (string pool size {}) >= total size {}",
-                arcticdb::Segment::FIXED_HEADER_SIZE,
+                FIXED_HEADER_SIZE,
                 fixed_hdr->header_bytes,
                 buffer_bytes,
                 string_pool_size,
-                readable_size
-    );
+                readable_size);
+}
+
+void set_body_fields(SegmentHeader& seg_hdr, const uint8_t* src) {
+    if(seg_hdr.has_column_fields()) {
+        auto encoded_fields = deserialize_body_fields(seg_hdr, src +  seg_hdr.footer_offset());
+        seg_hdr.set_body_fields(std::move(encoded_fields));
+    }
+}
 
+Segment Segment::from_bytes(const std::uint8_t* src, std::size_t readable_size, bool copy_data /* = false */) {
+    ARCTICDB_SAMPLE(SegmentFromBytes, 0)
+    util::check(src != nullptr, "Got null data ptr from segment");
+    auto* fixed_hdr = reinterpret_cast<const FixedHeader*>(src);
+    auto [seg_hdr, fields, desc_data, proto_wrapper, stream_id] = decode_header_and_fields(src, copy_data);
+    check_encoding(seg_hdr.encoding_version());
+    const auto[string_pool_size, buffer_bytes, body_bytes] = segment_size::compressed(seg_hdr, proto_wrapper);
+    check_size(fixed_hdr, buffer_bytes, readable_size, string_pool_size);
+    ARCTICDB_DEBUG(log::codec(), "Reading string pool {} header {} + {} and buffer bytes {}", string_pool_size, FIXED_HEADER_SIZE, fixed_hdr->header_bytes, buffer_bytes);
     ARCTICDB_SUBSAMPLE(CreateBufferView, 0)
+    VariantBuffer variant_buffer;
     if (copy_data) {
         auto buf = std::make_shared<Buffer>();
         buf->ensure(buffer_bytes);
         memcpy(buf->data(), src, buffer_bytes);
-        return {std::move(arena), seg_hdr, std::move(buf), std::make_shared<FieldCollection>(std::move(fields))};
+        variant_buffer = std::move(buf);
     } else {
-        BufferView bv{const_cast<uint8_t*>(src), buffer_bytes};
-        return {std::move(arena), seg_hdr, std::move(bv), std::make_shared<FieldCollection>(std::move(fields))};
+        variant_buffer = BufferView{const_cast<uint8_t*>(src), buffer_bytes};
     }
-}
 
+    set_body_fields(seg_hdr, src);
+    return {std::move(seg_hdr), std::move(variant_buffer), std::move(desc_data), std::move(fields), stream_id, readable_size};
+}
 
-Segment Segment::from_buffer(std::shared_ptr<Buffer>&& buffer) {
-    ARCTICDB_SAMPLE(SegmentFromBytes, 0)
-    auto* fixed_hdr = reinterpret_cast<Segment::FixedHeader*>(buffer->data());
+Segment Segment::from_buffer(const std::shared_ptr<Buffer>& buffer) {
+    ARCTICDB_SAMPLE(SegmentFromBuffer, 0)
+    auto* fixed_hdr = reinterpret_cast<FixedHeader*>(buffer->data());
     auto readable_size = buffer->bytes();
-    util::check_arg(fixed_hdr->magic_number == MAGIC_NUMBER, "expected first 2 bytes: {}, actual {}",
-                    MAGIC_NUMBER, fixed_hdr->magic_number);
-    util::check_arg(fixed_hdr->encoding_version == HEADER_VERSION_V1,
-                    "expected encoding_version {}, actual {}",
-                    HEADER_VERSION_V1 , fixed_hdr->encoding_version);
+    const auto* src = buffer->data();
+    auto [seg_hdr, fields, desc_data, proto_wrapper, stream_id] = decode_header_and_fields(src, false);
+    check_encoding(seg_hdr.encoding_version());
 
     ARCTICDB_SUBSAMPLE(ReadHeaderAndSegment, 0)
-    auto header_bytes ARCTICDB_UNUSED = arcticdb::Segment::FIXED_HEADER_SIZE + fixed_hdr->header_bytes;
-    ARCTICDB_DEBUG(log::codec(), "Reading header: {} + {} = {}",
-                  arcticdb::Segment::FIXED_HEADER_SIZE,
-                  fixed_hdr->header_bytes,
-                  header_bytes);
-    google::protobuf::io::ArrayInputStream ais(buffer->data() + arcticdb::Segment::FIXED_HEADER_SIZE, fixed_hdr->header_bytes);
-    auto arena = std::make_unique<google::protobuf::Arena>();
-    auto seg_hdr = google::protobuf::Arena::CreateMessage<arcticdb::proto::encoding::SegmentHeader>(arena.get());
-    seg_hdr->ParseFromZeroCopyStream(&ais);
-
-    const auto[string_pool_size, buffer_bytes] = segment_size::compressed(*seg_hdr);
-    ARCTICDB_DEBUG(log::codec(), "Reading string pool {} and buffer bytes {}", string_pool_size, buffer_bytes);
-    util::check(arcticdb::Segment::FIXED_HEADER_SIZE + fixed_hdr->header_bytes + buffer_bytes <= readable_size,
-                "Size disparity, fixed header size {} + variable header size {} + buffer size {}  (string pool size {}) >= total size {}",
-                arcticdb::Segment::FIXED_HEADER_SIZE,
-                fixed_hdr->header_bytes,
-                buffer_bytes,
-                string_pool_size,
-                readable_size
-    );
+    auto header_bytes ARCTICDB_UNUSED = FIXED_HEADER_SIZE + fixed_hdr->header_bytes;
+    ARCTICDB_TRACE(log::codec(), "Reading header: {} + {} = {}", FIXED_HEADER_SIZE, fixed_hdr->header_bytes, header_bytes);
 
-    auto version = EncodingVersion(seg_hdr->encoding_version());
-    util::check(version == EncodingVersion::V1 || version == EncodingVersion::V2,
-                "expected encoding_version < 2, actual {}",
-                seg_hdr->encoding_version());
+    const auto[string_pool_size, buffer_bytes, body_bytes] = segment_size::compressed(seg_hdr, proto_wrapper);
+    ARCTICDB_TRACE(log::codec(), "Reading string pool {} and buffer bytes {}", string_pool_size, buffer_bytes);
+    check_size(fixed_hdr, buffer_bytes, readable_size, string_pool_size);
 
+    set_body_fields(seg_hdr, src);
+    buffer->set_preamble(FIXED_HEADER_SIZE + fixed_hdr->header_bytes);
+    ARCTICDB_SUBSAMPLE(CreateSegment, 0)
+    return{std::move(seg_hdr), buffer, std::move(desc_data), std::move(fields), stream_id, readable_size};
+}
 
-    auto preamble_size = arcticdb::Segment::FIXED_HEADER_SIZE + fixed_hdr->header_bytes;
+size_t Segment::write_proto_header(uint8_t* dst) {
+    const auto& header = generate_header_proto();
+    const auto hdr_size = proto_size();
+    FixedHeader hdr = {MAGIC_NUMBER, HEADER_VERSION_V1, std::uint32_t(hdr_size)};
+    write_fixed_header(dst, hdr);
 
-    FieldCollection fields;
-    if(version == EncodingVersion::V1) {
-        fields = fields_from_proto(seg_hdr->stream_descriptor());
-    }
-    else {
-        const auto* fields_ptr = buffer->data() + preamble_size;
-        util::check_magic<MetadataMagic>(fields_ptr);
-        if(seg_hdr->has_metadata_field())
-            fields_ptr += encoding_sizes::field_compressed_size(seg_hdr->metadata_field());
-
-        util::check_magic<DescriptorMagic>(fields_ptr);
-        if(seg_hdr->has_descriptor_field() && seg_hdr->descriptor_field().has_ndarray())
-            fields = decode_fields(*seg_hdr, fields_ptr);
-    }
+    google::protobuf::io::ArrayOutputStream aos(dst + FIXED_HEADER_SIZE, static_cast<int>(hdr_size));
+    header.SerializeToZeroCopyStream(&aos);
+    return hdr_size;
+}
 
-    buffer->set_preamble(arcticdb::Segment::FIXED_HEADER_SIZE + fixed_hdr->header_bytes);
-    ARCTICDB_SUBSAMPLE(CreateSegment, 0)
-    return{std::move(arena), seg_hdr, std::move(buffer), std::make_shared<FieldCollection>(std::move(fields))};
+size_t Segment::write_binary_header(uint8_t* dst) const {
+    auto bytes_written = header_.serialize_to_bytes(dst + sizeof(FixedHeader));
+    FixedHeader hdr = {MAGIC_NUMBER, HEADER_VERSION_V2, std::uint32_t(bytes_written)};
+    write_fixed_header(dst, hdr);
+    return bytes_written;
+}
 
+std::pair<uint8_t*, size_t> Segment::serialize_header_v2(size_t expected_bytes) {
+    ARCTICDB_TRACE(log::codec(), "Calculating bytes for header {}", header_);
+    const auto header_bytes = header_.bytes() + sizeof(FixedHeader);
+    FixedHeader hdr = {MAGIC_NUMBER, HEADER_VERSION_V2, std::uint32_t(expected_bytes)};
+    util::check(header_bytes == buffer_.preamble_bytes(), "Expected v2 header of size {} to fit exactly into buffer preamble of size {}", header_.bytes(), buffer_.preamble_bytes());
+    const auto &buffer = buffer_.get_owning_buffer();
+    auto* dst = buffer->preamble();
+    write_fixed_header(dst, hdr);
+    header_.serialize_to_bytes(dst + FIXED_HEADER_SIZE, expected_bytes);
+    return std::make_pair(buffer->preamble(), calculate_size());
 }
 
-void Segment::write_header(uint8_t* dst, size_t hdr_size) const {
-    FixedHeader hdr = {MAGIC_NUMBER, HEADER_VERSION_V1, std::uint32_t(hdr_size)};
-    hdr.write(dst);
-    if(!header_->has_metadata_field())
-        ARCTICDB_DEBUG(log::codec(), "Expected metadata field");
+std::pair<uint8_t*, size_t> Segment::serialize_v1_header_in_place(size_t total_hdr_size) {
+    const auto &buffer = buffer_.get_owning_buffer();
+    auto base_ptr = buffer->preamble() + (buffer->preamble_bytes() - total_hdr_size);
+    util::check(base_ptr + total_hdr_size == buffer->data(), "Expected base ptr to align with data ptr, {} != {}",fmt::ptr(base_ptr + total_hdr_size),fmt::ptr(buffer->data()));
+    write_proto_header(base_ptr);
+    ARCTICDB_TRACE(log::storage(), "Header fits in internal buffer {:x} with {} bytes space: {}", intptr_t (base_ptr), buffer->preamble_bytes() - total_hdr_size,dump_bytes(buffer->data(), buffer->bytes(), 100u));
+    return std::make_pair(base_ptr, calculate_size());
+}
 
-    google::protobuf::io::ArrayOutputStream aos(dst + FIXED_HEADER_SIZE, static_cast<int>(hdr_size));
-    header_->SerializeToZeroCopyStream(&aos);
+std::tuple<uint8_t*, size_t, std::unique_ptr<Buffer>> Segment::serialize_v1_header_to_buffer(size_t hdr_size) {
+    auto tmp = std::make_unique<Buffer>();
+    ARCTICDB_TRACE(log::storage(), "Header doesn't fit in internal buffer, needed {} bytes but had {}, writing to temp buffer at {:x}", hdr_size, buffer_.preamble_bytes(),uintptr_t(tmp->data()));
+    tmp->ensure(calculate_size());
+    auto* dst = tmp->preamble();
+    write_proto_header(dst);
+    std::memcpy(dst + FIXED_HEADER_SIZE + hdr_size,
+                buffer().data(),
+                buffer().bytes());
+    return std::make_tuple(tmp->preamble(), calculate_size(), std::move(tmp));
 }
 
-std::pair<uint8_t*, size_t> Segment::try_internal_write(std::shared_ptr<Buffer>& tmp, size_t hdr_size) {
+std::tuple<uint8_t*, size_t, std::unique_ptr<Buffer>> Segment::serialize_header_v1() {
+    auto proto_header = generate_v1_header(header_, desc_);
+    const auto hdr_size = proto_header.ByteSizeLong();
     auto total_hdr_size = hdr_size + FIXED_HEADER_SIZE;
-    if(std::holds_alternative<std::shared_ptr<Buffer>>(buffer_) && std::get<std::shared_ptr<Buffer>>(buffer_)->preamble_bytes() >= total_hdr_size) {
-        auto& buffer = std::get<std::shared_ptr<Buffer>>(buffer_);
-        auto base_ptr = buffer->preamble() + (buffer->preamble_bytes() - total_hdr_size);
-        util::check(base_ptr + total_hdr_size == buffer->data(), "Expected base ptr to align with data ptr, {} != {}", fmt::ptr(base_ptr + total_hdr_size), fmt::ptr(buffer->data()));
-        ARCTICDB_TRACE(log::codec(), "Buffer contents before header write: {}", dump_bytes(buffer->data(), buffer->bytes(), 100u));
-        write_header(base_ptr, hdr_size);
-        ARCTICDB_TRACE(log::storage(), "Header fits in internal buffer {:x} with {} bytes space: {}", uintptr_t (base_ptr), buffer->preamble_bytes() - total_hdr_size, dump_bytes(buffer->data(), buffer->bytes(), 100u));
-        return std::make_pair(base_ptr, total_segment_size(hdr_size));
+
+    if (buffer_.is_owning_buffer() && buffer_.preamble_bytes() >= total_hdr_size) {
+        auto [dst, size] = serialize_v1_header_in_place(total_hdr_size);
+        return std::make_tuple(dst, size, std::unique_ptr<Buffer>());
+    } else {
+        return serialize_v1_header_to_buffer(hdr_size);
     }
-    else {
-        tmp = std::make_shared<Buffer>();
-        ARCTICDB_DEBUG(log::storage(), "Header doesn't fit in internal buffer, needed {} bytes but had {}, writing to temp buffer at {:x}", hdr_size, std::get<std::shared_ptr<Buffer>>(buffer_)->preamble_bytes(), uintptr_t(tmp->data()));
-        tmp->ensure(total_segment_size(hdr_size));
-        write_to(tmp->preamble(), hdr_size);
-        return std::make_pair(tmp->preamble(), total_segment_size(hdr_size));
+}
+
+std::tuple<uint8_t*, size_t, std::unique_ptr<Buffer>> Segment::serialize_header() {
+    if (header_.encoding_version() == EncodingVersion::V1) {
+        return serialize_header_v1();
+    } else {
+        auto [dst, size] = serialize_header_v2(buffer_.preamble_bytes() - FIXED_HEADER_SIZE);
+        return std::make_tuple(dst, size, std::unique_ptr<Buffer>());
     }
 }
 
-void Segment::write_to(std::uint8_t* dst, std::size_t hdr_sz) {
+[[nodiscard]] std::shared_ptr<FieldCollection> Segment::fields_ptr() const {
+    return desc_.fields_ptr();
+}
+
+[[nodiscard]] size_t Segment::fields_size() const {
+    return desc_.field_count();
+}
+
+[[nodiscard]] const Field& Segment::fields(size_t pos) const {
+    return desc_.fields(pos);
+}
+
+const arcticdb::proto::encoding::SegmentHeader& Segment::generate_header_proto() {
+    if(!proto_)
+        proto_ = std::make_unique<arcticdb::proto::encoding::SegmentHeader>(generate_v1_header(header_, desc_));
+
+    return *proto_;
+}
+
+void Segment::write_to(std::uint8_t* dst) {
     ARCTICDB_SAMPLE(SegmentWriteToStorage, RMTSF_Aggregate)
     ARCTICDB_SUBSAMPLE(SegmentWriteHeader, RMTSF_Aggregate)
-    write_header(dst, hdr_sz);
-    ARCTICDB_SUBSAMPLE(SegmentWriteBody, RMTSF_Aggregate)
-    ARCTICDB_DEBUG(log::codec(), "Writing {} bytes to body at offset {}",
-                       buffer().bytes(),
-                       arcticdb::Segment::FIXED_HEADER_SIZE + hdr_sz);
 
-    std::memcpy(dst + arcticdb::Segment::FIXED_HEADER_SIZE + hdr_sz,
-                buffer().data(),
-                buffer().bytes());
+    size_t header_size;
+    if(header_.encoding_version() == EncodingVersion::V1)
+        header_size = write_proto_header(dst);
+    else
+        header_size = write_binary_header(dst);
+
+    ARCTICDB_SUBSAMPLE(SegmentWriteBody, RMTSF_Aggregate)
+    ARCTICDB_DEBUG(log::codec(), "Writing {} bytes to body at offset {}", buffer().bytes(), FIXED_HEADER_SIZE + header_size);
+    std::memcpy(dst + FIXED_HEADER_SIZE + header_size, buffer().data(), buffer().bytes());
+    ARCTICDB_DEBUG(log::codec(), "Wrote segment {} header {} body ({} bytes)", header_size + FIXED_HEADER_SIZE, buffer().bytes(), header_size + buffer().bytes() + FIXED_HEADER_SIZE);
 }
 
 } //namespace arcticdb
diff --git a/cpp/arcticdb/codec/segment.hpp b/cpp/arcticdb/codec/segment.hpp
index 2e8b344843..10174b826c 100644
--- a/cpp/arcticdb/codec/segment.hpp
+++ b/cpp/arcticdb/codec/segment.hpp
@@ -7,13 +7,14 @@
 
 #pragma once
 
-#include "util/buffer.hpp"
 #include <arcticdb/storage/common.hpp>
-#include <codec/encoding_sizes.hpp>
+#include <arcticdb/codec/segment_header.hpp>
+#include <arcticdb/util/buffer_pool.hpp>
+#include <arcticdb/entity/stream_descriptor.hpp>
+#include <arcticdb/util/variant.hpp>
+
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 #include <google/protobuf/arena.h>
-#include <util/buffer_pool.hpp>
-#include <arcticdb/entity/field_collection.hpp>
 
 #include <iostream>
 #include <variant>
@@ -22,33 +23,43 @@
 namespace arcticdb {
 
 namespace segment_size {
-std::tuple<size_t, size_t> compressed(const arcticdb::proto::encoding::SegmentHeader& seg_hdr);
-}
 
-enum class EncodingVersion : uint16_t {
-    V1 = 0,
-    V2 = 1,
-    COUNT
+struct SegmentCompressedSize {
+    size_t string_pool_size_ = 0U;
+    size_t total_buffer_size_ = 0U;
+    size_t body_size_ = 0U;
 };
 
-template<typename T, typename = std::enable_if_t<std::is_integral_v<T>>>
-inline constexpr EncodingVersion to_encoding_version(T encoding_version) {
-    util::check(encoding_version >= 0 && encoding_version < uint16_t(EncodingVersion::COUNT), "Invalid encoding version");
-    return static_cast<EncodingVersion>(encoding_version);
+SegmentCompressedSize compressed(const arcticdb::proto::encoding::SegmentHeader& seg_hdr);
 }
 
+struct SegmentHeaderProtoWrapper {
+    arcticdb::proto::encoding::SegmentHeader* header_;
+    std::unique_ptr<google::protobuf::Arena> arena_;
+
+    [[nodiscard]] const auto& proto() const { return *header_; }
+
+    [[nodiscard]] auto& proto() { return *header_; }
+};
+
+SegmentHeaderProtoWrapper decode_protobuf_header(const uint8_t* data, size_t header_bytes_size);
+
+arcticdb::proto::encoding::SegmentHeader generate_v1_header(const SegmentHeader& header, const StreamDescriptor& desc);
+
 static constexpr uint16_t HEADER_VERSION_V1 = 1;
+static constexpr uint16_t HEADER_VERSION_V2 = 2;
 
 inline EncodingVersion encoding_version(const storage::LibraryDescriptor::VariantStoreConfig& cfg) {
     return util::variant_match(cfg,
-                               [](const arcticdb::proto::storage::VersionStoreConfig &version_config) {
-                                   return EncodingVersion(version_config.encoding_version());
-                               },
-                               [](std::monostate) {
-                                   return EncodingVersion::V1;
-                               }
+       [](const arcticdb::proto::storage::VersionStoreConfig &version_config) {
+           return EncodingVersion(version_config.encoding_version());
+       },
+       [](std::monostate) {
+           return EncodingVersion::V1;
+       }
     );
 }
+void set_body_fields(SegmentHeader& seg_hdr, const uint8_t* src);
 
 /*
  * Segment contains compressed data as returned from storage. When reading data the next step will usually be to
@@ -58,98 +69,69 @@ inline EncodingVersion encoding_version(const storage::LibraryDescriptor::Varian
  */
 class Segment {
   public:
-    constexpr static uint16_t MAGIC_NUMBER = 0xFA57;
-
-    struct FixedHeader {
-        std::uint16_t magic_number;
-        std::uint16_t encoding_version;
-        std::uint32_t header_bytes;
-
-        void write(std::uint8_t *dst) const {
-            ARCTICDB_DEBUG(log::codec(), "Writing header with size {}", header_bytes);
-            auto h = reinterpret_cast<FixedHeader *>(dst);
-            *h = *this;
-        }
-
-        void write(std::ostream &dst){
-            dst.write(reinterpret_cast<char*>(this), sizeof(FixedHeader));
-        }
-    };
-
-    constexpr static std::size_t FIXED_HEADER_SIZE = sizeof(FixedHeader);
-
-    Segment() :
-        header_(google::protobuf::Arena::CreateMessage<arcticdb::proto::encoding::SegmentHeader>(arena_.get())) {
+    Segment() = default;
+
+    Segment(
+        SegmentHeader&& header,
+        std::shared_ptr<Buffer> buffer,
+        std::shared_ptr<SegmentDescriptorImpl> data,
+        std::shared_ptr<FieldCollection> fields,
+        StreamId stream_id,
+        size_t size) :
+            header_(std::move(header)),
+            buffer_(std::move(buffer)),
+            desc_(std::move(data), std::move(fields), std::move(stream_id)),
+            size_(size) {
     }
 
-    Segment(std::unique_ptr<google::protobuf::Arena>&& arena, arcticdb::proto::encoding::SegmentHeader* header, std::shared_ptr<Buffer> buffer, std::shared_ptr<FieldCollection> fields) :
-        arena_(std::move(arena)),
-        header_(header),
-        buffer_(std::move(buffer)),
-        fields_(std::move(fields)){
+    Segment(
+        SegmentHeader&& header,
+        BufferView buffer,
+        std::shared_ptr<SegmentDescriptorImpl> data,
+        std::shared_ptr<FieldCollection> fields,
+        StreamId stream_id,
+        size_t size) :
+            header_(std::move(header)),
+            buffer_(buffer),
+            desc_(std::move(data), std::move(fields), std::move(stream_id)),
+            size_(size) {
     }
 
-    Segment(std::unique_ptr<google::protobuf::Arena>&& arena, arcticdb::proto::encoding::SegmentHeader* header, BufferView &&buffer, std::shared_ptr<FieldCollection> fields) :
-        arena_(std::move(arena)),
-        header_(header),
-        buffer_(buffer),
-        fields_(std::move(fields)){}
-
-    // for rvo only, go to solution should be to move
-    Segment(const Segment &that) :
-            header_(google::protobuf::Arena::CreateMessage<arcticdb::proto::encoding::SegmentHeader>(arena_.get())),
-            keepalive_(that.keepalive_) {
-        header_->CopyFrom(*that.header_);
-        auto b = std::make_shared<Buffer>();
-        util::variant_match(that.buffer_,
-            [] (const std::monostate&) {/* Uninitialized buffer */},
-            [&b](const BufferView& buf) { buf.copy_to(*b); },
-            [&b](const std::shared_ptr<Buffer>& buf) { buf->copy_to(*b); }
-            );
-        buffer_ = std::move(b);
-        if(that.fields_)
-            fields_ = std::make_shared<FieldCollection>(that.fields_->clone());
-    }
-
-    Segment &operator=(const Segment &that) {
-        if(this == &that)
-            return *this;
-
-        header_->CopyFrom(*that.header_);
-        auto b = std::make_shared<Buffer>();
-        util::variant_match(that.buffer_,
-                            [] (const std::monostate&) {/* Uninitialized buffer */},
-                            [&b](const BufferView& buf) { buf.copy_to(*b); },
-                            [&b](const std::shared_ptr<Buffer>& buf) { buf->copy_to(*b); }
-                            );
-        buffer_ = std::move(b);
-        fields_ = that.fields_;
-        keepalive_ = that.keepalive_;
-        return *this;
+    Segment(
+        SegmentHeader&& header,
+        VariantBuffer &&buffer,
+        std::shared_ptr<SegmentDescriptorImpl> data,
+        std::shared_ptr<FieldCollection> fields,
+        StreamId stream_id,
+        size_t size) :
+            header_(std::move(header)),
+            buffer_(std::move(buffer)),
+            desc_(std::move(data), std::move(fields), std::move(stream_id)),
+            size_(size) {
     }
 
     Segment(Segment &&that) noexcept {
         using std::swap;
         swap(header_, that.header_);
-        swap(arena_, that.arena_);
-        swap(fields_, that.fields_);
+        swap(desc_, that.desc_);
         swap(keepalive_, that.keepalive_);
-        move_buffer(std::move(that));
+        swap(size_, that.size_);
+        buffer_.move_buffer(std::move(that.buffer_));
     }
 
     Segment &operator=(Segment &&that) noexcept {
         using std::swap;
         swap(header_, that.header_);
-        swap(arena_, that.arena_);
-        swap(fields_, that.fields_);
+        swap(desc_, that.desc_);
         swap(keepalive_, that.keepalive_);
-        move_buffer(std::move(that));
+        swap(size_, that.size_);
+        buffer_.move_buffer(std::move(that.buffer_));
         return *this;
     }
 
     ~Segment() = default;
 
-    static Segment from_buffer(std::shared_ptr<Buffer>&& buf);
+    static Segment from_buffer(const std::shared_ptr<Buffer>& buf);
 
     void set_buffer(VariantBuffer&& buffer) {
         buffer_ = std::move(buffer);
@@ -157,122 +139,132 @@ class Segment {
 
     static Segment from_bytes(const std::uint8_t *src, std::size_t readable_size, bool copy_data = false);
 
-    void write_to(std::uint8_t *dst, std::size_t hdr_sz);
+    void write_to(std::uint8_t *dst);
 
-    std::pair<uint8_t*, size_t> try_internal_write(std::shared_ptr<Buffer>& tmp, size_t hdr_size);
+    std::tuple<uint8_t*, size_t, std::unique_ptr<Buffer>> serialize_header();
 
-    void write_header(uint8_t* dst, size_t hdr_size) const;
+    size_t write_proto_header(uint8_t* dst);
 
-    [[nodiscard]] std::size_t total_segment_size() const {
-        return total_segment_size(segment_header_bytes_size());
+    [[nodiscard]] std::size_t size() const {
+        util::check(size_.has_value(), "Segment size has not been set");
+        return *size_;
     }
 
-    [[nodiscard]] std::size_t total_segment_size(std::size_t hdr_size) const {
-        auto total = FIXED_HEADER_SIZE + hdr_size + buffer_bytes();
-        ARCTICDB_TRACE(log::storage(), "Total segment size {} + {} + {} = {}", FIXED_HEADER_SIZE, hdr_size, buffer_bytes(), total);
-        return total;
-    }
+    [[nodiscard]] std::size_t calculate_size() {
+        if(!size_.has_value())
+            size_ = FIXED_HEADER_SIZE + segment_header_bytes_size() + buffer_bytes();
 
-    [[nodiscard]] std::size_t segment_header_bytes_size() const {
-        return header_->ByteSizeLong();
+        return *size_;
     }
 
-    [[nodiscard]] std::size_t buffer_bytes() const {
-        std::size_t s = 0;
-         util::variant_match(buffer_,
-           [] (const std::monostate&) { /* Uninitialized buffer */},
-           [&s](const BufferView& b) { s = b.bytes(); },
-           [&s](const std::shared_ptr<Buffer>& b) { s = b->bytes(); });
+    const arcticdb::proto::encoding::SegmentHeader& generate_header_proto();
 
-        return s;
-    }
+    [[nodiscard]] size_t proto_size() {
+        util::check(static_cast<bool>(proto_), "Proto has not been generated");
 
-    arcticdb::proto::encoding::SegmentHeader &header() {
-        return *header_;
+        return proto_->ByteSizeLong();
     }
 
-    [[nodiscard]] const arcticdb::proto::encoding::SegmentHeader &header() const {
-        return *header_;
+    [[nodiscard]] std::size_t segment_header_bytes_size() {
+        if(header_.encoding_version() == EncodingVersion::V1) {
+            generate_header_proto();
+            return proto_size();
+        }
+        else
+            return header_.bytes();
     }
 
-    [[nodiscard]] BufferView buffer() const {
-        if (std::holds_alternative<std::shared_ptr<Buffer>>(buffer_)) {
-            return std::get<std::shared_ptr<Buffer>>(buffer_)->view();
-        } else {
-            return std::get<BufferView>(buffer_);
-        }
+    [[nodiscard]] std::size_t buffer_bytes() const {
+        return buffer_.bytes();
     }
 
-    [[nodiscard]] bool is_uninitialized() const {
-        return std::holds_alternative<std::monostate>(buffer_);
+    SegmentHeader &header() {
+        return header_;
     }
 
-    [[nodiscard]] bool is_empty() const {
-        return is_uninitialized() || (buffer().bytes() == 0 && header_->ByteSizeLong() == 0);
+    [[nodiscard]] const SegmentHeader &header() const {
+        return header_;
     }
 
-    [[nodiscard]] bool is_owning_buffer() const {
-        return std::holds_alternative<std::shared_ptr<Buffer>>(buffer_);
+    [[nodiscard]] BufferView buffer() const {
+        return buffer_.view();
     }
 
-    [[nodiscard]] std::shared_ptr<FieldCollection> fields_ptr() const {
-        return fields_;
+    [[nodiscard]] bool is_empty() const {
+        return buffer_.is_uninitialized() || (buffer().bytes() == 0 && header_.empty());
     }
 
-    [[nodiscard]] size_t fields_size() const {
-        return fields_->size();
+    [[nodiscard]] std::shared_ptr<FieldCollection> fields_ptr() const;
+
+    [[nodiscard]] size_t fields_size() const;
+
+    [[nodiscard]] const Field& fields(size_t pos) const;
+
+    void force_own_buffer() {
+        buffer_.force_own_buffer();
+        keepalive_.reset();
     }
 
     // For external language tools, not efficient
     [[nodiscard]] std::vector<std::string_view> fields_vector() const {
         std::vector<std::string_view> fields;
-        for(const auto& field : *fields_)
+        for(const auto& field : desc_.fields())
             fields.push_back(field.name());
 
         return fields;
     }
 
-    void force_own_buffer() {
-        if (!is_owning_buffer()) {
-            auto b = std::make_shared<Buffer>();
-            std::get<BufferView>(buffer_).copy_to(*b);
-            buffer_ = std::move(b);
-        }
-        keepalive_.reset();
-    }
-
     void set_keepalive(std::any&& keepalive) {
         keepalive_ = std::move(keepalive);
     }
 
-    const std::any& keepalive() const  {
+    [[nodiscard]] const std::any& keepalive() const  {
         return keepalive_;
     }
 
+    [[nodiscard]] const StreamDescriptor& descriptor() const {
+        return desc_;
+    }
+
+    Segment clone() const {
+        return Segment{header_.clone(), buffer_.clone(), desc_.clone(), size_};
+    }
+
+    static Segment initialize(SegmentHeader&& header, std::shared_ptr<Buffer>&& buffer,  std::shared_ptr<SegmentDescriptorImpl> data, std::shared_ptr<FieldCollection> fields, StreamId stream_id) {
+        return {std::move(header), std::move(buffer), std::move(data), std::move(fields), std::move(stream_id)};
+    }
+
   private:
-    void move_buffer(Segment &&that) {
-        if(is_uninitialized() || that.is_uninitialized()) {
-            std::swap(buffer_, that.buffer_);
-        } else if (!(is_owning_buffer() ^ that.is_owning_buffer())) {
-            if (is_owning_buffer()) {
-                swap(*std::get<std::shared_ptr<Buffer>>(buffer_), *std::get<std::shared_ptr<Buffer>>(that.buffer_));
-            } else {
-                swap(std::get<BufferView>(buffer_), std::get<BufferView>(that.buffer_));
-            }
-        } else if (is_owning_buffer()) {
-            log::storage().info("Copying segment");
-            // data of segment being moved is not owned, moving it is dangerous, copying instead
-            that.buffer().copy_to(*std::get<std::shared_ptr<Buffer>>(buffer_));
-        } else {
-            // data of this segment is a view, but the move data is moved
-            buffer_ = std::move(std::get<std::shared_ptr<Buffer>>(that.buffer_));
-        }
+    Segment(
+        SegmentHeader&& header,
+        std::shared_ptr<Buffer> buffer,
+        std::shared_ptr<SegmentDescriptorImpl> data,
+        std::shared_ptr<FieldCollection> fields,
+        StreamId stream_id) :
+        header_(std::move(header)),
+        buffer_(std::move(buffer)),
+        desc_(std::move(data), std::move(fields), std::move(stream_id)) {
+    }
+
+    Segment(SegmentHeader&& header, VariantBuffer&& buffer, StreamDescriptor&& desc, const std::optional<size_t> size) :
+        header_(std::move(header)),
+        buffer_(std::move(buffer)),
+        desc_(std::move(desc)),
+        size_(size) {
     }
-    std::unique_ptr<google::protobuf::Arena> arena_ = std::make_unique<google::protobuf::Arena>();
-    arcticdb::proto::encoding::SegmentHeader* header_ = nullptr;
+
+    std::tuple<uint8_t*, size_t,  std::unique_ptr<Buffer>> serialize_v1_header_to_buffer(size_t total_hdr_size);
+    std::pair<uint8_t*, size_t> serialize_v1_header_in_place(size_t total_header_size);
+    std::tuple<uint8_t*, size_t, std::unique_ptr<Buffer>> serialize_header_v1();
+    std::pair<uint8_t*, size_t> serialize_header_v2(size_t expected_bytes);
+    size_t write_binary_header(uint8_t* dst) const;
+
+    SegmentHeader header_;
     VariantBuffer buffer_;
-    std::shared_ptr<FieldCollection> fields_;
+    StreamDescriptor desc_;
     std::any keepalive_;
+    std::unique_ptr<arcticdb::proto::encoding::SegmentHeader> proto_;
+    std::optional<size_t> size_;
 };
 
 } //namespace arcticdb
diff --git a/cpp/arcticdb/codec/segment_header.cpp b/cpp/arcticdb/codec/segment_header.cpp
new file mode 100644
index 0000000000..aff1a03100
--- /dev/null
+++ b/cpp/arcticdb/codec/segment_header.cpp
@@ -0,0 +1,108 @@
+/* Copyright 2023 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
+ */
+#include <arcticdb/codec/segment_header.hpp>
+#include <arcticdb/column_store/memory_segment.hpp>
+
+namespace arcticdb {
+
+size_t field_collection_encoded_field_bytes(const FieldCollection& fields) {
+    return calc_field_bytes(fields.num_blocks() == 0 ? 0 : fields.num_blocks() + 1); //Non-empty field collection always has shapes buffer
+}
+
+size_t SegmentHeader::serialize_to_bytes(uint8_t* dst, std::optional<size_t> expected_bytes) const {
+    const auto* begin = dst;
+    data_.field_buffer_.fields_bytes_ = static_cast<uint32_t>(header_fields_.data_bytes());
+    data_.field_buffer_.offset_bytes_ = static_cast<uint32_t>(header_fields_.offset_bytes());
+    memcpy(dst, &data_, sizeof(HeaderData));
+    dst += sizeof(HeaderData);
+    ARCTICDB_TRACE(log::codec(), "Wrote header data in {} bytes", dst - begin);
+    header_fields_.write_data_to(dst);
+    memcpy(dst, header_fields_.offsets_buffer(), header_fields_.offset_bytes());
+    dst += header_fields_.offset_bytes();
+    memcpy(dst, &offset_, sizeof(offset_));
+    ARCTICDB_TRACE(log::codec(), "Wrote header fields in {} bytes", dst - begin);
+    dst += sizeof(offset_);
+    ARCTICDB_TRACE(log::codec(), "Wrote offsets in {} bytes", dst - begin);
+    size_t bytes_written = dst - begin;
+    util::check(!expected_bytes || bytes_written == *expected_bytes, "Mismatch between actual and expected bytes: {} != {}", dst - begin, *expected_bytes);
+    ARCTICDB_TRACE(log::codec(), "Wrote V2 header with {} bytes ({} expected)", bytes_written, expected_bytes.value_or(0));
+    return bytes_written;
+}
+
+size_t calc_required_header_fields_bytes(const SegmentInMemory& in_mem_seg) {
+    size_t required = 0UL;
+    if(in_mem_seg.has_index_descriptor()) {
+        const auto index_descriptor_size = field_collection_encoded_field_bytes(in_mem_seg.index_descriptor().fields()) + sizeof(uint64_t);
+        required += index_descriptor_size;
+        ARCTICDB_TRACE(log::codec(), "Index descriptor size {}", index_descriptor_size);
+    }
+
+    if(in_mem_seg.has_string_pool()) {
+        const auto string_pool_size =  calc_field_bytes(in_mem_seg.const_string_pool().num_blocks() + 1) + sizeof(uint64_t); //String pool has a custom shapes buffer
+        required += string_pool_size;
+        ARCTICDB_TRACE(log::codec(), "String pool size {}", string_pool_size);
+    }
+
+    if(!in_mem_seg.descriptor().empty()) {
+        const auto descriptor_size =  field_collection_encoded_field_bytes(in_mem_seg.descriptor().fields()) + sizeof(uint64_t);
+        required += descriptor_size;
+        ARCTICDB_TRACE(log::codec(), "Descriptor size {}", descriptor_size);
+    }
+
+    // Metadata and column fields are allocated in one contiguous buffer with dimension 1
+    if(in_mem_seg.metadata()) {
+        const auto metadata_size =  calc_field_bytes(2) + sizeof(uint64_t);
+        required += metadata_size;
+        ARCTICDB_TRACE(log::codec(), "Metadata size {}", metadata_size);
+    }
+
+    if(in_mem_seg.row_count() > 0) {
+        const auto column_fields_size =  calc_field_bytes(1) + sizeof(uint64_t);
+        ARCTICDB_TRACE(log::codec(), "Column fields size {}", column_fields_size);
+        required += column_fields_size;
+    }
+
+    ARCTICDB_TRACE(log::codec(), "Required header bytes: {}", required);
+    return required;
+}
+
+void SegmentHeader::deserialize_from_bytes(const uint8_t* data, bool copy_data) {
+    memcpy(&data_, data, sizeof(HeaderData));
+    data += sizeof(HeaderData);
+    ChunkedBuffer fields_buffer;
+    const auto fields_bytes = data_.field_buffer_.fields_bytes_;
+
+    if(copy_data) {
+        fields_buffer.ensure(fields_bytes);
+        memcpy(fields_buffer.data(), data, fields_bytes);
+    } else {
+        fields_buffer.add_external_block(data, fields_bytes, 0UL);
+    }
+
+    data += data_.field_buffer_.fields_bytes_;
+    Buffer offsets_buffer{data_.field_buffer_.offset_bytes_};
+    memcpy(offsets_buffer.data(), data, data_.field_buffer_.offset_bytes_);
+    data += offsets_buffer.bytes();
+    header_fields_ = EncodedFieldCollection{std::move(fields_buffer), std::move(offsets_buffer)};
+    auto* offsets = reinterpret_cast<const uint32_t*>(data);
+    for(auto i = 0UL; i < offset_.size(); ++i)
+        offset_[i] = *offsets++;
+}
+
+size_t SegmentHeader::required_bytes(const SegmentInMemory& in_mem_seg) {
+    size_t required = 0UL;
+    required += FIXED_HEADER_SIZE;
+    required += sizeof(HeaderData);
+    required += sizeof(offset_);
+    ARCTICDB_TRACE(log::codec(), "Overhead size {} + {} + {} = {}", FIXED_HEADER_SIZE, sizeof(HeaderData), sizeof(offset_), required);
+
+    required += calc_required_header_fields_bytes(in_mem_seg);
+    ARCTICDB_TRACE(log::codec(), "Total calculated header size: {}", required);
+    return required;
+}
+
+} //namespace arcticdb
diff --git a/cpp/arcticdb/codec/segment_header.hpp b/cpp/arcticdb/codec/segment_header.hpp
new file mode 100644
index 0000000000..09a8d9d92f
--- /dev/null
+++ b/cpp/arcticdb/codec/segment_header.hpp
@@ -0,0 +1,286 @@
+/* Copyright 2023 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
+ */
+#pragma once
+
+#include <arcticdb/codec/encoded_field.hpp>
+#include <arcticdb/codec/encoded_field_collection.hpp>
+#include <arcticdb/util/cursored_buffer.hpp>
+
+namespace arcticdb {
+
+class SegmentInMemory;
+
+static constexpr std::array<std::string_view, 5> offset_names_ = {
+    "METADATA",
+    "STRING_POOL",
+    "DESCRIPTOR",
+    "INDEX",
+    "COLUMN"
+};
+
+inline void write_fixed_header(std::uint8_t *dst, const FixedHeader& hdr) {
+    ARCTICDB_DEBUG(log::codec(), "Writing header with size {}", hdr.header_bytes);
+    auto h = reinterpret_cast<FixedHeader*>(dst);
+    *h = hdr;
+}
+
+class SegmentHeader {
+    HeaderData data_;
+    EncodedFieldCollection header_fields_;
+    EncodedFieldCollection body_fields_;
+    std::array<uint32_t, 5> offset_ = {};
+    size_t field_count_ = 0U;
+
+public:
+    explicit SegmentHeader(EncodingVersion encoding_version) {
+        data_.encoding_version_ = encoding_version;
+    }
+
+    ARCTICDB_MOVE_ONLY_DEFAULT(SegmentHeader)
+
+    SegmentHeader() = default;
+
+    SegmentHeader clone() const {
+        SegmentHeader output(data_.encoding_version_);
+        output.data_ = data_;
+        output.header_fields_ = header_fields_.clone();
+        output.body_fields_ = body_fields_.clone();
+        output.offset_ = offset_;
+        return output;
+    }
+
+    [[nodiscard]] bool empty() const {
+        return header_fields_.empty();
+    }
+
+    static constexpr uint8_t flag_mask(HeaderFlag flag) {
+        return 1 << static_cast<uint8_t>(flag);
+    }
+
+    void set_offset(FieldOffset field, uint32_t offset) {
+        util::check(offset < 5, "Out of bounds offset {}", offset);
+        offset_[as_pos(field)] = offset;
+    }
+    
+    template<HeaderFlag flag>
+    void set_flag(bool value) {
+       constexpr auto mask = flag_mask(flag);
+       if(value)
+           data_.flags_ |= mask;
+       else
+           data_.flags_ &= ~mask;
+    }
+
+    template<HeaderFlag flag>
+    [[nodiscard]] bool get_flag() const {
+        return data_.flags_ & flag_mask(flag);
+    }
+
+    [[nodiscard]] bool compacted() const {
+        return get_flag<HeaderFlag::COMPACTED>();
+    }
+
+    void set_compacted(bool value) {
+         set_flag<HeaderFlag::COMPACTED>(value);
+    }
+
+    [[nodiscard]] size_t bytes() const {
+        const auto total_bytes = sizeof(HeaderData) + header_fields_.data_bytes() + header_fields_.offset_bytes() + sizeof(offset_);
+        ARCTICDB_DEBUG(log::codec(), "Header bytes HeaderData {} + offset {} + Header field bytes {} + header field offset {}  = {}", sizeof(HeaderData), sizeof(offset_), header_fields_.data_bytes(), header_fields_.offset_bytes(), total_bytes);
+        return total_bytes;
+    }
+
+    [[nodiscard]] static constexpr size_t as_pos(FieldOffset field_offset) {
+        return static_cast<size_t>(field_offset);
+    }
+
+    int32_t get_pos(FieldOffset field_offset) const {
+        return header_fields_.get_offset(get_offset(field_offset));
+    }
+
+    [[nodiscard]] int32_t get_offset(FieldOffset field_offset) const {
+        return offset_[as_pos(field_offset)];
+    }
+
+    [[nodiscard]] static constexpr std::string_view offset_name(FieldOffset field_offset) {
+        return offset_names_[as_pos(field_offset)];
+    }
+
+    [[nodiscard]] bool has_metadata_field() const {
+        return has_field(FieldOffset::METADATA);
+    }
+
+    [[nodiscard]] bool has_string_pool_field() const {
+        return has_field(FieldOffset::STRING_POOL);
+    }
+
+    [[nodiscard]] bool has_descriptor_field() const {
+        return has_field(FieldOffset::DESCRIPTOR);
+    }
+
+    [[nodiscard]] bool has_index_descriptor_field() const {
+        return has_field(FieldOffset::INDEX);
+    }
+
+    [[nodiscard]] bool has_column_fields() const {
+        return has_field(FieldOffset::COLUMN);
+    }
+
+    template <FieldOffset field_offset>
+    [[nodiscard]] const EncodedFieldImpl& get_field() const {
+        util::check(has_field(field_offset), "Field {} has not been set", offset_name(field_offset));
+        return header_fields_.at(get_offset(field_offset));
+    }
+
+    [[nodiscard]] const EncodedFieldImpl& metadata_field() const {
+        return get_field<FieldOffset::METADATA>();
+    }
+
+    [[nodiscard]] const EncodedFieldImpl& string_pool_field() const {
+        return get_field<FieldOffset::STRING_POOL>();
+    }
+    [[nodiscard]] const EncodedFieldImpl& descriptor_field() const {
+        return get_field<FieldOffset::DESCRIPTOR>();
+    }
+
+    [[nodiscard]] const EncodedFieldImpl& index_descriptor_field() const {
+        return get_field<FieldOffset::INDEX>();
+    }
+
+    [[nodiscard]] const EncodedFieldImpl& column_fields() const {
+        return get_field<FieldOffset::COLUMN>();
+    }
+
+    void validate() const {
+        for(auto i = 0U; i < static_cast<size_t>(FieldOffset::COUNT); ++i) {
+            auto offset = FieldOffset(i);
+            if(has_field(offset))
+                header_fields_.at(get_offset(offset)).validate();
+        }
+    }
+
+    template <FieldOffset field_offset>
+    EncodedFieldImpl& create_field(size_t num_blocks) {
+        ARCTICDB_TRACE(log::codec(), "Header adding field {} with {} blocks ({} bytes)", offset_names_[as_pos(field_offset)], num_blocks, calc_field_bytes(num_blocks));
+        auto new_field = header_fields_.add_field(num_blocks);
+        set_offset(field_offset, field_count_++);
+        set_field(field_offset);
+
+        ARCTICDB_TRACE(log::codec(), "Header size {} ({} offsets)", header_fields_.data_bytes(), header_fields_.offset_bytes());
+        return *new_field;
+    }
+
+    template <FieldOffset field_offset>
+    [[nodiscard]] EncodedFieldImpl& get_mutable_field(size_t num_blocks) {
+        if(has_field(field_offset)) {
+            return header_fields_.at(get_offset(field_offset));
+        } else {
+            return create_field<field_offset>(num_blocks);
+        }
+    }
+
+    [[nodiscard]] EncodedFieldImpl& mutable_metadata_field(size_t num_blocks) {
+        return get_mutable_field<FieldOffset::METADATA>(num_blocks);
+    }
+
+    [[nodiscard]] EncodedFieldImpl& mutable_string_pool_field(size_t num_blocks) {
+        return get_mutable_field<FieldOffset::STRING_POOL>(num_blocks);
+    }
+
+    [[nodiscard]] EncodedFieldImpl& mutable_descriptor_field(size_t num_blocks) {
+        return get_mutable_field<FieldOffset::DESCRIPTOR>(num_blocks);
+    }
+
+    [[nodiscard]] EncodedFieldImpl& mutable_index_descriptor_field(size_t num_blocks) {
+        return get_mutable_field<FieldOffset::INDEX>(num_blocks);
+    }
+
+    [[nodiscard]] EncodedFieldImpl& mutable_column_fields(size_t num_blocks) {
+        return get_mutable_field<FieldOffset::COLUMN>(num_blocks);
+    }
+
+    size_t required_bytes(const SegmentInMemory& in_mem_seg);
+
+    [[nodiscard]] EncodingVersion encoding_version() const {
+        return data_.encoding_version_;
+    }
+
+    void set_encoding_version(EncodingVersion encoding_version) {
+        data_.encoding_version_ = encoding_version;
+    }
+
+    void set_footer_offset(uint64_t offset) {
+        ARCTICDB_TRACE(log::codec(), "Setting footer offset at {}", offset);
+        data_.footer_offset_ = offset;
+    }
+
+    [[nodiscard]] uint64_t footer_offset() const {
+        return data_.footer_offset_;
+    }
+
+    size_t serialize_to_bytes(uint8_t* dst, std::optional<size_t> expected_bytes = std::nullopt) const;
+
+    static constexpr uint16_t field_mask(FieldOffset field_offset) {
+       return 1U << static_cast<uint16_t>(field_offset);
+    }
+
+    void set_field(FieldOffset field_offset) {
+        data_.fields_ |= field_mask(field_offset);
+    }
+
+    [[nodiscard]] bool has_field(FieldOffset field_offset) const {
+        return data_.fields_ & field_mask(field_offset);
+    }
+
+    void deserialize_from_bytes(const uint8_t* data, bool copy_data);
+
+    [[nodiscard]] const EncodedFieldCollection& body_fields() const {
+        return body_fields_;
+    }
+
+    [[nodiscard]] const EncodedFieldCollection& header_fields() const {
+        return header_fields_;
+    }
+
+    void set_body_fields(EncodedFieldCollection&& body_fields) {
+        body_fields_ = std::move(body_fields);
+        body_fields_.regenerate_offsets();
+    }
+};
+
+} //namespace arcticdb
+
+namespace fmt {
+template<>
+struct formatter<arcticdb::SegmentHeader> {
+    template<typename ParseContext>
+    constexpr auto parse(ParseContext &ctx) { return ctx.begin(); }
+
+    template<typename FormatContext>
+    auto format(const arcticdb::SegmentHeader &header, FormatContext &ctx) const {
+        fmt::format_to(ctx.out(), fmt::runtime("Segment header: encoding {}: {} bytes \n"), header.encoding_version(), header.bytes());
+        using namespace arcticdb;
+
+        if(header.has_metadata_field())
+            fmt::format_to(ctx.out(), "{}: Metadata: {}\n", header.get_pos(FieldOffset::METADATA), header.metadata_field());
+
+        if(header.has_descriptor_field())
+            fmt::format_to(ctx.out(), "{}: Descriptor: {}\n", header.get_pos(FieldOffset::DESCRIPTOR), header.descriptor_field());
+
+        if(header.has_index_descriptor_field())
+            fmt::format_to(ctx.out(), "{}: Index: {}\n", header.get_pos(FieldOffset::INDEX), header.index_descriptor_field());
+
+        if(header.has_string_pool_field())
+            fmt::format_to(ctx.out(), "{}: String pool: {}\n", header.get_pos(FieldOffset::STRING_POOL), header.string_pool_field());
+
+        if(header.has_column_fields())
+            fmt::format_to(ctx.out(), "{}: Columns: {}\n", header.get_pos(FieldOffset::COLUMN), header.column_fields());
+
+        return fmt::format_to(ctx.out(), "{} bytes \n", header.header_fields().data_bytes());
+    }
+};
+}
\ No newline at end of file
diff --git a/cpp/arcticdb/codec/segment_identifier.hpp b/cpp/arcticdb/codec/segment_identifier.hpp
new file mode 100644
index 0000000000..bcacffee84
--- /dev/null
+++ b/cpp/arcticdb/codec/segment_identifier.hpp
@@ -0,0 +1,73 @@
+/* Copyright 2023 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
+ */
+#pragma once
+
+#include "arcticdb/storage/memory_layout.hpp"
+#include <arcticdb/entity/types.hpp>
+
+namespace arcticdb {
+
+static constexpr size_t SegmentIdentifierSize = sizeof(SegmentIdentifierHeader);
+
+struct SegmentIdentifier {
+    SegmentIdentifierHeader header_;
+    std::array<char, 2> data_;
+};
+
+[[nodiscard]] inline size_t identifier_bytes(const StreamId& stream_id) {
+    return util::variant_match(stream_id,
+       [] (const NumericId&) { return SegmentIdentifierSize; },
+       [] (const StringId& str_id) { return SegmentIdentifierSize + str_id.size(); });
+}
+
+inline void write_identifier(Buffer& buffer, std::ptrdiff_t& pos, const StreamId& stream_id) {
+    auto data = new (buffer.data() + pos) SegmentDescriptorImpl{};
+    util::variant_match(stream_id,
+       [data, &pos] (const NumericId& num_id) {
+           SegmentIdentifierHeader header{IdentifierType::NUMERIC, static_cast<uint32_t>(num_id)};
+           *reinterpret_cast<SegmentIdentifierHeader*>(data) = header;
+           pos += SegmentIdentifierSize;
+           },
+       [data, &pos] (const StringId& str_id) {
+           auto* identifier_impl = reinterpret_cast<SegmentIdentifier*>(data);
+           identifier_impl->header_.type_ = IdentifierType::STRING;
+           identifier_impl->header_.size_ = static_cast<uint32_t>(str_id.size());
+           memcpy(&identifier_impl->data_[0], str_id.data(), str_id.size());
+           pos += SegmentIdentifierSize + str_id.size();
+    });
+}
+
+inline StreamId read_identifier(const uint8_t*& data) {
+    auto* identifier = reinterpret_cast<const SegmentIdentifier*>(data);
+
+    switch(identifier->header_.type_) {
+    case IdentifierType::STRING:
+        data += SegmentIdentifierSize + identifier->header_.size_;
+        return StringId(&identifier->data_[0], identifier->header_.size_);
+    case IdentifierType::NUMERIC:
+        data += SegmentIdentifierSize;
+        return NumericId(identifier->header_.size_);
+    default:
+        util::raise_rte("Unknown identifier type in read_identifier");
+    }
+}
+
+inline void skip_identifier(const uint8_t*& data) {
+    auto* identifier = reinterpret_cast<const SegmentIdentifier*>(data);
+    switch(identifier->header_.type_) {
+    case IdentifierType::STRING:
+        data += SegmentIdentifierSize + identifier->header_.size_;
+        break;
+    case IdentifierType::NUMERIC:
+        data += SegmentIdentifierSize;
+        break;
+    default:
+        util::raise_rte("Unknown identifier type in skip_identifier");
+    }
+}
+
+} // namespace arcticdb
\ No newline at end of file
diff --git a/cpp/arcticdb/codec/test/test_codec.cpp b/cpp/arcticdb/codec/test/test_codec.cpp
index 5cd20bddd2..51b39a25be 100644
--- a/cpp/arcticdb/codec/test/test_codec.cpp
+++ b/cpp/arcticdb/codec/test/test_codec.cpp
@@ -26,7 +26,7 @@ namespace arcticdb {
         static void encode(
             const arcticdb::proto::encoding::VariantCodec &codec_opts,
             ColumnData& column_data,
-            std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
+            EncodedFieldImpl& variant_field,
             Buffer& out,
             std::ptrdiff_t& pos);
     };
@@ -36,7 +36,7 @@ namespace arcticdb {
         static void encode(
             const arcticdb::proto::encoding::VariantCodec &codec_opts,
             ColumnData& column_data,
-            std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
+            EncodedFieldImpl& variant_field,
             Buffer& out,
             std::ptrdiff_t& pos);
         static std::pair<size_t, size_t> max_compressed_size(
@@ -45,14 +45,14 @@ namespace arcticdb {
     private:
         static void encode_shapes(
             const ColumnData& column_data,
-            std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
+            EncodedFieldImpl& variant_field,
             Buffer& out,
             std::ptrdiff_t& pos_in_buffer);
 
         static void encode_blocks(
             const arcticdb::proto::encoding::VariantCodec &codec_opts,
             ColumnData& column_data,
-            std::variant<EncodedField*, arcticdb::proto::encoding::EncodedField*> variant_field,
+            EncodedFieldImpl& variant_field,
             Buffer& out,
             std::ptrdiff_t& pos);
     };
@@ -62,7 +62,7 @@ namespace arcticdb {
 
 using namespace arcticdb;
 
-using EncoginVersions = ::testing::Types<
+using EncodingVersions = ::testing::Types<
     std::integral_constant<EncodingVersion, EncodingVersion::V1>,
     std::integral_constant<EncodingVersion, EncodingVersion::V2>>;
 
@@ -80,7 +80,7 @@ class FieldEncoderTestDim0Base : public testing::Test {
 template<typename EncodedFieldType>
 class FieldEncoderTestDim0 : public FieldEncoderTestDim0Base{};
 
-using EncodedFieldsType = ::testing::Types<arcticdb::proto::encoding::EncodedField, EncodedField>;
+using EncodedFieldsType = ::testing::Types<EncodedFieldImpl>;
 TYPED_TEST_SUITE(FieldEncoderTestDim0, EncodedFieldsType);
 
 TYPED_TEST(FieldEncoderTestDim0, Passthrough_v1) {
@@ -133,10 +133,8 @@ class FieldEncoderTestFromColumnDim0 : public FieldEncoderTestDim0Base{};
 /// @brief Cartesian product between the type of the encoded field and the encoding version.
 /// (EncodedField, arcticdb::proto::encoding::EncodedField) x (EncodingVersion::V1, EncodingVersion::V2)
 using FieldVersionT = ::testing::Types<
-    std::pair<arcticdb::proto::encoding::EncodedField, ColumnEncoderV1>,
-	std::pair<arcticdb::proto::encoding::EncodedField, ColumnEncoderV2>,
-	std::pair<EncodedField, ColumnEncoderV1>,
-	std::pair<EncodedField, ColumnEncoderV2>>;
+    std::pair<EncodedFieldImpl, ColumnEncoderV1>,
+	std::pair<EncodedFieldImpl, ColumnEncoderV2>>;
 TYPED_TEST_SUITE(FieldEncoderTestFromColumnDim0, FieldVersionT);
 
 TYPED_TEST(FieldEncoderTestFromColumnDim0, Passthrough) {
@@ -156,7 +154,7 @@ TYPED_TEST(FieldEncoderTestFromColumnDim0, Passthrough) {
         column_data);
     Buffer out(max_compressed_size);
     column_data.reset();
-    ColumnEncoder::encode(TestFixture::passthorugh_encoding_options, column_data, &field, out, pos);
+    ColumnEncoder::encode(TestFixture::passthorugh_encoding_options, column_data, field, out, pos);
     auto& nd = field.ndarray();
     ASSERT_EQ(nd.items_count(), TestFixture::values.size());
     ASSERT_EQ(nd.shapes_size(), 0);
@@ -180,34 +178,6 @@ class FieldEncoderTestDim1 : public testing::Test {
     arcticdb::proto::encoding::VariantCodec passthorugh_encoding_options;
 };
 
-TEST_F(FieldEncoderTestDim1, PassthroughV1ProtoField) {
-    using Encoder = TypedBlockEncoderImpl<TypedBlockData, ValuesTypeDescriptorTag, EncodingVersion::V1>;
-    const TypedBlockData<ValuesTypeDescriptorTag> block(
-        values.data(),
-        shapes.data(),
-        values_byte_size,
-        shapes.size(),
-        nullptr);
-    arcticdb::proto::encoding::EncodedField field;
-    Buffer out(Encoder::max_compressed_size(passthorugh_encoding_options, block));
-    std::ptrdiff_t pos = 0;
-    Encoder::encode(passthorugh_encoding_options, block, field, out, pos);
-
-    const auto& nd = field.ndarray();
-    ASSERT_EQ(nd.items_count(), shapes.size());
-
-    const auto& shapes = nd.shapes();
-    ASSERT_EQ(shapes[0].in_bytes(), shapes_byte_size);
-    ASSERT_EQ(shapes[0].out_bytes(), shapes_byte_size);
-    ASSERT_NE(0, shapes[0].hash());
-
-    const auto& vals = nd.values();
-    ASSERT_EQ(vals[0].in_bytes(), values_expected_bytes);
-    ASSERT_EQ(vals[0].out_bytes(), values_expected_bytes);
-    ASSERT_NE(0, vals[0].hash());
-    ASSERT_EQ(pos, values_expected_bytes + shapes_byte_size);
-}
-
 TEST_F(FieldEncoderTestDim1, PassthroughV1NativeField) {
     using Encoder = TypedBlockEncoderImpl<TypedBlockData, ValuesTypeDescriptorTag, EncodingVersion::V1>;
     const TypedBlockData<ValuesTypeDescriptorTag> block(
@@ -216,10 +186,11 @@ TEST_F(FieldEncoderTestDim1, PassthroughV1NativeField) {
         values_byte_size,
         shapes.size(),
         nullptr);
+
     // one block for shapes and one for values
-    constexpr size_t encoded_field_size = EncodedField::Size + 2 * sizeof(EncodedBlock);
+    constexpr size_t encoded_field_size = EncodedFieldImpl::Size + 2 * sizeof(EncodedBlock);
     std::array<uint8_t, encoded_field_size> encoded_field_memory;
-    EncodedField* field = new(encoded_field_memory.data()) EncodedField;
+    EncodedFieldImpl* field = new(encoded_field_memory.data()) EncodedFieldImpl;
 
     Buffer out(Encoder::max_compressed_size(passthorugh_encoding_options, block));
     std::ptrdiff_t pos = 0;
@@ -240,47 +211,6 @@ TEST_F(FieldEncoderTestDim1, PassthroughV1NativeField) {
     ASSERT_EQ(pos, values_expected_bytes + shapes_byte_size);
 }
 
-TEST_F(FieldEncoderTestDim1, PassthroughV2ProtoField) {
-    using Encoder = TypedBlockEncoderImpl<TypedBlockData, ValuesTypeDescriptorTag, EncodingVersion::V2>;
-    using ShapesEncoder = TypedBlockEncoderImpl<TypedBlockData, arcticdb::ShapesBlockTDT, EncodingVersion::V2>;
-    const TypedBlockData<ValuesTypeDescriptorTag> values_block(
-        values.data(),
-        shapes.data(),
-        values_byte_size,
-        shapes.size(),
-        nullptr);
-    const TypedBlockData<arcticdb::ShapesBlockTDT> shapes_block(
-        shapes.data(),
-        nullptr,
-        shapes_byte_size,
-        0,
-        nullptr);
-    const size_t values_max_compressed_size = Encoder::max_compressed_size(passthorugh_encoding_options,
-        values_block);
-    const size_t shapes_max_compressed_size = ShapesEncoder::max_compressed_size(passthorugh_encoding_options,
-        shapes_block);
-    const size_t total_max_compressed_size = values_max_compressed_size + shapes_max_compressed_size;
-    arcticdb::proto::encoding::EncodedField field;
-    Buffer out(total_max_compressed_size);
-    std::ptrdiff_t pos = 0;
-    ShapesEncoder::encode_shapes(passthorugh_encoding_options, shapes_block, field, out, pos);
-    Encoder::encode_values(passthorugh_encoding_options, values_block, field, out, pos);
-
-    const auto& nd = field.ndarray();
-    ASSERT_EQ(nd.items_count(), shapes.size());
-
-    const auto& shapes = nd.shapes();
-    ASSERT_EQ(shapes[0].in_bytes(), shapes_byte_size);
-    ASSERT_EQ(shapes[0].out_bytes(), shapes_byte_size);
-    ASSERT_NE(0, shapes[0].hash());
-
-    const auto& vals = nd.values();
-    ASSERT_EQ(vals[0].in_bytes(), values_expected_bytes);
-    ASSERT_EQ(vals[0].out_bytes(), values_expected_bytes);
-    ASSERT_NE(0, vals[0].hash());
-    ASSERT_EQ(pos, values_expected_bytes + shapes_byte_size);
-}
-
 TEST_F(FieldEncoderTestDim1, PassthroughV2NativeField) {
     using Encoder = TypedBlockEncoderImpl<TypedBlockData, ValuesTypeDescriptorTag, EncodingVersion::V2>;
     using ShapesEncoder = TypedBlockEncoderImpl<TypedBlockData, arcticdb::ShapesBlockTDT, EncodingVersion::V2>;
@@ -290,21 +220,21 @@ TEST_F(FieldEncoderTestDim1, PassthroughV2NativeField) {
         values_byte_size,
         shapes.size(),
         nullptr);
+
     const TypedBlockData<arcticdb::ShapesBlockTDT> shapes_block(
         shapes.data(),
         nullptr,
         shapes_byte_size,
         0,
         nullptr);
-    const size_t values_max_compressed_size = Encoder::max_compressed_size(passthorugh_encoding_options,
-        values_block);
-    const size_t shapes_max_compressed_size = ShapesEncoder::max_compressed_size(passthorugh_encoding_options,
-        shapes_block);
+
+    const size_t values_max_compressed_size = Encoder::max_compressed_size(passthorugh_encoding_options, values_block);
+    const size_t shapes_max_compressed_size = ShapesEncoder::max_compressed_size(passthorugh_encoding_options, shapes_block);
     const size_t total_max_compressed_size = values_max_compressed_size + shapes_max_compressed_size;
     // one block for shapes and one for values
-    constexpr size_t encoded_field_size = EncodedField::Size + 2 * sizeof(EncodedBlock);
+    constexpr size_t encoded_field_size = EncodedFieldImpl::Size + 2 * sizeof(EncodedBlock);
     std::array<uint8_t, encoded_field_size> encoded_field_memory;
-    EncodedField* field = new(encoded_field_memory.data()) EncodedField;
+    EncodedFieldImpl* field = new(encoded_field_memory.data()) EncodedFieldImpl;
     Buffer out(total_max_compressed_size);
     std::ptrdiff_t pos = 0;
     ShapesEncoder::encode_shapes(passthorugh_encoding_options, shapes_block, *field, out, pos);
@@ -341,14 +271,13 @@ class TestMultiblockData_Dim1 : public testing::Test {
             0);
         memcpy(shapes_buffer.data(), shapes_data.data(), shapes_data_byte_size);
     }
+
     using ValuesTypeDescriptorTag = TypeDescriptorTag<DataTypeTag<DataType::INT64>, DimensionTag<Dimension::Dim1>>;
     static constexpr TypeDescriptor type_descriptor = static_cast<TypeDescriptor>(ValuesTypeDescriptorTag());
     static constexpr std::array<int64_t, 8> first_block_data = {1, 2, 3, 4, 5, 6, 7, 8};
-    static constexpr size_t first_block_data_byte_size =
-        sizeof(decltype(first_block_data)::value_type) * first_block_data.size();
+    static constexpr size_t first_block_data_byte_size = sizeof(decltype(first_block_data)::value_type) * first_block_data.size();
     static constexpr std::array<int64_t, 2> second_block_data = {9, 10};
-    static constexpr size_t second_block_data_byte_size =
-        sizeof(decltype(second_block_data)::value_type) * second_block_data.size();
+    static constexpr size_t second_block_data_byte_size = sizeof(decltype(second_block_data)::value_type) * second_block_data.size();
     static constexpr std::array<shape_t, 2> shapes_data = {first_block_data.size(), second_block_data.size()};
     static constexpr size_t shapes_data_byte_size = sizeof(decltype(shapes_data)::value_type) * shapes_data.size();
     arcticdb::proto::encoding::VariantCodec passthorugh_encoding_options;
@@ -356,31 +285,17 @@ class TestMultiblockData_Dim1 : public testing::Test {
     Buffer shapes_buffer;
 };
 
-TEST_F(TestMultiblockData_Dim1, EncodingVersion_1) {
-    arcticdb::proto::encoding::EncodedField encoded_field;
-    ColumnData column_data(&data_buffer, &shapes_buffer, type_descriptor, nullptr);
-    const auto [_, max_compressed_size] = ColumnEncoderV1::max_compressed_size(passthorugh_encoding_options, column_data);
-    Buffer out(max_compressed_size);
-    ptrdiff_t out_pos = 0;
-    column_data.reset();
-    ColumnEncoderV1::encode(passthorugh_encoding_options, column_data, &encoded_field, out, out_pos);
-    const auto ndarray = encoded_field.ndarray();
-    ASSERT_EQ(ndarray.shapes_size(), 2);
-    ASSERT_EQ(ndarray.values_size(), 2);
-    ASSERT_EQ(ndarray.items_count(), shapes_data.size());
-}
-
 TEST_F(TestMultiblockData_Dim1, EncodingVersion_2) {
-    constexpr size_t encoded_field_size = EncodedField::Size + 3 * sizeof(EncodedBlock);
+    constexpr size_t encoded_field_size = EncodedFieldImpl::Size + 3 * sizeof(EncodedBlock);
     std::array<uint8_t, encoded_field_size> encoded_field_owner;
-    EncodedField* encoded_field = new(encoded_field_owner.data()) EncodedField;
+    EncodedFieldImpl* encoded_field = new(encoded_field_owner.data()) EncodedFieldImpl;
     ColumnData column_data(&data_buffer, &shapes_buffer, type_descriptor, nullptr);
     const auto [_, max_compressed_size] = ColumnEncoderV2::max_compressed_size(passthorugh_encoding_options, column_data);
     Buffer out(max_compressed_size);
     ptrdiff_t out_pos = 0;
     column_data.reset();
-    ColumnEncoderV2::encode(passthorugh_encoding_options, column_data, encoded_field, out, out_pos);
-    const auto ndarray = encoded_field->ndarray();
+    ColumnEncoderV2::encode(passthorugh_encoding_options, column_data, *encoded_field, out, out_pos);
+    const auto& ndarray = encoded_field->ndarray();
     ASSERT_EQ(ndarray.shapes_size(), 1);
     ASSERT_EQ(ndarray.values_size(), 2);
     ASSERT_EQ(ndarray.items_count(), shapes_data.size());
@@ -389,7 +304,7 @@ TEST_F(TestMultiblockData_Dim1, EncodingVersion_2) {
 template<typename EncodedFieldType>
 class SegmentStringEncodingTest : public testing::Test{};
 
-TYPED_TEST_SUITE(SegmentStringEncodingTest, EncoginVersions);
+TYPED_TEST_SUITE(SegmentStringEncodingTest, EncodingVersions);
 
 TYPED_TEST(SegmentStringEncodingTest, EncodeSingleString) {
     const auto tsd = create_tsd<DataTypeTag<DataType::ASCII_DYNAMIC64>, Dimension::Dim0>("thing", 1);
@@ -485,15 +400,114 @@ bool TransactionalThing::destroyed = false;
 
 TEST(Segment, KeepAlive) {
     {
+        auto buf = std::make_shared<Buffer>();
         Segment segment;
+        segment.set_buffer(std::move(buf));
         segment.set_keepalive(std::any(TransactionalThing{}));
 
         auto seg1 = std::move(segment);
         Segment seg2{std::move(seg1)};
-        auto seg3 = seg2;
-        Segment seg4{seg3};
+        auto seg3 = seg2.clone();
+        Segment seg4{seg3.clone()};
 
-        std::any_cast<TransactionalThing>(seg4.keepalive()).magic_.check();
+        std::any_cast<TransactionalThing>(seg2.keepalive()).magic_.check();
     }
     ASSERT_EQ(TransactionalThing::destroyed, true);
 }
+
+TEST(Segment, RoundtripTimeseriesDescriptorV1) {
+    const auto stream_desc = stream_descriptor(StreamId{"thing"}, RowCountIndex{}, {scalar_field(DataType::UINT8, "ints")});
+    SegmentInMemory in_mem_seg{stream_desc.clone()};
+    in_mem_seg.set_scalar<uint8_t>(0, 23);
+    in_mem_seg.end_row();
+    TimeseriesDescriptor tsd;
+    tsd.set_total_rows(23);
+    tsd.set_stream_descriptor(stream_desc);
+    in_mem_seg.set_timeseries_descriptor(tsd);
+    auto copy = in_mem_seg.clone();
+    auto seg = encode_v1(std::move(in_mem_seg), codec::default_lz4_codec());
+    SegmentInMemory decoded{stream_desc.clone()};
+    decode_v1(seg, seg.header(), decoded, seg.descriptor());
+    ASSERT_EQ(decoded.index_descriptor().total_rows(), 23);
+    ASSERT_EQ(decoded, copy);
+}
+
+TEST(Segment, RoundtripTimeseriesDescriptorWriteToBufferV1) {
+    const auto stream_desc = stream_descriptor(StreamId{"thing"}, RowCountIndex{}, {scalar_field(DataType::UINT8, "ints")});
+    SegmentInMemory in_mem_seg{stream_desc.clone()};
+    in_mem_seg.set_scalar<uint8_t>(0, 23);
+    in_mem_seg.end_row();
+    TimeseriesDescriptor tsd;
+    tsd.set_total_rows(23);
+    tsd.set_stream_descriptor(stream_desc);
+    in_mem_seg.set_timeseries_descriptor(tsd);
+    auto copy = in_mem_seg.clone();
+    auto seg = encode_v1(std::move(in_mem_seg), codec::default_lz4_codec());
+    std::vector<uint8_t> vec;
+    const auto bytes = seg.calculate_size();
+    vec.resize(bytes);
+    seg.write_to(vec.data());
+    auto unserialized = Segment::from_bytes(vec.data(), bytes);
+    SegmentInMemory decoded{stream_desc.clone()};
+    decode_v1(unserialized, unserialized.header(), decoded, unserialized.descriptor());
+    ASSERT_EQ(decoded.index_descriptor().total_rows(), 23);
+    ASSERT_EQ(decoded, copy);
+}
+
+TEST(Segment, RoundtripStringsWriteToBufferV1) {
+    const auto stream_desc = stream_descriptor(StreamId{"thing"}, RowCountIndex{}, {scalar_field(DataType::UTF_DYNAMIC64, "ints")});
+    SegmentInMemory in_mem_seg{stream_desc.clone()};
+    in_mem_seg.set_string(0, "kismet");
+    in_mem_seg.end_row();
+    auto copy = in_mem_seg.clone();
+    auto seg = encode_v1(std::move(in_mem_seg), codec::default_lz4_codec());
+    std::vector<uint8_t> vec;
+    const auto bytes = seg.calculate_size();
+    vec.resize(bytes);
+    seg.write_to(vec.data());
+    auto unserialized = Segment::from_bytes(vec.data(), bytes);
+    SegmentInMemory decoded{stream_desc.clone()};
+    decode_v1(unserialized, unserialized.header(), decoded, unserialized.descriptor());
+    ASSERT_EQ(decoded.string_at(0, 0).value(), "kismet");
+    ASSERT_EQ(decoded, copy);
+}
+
+TEST(Segment, RoundtripTimeseriesDescriptorV2) {
+    const auto stream_desc = stream_descriptor(StreamId{"thing"}, RowCountIndex{}, {scalar_field(DataType::UINT8, "ints")});
+    SegmentInMemory in_mem_seg{stream_desc.clone()};
+    in_mem_seg.set_scalar<uint8_t>(0, 23);
+    in_mem_seg.end_row();
+    TimeseriesDescriptor tsd;
+    tsd.set_total_rows(23);
+    tsd.set_stream_descriptor(stream_desc);
+    in_mem_seg.set_timeseries_descriptor(tsd);
+    auto copy = in_mem_seg.clone();
+    auto seg = encode_v2(std::move(in_mem_seg), codec::default_lz4_codec());
+    SegmentInMemory decoded{stream_desc.clone()};
+    decode_v2(seg, seg.header(), decoded, seg.descriptor());
+    ASSERT_EQ(decoded.index_descriptor().total_rows(), 23);
+    ASSERT_EQ(decoded, copy);
+}
+
+TEST(Segment, RoundtripTimeseriesDescriptorWriteToBufferV2) {
+    const auto stream_desc = stream_descriptor(StreamId{"thing"}, RowCountIndex{}, {scalar_field(DataType::UINT8, "ints")});
+    SegmentInMemory in_mem_seg{stream_desc.clone()};
+    in_mem_seg.set_scalar<uint8_t>(0, 23);
+    in_mem_seg.end_row();
+    TimeseriesDescriptor tsd;
+    tsd.set_total_rows(23);
+    tsd.set_stream_descriptor(stream_desc);
+    in_mem_seg.set_timeseries_descriptor(tsd);
+    auto copy = in_mem_seg.clone();
+    auto seg = encode_v2(std::move(in_mem_seg), codec::default_lz4_codec());
+    std::vector<uint8_t> vec;
+    const auto bytes = seg.calculate_size();
+    ARCTICDB_DEBUG(log::codec(), "## Resizing buffer to {} bytes", bytes);
+    vec.resize(bytes);
+    seg.write_to(vec.data());
+    auto unserialized = Segment::from_bytes(vec.data(), bytes);
+    SegmentInMemory decoded{stream_desc.clone()};
+    decode_v2(unserialized, unserialized.header(), decoded, unserialized.descriptor());
+    ASSERT_EQ(decoded.index_descriptor().total_rows(), 23);
+    ASSERT_EQ(decoded, copy);
+}
\ No newline at end of file
diff --git a/cpp/arcticdb/codec/test/test_encode_field_collection.cpp b/cpp/arcticdb/codec/test/test_encode_field_collection.cpp
new file mode 100644
index 0000000000..8ba74c5348
--- /dev/null
+++ b/cpp/arcticdb/codec/test/test_encode_field_collection.cpp
@@ -0,0 +1,39 @@
+/* Copyright 2023 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
+ */
+#include <gtest/gtest.h>
+#include <arcticdb/codec/encoded_field_collection.hpp>
+
+TEST(EncodedFieldCollection, AddField) {
+    using namespace arcticdb;
+    EncodedFieldCollection fields;
+    auto* field1 = fields.add_field(3);
+    auto* block1 = field1->mutable_ndarray()->add_shapes();
+    block1->set_in_bytes(1);
+    auto* block2 = field1->mutable_ndarray()->add_values(EncodingVersion::V2);
+    block2->set_in_bytes(2);
+    auto* block3 = field1->mutable_ndarray()->add_values(EncodingVersion::V2);
+    block3->set_in_bytes(3);
+    auto* field2 = fields.add_field(2);
+    auto* block4 = field2->mutable_ndarray()->add_values(EncodingVersion::V2);
+    block4->set_in_bytes(4);
+    auto* block5 = field2->mutable_ndarray()->add_values(EncodingVersion::V2);
+    block5->set_in_bytes(5);
+
+    const auto& read1 = fields.at(0);
+    auto b1 = read1.shapes(0);
+    ASSERT_EQ(b1.in_bytes(), 1);
+    auto b2 = read1.values(0);
+    ASSERT_EQ(b2.in_bytes(), 2);
+    auto b3 = read1.values(1);
+    ASSERT_EQ(b3.in_bytes(), 3);
+
+    const auto& read2 = fields.at(1);
+    auto b4 = read2.values(0);
+    ASSERT_EQ(b4.in_bytes(), 4);
+    auto b5 = read2.values(1);
+    ASSERT_EQ(b5.in_bytes(), 5);
+}
\ No newline at end of file
diff --git a/cpp/arcticdb/codec/test/test_encoded_field.cpp b/cpp/arcticdb/codec/test/test_encoded_field.cpp
new file mode 100644
index 0000000000..f39584600f
--- /dev/null
+++ b/cpp/arcticdb/codec/test/test_encoded_field.cpp
@@ -0,0 +1,242 @@
+/* Copyright 2023 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
+ */
+#include <gtest/gtest.h>
+#include <arcticdb/codec/encoded_field.hpp>
+#include <arcticdb/codec/encoded_field_collection.hpp>
+#include <arcticdb/codec/protobuf_mappings.hpp>
+
+TEST(EncodedField, ScalarBlocks) {
+    using namespace arcticdb;
+    EncodedFieldCollection coll;
+    coll.reserve(calc_field_bytes(4), 1);
+    auto* field_ptr = coll.add_field(4);
+    auto& field = *field_ptr;
+    auto* v1 = field.add_values(EncodingVersion::V1);
+    v1->mutable_codec()->mutable_lz4()->acceleration_ = 1;
+    auto* v2 = field.add_values(EncodingVersion::V1);
+    v2->mutable_codec()->mutable_lz4()->acceleration_ = 2;
+    auto* v3 = field.add_values(EncodingVersion::V1);
+    v3->mutable_codec()->mutable_lz4()->acceleration_ = 3;
+    auto* v4 = field.add_values(EncodingVersion::V1);
+    v4 ->mutable_codec()->mutable_lz4()->acceleration_ = 4;
+
+    ASSERT_EQ(field.values_size(), 4);
+    ASSERT_EQ(field.shapes_size(), 0);
+
+    auto expected = 1;
+    for(const auto& value : field.values()) {
+        ASSERT_EQ(value.codec().lz4().acceleration_, expected);
+        ++expected;
+    }
+    field.validate();
+}
+
+TEST(EncodedField, OldStyleShapes) {
+    using namespace arcticdb;
+    EncodedFieldCollection coll;
+    coll.reserve(calc_field_bytes(8), 1);
+    auto* field_ptr = coll.add_field(8);
+    auto& field = *field_ptr;
+    auto* s1 = field.add_shapes();
+    s1->mutable_codec()->mutable_lz4()->acceleration_ = 1;
+    auto* v1 = field.add_values(EncodingVersion::V1);
+    v1->mutable_codec()->mutable_lz4()->acceleration_ = 2;
+    auto* s2 = field.add_shapes();
+    s2->mutable_codec()->mutable_lz4()->acceleration_ = 3;
+    auto* v2 = field.add_values(EncodingVersion::V1);
+    v2->mutable_codec()->mutable_lz4()->acceleration_ = 4;
+    auto* s3 = field.add_shapes();
+    s3->mutable_codec()->mutable_lz4()->acceleration_ = 5;
+    auto* v3 = field.add_values(EncodingVersion::V1);
+    v3->mutable_codec()->mutable_lz4()->acceleration_ = 6;
+    auto* s4 = field.add_shapes();
+    s4->mutable_codec()->mutable_lz4()->acceleration_ = 7;
+    auto* v4 = field.add_values(EncodingVersion::V1);
+    v4->mutable_codec()->mutable_lz4()->acceleration_ = 8;
+
+    ASSERT_EQ(field.values_size(), 4);
+    ASSERT_EQ(field.shapes_size(), 4);
+
+    auto expected = 2;
+    for(const auto& value : field.values()) {
+        ASSERT_EQ(value.codec().lz4().acceleration_, expected);
+        expected += 2;
+    }
+
+    expected = 1;
+    for(const auto& shape : field.shapes()) {
+        ASSERT_EQ(shape.codec().lz4().acceleration_, expected);
+        expected += 2;
+    }
+    field.validate();
+}
+
+TEST(EncodedField, OldStyleShapesEnterShapesFirst) {
+    using namespace arcticdb;
+    EncodedFieldCollection coll;
+    coll.reserve(calc_field_bytes(8), 1);
+    auto* field_ptr = coll.add_field(8);
+    auto& field = *field_ptr;
+    auto* s1 = field.add_shapes();
+    s1->mutable_codec()->mutable_lz4()->acceleration_ = 1;
+    auto* s2 = field.add_shapes();
+    s2->mutable_codec()->mutable_lz4()->acceleration_ = 3;
+    auto* s3 = field.add_shapes();
+    s3->mutable_codec()->mutable_lz4()->acceleration_ = 5;
+    auto* s4 = field.add_shapes();
+    s4->mutable_codec()->mutable_lz4()->acceleration_ = 7;
+
+
+    auto* v1 = field.add_values(EncodingVersion::V1);
+    v1->mutable_codec()->mutable_lz4()->acceleration_ = 2;
+    auto* v2 = field.add_values(EncodingVersion::V1);
+    v2->mutable_codec()->mutable_lz4()->acceleration_ = 4;
+    auto* v3 = field.add_values(EncodingVersion::V1);
+    v3->mutable_codec()->mutable_lz4()->acceleration_ = 6;
+    auto* v4 = field.add_values(EncodingVersion::V1);
+    v4->mutable_codec()->mutable_lz4()->acceleration_ = 8;
+
+    ASSERT_EQ(field.values_size(), 4);
+    ASSERT_EQ(field.shapes_size(), 4);
+
+    auto expected = 2;
+    for(const auto& value : field.values()) {
+        ASSERT_EQ(value.codec().lz4().acceleration_, expected);
+        expected += 2;
+    }
+
+    expected = 1;
+    for(const auto& shape : field.shapes()) {
+        ASSERT_EQ(shape.codec().lz4().acceleration_, expected);
+        expected += 2;
+    }
+    field.validate();
+}
+
+TEST(EncodedField, NewStyleShapes) {
+    using namespace arcticdb;
+    EncodedFieldCollection coll;
+    coll.reserve(calc_field_bytes(5), 1);
+    auto* field_ptr = coll.add_field(5);
+    auto& field = *field_ptr;
+    auto* s1 = field.add_shapes();
+    s1->mutable_codec()->mutable_lz4()->acceleration_ = 1;
+    auto* v1 = field.add_values(EncodingVersion::V2);
+    v1->mutable_codec()->mutable_lz4()->acceleration_ = 2;
+    auto* v2 = field.add_values(EncodingVersion::V2);
+    v2->mutable_codec()->mutable_lz4()->acceleration_ = 3;
+    auto* v3 = field.add_values(EncodingVersion::V2);
+    v3->mutable_codec()->mutable_lz4()->acceleration_ = 4;
+    auto* v4 = field.add_values(EncodingVersion::V2);
+    v4->mutable_codec()->mutable_lz4()->acceleration_ = 5;
+
+    ASSERT_EQ(field.values_size(), 4);
+    ASSERT_EQ(field.shapes_size(), 1);
+
+    auto expected = 2;
+    for(const auto& value : field.values()) {
+        ASSERT_EQ(value.codec().lz4().acceleration_, expected);
+        ++expected;
+    }
+
+    for (const auto& shape: field.shapes()) {
+        ASSERT_EQ(shape.codec().lz4().acceleration_, 1);
+    }
+    field.validate();
+}
+
+TEST(EncodedFieldTest, Roundtrip) {
+    using namespace arcticdb;
+    arcticdb::proto::encoding::EncodedField original_proto;
+    original_proto.set_offset(123);
+    original_proto.set_num_elements(456);
+
+    auto* ndarray = original_proto.mutable_ndarray();
+    ndarray->set_items_count(789);
+
+    auto* shape_block = ndarray->add_shapes();
+    shape_block->set_in_bytes(100);
+    shape_block->set_out_bytes(50);
+    shape_block->set_hash(1234567890);
+    shape_block->set_encoder_version(1);
+
+    auto* shape_block_codec = shape_block->mutable_codec();
+    shape_block_codec->mutable_zstd()->set_level(5);
+    shape_block_codec->mutable_zstd()->set_is_streaming(true);
+
+    auto* value_block = ndarray->add_values();
+    value_block->set_in_bytes(200);
+    value_block->set_out_bytes(100);
+    value_block->set_hash(987654321);
+    value_block->set_encoder_version(2);
+
+    auto* value_block_codec = value_block->mutable_codec();
+    value_block_codec->mutable_lz4()->set_acceleration(10);
+
+    ndarray->set_sparse_map_bytes(1024);
+    EncodedFieldCollection collection;
+    auto encoded_field = collection.add_field(2);
+    encoded_field_from_proto(original_proto, *encoded_field);
+
+    arcticdb::proto::encoding::EncodedField roundtrip_proto;
+    copy_encoded_field_to_proto(*encoded_field, roundtrip_proto);
+
+    ASSERT_TRUE(original_proto.has_ndarray());
+    ASSERT_TRUE(roundtrip_proto.has_ndarray());
+
+    const auto& original_ndarray = original_proto.ndarray();
+    const auto& roundtrip_ndarray = roundtrip_proto.ndarray();
+
+    ASSERT_EQ(original_ndarray.items_count(), roundtrip_ndarray.items_count());
+    ASSERT_EQ(original_ndarray.sparse_map_bytes(), roundtrip_ndarray.sparse_map_bytes());
+
+    ASSERT_EQ(original_ndarray.shapes_size(), roundtrip_ndarray.shapes_size());
+    ASSERT_EQ(original_ndarray.values_size(), roundtrip_ndarray.values_size());
+
+    for (int i = 0; i < original_ndarray.shapes_size(); ++i) {
+        const auto& original_shape = original_ndarray.shapes(i);
+        const auto& roundtrip_shape = roundtrip_ndarray.shapes(i);
+
+        ASSERT_EQ(original_shape.in_bytes(), roundtrip_shape.in_bytes());
+        ASSERT_EQ(original_shape.out_bytes(), roundtrip_shape.out_bytes());
+        ASSERT_EQ(original_shape.hash(), roundtrip_shape.hash());
+        ASSERT_EQ(original_shape.encoder_version(), roundtrip_shape.encoder_version());
+
+        ASSERT_TRUE(original_shape.has_codec());
+        ASSERT_TRUE(roundtrip_shape.has_codec());
+
+        const auto& original_shape_codec = original_shape.codec();
+        const auto& roundtrip_shape_codec = roundtrip_shape.codec();
+
+        ASSERT_TRUE(original_shape_codec.has_zstd());
+        ASSERT_TRUE(roundtrip_shape_codec.has_zstd());
+
+        ASSERT_EQ(original_shape_codec.zstd().level(), roundtrip_shape_codec.zstd().level());
+        ASSERT_EQ(original_shape_codec.zstd().is_streaming(), roundtrip_shape_codec.zstd().is_streaming());
+    }
+
+    for (int i = 0; i < original_ndarray.values_size(); ++i) {
+        const auto& original_value = original_ndarray.values(i);
+        const auto& roundtrip_value = roundtrip_ndarray.values(i);
+
+        ASSERT_EQ(original_value.in_bytes(), roundtrip_value.in_bytes());
+        ASSERT_EQ(original_value.out_bytes(), roundtrip_value.out_bytes());
+        ASSERT_EQ(original_value.hash(), roundtrip_value.hash());
+        ASSERT_EQ(original_value.encoder_version(), roundtrip_value.encoder_version());
+
+        ASSERT_TRUE(original_value.has_codec());
+        ASSERT_TRUE(roundtrip_value.has_codec());
+
+        const auto& original_value_codec = original_value.codec();
+        const auto& roundtrip_value_codec = roundtrip_value.codec();
+
+        ASSERT_TRUE(original_value_codec.has_lz4());
+        ASSERT_TRUE(roundtrip_value_codec.has_lz4());
+
+        ASSERT_EQ(original_value_codec.lz4().acceleration(), roundtrip_value_codec.lz4().acceleration());
+    }
+}
\ No newline at end of file
diff --git a/cpp/arcticdb/codec/test/test_segment_header.cpp b/cpp/arcticdb/codec/test/test_segment_header.cpp
new file mode 100644
index 0000000000..4e7da7f1d1
--- /dev/null
+++ b/cpp/arcticdb/codec/test/test_segment_header.cpp
@@ -0,0 +1,89 @@
+/* Copyright 2023 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
+ */
+#include <gtest/gtest.h>
+#include <arcticdb/codec/segment_header.hpp>
+#include <arcticdb/codec/segment.hpp>
+#include <arcticdb/stream/index.hpp>
+
+TEST(SegmentHeader, WriteAndReadFields) {
+    using namespace arcticdb;
+    SegmentHeader header{EncodingVersion::V1};
+    auto& string_pool_field = header.mutable_string_pool_field(2);
+    auto* values = string_pool_field.mutable_ndarray()->add_values(EncodingVersion::V2);
+    values->set_in_bytes(23);
+    values = string_pool_field.mutable_ndarray()->add_values(EncodingVersion::V2);
+    values->set_in_bytes(47);
+
+    const auto& read_string_pool = header.string_pool_field();
+    auto& read_values1 = read_string_pool.values(0);
+    ASSERT_EQ(read_values1.in_bytes(), 23);
+    auto&read_values2 = read_string_pool.values(1);
+    ASSERT_EQ(read_values2.in_bytes(), 47);
+}
+
+TEST(SegmentHeader, HasFields) {
+    using namespace arcticdb;
+    SegmentHeader header{EncodingVersion::V1};
+    ASSERT_EQ(header.has_index_descriptor_field(), false);
+    ASSERT_EQ(header.has_metadata_field(), false);
+    ASSERT_EQ(header.has_column_fields(), false);
+    ASSERT_EQ(header.has_descriptor_field(), false);
+    ASSERT_EQ(header.has_string_pool_field(), false);
+
+    (void)header.mutable_string_pool_field(2);
+    ASSERT_EQ(header.has_index_descriptor_field(), false);
+    ASSERT_EQ(header.has_metadata_field(), false);
+    ASSERT_EQ(header.has_column_fields(), false);
+    ASSERT_EQ(header.has_descriptor_field(), false);
+    ASSERT_EQ(header.has_string_pool_field(), true);
+
+    (void)header.mutable_descriptor_field(2);
+    ASSERT_EQ(header.has_index_descriptor_field(), false);
+    ASSERT_EQ(header.has_metadata_field(), false);
+    ASSERT_EQ(header.has_column_fields(), false);
+    ASSERT_EQ(header.has_descriptor_field(), true);
+    ASSERT_EQ(header.has_string_pool_field(), true);
+
+    (void)header.mutable_column_fields(2);
+    ASSERT_EQ(header.has_index_descriptor_field(), false);
+    ASSERT_EQ(header.has_metadata_field(), false);
+    ASSERT_EQ(header.has_column_fields(), true);
+    ASSERT_EQ(header.has_descriptor_field(), true);
+    ASSERT_EQ(header.has_string_pool_field(), true);
+}
+
+TEST(SegmentHeader, SerializeUnserializeV1) {
+    using namespace arcticdb;
+    SegmentHeader header{EncodingVersion::V1};
+    auto& string_pool_field = header.mutable_string_pool_field(10);
+    for(auto i = 0U; i < 5; ++i) {
+        auto *shapes = string_pool_field.mutable_ndarray()->add_shapes();
+        shapes->set_in_bytes(i + 1);
+        shapes->mutable_codec()->mutable_lz4()->acceleration_ = 1;
+        auto *values = string_pool_field.mutable_ndarray()->add_values(EncodingVersion::V1);
+        values->set_in_bytes(i + 1);
+        values->mutable_codec()->mutable_lz4()->acceleration_ = 1;
+    }
+
+    auto desc = stream_descriptor(StreamId{"thing"}, stream::RowCountIndex{}, {scalar_field(DataType::UINT8, "ints")});
+
+    auto proto = generate_v1_header(header, desc);
+    const auto header_size = proto.ByteSizeLong();
+    std::vector<uint8_t> vec(header_size);
+    auto read_header = decode_protobuf_header(vec.data(), header_size);
+
+    const auto& string_pool =  read_header.proto().string_pool_field();
+    auto expected = 1U;
+    for(const auto& value : string_pool.ndarray().values()) {
+        ASSERT_EQ(value.in_bytes(), expected++);
+    }
+
+    expected = 1U;
+    for(const auto& shape : string_pool.ndarray().shapes()) {
+        ASSERT_EQ(shape.in_bytes(), expected++);
+    }
+}
\ No newline at end of file
diff --git a/cpp/arcticdb/codec/typed_block_encoder_impl.hpp b/cpp/arcticdb/codec/typed_block_encoder_impl.hpp
index 34ea13b2fb..eed46444d0 100644
--- a/cpp/arcticdb/codec/typed_block_encoder_impl.hpp
+++ b/cpp/arcticdb/codec/typed_block_encoder_impl.hpp
@@ -8,6 +8,7 @@
 #include <type_traits>
 
 namespace arcticdb {
+
     /// @todo Split this class. This class does too much. Supports encoding via V1 and V2 but in a clunky way. The
     ///	interface for encoding is different (V2 requires EncodedBlock to be passed, thus encode_values and
     /// encode_shapes were added).
@@ -16,9 +17,8 @@ namespace arcticdb {
         using ShapesBlockTDT = TypeDescriptorTag<DataTypeTag<DataType::INT64>, DimensionTag<Dimension::Dim0>>;
 
         static size_t max_compressed_size(
-            const arcticdb::proto::encoding::VariantCodec& codec_opts,
-            const TypedBlock<TD>& typed_block
-        ) {
+                const arcticdb::proto::encoding::VariantCodec& codec_opts,
+                const TypedBlock<TD>& typed_block) {
             return visit_encoder(codec_opts, [&](auto encoder_tag) {
                 return decltype(encoder_tag)::Encoder::max_compressed_size(typed_block);
             });
@@ -26,7 +26,7 @@ namespace arcticdb {
         /**
          * Perform encoding of in memory field for storage
          * @param[in] codec_opts Option used to dispatch to the appropriate encoder and configure it
-         * @param[in] typed_block The block which will be encoded
+         * @param[in] typed_block The block to be encoded
          * @param[in, out] field description of the encoding operation
          * @param[out] out output buffer to write the encoded values to. Must be resized if pos becomes > size
          * @param[in, out] pos position in bytes in the buffer where to start writing.
@@ -34,14 +34,12 @@ namespace arcticdb {
          */
         template <typename EncodedFieldType>
         static void encode(
-            const arcticdb::proto::encoding::VariantCodec& codec_opts,
-            const TypedBlock<TD>& typed_block,
-            EncodedFieldType& field,
-            Buffer& out,
-            std::ptrdiff_t& pos
-        ) {
-            static_assert(encoder_version == EncodingVersion::V1,
-                "Encoding of both shapes and values at the same time is allowed only in V1 encoding");
+                const arcticdb::proto::encoding::VariantCodec& codec_opts,
+                const TypedBlock<TD>& typed_block,
+                EncodedFieldType& field,
+                Buffer& out,
+                std::ptrdiff_t& pos) {
+            static_assert(encoder_version == EncodingVersion::V1, "Encoding of both shapes and values at the same time is allowed only in V1 encoding");
             visit_encoder(codec_opts, [&](auto encoder_tag) {
                 decltype(encoder_tag)::Encoder::encode(get_opts(codec_opts, encoder_tag),
                     typed_block,
@@ -51,6 +49,35 @@ namespace arcticdb {
             });
         }
 
+        template<class TypedBlockType, class NDArrayType>
+        static void encode_to_values(
+            const arcticdb::proto::encoding::VariantCodec& codec_opts,
+            const TypedBlockType& typed_block,
+            Buffer& out,
+            std::ptrdiff_t& pos,
+            NDArrayType& ndarray
+        ) {
+            if constexpr (encoder_version == EncodingVersion::V2) {
+                auto *values_encoded_block = ndarray->add_values(encoder_version);
+                visit_encoder(codec_opts, [&](auto encoder_tag) {
+                    decltype(encoder_tag)::Encoder::encode(get_opts(codec_opts, encoder_tag),
+                                                           typed_block,
+                                                           out,
+                                                           pos,
+                                                           values_encoded_block);
+                });
+            } else {
+                auto* values_encoded_block = ndarray->add_values();
+                visit_encoder(codec_opts, [&](auto encoder_tag) {
+                    decltype(encoder_tag)::Encoder::encode(get_opts(codec_opts, encoder_tag),
+                                                           typed_block,
+                                                           out,
+                                                           pos,
+                                                           values_encoded_block);
+                });
+            }
+        }
+
         template<typename EncodedFieldType>
         static void encode_values(
             const arcticdb::proto::encoding::VariantCodec& codec_opts,
@@ -59,39 +86,32 @@ namespace arcticdb {
             Buffer& out,
             std::ptrdiff_t& pos
         ) {
-            static_assert(encoder_version == EncodingVersion::V2,
-                "Encoding values separately from the shapes is allowed only in V2 encoding");
+            static_assert(encoder_version == EncodingVersion::V2, "Encoding values separately from the shapes is allowed only in V2 encoding");
             auto* ndarray = field.mutable_ndarray();
             if(typed_block.nbytes() == 0) {
                 ARCTICDB_TRACE(log::codec(), "Encoder got values of size 0. Noting to encode.");
                 return;
             }
-            auto* values_encoded_block = ndarray->add_values();
-            visit_encoder(codec_opts, [&](auto encoder_tag) {
-                decltype(encoder_tag)::Encoder::encode(get_opts(codec_opts, encoder_tag),
-                    typed_block,
-                    out,
-                    pos,
-                    values_encoded_block);
-            });
+
+            encode_to_values<TypedBlock<TD>, decltype(ndarray)>(codec_opts, typed_block, out, pos, ndarray);
             const auto existing_items_count = ndarray->items_count();
             ndarray->set_items_count(existing_items_count + typed_block.row_count());
         }
 
         template<typename EncodedFieldType>
         static void encode_shapes(
-            const arcticdb::proto::encoding::VariantCodec& codec_opts,
-            const TypedBlockData<ShapesBlockTDT>& typed_block,
-            EncodedFieldType& field,
-            Buffer& out,
-            std::ptrdiff_t& pos
-        ) {
-            static_assert(encoder_version == EncodingVersion::V2,
-                "Encoding shapes separately from the values is allowed only in V2 encoding");
+                const arcticdb::proto::encoding::VariantCodec& codec_opts,
+                const TypedBlockData<ShapesBlockTDT>& typed_block,
+                EncodedFieldType& field,
+                Buffer& out,
+                std::ptrdiff_t& pos) {
+            static_assert(encoder_version == EncodingVersion::V2, "Encoding shapes separately from the values is allowed only in V2 encoding");
+
             if(typed_block.nbytes() == 0) {
                 ARCTICDB_TRACE(log::codec(), "Encoder got shapes of size 0. Noting to encode.");
                 return;
             }
+
             auto* ndarray = field.mutable_ndarray();
             auto* shapes_encoded_block = ndarray->add_shapes();
             visit_encoder(codec_opts, [&](auto encoder_tag) {
diff --git a/cpp/arcticdb/codec/variant_encoded_field_collection.cpp b/cpp/arcticdb/codec/variant_encoded_field_collection.cpp
deleted file mode 100644
index df3da27cd2..0000000000
--- a/cpp/arcticdb/codec/variant_encoded_field_collection.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/* Copyright 2023 Man Group Operations Limited
- *
- * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
- *
- * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
- */
-
-#include <arcticdb/codec/variant_encoded_field_collection.hpp>
-#include <arcticdb/codec/codec.hpp>
-#include <arcticdb/codec/magic_words.hpp>
-
-namespace arcticdb {
-
-VariantEncodedFieldCollection::VariantEncodedFieldCollection(const Segment& segment) {
-    if(EncodingVersion(segment.header().encoding_version()) == EncodingVersion::V2) {
-        const auto& hdr = segment.header();
-        auto [begin, encoded_fields_ptr] = get_segment_begin_end(segment, segment.header());
-        util::check_magic<EncodedMagic>(encoded_fields_ptr);
-        auto encoded_fields_buffer = decode_encoded_fields(hdr, encoded_fields_ptr, begin);
-        fields_ = EncodedFieldCollection{std::move(encoded_fields_buffer)};
-    } else {
-        is_proto_ = true;
-        header_ = &segment.header();
-    }
-}
-
-VariantField VariantEncodedFieldCollection::at(size_t pos) const {
-    if (is_proto_)
-        return &header_->fields(static_cast<int>(pos));
-    else
-        return &fields_.at(pos);
-}
-
-size_t VariantEncodedFieldCollection::size() const {
-    if (is_proto_)
-        return header_->fields_size();
-    else
-        return fields_.size();
-}
-
-} //namespace arcticdb
diff --git a/cpp/arcticdb/codec/variant_encoded_field_collection.hpp b/cpp/arcticdb/codec/variant_encoded_field_collection.hpp
deleted file mode 100644
index 41288cab85..0000000000
--- a/cpp/arcticdb/codec/variant_encoded_field_collection.hpp
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Copyright 2023 Man Group Operations Limited
- *
- * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
- *
- * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
- */
-
-#pragma once
-
-#include <arcticdb/codec/encoded_field_collection.hpp>
-
-namespace arcticdb {
-
-using VariantField = std::variant<const EncodedField *, const arcticdb::proto::encoding::EncodedField *>;
-
-struct VariantEncodedFieldCollection {
-    EncodedFieldCollection fields_;
-    const arcticdb::proto::encoding::SegmentHeader *header_ = nullptr;
-    bool is_proto_ = false;
-    explicit VariantEncodedFieldCollection(const Segment &segment);
-    [[nodiscard]] VariantField at(size_t pos) const;
-    [[nodiscard]] size_t size() const;
-};
-
-} // namespace arcticdb
diff --git a/cpp/arcticdb/codec/zstd.hpp b/cpp/arcticdb/codec/zstd.hpp
index 4f69ff07d2..34b08113f4 100644
--- a/cpp/arcticdb/codec/zstd.hpp
+++ b/cpp/arcticdb/codec/zstd.hpp
@@ -10,6 +10,7 @@
 #include <arcticdb/codec/segment.hpp>
 #include <arcticdb/entity/performance_tracing.hpp>
 #include <arcticdb/stream/protobuf_mappings.hpp>
+#include <arcticdb/codec/protobuf_mappings.hpp>
 #include <arcticdb/storage/common.hpp>
 #include <arcticdb/util/pb_util.hpp>
 #include <arcticdb/util/dump_bytes.hpp>
@@ -35,7 +36,7 @@ struct ZstdBlockEncoder {
     static std::size_t encode_block(
             const Opts &opts,
             const T* in,
-            BlockProtobufHelper &block_utils,
+            BlockDataHelper &block_utils,
             HashAccum& hasher,
             T *out,
             std::size_t out_capacity,
@@ -44,7 +45,8 @@ struct ZstdBlockEncoder {
         std::size_t compressed_bytes = ZSTD_compress(out, out_capacity, in, block_utils.bytes_, opts.level());
         hasher(in, block_utils.count_);
         pos += compressed_bytes;
-        out_codec.mutable_zstd()->MergeFrom(opts);
+        copy_codec(*out_codec.mutable_zstd(), opts);
+
         return compressed_bytes;
     }
 };
diff --git a/cpp/arcticdb/column_store/chunked_buffer.hpp b/cpp/arcticdb/column_store/chunked_buffer.hpp
index 7bc4453815..7c25b2fc64 100644
--- a/cpp/arcticdb/column_store/chunked_buffer.hpp
+++ b/cpp/arcticdb/column_store/chunked_buffer.hpp
@@ -91,6 +91,10 @@ class ChunkedBufferImpl {
     ChunkedBufferImpl() = default;
 
     explicit ChunkedBufferImpl(size_t size) {
+        reserve(size);
+    }
+
+    void reserve(size_t size) {
         if(size > 0) {
             if (size > DefaultBlockSize) {
                 handle_transition_to_irregular();
@@ -306,7 +310,7 @@ class ChunkedBufferImpl {
     void check_bytes(size_t pos_bytes, size_t required_bytes) const {
         if (pos_bytes + required_bytes > bytes()) {
             std::string err = fmt::format("Cursor overflow in chunked_buffer ptr_cast, cannot read {} bytes from a buffer of size {} with cursor "
-                                          "at {}, as it would required {} bytes. ",
+                                          "at {}, as it would require {} bytes. ",
                                           required_bytes,
                                           bytes(),
                                           pos_bytes,
@@ -351,7 +355,7 @@ class ChunkedBufferImpl {
         bytes_ += size;
     }
 
-    bool empty() const { return bytes_ == 0; }
+    [[nodiscard]] bool empty() const { return bytes_ == 0; }
 
     void clear() {
         bytes_ = 0;
diff --git a/cpp/arcticdb/column_store/column.cpp b/cpp/arcticdb/column_store/column.cpp
index 2b08c07a9e..a1d2d40188 100644
--- a/cpp/arcticdb/column_store/column.cpp
+++ b/cpp/arcticdb/column_store/column.cpp
@@ -131,6 +131,10 @@ Column Column::clone() const {
     return output;
 }
 
+bool Column::empty() const {
+    return row_count() == 0;
+}
+
 bool Column::is_sparse() const {
     if(last_logical_row_ != last_physical_row_) {
         util::check(static_cast<bool>(sparse_map_), "Expected sparse map in column with logical row {} and physical row {}", last_logical_row_, last_physical_row_);
@@ -746,10 +750,6 @@ void Column::set_sparse_bit_for_row(size_t sparse_location) {
     sparse_map()[bv_size(sparse_location)] = true;
 }
 
-bool Column::empty() const {
-    return row_count() == 0;
-}
-
 void Column::regenerate_offsets() const {
     if (ARCTICDB_LIKELY(is_scalar() || !offsets_.empty()))
         return;
diff --git a/cpp/arcticdb/column_store/column.hpp b/cpp/arcticdb/column_store/column.hpp
index c800122663..8117b66ab2 100644
--- a/cpp/arcticdb/column_store/column.hpp
+++ b/cpp/arcticdb/column_store/column.hpp
@@ -86,9 +86,7 @@ void initialise_output_column(const Column& left_input_column, const Column& rig
 void initialise_output_bitset(const util::BitSet& input_bitset, bool sparse_missing_value_output, util::BitSet& output_bitset);
 
 class Column {
-
 public:
-
     template<typename TDT, typename ValueType>
     class TypedColumnIterator :  public boost::iterator_facade<TypedColumnIterator<TDT, ValueType>, ValueType, boost::random_access_traversal_tag> {
         using RawType =  std::decay_t<typename TDT::DataTypeTag::raw_type>;
@@ -221,6 +219,13 @@ class Column {
         allow_sparse_(allow_sparse) {
     }
 
+    Column(TypeDescriptor type, bool allow_sparse, ChunkedBuffer&& buffer, Buffer&& shapes) :
+        data_(std::move(buffer)),
+        shapes_(std::move(shapes)),
+        type_(type),
+        allow_sparse_(allow_sparse) {
+    }
+
     Column(
         TypeDescriptor type,
         size_t expected_rows,
@@ -243,6 +248,8 @@ class Column {
 
     Column clone() const;
 
+    bool empty() const;
+
     bool is_sparse() const;
 
     bool sparse_permitted() const;
@@ -437,10 +444,10 @@ class Column {
 
     // The following two methods inflate (reduplicate) numpy string arrays that are potentially multi-dimensional,
     // i.e where the value is not a string but an array of strings
-    void inflate_string_array(const TensorType<position_t> &string_refs,
-                              CursoredBuffer<ChunkedBuffer> &data,
+    void inflate_string_array(const TensorType<position_t>& string_refs,
+                              CursoredBuffer<ChunkedBuffer>& data,
                               CursoredBuffer<Buffer> &shapes,
-                              boost::container::small_vector<position_t, 1> &offsets,
+                              boost::container::small_vector<position_t, 1>& offsets,
                               const StringPool &string_pool);
 
     void inflate_string_arrays(const StringPool &string_pool);
@@ -596,9 +603,8 @@ class Column {
         return data_.buffer();
     }
 
-    //TODO this will need to be more efficient - index each block?
     template<typename T>
-    std::optional<position_t> index_of(T val) const {
+    std::optional<position_t> search_unsorted(T val) const {
         util::check_arg(is_scalar(), "Cannot index on multidimensional values");
         for (position_t i = 0; i < row_count(); ++i) {
             if (val == *ptr_cast<T>(i, sizeof(T)))
@@ -899,7 +905,6 @@ class Column {
     }
 
 private:
-
     position_t last_offset() const;
     void update_offsets(size_t nbytes);
     bool is_scalar() const;
@@ -910,7 +915,6 @@ class Column {
     size_t inflated_row_count() const;
     size_t num_shapes() const;
     void set_sparse_bit_for_row(size_t sparse_location);
-    bool empty() const;
     void regenerate_offsets() const;
 
     // Permutes the physical column storage based on the given sorted_pos.
diff --git a/cpp/arcticdb/column_store/column_data.hpp b/cpp/arcticdb/column_store/column_data.hpp
index 67d96f5273..a8dc70ea02 100644
--- a/cpp/arcticdb/column_store/column_data.hpp
+++ b/cpp/arcticdb/column_store/column_data.hpp
@@ -24,7 +24,7 @@ struct TypedBlockData {
     template <class ValueType>
     class TypedColumnBlockIterator :  public boost::iterator_facade<TypedColumnBlockIterator<ValueType>, ValueType, boost::random_access_traversal_tag> {
       public:
-        TypedColumnBlockIterator(ValueType* ptr)
+        explicit TypedColumnBlockIterator(ValueType* ptr)
             :  ptr_(ptr) { }
 
         TypedColumnBlockIterator(const TypedColumnBlockIterator& other)
@@ -38,7 +38,9 @@ struct TypedBlockData {
             :  ptr_(nullptr) { }
 
         TypedColumnBlockIterator& operator=(const TypedColumnBlockIterator& other) {
-            ptr_ = other.ptr_;
+            if(&other != this)
+                ptr_ = other.ptr_;
+
             return *this;
         }
 
@@ -92,12 +94,29 @@ struct TypedBlockData {
         block_(nullptr)
     {}
 
-    std::size_t nbytes() const { return nbytes_; }
-    std::size_t row_count() const { return row_count_; }
-    TypeDescriptor type() const { return static_cast<TypeDescriptor>(TDT()); }
-    const shape_t *shapes() const { return shapes_; }
-    const raw_type *data() const { return data_; }
-    const MemBlock *mem_block() const { return block_; }
+    [[nodiscard]] std::size_t nbytes() const {
+        return nbytes_;
+    }
+
+    [[nodiscard]] std::size_t row_count() const {
+        return row_count_;
+    }
+
+    [[nodiscard]] TypeDescriptor type() const {
+        return static_cast<TypeDescriptor>(TDT());
+    }
+
+    [[nodiscard]] const shape_t *shapes() const {
+        return shapes_;
+    }
+
+    [[nodiscard]] const raw_type *data() const {
+        return data_;
+    }
+
+    [[nodiscard]] const MemBlock *mem_block() const {
+        return block_;
+    }
 
     raw_type operator[](size_t pos) const {
         return reinterpret_cast<const raw_type*>(block_->data())[pos];
@@ -111,7 +130,7 @@ struct TypedBlockData {
         return TypedColumnBlockIterator<const raw_type>(data_ + row_count_);
     }
 
-    size_t offset() const {
+    [[nodiscard]] size_t offset() const {
         return block_->offset_;
     }
 
@@ -142,13 +161,12 @@ struct ColumnData {
  * ColumnData is just a thin wrapper that helps in iteration over all the blocks in the column
  */
 public:
-
     template<typename RawType>
     struct Enumeration {
         ssize_t idx_{0};
         RawType* ptr_{nullptr};
 
-        inline ssize_t idx() const {
+        [[nodiscard]] inline ssize_t idx() const {
             return idx_;
         }
 
@@ -321,15 +339,15 @@ struct ColumnData {
         return ColumnDataIterator<TDT, iterator_type, iterator_density, true>(this, end_ptr);
     }
 
-    TypeDescriptor type() const {
+    [[nodiscard]] TypeDescriptor type() const {
         return type_;
     }
 
-    const ChunkedBuffer &buffer() const {
+    [[nodiscard]] const ChunkedBuffer &buffer() const {
         return *data_;
     }
 
-    const util::BitMagic* bit_vector() const {
+    [[nodiscard]] const util::BitMagic* bit_vector() const {
         return bit_vector_;
     }
 
@@ -343,7 +361,7 @@ struct ColumnData {
         return shape;
     }
 
-    size_t num_blocks() const {
+    [[nodiscard]] size_t num_blocks() const {
         return data_->blocks().size();
     }
 
diff --git a/cpp/arcticdb/column_store/memory_segment.hpp b/cpp/arcticdb/column_store/memory_segment.hpp
index 4fda35d5ab..90e2085522 100644
--- a/cpp/arcticdb/column_store/memory_segment.hpp
+++ b/cpp/arcticdb/column_store/memory_segment.hpp
@@ -99,28 +99,20 @@ class SegmentInMemory {
         return impl_->column_index(name);
     }
 
-    std::shared_ptr<FieldCollection> index_fields() const {
-        return impl_->index_fields();
-    }
-
-    bool has_index_fields() const {
-        return impl_->has_index_fields();
-    }
-
-    TimeseriesDescriptor index_descriptor() {
+    const TimeseriesDescriptor& index_descriptor() const {
         return impl_->index_descriptor();
     }
 
-    FieldCollection&& detach_index_fields() {
-        return impl_->detach_index_fields();
+    TimeseriesDescriptor& mutable_index_descriptor() {
+        return impl_->mutable_index_descriptor();
     }
 
-    std::shared_ptr<arcticdb::proto::descriptors::TimeSeriesDescriptor> timeseries_proto() {
-        return impl_->timeseries_proto();
+    bool has_index_descriptor() const {
+        return impl_->has_index_descriptor();
     }
 
-    void set_index_fields(std::shared_ptr<FieldCollection> fields) {
-        impl_->set_index_fields(std::move(fields));
+    TimeseriesDescriptor&& detach_index_descriptor() {
+        return impl_->detach_index_descriptor();
     }
 
     void init_column_map() const  {
@@ -240,9 +232,13 @@ class SegmentInMemory {
         return impl_->string_array_at(row, col);
     }
 
-    void set_timeseries_descriptor(TimeseriesDescriptor&& tsd) {
+    void set_timeseries_descriptor(const TimeseriesDescriptor& tsd) {
         util::check(!tsd.proto_is_null(), "Got null timeseries descriptor in set_timeseries_descriptor");
-        impl_->set_timeseries_descriptor(std::move(tsd));
+        impl_->set_timeseries_descriptor(tsd);
+    }
+
+    void reset_timeseries_descriptor() {
+        impl_->reset_timeseries_descriptor();
     }
 
     size_t num_columns() const { return impl_->num_columns(); }
@@ -255,6 +251,14 @@ class SegmentInMemory {
         impl_->unsparsify();
     }
 
+    bool has_user_metadata() const {
+        return impl_->has_user_metadata();
+    }
+
+    const arcticdb::proto::descriptors::UserDefinedMetadata& user_metadata() const {
+        return impl_->user_metadata();
+    }
+
     void sparsify() {
         impl_->sparsify();
     }
@@ -337,6 +341,10 @@ class SegmentInMemory {
         return impl_->string_pool_ptr();
     }
 
+    void reset_metadata() {
+        impl_->reset_metadata();
+    }
+
     void set_metadata(google::protobuf::Any &&meta) {
         impl_->set_metadata(std::move(meta));
     }
@@ -345,10 +353,6 @@ class SegmentInMemory {
         return impl_->has_metadata();
     }
 
-    void override_metadata(google::protobuf::Any &&meta) {
-        impl_->override_metadata(std::move(meta));
-    }
-
     ssize_t get_row_id() {
         return impl_->get_row_id();
     }
@@ -473,10 +477,6 @@ class SegmentInMemory {
         return output;
     }
 
-    StreamId get_index_col_name() const{
-        return impl_->get_index_col_name();
-    }
-
 private:
     explicit SegmentInMemory(std::shared_ptr<SegmentInMemoryImpl> impl) :
             impl_(std::move(impl)) {}
@@ -484,4 +484,4 @@ class SegmentInMemory {
     std::shared_ptr<SegmentInMemoryImpl> impl_;
 };
 
-}
+} //namespace arcticdb
diff --git a/cpp/arcticdb/column_store/memory_segment_impl.cpp b/cpp/arcticdb/column_store/memory_segment_impl.cpp
index bd24ca4633..73a19db3dc 100644
--- a/cpp/arcticdb/column_store/memory_segment_impl.cpp
+++ b/cpp/arcticdb/column_store/memory_segment_impl.cpp
@@ -154,6 +154,9 @@ SegmentInMemoryImpl SegmentInMemoryImpl::clone() const {
     }
     output.allow_sparse_ = allow_sparse_;
     output.compacted_ = compacted_;
+    if(tsd_)
+        output.set_timeseries_descriptor(tsd_->clone());
+
     return output;
 }
 
@@ -326,14 +329,6 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::filter(util::BitSet&&
     return output;
 }
 
-std::shared_ptr<arcticdb::proto::descriptors::TimeSeriesDescriptor> SegmentInMemoryImpl::timeseries_proto() {
-    if(!tsd_) {
-        tsd_ = std::make_shared<arcticdb::proto::descriptors::TimeSeriesDescriptor>();
-        metadata_->UnpackTo(tsd_.get());
-    }
-    return tsd_;
-}
-
 std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::get_output_segment(size_t num_values, bool pre_allocate) const {
     std::shared_ptr<SegmentInMemoryImpl> output;
     if (is_sparse()) {
@@ -652,22 +647,20 @@ void SegmentInMemoryImpl::sort(position_t idx) {
     }
 }
 
-void SegmentInMemoryImpl::set_timeseries_descriptor(TimeseriesDescriptor&& tsd) {
-    index_fields_ = tsd.fields_ptr();
-    tsd_ = tsd.proto_ptr();
-    util::check(!tsd_->has_stream_descriptor() || tsd_->stream_descriptor().has_index(), "Stream descriptor without index in set_timeseries_descriptor");
-    google::protobuf::Any any;
-    any.PackFrom(tsd.proto());
-    set_metadata(std::move(any));
+void SegmentInMemoryImpl::set_timeseries_descriptor(const TimeseriesDescriptor& tsd) {
+    tsd_ = tsd;
 }
 
-void SegmentInMemoryImpl::set_metadata(google::protobuf::Any&& meta) {
-    util::check_arg(!metadata_, "Cannot override previously set metadata");
-    if (meta.ByteSizeLong())
-        metadata_ = std::make_unique<google::protobuf::Any>(std::move(meta));
+void SegmentInMemoryImpl::reset_timeseries_descriptor() {
+    tsd_.reset();
+}
+
+void SegmentInMemoryImpl::reset_metadata() {
+    metadata_.reset();
 }
 
-void SegmentInMemoryImpl::override_metadata(google::protobuf::Any&& meta) {
+void SegmentInMemoryImpl::set_metadata(google::protobuf::Any&& meta) {
+    util::check_arg(!metadata_, "Cannot override previously set metadata");
     if (meta.ByteSizeLong())
         metadata_ = std::make_unique<google::protobuf::Any>(std::move(meta));
 }
diff --git a/cpp/arcticdb/column_store/memory_segment_impl.hpp b/cpp/arcticdb/column_store/memory_segment_impl.hpp
index e0ccc77432..6dbefd3e72 100644
--- a/cpp/arcticdb/column_store/memory_segment_impl.hpp
+++ b/cpp/arcticdb/column_store/memory_segment_impl.hpp
@@ -32,7 +32,7 @@ namespace arcticdb {
 class SegmentInMemoryImpl;
 
 namespace {
-inline std::shared_ptr<SegmentInMemoryImpl> allocate_sparse_segment(const StreamId& id, const IndexDescriptor& index);
+inline std::shared_ptr<SegmentInMemoryImpl> allocate_sparse_segment(const StreamId& id, const IndexDescriptorImpl& index);
 
 inline std::shared_ptr<SegmentInMemoryImpl> allocate_dense_segment(const StreamDescriptor& descriptor, size_t row_count);
 
@@ -51,7 +51,7 @@ inline void check_output_bitset(const arcticdb::util::BitSet& output,
                                  "Mismatch in output bitset in filter_segment");
     }
 }
-} // namespace anon
+} // namespace
 
 
 class SegmentInMemoryImpl {
@@ -417,12 +417,14 @@ class SegmentInMemoryImpl {
         row_id_++;
     }
 
-    std::shared_ptr<FieldCollection> index_fields() const {
-        return index_fields_;
+    const TimeseriesDescriptor& index_descriptor() const {
+        util::check(tsd_.has_value(), "Index descriptor requested but not set");
+        return *tsd_;
     }
 
-    void set_index_fields(std::shared_ptr<FieldCollection> index_fields) {
-        index_fields_ = std::move(index_fields);
+    TimeseriesDescriptor& mutable_index_descriptor() {
+        util::check(tsd_.has_value(), "Index descriptor requested but not set");
+        return *tsd_;
     }
 
     void end_block_write(ssize_t size) {
@@ -717,8 +719,10 @@ class SegmentInMemoryImpl {
 
     StringPool &string_pool() { return *string_pool_; } //TODO protected
 
+    void reset_metadata();
+
     void set_metadata(google::protobuf::Any&& meta);
-    void override_metadata(google::protobuf::Any&& meta);
+
     bool has_metadata() const;
 
     const google::protobuf::Any* metadata() const;
@@ -762,21 +766,26 @@ class SegmentInMemoryImpl {
                                                 bool filter_down_stringpool=false,
                                                 bool validate=false) const;
 
-    std::shared_ptr<arcticdb::proto::descriptors::TimeSeriesDescriptor> timeseries_proto();
-
-    TimeseriesDescriptor index_descriptor() {
-        return {timeseries_proto(), index_fields_};
+    bool has_index_descriptor() const {
+        return tsd_.has_value();
     }
 
-    bool has_index_fields() const {
-        return static_cast<bool>(index_fields_);
+    TimeseriesDescriptor&& detach_index_descriptor() {
+        util::check(tsd_.has_value(), "No index descriptor on segment");
+        return std::move(*tsd_);
     }
 
-    FieldCollection&& detach_index_fields() {
-        return std::move(*index_fields_);
+    void set_timeseries_descriptor(const TimeseriesDescriptor& tsd);
+
+    void reset_timeseries_descriptor();
+
+    bool has_user_metadata() {
+        return tsd_.has_value() && !tsd_->proto_is_null() && tsd_->proto().has_user_meta();
     }
 
-    void set_timeseries_descriptor(TimeseriesDescriptor&& tsd);
+    const arcticdb::proto::descriptors::UserDefinedMetadata& user_metadata() const {
+        return tsd_->user_metadata();
+    }
 
     /// @brief Construct a copy of the segment containing only rows in [start_row; end_row)
     /// @param start_row Start of the row range (inclusive)
@@ -800,10 +809,6 @@ class SegmentInMemoryImpl {
 
     std::vector<std::shared_ptr<SegmentInMemoryImpl>> split(size_t rows) const;
 
-    StreamId get_index_col_name() const{
-        return descriptor().id();
-    }
-
 private:
     ssize_t row_id_ = -1;
     std::shared_ptr<StreamDescriptor> descriptor_ = std::make_shared<StreamDescriptor>();
@@ -816,12 +821,11 @@ class SegmentInMemoryImpl {
     bool allow_sparse_ = false;
     bool compacted_ = false;
     util::MagicNum<'M', 'S', 'e', 'g'> magic_;
-    std::shared_ptr<FieldCollection> index_fields_;
-    std::shared_ptr<arcticdb::proto::descriptors::TimeSeriesDescriptor> tsd_;
+    std::optional<TimeseriesDescriptor> tsd_;
 };
 
 namespace {
-inline std::shared_ptr<SegmentInMemoryImpl> allocate_sparse_segment(const StreamId& id, const IndexDescriptor& index) {
+inline std::shared_ptr<SegmentInMemoryImpl> allocate_sparse_segment(const StreamId& id, const IndexDescriptorImpl& index) {
     return std::make_shared<SegmentInMemoryImpl>(StreamDescriptor{id, index}, 0, false, true);
 }
 
diff --git a/cpp/arcticdb/column_store/string_pool.cpp b/cpp/arcticdb/column_store/string_pool.cpp
index c16d3bc61b..c96c436e4f 100644
--- a/cpp/arcticdb/column_store/string_pool.cpp
+++ b/cpp/arcticdb/column_store/string_pool.cpp
@@ -135,6 +135,10 @@ void StringPool::set_allow_sparse(bool) {
     // Not used
 }
 
+size_t StringPool::num_blocks() const {
+    return block_.num_blocks();
+}
+
 OffsetString StringPool::get(std::string_view s, bool deduplicate) {
     if(deduplicate) {
         if (auto it = map_.find(s); it != map_.end())
diff --git a/cpp/arcticdb/column_store/string_pool.hpp b/cpp/arcticdb/column_store/string_pool.hpp
index 2f4ef6b8a8..2146aea3d0 100644
--- a/cpp/arcticdb/column_store/string_pool.hpp
+++ b/cpp/arcticdb/column_store/string_pool.hpp
@@ -101,6 +101,10 @@ class StringBlock {
 
     uint8_t * pos_data(size_t required_size);
 
+    [[nodiscard]] size_t num_blocks() {
+        return data_.buffer().num_blocks();
+    }
+
     StringHead* head_at(position_t pos) {
         auto data = data_.buffer().ptr_cast<uint8_t>(pos, sizeof(StringHead));
         return reinterpret_cast<StringHead*>(data);
@@ -167,6 +171,8 @@ class StringPool {
 
     size_t size() const;
 
+    [[nodiscard]] size_t num_blocks() const;
+
     py::buffer_info as_buffer_info() const;
 
     std::optional<position_t> get_offset_for_column(std::string_view str, const Column& column);
diff --git a/cpp/arcticdb/column_store/test/benchmark_memory_segment.cpp b/cpp/arcticdb/column_store/test/benchmark_memory_segment.cpp
index 69613fc011..1a9aa4242a 100644
--- a/cpp/arcticdb/column_store/test/benchmark_memory_segment.cpp
+++ b/cpp/arcticdb/column_store/test/benchmark_memory_segment.cpp
@@ -34,12 +34,19 @@ std::vector<uint64_t> get_random_permutation(size_t num_rows, std::mt19937 g){
 SegmentInMemory get_shuffled_segment(const StreamId& id, size_t num_rows, size_t num_columns, std::optional<float> sparsity_percentage = std::nullopt){
     // We use a seed to get the same shuffled segment for given arguments.
     std::mt19937 g(0);
-    auto fields = std::vector<FieldRef>(num_columns);
+    std::vector<FieldWrapper> fields;
     for (auto i=0u; i<num_columns; ++i){
-        fields[i] = scalar_field(DataType::UINT64, "column_"+std::to_string(i));
+        fields.emplace_back(make_scalar_type(DataType::UINT64), "column_"+std::to_string(i));
     }
+
+    std::vector<FieldRef> field_refs;
+    field_refs.reserve(fields.size());
+    for(const auto& wrapper : fields) {
+        field_refs.emplace_back(FieldRef{wrapper.type(), wrapper.name()});
+    }
+
     auto segment = SegmentInMemory{
-        get_test_descriptor<stream::TimeseriesIndex>(id, fields),
+        get_test_descriptor<stream::TimeseriesIndex>(id, field_refs),
         num_rows,
         false,
         sparsity_percentage.has_value()
diff --git a/cpp/arcticdb/column_store/test/ingestion_stress_test.cpp b/cpp/arcticdb/column_store/test/ingestion_stress_test.cpp
index 5204eafdf2..bb3f646537 100644
--- a/cpp/arcticdb/column_store/test/ingestion_stress_test.cpp
+++ b/cpp/arcticdb/column_store/test/ingestion_stress_test.cpp
@@ -113,7 +113,7 @@ TEST_F(IngestionStressStore, ScalarIntAppend) {
     }
     GTEST_COUT << " 2 done";
 
-    agg.commit();
+    agg.finalize();
 
     for(auto &seg : sink.segments_)
         arcticdb::append_incomplete_segment(test_store_->_test_get_store(), symbol, std::move(seg));
@@ -172,7 +172,6 @@ TEST_F(IngestionStressStore, ScalarIntDynamicSchema) {
     auto new_descriptor = index.create_stream_descriptor(symbol, columns_second.clone());
 
     // Now write again.
-
     for (timestamp i = 0; i < NumRows; ++i) {
         agg.start_row(timestamp{i + NumRows})([&](auto &rb) {
             for (uint64_t j = 1u; j < NumColumnsSecondWrite; ++j)
@@ -199,12 +198,9 @@ TEST_F(IngestionStressStore, ScalarIntDynamicSchema) {
         });
     }
 
-
-    agg.commit();
-
-
+    agg.finalize();
     for(auto &seg : sink.segments_) {
-        log::version().info("Writing to symbol: {}", symbol);
+        ARCTICDB_DEBUG(log::version(), "Writing to symbol: {}", symbol);
         arcticdb::append_incomplete_segment(test_store_->_test_get_store(), symbol, std::move(seg));
     }
 
@@ -252,10 +248,10 @@ TEST_F(IngestionStressStore, DynamicSchemaWithStrings) {
     timer.stop_timer(timer_name);
     GTEST_COUT << " 1 done";
 
-    agg.commit();
+    agg.finalize();
 
     for(auto &seg : sink.segments_) {
-        log::version().info("Writing to symbol: {}", symbol);
+        ARCTICDB_DEBUG(log::version(), "Writing to symbol: {}", symbol);
         arcticdb::append_incomplete_segment(test_store_->_test_get_store(), symbol, std::move(seg));
     }
 
@@ -268,5 +264,5 @@ TEST_F(IngestionStressStore, DynamicSchemaWithStrings) {
     ReadQuery read_query;
     read_query.row_filter = universal_range();
     auto read_result = test_store_->read_dataframe_version(symbol, VersionQuery{}, read_query, read_options);
-    log::version().info("result columns: {}", read_result.frame_data.names());
+    ARCTICDB_DEBUG(log::version(), "result columns: {}", read_result.frame_data.names());
 }
diff --git a/cpp/arcticdb/column_store/test/rapidcheck_chunked_buffer.cpp b/cpp/arcticdb/column_store/test/rapidcheck_chunked_buffer.cpp
index 0567f9102f..9767b3b7a2 100644
--- a/cpp/arcticdb/column_store/test/rapidcheck_chunked_buffer.cpp
+++ b/cpp/arcticdb/column_store/test/rapidcheck_chunked_buffer.cpp
@@ -67,7 +67,7 @@ RC_GTEST_PROP(ChunkedBuffer, SplitBuffer, (const std::vector<uint8_t> &input, ui
         auto right = input[i];
         auto& buf_obj = *buf;
         if(buf_obj.cast<uint8_t>(where) != input[i])
-            log::version().info("Mismatch at {} ({}), {} != {}", i, where, left, right);
+            ARCTICDB_DEBUG(log::version(), "Mismatch at {} ({}), {} != {}", i, where, left, right);
         RC_ASSERT(left == right);
         if(((i + 1) % split_size) == 0)
             ++buf;
diff --git a/cpp/arcticdb/column_store/test/test_column.cpp b/cpp/arcticdb/column_store/test/test_column.cpp
index bd652428a5..ccb27b5c78 100644
--- a/cpp/arcticdb/column_store/test/test_column.cpp
+++ b/cpp/arcticdb/column_store/test/test_column.cpp
@@ -53,7 +53,7 @@ void test_column_type(size_t num_values = 20, size_t num_tests = 50) {
         ASSERT_EQ(*column.ptr_cast<raw_type>(index, sizeof(raw_type)), start);
 
         if constexpr (dimensions == Dimension::Dim0) {
-            ASSERT_EQ(column.index_of(start).value(), index);
+            ASSERT_EQ(column.search_unsorted(start).value(), index);
             auto s = column.scalar_at<raw_type>(j);
             ASSERT_FALSE(s == std::nullopt);
             ASSERT_EQ(s.value(), start);
diff --git a/cpp/arcticdb/column_store/test/test_index_filtering.cpp b/cpp/arcticdb/column_store/test/test_index_filtering.cpp
index a2469d23e7..4dc1e5bf16 100644
--- a/cpp/arcticdb/column_store/test/test_index_filtering.cpp
+++ b/cpp/arcticdb/column_store/test/test_index_filtering.cpp
@@ -13,6 +13,7 @@
 #include <arcticdb/stream/index.hpp>
 #include <arcticdb/storage/test/in_memory_store.hpp>
 #include <arcticdb/pipeline/index_writer.hpp>
+#include <arcticdb/codec/codec.hpp>
 
 namespace arcticdb {
 using namespace arcticdb::pipelines;
@@ -20,7 +21,7 @@ using namespace arcticdb::pipelines;
 std::pair<TimeseriesDescriptor , std::vector<SliceAndKey>> get_sample_slice_and_key(StreamId stream_id, VersionId version_id, size_t col_slices = 1, size_t row_slices = 10) {
     StreamDescriptor stream_desc{
         stream_id,
-        IndexDescriptor{1, IndexDescriptor::TIMESTAMP}
+        IndexDescriptorImpl{1, IndexDescriptorImpl::Type::TIMESTAMP}
     };
 
     stream_desc.add_field(scalar_field(DataType::NANOSECONDS_UTC64, "time"));
@@ -77,11 +78,11 @@ TEST(IndexFilter, Static) {
     const auto stream_id = StreamId{"thing"};
     const auto version_id = VersionId{0};
 
-    auto [metadata, slice_and_keys] = get_sample_slice_and_key(stream_id, version_id);
+    auto [tsd, slice_and_keys] = get_sample_slice_and_key(stream_id, version_id);
 
     const IndexPartialKey& partial_key{stream_id, version_id};
     auto mock_store = std::make_shared<InMemoryStore>();
-    index::IndexWriter<stream::RowCountIndex> writer(mock_store, partial_key, std::move(metadata));
+    index::IndexWriter<stream::RowCountIndex> writer(mock_store, partial_key, std::move(tsd));
 
     for (auto &slice_and_key : slice_and_keys) {
         writer.add(slice_and_key.key(), slice_and_key.slice());
@@ -89,6 +90,7 @@ TEST(IndexFilter, Static) {
     auto key_fut = writer.commit();
     auto key = std::move(key_fut).get();
     auto seg = mock_store->read(key, storage::ReadKeyOpts{}).get();
+
     pipelines::index::IndexSegmentReader isr{std::move(seg.second)};
     auto pipeline_context = std::make_shared<PipelineContext>(StreamDescriptor{isr.tsd().as_stream_descriptor()});
 
diff --git a/cpp/arcticdb/entity/descriptor_item.hpp b/cpp/arcticdb/entity/descriptor_item.hpp
index 38aa4b2a5d..6defea51f3 100644
--- a/cpp/arcticdb/entity/descriptor_item.hpp
+++ b/cpp/arcticdb/entity/descriptor_item.hpp
@@ -15,10 +15,11 @@
 namespace arcticdb {
 struct DescriptorItem {
     DescriptorItem(
-        entity::AtomKey &&key, 
+        entity::AtomKey &&key,
         std::optional<timestamp> start_index,
         std::optional<timestamp> end_index,
-        std::optional<arcticdb::proto::descriptors::TimeSeriesDescriptor>&& timeseries_descriptor) :
+        std::optional<TimeseriesDescriptor> timeseries_descriptor) :
+
         key_(std::move(key)),
         start_index_(start_index),
         end_index_(end_index),
@@ -30,13 +31,13 @@ struct DescriptorItem {
     entity::AtomKey key_;
     std::optional<timestamp> start_index_;
     std::optional<timestamp> end_index_;
-    std::optional<arcticdb::proto::descriptors::TimeSeriesDescriptor> timeseries_descriptor_;
-    
+    std::optional<TimeseriesDescriptor> timeseries_descriptor_;
+
     std::string symbol() const { return fmt::format("{}", key_.id()); }
     uint64_t version() const { return key_.version_id(); }
     timestamp creation_ts() const { return key_.creation_ts(); }
     std::optional<timestamp> start_index() const { return start_index_; }
     std::optional<timestamp> end_index() const { return end_index_; }
-    std::optional<arcticdb::proto::descriptors::TimeSeriesDescriptor> timeseries_descriptor() const { return timeseries_descriptor_; }
+    std::optional<TimeseriesDescriptor> timeseries_descriptor() const { return timeseries_descriptor_; }
 };
 }
\ No newline at end of file
diff --git a/cpp/arcticdb/entity/field_collection.hpp b/cpp/arcticdb/entity/field_collection.hpp
index 4249f5de56..0fbf367fd6 100644
--- a/cpp/arcticdb/entity/field_collection.hpp
+++ b/cpp/arcticdb/entity/field_collection.hpp
@@ -179,6 +179,10 @@ class FieldCollection {
         return {&buffer_.buffer(), &shapes_.buffer(), type_, nullptr};
     }
 
+    size_t num_blocks() const {
+        return buffer_.buffer().num_blocks();
+    }
+
     const Field& operator[](size_t pos) const {
         return at(pos);
     }
@@ -202,7 +206,6 @@ FieldCollection fields_from_range(const RangeType& fields) {
     return output;
 }
 
-
 } //namespace arcticdb
 
 
diff --git a/cpp/arcticdb/entity/field_collection_proto.cpp b/cpp/arcticdb/entity/field_collection_proto.cpp
index 1e4f8d494a..2cf066b47c 100644
--- a/cpp/arcticdb/entity/field_collection_proto.cpp
+++ b/cpp/arcticdb/entity/field_collection_proto.cpp
@@ -6,10 +6,10 @@
  */
 
 #include <arcticdb/entity/field_collection_proto.hpp>
+#include <arcticdb/entity/types_proto.hpp>
 
 namespace arcticdb {
 
-
 FieldCollection fields_from_proto(const arcticdb::proto::descriptors::StreamDescriptor& desc) {
     FieldCollection output;
     for (const auto& field : desc.fields())
@@ -18,7 +18,6 @@ FieldCollection fields_from_proto(const arcticdb::proto::descriptors::StreamDesc
     return output;
 }
 
-
 void proto_from_fields(const FieldCollection& fields, arcticdb::proto::descriptors::StreamDescriptor& desc) {
     for (const auto& field : fields) {
         auto new_field = desc.add_fields();
diff --git a/cpp/arcticdb/entity/field_collection_proto.hpp b/cpp/arcticdb/entity/field_collection_proto.hpp
index 9f3554669f..fc71055437 100644
--- a/cpp/arcticdb/entity/field_collection_proto.hpp
+++ b/cpp/arcticdb/entity/field_collection_proto.hpp
@@ -8,12 +8,12 @@
 #pragma once
 
 #include <arcticdb/entity/field_collection.hpp>
-#include <arcticdb/entity/types_proto.hpp>
+#include <descriptors.pb.h>
 
 namespace arcticdb {
 
 FieldCollection fields_from_proto(const arcticdb::proto::descriptors::StreamDescriptor& desc);
-void proto_from_fields(const FieldCollection& fields, arcticdb::proto::descriptors::StreamDescriptor& desc);
 
+void proto_from_fields(const FieldCollection& fields, arcticdb::proto::descriptors::StreamDescriptor& desc);
 
 } //namespace arcticdb
diff --git a/cpp/arcticdb/entity/merge_descriptors.cpp b/cpp/arcticdb/entity/merge_descriptors.cpp
index a41a8cfe18..a8e706b4a7 100644
--- a/cpp/arcticdb/entity/merge_descriptors.cpp
+++ b/cpp/arcticdb/entity/merge_descriptors.cpp
@@ -15,7 +15,7 @@ StreamDescriptor merge_descriptors(
     const StreamDescriptor &original,
     const std::vector<std::shared_ptr<FieldCollection>> &entries,
     const std::unordered_set<std::string_view> &filtered_set,
-    const std::optional<IndexDescriptor>& default_index) {
+    const std::optional<IndexDescriptorImpl>& default_index) {
     using namespace arcticdb::stream;
     std::vector<std::string_view> merged_fields;
     std::unordered_map<std::string_view, TypeDescriptor> merged_fields_map;
@@ -61,7 +61,7 @@ StreamDescriptor merge_descriptors(
                 if(auto existing = merged_fields_map.find(field.name()); existing != merged_fields_map.end()) {
                     auto existing_type_desc = existing->second;
                     if(existing_type_desc != type_desc) {
-                        log::version().info(
+                        ARCTICDB_DEBUG(log::version(),
                                 "Merging different type descriptors for column: {}\n"
                                 "Existing type descriptor                : {}\n"
                                 "New type descriptor                     : {}",
@@ -92,7 +92,7 @@ StreamDescriptor merge_descriptors(
     const StreamDescriptor &original,
     const std::vector<std::shared_ptr<FieldCollection>> &entries,
     const std::vector<std::string> &filtered_columns,
-    const std::optional<IndexDescriptor>& default_index) {
+    const std::optional<IndexDescriptorImpl>& default_index) {
     std::unordered_set<std::string_view> filtered_set(filtered_columns.begin(), filtered_columns.end());
     return merge_descriptors(original, entries, filtered_set, default_index);
 }
@@ -101,7 +101,7 @@ StreamDescriptor merge_descriptors(
     const StreamDescriptor &original,
     const std::vector<pipelines::SliceAndKey> &entries,
     const std::vector<std::string> &filtered_columns,
-    const std::optional<IndexDescriptor>& default_index) {
+    const std::optional<IndexDescriptorImpl>& default_index) {
     std::vector<std::shared_ptr<FieldCollection>> fields;
     for (const auto &entry : entries) {
         fields.push_back(std::make_shared<FieldCollection>(entry.slice_.desc()->fields().clone()));
@@ -114,7 +114,7 @@ StreamDescriptor merge_descriptors(
     const StreamDescriptor &original,
     const std::vector<pipelines::SliceAndKey> &entries,
     const std::unordered_set<std::string_view> &filtered_set,
-    const std::optional<IndexDescriptor>& default_index) {
+    const std::optional<IndexDescriptorImpl>& default_index) {
     std::vector<std::shared_ptr<FieldCollection>> fields;
     for (const auto &entry : entries) {
         fields.push_back(std::make_shared<FieldCollection>(entry.segment(store).descriptor().fields().clone()));
diff --git a/cpp/arcticdb/entity/merge_descriptors.hpp b/cpp/arcticdb/entity/merge_descriptors.hpp
index be491c8f50..4058748499 100644
--- a/cpp/arcticdb/entity/merge_descriptors.hpp
+++ b/cpp/arcticdb/entity/merge_descriptors.hpp
@@ -13,24 +13,24 @@ StreamDescriptor merge_descriptors(
     const StreamDescriptor &original,
     const std::vector<std::shared_ptr<FieldCollection>> &entries,
     const std::unordered_set<std::string_view> &filtered_set,
-    const std::optional<IndexDescriptor>& default_index);
+    const std::optional<IndexDescriptorImpl>& default_index);
 
 entity::StreamDescriptor merge_descriptors(
     const entity::StreamDescriptor &original,
     const std::vector<std::shared_ptr<FieldCollection>> &entries,
     const std::vector<std::string> &filtered_columns,
-    const std::optional<entity::IndexDescriptor>& default_index = std::nullopt);
+    const std::optional<entity::IndexDescriptorImpl>& default_index = std::nullopt);
 
 entity::StreamDescriptor merge_descriptors(
     const entity::StreamDescriptor &original,
     const std::vector<pipelines::SliceAndKey> &entries,
     const std::vector<std::string> &filtered_columns,
-    const std::optional<entity::IndexDescriptor>& default_index = std::nullopt);
+    const std::optional<entity::IndexDescriptorImpl>& default_index = std::nullopt);
 
 entity::StreamDescriptor merge_descriptors(
     const std::shared_ptr<Store>& store,
     const entity::StreamDescriptor &original,
     const std::vector<pipelines::SliceAndKey> &entries,
     const std::unordered_set<std::string_view> &filtered_set,
-    const std::optional<entity::IndexDescriptor>& default_index = std::nullopt);
+    const std::optional<entity::IndexDescriptorImpl>& default_index = std::nullopt);
 }
\ No newline at end of file
diff --git a/cpp/arcticdb/entity/metrics.cpp b/cpp/arcticdb/entity/metrics.cpp
index 0930a49fe0..d165c1c8f9 100644
--- a/cpp/arcticdb/entity/metrics.cpp
+++ b/cpp/arcticdb/entity/metrics.cpp
@@ -7,8 +7,6 @@
 
 #include <arcticdb/entity/metrics.hpp>
 #include <arcticdb/log/log.hpp>
-#include <arcticdb/util/preconditions.hpp>
-#include <arcticdb/util/preprocess.hpp>
 #include <arcticdb/util/pb_util.hpp>
 
 #ifdef _WIN32
@@ -140,94 +138,82 @@ namespace arcticdb {
         }
 }
 
-    // update new cardinal counter
-    void PrometheusInstance::incrementCounter(const std::string& name, const std::map<std::string, std::string>& labels) {
-        if (registry_.use_count() == 0)
-            return;
-
-        if (map_counter_.count(name) != 0) {
-            // Add returns Counter&
-            map_counter_[name]->Add(labels).Increment();
-        } else {
-            arcticdb::log::version().warn("Unregistered counter metric {}", name);
-        }
-    }
-    void PrometheusInstance::incrementCounter(const std::string& name, double value, const std::map<std::string, std::string>& labels) {
-        if (registry_.use_count() == 0)
-            return;
+void PrometheusInstance::incrementCounter(const std::string& name, double value, const std::map<std::string, std::string>& labels) {
+    if (registry_.use_count() == 0)
+        return;
 
-        if (map_counter_.count(name) != 0) {
-            // Add returns Counter&
-            map_counter_[name]->Add(labels).Increment(value);
-        } else {
-            arcticdb::log::version().warn("Unregistered counter metric {}", name);
-        }
+    if (map_counter_.count(name) != 0) {
+        // Add returns Counter&
+        map_counter_[name]->Add(labels).Increment(value);
+    } else {
+        arcticdb::log::version().warn("Unregistered counter metric {}", name);
     }
-    void PrometheusInstance::setGauge(const std::string& name, double value, const std::map<std::string, std::string>& labels) {
-        if (registry_.use_count() == 0)
-            return;
-
-        if (map_gauge_.count(name) != 0) {
-            map_gauge_[name]->Add(labels).Set(value);
-        } else {
-            arcticdb::log::version().warn("Unregistered gauge metric {}", name);
-        }
+}
+void PrometheusInstance::setGauge(const std::string& name, double value, const std::map<std::string, std::string>& labels) {
+    if (registry_.use_count() == 0)
+        return;
+
+    if (map_gauge_.count(name) != 0) {
+        map_gauge_[name]->Add(labels).Set(value);
+    } else {
+        arcticdb::log::version().warn("Unregistered gauge metric {}", name);
     }
-    void PrometheusInstance::setGaugeCurrentTime(const std::string& name, const std::map<std::string, std::string>& labels) {
-        if (registry_.use_count() == 0)
-            return;
-
-        if (map_gauge_.count(name) != 0) {
-            map_gauge_[name]->Add(labels).SetToCurrentTime();
-        } else {
-            arcticdb::log::version().warn("Unregistered gauge metric {}", name);
-        }
+}
+void PrometheusInstance::setGaugeCurrentTime(const std::string& name, const std::map<std::string, std::string>& labels) {
+    if (registry_.use_count() == 0)
+        return;
+
+    if (map_gauge_.count(name) != 0) {
+        map_gauge_[name]->Add(labels).SetToCurrentTime();
+    } else {
+        arcticdb::log::version().warn("Unregistered gauge metric {}", name);
     }
-    void PrometheusInstance::observeHistogram(const std::string& name, double value, const std::map<std::string, std::string>& labels) {
-        if (registry_.use_count() == 0)
-            return;
-        if (auto it=map_histogram_.find(name); it != map_histogram_.end()) {
-            it->second.histogram->Add(labels, it->second.buckets_list).Observe(value);
-        } else {
-            arcticdb::log::version().warn("Unregistered Histogram metric {}", name);
-        }
+}
+void PrometheusInstance::observeHistogram(const std::string& name, double value, const std::map<std::string, std::string>& labels) {
+    if (registry_.use_count() == 0)
+        return;
+    if (auto it=map_histogram_.find(name); it != map_histogram_.end()) {
+        it->second.histogram->Add(labels, it->second.buckets_list).Observe(value);
+    } else {
+        arcticdb::log::version().warn("Unregistered Histogram metric {}", name);
     }
-    void PrometheusInstance::DeleteHistogram(const std::string& name, const std::map<std::string, std::string>& labels) {
-        if (registry_.use_count() == 0)
-            return;
-
-        if (auto it=map_histogram_.find(name); it != map_histogram_.end()) {
-            it->second.histogram->Remove(&it->second.histogram->Add(labels, it->second.buckets_list));
-        } else {
-            arcticdb::log::version().warn("Unregistered Histogram metric {}", name);
-        }
+}
+void PrometheusInstance::DeleteHistogram(const std::string& name, const std::map<std::string, std::string>& labels) {
+    if (registry_.use_count() == 0)
+        return;
+
+    if (auto it=map_histogram_.find(name); it != map_histogram_.end()) {
+        it->second.histogram->Remove(&it->second.histogram->Add(labels, it->second.buckets_list));
+    } else {
+        arcticdb::log::version().warn("Unregistered Histogram metric {}", name);
     }
-    void PrometheusInstance::observeSummary(const std::string& name, double value, const std::map<std::string, std::string>& labels) {
-        if (registry_.use_count() == 0)
-            return;
-
-        if (map_summary_.count(name) != 0) {
-            //TODO DMK quantiles
-            map_summary_[name]->Add(labels,Summary::Quantiles{ {0.1, 0.05}, {0.2, 0.05}, {0.3, 0.05}, {0.4, 0.05}, {0.5, 0.05}, {0.6, 0.05}, {0.7, 0.05}, {0.8, 0.05}, {0.9, 0.05}, {0.9, 0.05}, {1.0, 0.05}}, std::chrono::seconds{SUMMARY_MAX_AGE}, SUMMARY_AGE_BUCKETS).Observe(value);
-        } else {
-            arcticdb::log::version().warn("Unregistered summary metric {}", name);
-        }
+}
+void PrometheusInstance::observeSummary(const std::string& name, double value, const std::map<std::string, std::string>& labels) {
+    if (registry_.use_count() == 0)
+        return;
+
+    if (map_summary_.count(name) != 0) {
+        //TODO DMK quantiles
+        map_summary_[name]->Add(labels,Summary::Quantiles{ {0.1, 0.05}, {0.2, 0.05}, {0.3, 0.05}, {0.4, 0.05}, {0.5, 0.05}, {0.6, 0.05}, {0.7, 0.05}, {0.8, 0.05}, {0.9, 0.05}, {0.9, 0.05}, {1.0, 0.05}}, std::chrono::seconds{SUMMARY_MAX_AGE}, SUMMARY_AGE_BUCKETS).Observe(value);
+    } else {
+        arcticdb::log::version().warn("Unregistered summary metric {}", name);
     }
-    std::string PrometheusInstance::getHostName() {
-        char hostname[1024];
-        if (::gethostname(hostname, sizeof(hostname))) {
-            return {};
-        }
-        return hostname;
+}
+std::string PrometheusInstance::getHostName() {
+    char hostname[1024];
+    if (::gethostname(hostname, sizeof(hostname))) {
+        return {};
     }
+    return hostname;
+}
 
-    int PrometheusInstance::push() {
-        if (gateway_.use_count() > 0) {
-            return gateway_->PushAdd();
-        } else {
-            return 0;
-        }
+int PrometheusInstance::push() {
+    if (gateway_.use_count() > 0) {
+        return gateway_->PushAdd();
+    } else {
+        return 0;
     }
+}
 
 } // Namespace arcticdb
 
diff --git a/cpp/arcticdb/entity/protobuf_mappings.cpp b/cpp/arcticdb/entity/protobuf_mappings.cpp
new file mode 100644
index 0000000000..3b48af598f
--- /dev/null
+++ b/cpp/arcticdb/entity/protobuf_mappings.cpp
@@ -0,0 +1,132 @@
+/* Copyright 2023 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
+ */
+#include <arcticdb/entity/protobufs.hpp>
+#include <arcticdb/entity/atom_key.hpp>
+#include <arcticdb/util/variant.hpp>
+#include <arcticdb/entity/protobuf_mappings.hpp>
+#include <arcticdb/entity/stream_descriptor.hpp>
+#include <arcticdb/entity/timeseries_descriptor.hpp>
+
+namespace arcticdb {
+
+using namespace arcticdb::entity;
+
+
+inline arcticdb::proto::descriptors::SortedValue sorted_value_to_proto(SortedValue sorted) {
+    switch (sorted) {
+    case SortedValue::UNSORTED:
+        return arcticdb::proto::descriptors::SortedValue::UNSORTED;
+    case SortedValue::DESCENDING:
+        return arcticdb::proto::descriptors::SortedValue::DESCENDING;
+    case SortedValue::ASCENDING:
+        return arcticdb::proto::descriptors::SortedValue::ASCENDING;
+    default:
+        return arcticdb::proto::descriptors::SortedValue::UNKNOWN;
+    }
+}
+
+inline SortedValue sorted_value_from_proto(arcticdb::proto::descriptors::SortedValue sorted_proto) {
+    switch (sorted_proto) {
+    case arcticdb::proto::descriptors::SortedValue::UNSORTED:
+        return SortedValue::UNSORTED;
+    case arcticdb::proto::descriptors::SortedValue::DESCENDING:
+        return SortedValue::DESCENDING;
+    case arcticdb::proto::descriptors::SortedValue::ASCENDING:
+        return SortedValue::ASCENDING;
+    default:
+        return SortedValue::UNKNOWN;
+    }
+}
+
+// The type enum needs to be kept in sync with the protobuf one, which should not be changed
+[[nodiscard]] arcticdb::proto::descriptors::IndexDescriptor index_descriptor_to_proto(const IndexDescriptorImpl& index_descriptor) {
+    arcticdb::proto::descriptors::IndexDescriptor proto;
+    proto.set_kind(static_cast<arcticdb::proto::descriptors::IndexDescriptor_Type>(index_descriptor.type_));
+    proto.set_field_count(index_descriptor.field_count_);
+    return proto;
+}
+
+[[nodiscard]] IndexDescriptorImpl index_descriptor_from_proto(const arcticdb::proto::descriptors::IndexDescriptor& index_descriptor) {
+    IndexDescriptorImpl output;
+    output.set_type(static_cast<IndexDescriptor::Type>(index_descriptor.kind()));
+    output.set_field_count(index_descriptor.field_count());
+    return output;
+}
+
+arcticdb::proto::descriptors::AtomKey key_to_proto(const AtomKey &key) {
+    arcticdb::proto::descriptors::AtomKey output;
+    util::variant_match(key.id(),
+        [&](const StringId &id) { output.set_string_id(id); },
+        [&](const NumericId &id) { output.set_numeric_id(id); });
+
+    output.set_version_id(key.version_id());
+    output.set_creation_ts(key.creation_ts());
+    output.set_content_hash(key.content_hash());
+
+    util::check(std::holds_alternative<StringId>(key.start_index()) || !std::holds_alternative<StringId>(key.end_index()),
+        "Start and end index mismatch");
+
+    util::variant_match(key.start_index(),
+        [&](const StringId &id) { output.set_string_start(id); },
+        [&](const NumericId &id) { output.set_numeric_start(id); });
+
+    util::variant_match(key.end_index(),
+        [&](const StringId &id) { output.set_string_end(id); },
+        [&](const NumericId &id) { output.set_numeric_end(id); });
+
+    output.set_key_type(arcticdb::proto::descriptors::KeyType (int(key.type())));
+    return output;
+}
+
+AtomKey key_from_proto(const arcticdb::proto::descriptors::AtomKey& input) {
+    StreamId stream_id = input.id_case() == input.kNumericId ? StreamId(input.numeric_id()) : StreamId(input.string_id());
+    IndexValue index_start = input.index_start_case() == input.kNumericStart ? IndexValue(input.numeric_start()) : IndexValue(input.string_start());
+    IndexValue index_end = input.index_end_case() == input.kNumericEnd ? IndexValue(input.numeric_end() ): IndexValue(input.string_end());
+
+    return atom_key_builder()
+        .version_id(input.version_id())
+        .creation_ts(timestamp(input.creation_ts()))
+        .content_hash(input.content_hash())
+        .start_index(index_start)
+        .end_index(index_end)
+        .build(stream_id, KeyType(input.key_type()));
+}
+
+void copy_stream_descriptor_to_proto(const StreamDescriptor& desc, arcticdb::proto::descriptors::StreamDescriptor& proto) {
+    proto.set_in_bytes(desc.uncompressed_bytes());
+    proto.set_out_bytes(desc.compressed_bytes());
+    proto.set_sorted(arcticdb::proto::descriptors::SortedValue(desc.sorted()));
+    // The index descriptor enum must be kept in sync with the protobuf
+    *proto.mutable_index() = index_descriptor_to_proto(desc.index());
+    util::variant_match(desc.id(),
+        [&proto] (const StringId& str) { proto.set_str_id(str); },
+        [&proto] (const NumericId& n) { proto.set_num_id(n); });
+
+    proto.mutable_fields()->Clear();
+    for(const auto& field : desc.fields()) {
+        auto new_field = proto.mutable_fields()->Add();
+        new_field->set_name(std::string(field.name()));
+        new_field->mutable_type_desc()->set_dimension(static_cast<uint32_t>(field.type().dimension()));
+        set_data_type(field.type().data_type(), *new_field->mutable_type_desc());
+    }
+}
+
+arcticdb::proto::descriptors::TimeSeriesDescriptor copy_time_series_descriptor_to_proto(const TimeseriesDescriptor& tsd) {
+    arcticdb::proto::descriptors::TimeSeriesDescriptor output;
+
+    output.set_total_rows(tsd.total_rows());
+    if(tsd.column_groups())
+        output.mutable_column_groups()->set_enabled(true);
+
+    exchange_timeseries_proto(tsd.proto(), output);
+
+    auto index_stream_descriptor = tsd.as_stream_descriptor();
+    copy_stream_descriptor_to_proto(index_stream_descriptor, *output.mutable_stream_descriptor());
+    return output;
+}
+
+} //namespace arcticdb
diff --git a/cpp/arcticdb/entity/protobuf_mappings.hpp b/cpp/arcticdb/entity/protobuf_mappings.hpp
index f5d969889d..4f7ceee149 100644
--- a/cpp/arcticdb/entity/protobuf_mappings.hpp
+++ b/cpp/arcticdb/entity/protobuf_mappings.hpp
@@ -9,45 +9,43 @@
 
 #include <arcticdb/entity/protobufs.hpp>
 #include <arcticdb/entity/atom_key.hpp>
-#include <arcticdb/util/variant.hpp>
-
+#include <arcticdb/entity/types.hpp>
 
 namespace arcticdb {
 
-using namespace arcticdb::entity;
-
-inline arcticdb::proto::descriptors::AtomKey encode_key(const AtomKey &key) {
-    arcticdb::proto::descriptors::AtomKey output;
-    util::variant_match(key.id(),
-                        [&](const StringId &id) { output.set_string_id(id); },
-                        [&](const NumericId &id) { output.set_numeric_id(id); });
-    output.set_version_id(key.version_id());
-    output.set_creation_ts(key.creation_ts());
-    output.set_content_hash(key.content_hash());
-
-    util::variant_match(key.start_index(),
-                        [&](const StringId &id) { output.set_string_start(id); },
-                        [&](const NumericId &id) { output.set_numeric_start(id); });
-    util::variant_match(key.end_index(),
-                        [&](const StringId &id) { output.set_string_end(id); },
-                        [&](const NumericId &id) { output.set_numeric_end(id); });
-
-    output.set_key_type(arcticdb::proto::descriptors::KeyType (int(key.type())));
-    return output;
-}
+namespace entity {
+struct StreamDescriptor;
+} //namespace arcticdb::entity
+
+struct TimeseriesDescriptor;
+
+arcticdb::proto::descriptors::AtomKey key_to_proto(const entity::AtomKey &key);
+
+entity::AtomKey key_from_proto(const arcticdb::proto::descriptors::AtomKey& input);
+
+void copy_stream_descriptor_to_proto(const entity::StreamDescriptor& desc, arcticdb::proto::descriptors::StreamDescriptor& proto);
+
+arcticdb::proto::descriptors::TimeSeriesDescriptor copy_time_series_descriptor_to_proto(const TimeseriesDescriptor& tsd);
+
+inline void set_id(arcticdb::proto::descriptors::StreamDescriptor& pb_desc, StreamId id);
+
+[[nodiscard]] arcticdb::proto::descriptors::IndexDescriptor index_descriptor_to_proto(const entity::IndexDescriptorImpl& index_descriptor);
+
+[[nodiscard]] entity::IndexDescriptorImpl index_descriptor_from_proto(const arcticdb::proto::descriptors::IndexDescriptor& index_descriptor);
+
+template<typename SourceType, typename DestType>
+void exchange_timeseries_proto(const SourceType& source, DestType& destination) {
+    if (source.has_normalization())
+        *destination.mutable_normalization() = source.normalization();
+
+    if (source.has_user_meta())
+        *destination.mutable_user_meta() = source.user_meta();
+
+    if (source.has_next_key())
+        *destination.mutable_next_key() = source.next_key();
 
-inline AtomKey decode_key(const arcticdb::proto::descriptors::AtomKey& input) {
-    StreamId stream_id = input.id_case() == input.kNumericId ? StreamId(input.numeric_id()) : StreamId(input.string_id());
-    IndexValue index_start = input.index_start_case() == input.kNumericStart ? IndexValue(input.numeric_start()) : IndexValue(input.string_start());
-    IndexValue index_end = input.index_end_case() == input.kNumericEnd ? IndexValue(input.numeric_end() ): IndexValue(input.string_end());
-
-    return atom_key_builder()
-         .version_id(input.version_id())
-         .creation_ts(timestamp(input.creation_ts()))
-         .content_hash(input.content_hash())
-         .start_index(index_start)
-         .end_index(index_end)
-         .build(stream_id, KeyType(input.key_type()));
+    if (source.has_multi_key_meta())
+        *destination.mutable_multi_key_meta() = source.multi_key_meta();
 }
 
 } //namespace arcticdb
\ No newline at end of file
diff --git a/cpp/arcticdb/entity/protobufs.hpp b/cpp/arcticdb/entity/protobufs.hpp
index 91e1159fec..60fc2ed415 100644
--- a/cpp/arcticdb/entity/protobufs.hpp
+++ b/cpp/arcticdb/entity/protobufs.hpp
@@ -21,6 +21,7 @@
 #include <utils.pb.h>
 
 namespace arcticdb::proto {
+
     namespace encoding = arcticc::pb2::encoding_pb2;
     namespace storage = arcticc::pb2::storage_pb2;
     namespace s3_storage = arcticc::pb2::s3_storage_pb2;
@@ -34,4 +35,4 @@ namespace arcticdb::proto {
     namespace nfs_backed_storage = arcticc::pb2::nfs_backed_storage_pb2;
     namespace utils = arcticc::pb2::utils_pb2;
 
-} //namespace arcticdb
+} //namespace arcticdb::proto
diff --git a/cpp/arcticdb/entity/serialized_key.hpp b/cpp/arcticdb/entity/serialized_key.hpp
index 43271bed6c..dca06a5ca9 100644
--- a/cpp/arcticdb/entity/serialized_key.hpp
+++ b/cpp/arcticdb/entity/serialized_key.hpp
@@ -46,12 +46,12 @@ inline VariantId variant_id_from_token(std::string_view strv, VariantType varian
     }
 }
 
-inline VariantType variant_type_from_index_type(IndexDescriptor::Type index_type) {
+inline VariantType variant_type_from_index_type(IndexDescriptorImpl::Type index_type) {
     switch (index_type) {
-        case IndexDescriptor::TIMESTAMP:
-        case IndexDescriptor::ROWCOUNT:
+        case IndexDescriptorImpl::Type::TIMESTAMP:
+        case IndexDescriptorImpl::Type::ROWCOUNT:
             return VariantType::NUMERIC_TYPE;
-        case IndexDescriptor::STRING:
+        case IndexDescriptorImpl::Type::STRING:
             return VariantType::STRING_TYPE;
         default:
             return VariantType::UNKNOWN_TYPE;
@@ -79,7 +79,7 @@ inline AtomKey key_from_old_style_bytes(const uint8_t *data, size_t size, KeyTyp
     auto cursor = std::string_view(reinterpret_cast<const char *>(data), size);
     auto arr = util::split_to_array<NumOldKeyFields>(cursor, OldKeyDelimiter);
     auto id_variant_type = variant_type_from_key_type(key_type);
-    auto index_type = IndexDescriptor::Type(util::num_from_strv(arr[int(OldKeyField::index_type)]));
+    auto index_type = IndexDescriptorImpl::Type(util::num_from_strv(arr[int(OldKeyField::index_type)]));
     auto index_variant_type = variant_type_from_index_type(index_type);
     return atom_key_from_tokens<OldKeyField>(arr, id_variant_type, index_variant_type, key_type);
 }
@@ -162,8 +162,8 @@ inline size_t max_id_size(const VariantId& id) {
 }
 
 inline size_t max_index_size(const IndexDescriptor& index) {
-    switch(index.type()) {
-    case IndexDescriptor::STRING:
+    switch(index.type_) {
+    case IndexDescriptor::Type::STRING:
         return max_string_size();
     default:
         return sizeof(uint64_t);
@@ -178,7 +178,7 @@ struct KeyDescriptor {
             format_type(format_type) {
     }
 
-    KeyDescriptor(const StringId& id, IndexDescriptor::Type index_type, FormatType format_type) :
+    KeyDescriptor(const StringId& id, IndexDescriptorImpl::Type index_type, FormatType format_type) :
             identifier(SerializedKeyIdentifier),
             id_type(variant_type_from_id(id)),
             index_type(to_type_char(index_type)),
@@ -187,13 +187,13 @@ struct KeyDescriptor {
     KeyDescriptor(const RefKey &key, FormatType format_type) :
             identifier(SerializedKeyIdentifier),
             id_type(variant_type_from_id(key.id())),
-            index_type(to_type_char(IndexDescriptor::UNKNOWN)),
+            index_type(to_type_char(IndexDescriptorImpl::Type::UNKNOWN)),
             format_type(format_type) {
     }
 
     char identifier;
     VariantType id_type;
-    IndexDescriptor::TypeChar index_type;
+    IndexDescriptorImpl::TypeChar index_type;
     FormatType format_type;
 };
 
diff --git a/cpp/arcticdb/entity/stream_descriptor.hpp b/cpp/arcticdb/entity/stream_descriptor.hpp
index 917e6751b9..980bed75d4 100644
--- a/cpp/arcticdb/entity/stream_descriptor.hpp
+++ b/cpp/arcticdb/entity/stream_descriptor.hpp
@@ -7,111 +7,140 @@
 
 #pragma once
 
-
-#include <proto/arcticc/pb2/proto/descriptors.pb.h>
-#include <google/protobuf/util/message_differencer.h>
-#include <folly/gen/Base.h>
+#include <arcticdb/entity/field_collection.hpp>
+#include "arcticdb/storage/memory_layout.hpp"
 
 #include <arcticdb/entity/field_collection_proto.hpp>
 #include <arcticdb/util/variant.hpp>
+#include <arcticdb/entity/types_proto.hpp>
 
 namespace arcticdb::entity {
 
-struct StreamDescriptor {
-    using Proto = arcticdb::proto::descriptors::StreamDescriptor;
+struct SegmentDescriptorImpl : public SegmentDescriptor {
+    SegmentDescriptorImpl() = default;
+
+    ARCTICDB_MOVE_COPY_DEFAULT(SegmentDescriptorImpl)
+
+    [[nodiscard]] const IndexDescriptorImpl& index() const {
+        return static_cast<const IndexDescriptorImpl&>(index_);
+    }
+
+    IndexDescriptorImpl& index() {
+        return static_cast<IndexDescriptorImpl&>(index_);
+    }
+
+    [[nodiscard]] SegmentDescriptorImpl clone() const {
+        return *this;
+    }
+};
+
+inline bool operator==(const SegmentDescriptorImpl& l, const SegmentDescriptorImpl& r) {
+    return l.sorted_ == r.sorted_ &&
+        l.index() == r.index() &&
+        l.compressed_bytes_ == r.compressed_bytes_ &&
+        l.uncompressed_bytes_ == r.uncompressed_bytes_;
+}
+
+inline bool operator!=(const SegmentDescriptorImpl& l, const SegmentDescriptorImpl& r) {
+    return !(l == r);
+}
 
-    std::shared_ptr<Proto> data_ = std::make_shared<Proto>();
+struct StreamDescriptor {
+    std::shared_ptr<SegmentDescriptorImpl> segment_desc_ = std::make_shared<SegmentDescriptorImpl>();
     std::shared_ptr<FieldCollection> fields_ = std::make_shared<FieldCollection>();
 
+    StreamId stream_id_;
     StreamDescriptor() = default;
     ~StreamDescriptor() = default;
 
-    [[nodiscard]] Proto copy_to_proto() const {
-        Proto proto;
-        proto.CopyFrom(*data_);
-        proto.mutable_fields()->Clear();
-        for(const auto& field : *fields_) {
-            auto new_field = proto.mutable_fields()->Add();
-            new_field->set_name(std::string(field.name()));
-            new_field->mutable_type_desc()->set_dimension(static_cast<uint32_t>(field.type().dimension()));
-            set_data_type(field.type().data_type(), *new_field->mutable_type_desc());
-        }
-        return proto;
+    StreamDescriptor(std::shared_ptr<SegmentDescriptorImpl> data, std::shared_ptr<FieldCollection> fields) :
+        segment_desc_(std::move(data)),
+        fields_(std::move(fields)) {
     }
 
-    void set_id(const StreamId& id) {
-        util::variant_match(id,
-                            [this] (const StringId& str) { data_->set_str_id(str); },
-                            [this] (const NumericId& n) { data_->set_num_id(n); });
+    StreamDescriptor(std::shared_ptr<SegmentDescriptorImpl> data, std::shared_ptr<FieldCollection> fields, StreamId stream_id) :
+        segment_desc_(std::move(data)),
+        fields_(std::move(fields)),
+        stream_id_(std::move(stream_id)) {
+    }
+
+    [[nodiscard]] const SegmentDescriptorImpl& data() const  {
+        return *segment_desc_;
     }
 
-    static StreamId id_from_proto(const Proto& proto) {
-        if(proto.id_case() == arcticdb::proto::descriptors::StreamDescriptor::kNumId)
-            return NumericId(proto.num_id());
-        else
-            return proto.str_id();
+    void set_id(const StreamId& id) {
+        stream_id_ = id;
     }
 
     [[nodiscard]] StreamId id() const {
-        return id_from_proto(*data_);
+        return stream_id_;
+    }
+
+    [[nodiscard]] uint64_t uncompressed_bytes() const {
+        return segment_desc_->uncompressed_bytes_;
     }
 
-    [[nodiscard]] IndexDescriptor index() const {
-        return IndexDescriptor(data_->index());
+    [[nodiscard]] uint64_t compressed_bytes() const {
+        return segment_desc_->compressed_bytes_;
+    }
+
+    [[nodiscard]] SortedValue sorted() const {
+        return segment_desc_->sorted_;
+    }
+
+    [[nodiscard]] IndexDescriptorImpl index() const {
+        return static_cast<IndexDescriptorImpl&>(segment_desc_->index_);
     }
 
     void set_sorted(SortedValue sorted) {
-        data_->set_sorted(sorted_value_to_proto(sorted));
+       segment_desc_->sorted_ = sorted;
     }
 
-    SortedValue get_sorted() const {
-        return sorted_value_from_proto(data_->sorted());
+    void set_index(const IndexDescriptorImpl& idx) {
+        segment_desc_->index_ = idx;
     }
 
-    void set_index(const IndexDescriptor& idx) {
-        data_->mutable_index()->CopyFrom(idx.data_);
+    IndexDescriptorImpl& index() {
+        return static_cast<IndexDescriptorImpl&>(segment_desc_->index_);
     }
 
-    void set_index_type(const IndexDescriptor::Type type) {
-        data_->mutable_index()->set_kind(type);
+    void set_index_type(const IndexDescriptorImpl::Type type) {
+       index().set_type(type);
     }
 
     void set_index_field_count(size_t size) {
-        data_->mutable_index()->set_field_count(size);
+        index().set_field_count(size);
+    }
+
+    void set_row_count(size_t row_count) {
+        segment_desc_->row_count_ = row_count;
+    }
+
+    size_t row_count() const {
+        return segment_desc_->row_count_;
     }
 
     explicit StreamDescriptor(const StreamId& id) {
         set_id(id);
+        set_index({0, IndexDescriptor::Type::ROWCOUNT});
     }
 
     void add_scalar_field(DataType data_type, std::string_view name) {
         fields_->add_field(TypeDescriptor{data_type, Dimension::Dim0}, name);
     }
 
-    StreamDescriptor(const StreamId& id, const IndexDescriptor &idx, std::shared_ptr<FieldCollection> fields) {
+    StreamDescriptor(const StreamId& id, const IndexDescriptorImpl &idx, std::shared_ptr<FieldCollection> fields) {
         set_id(id);
         set_index(idx);
         util::check(static_cast<bool>(fields), "Creating field collection with null pointer");
         fields_ = std::move(fields);
     }
 
-    StreamDescriptor(std::shared_ptr<arcticdb::proto::descriptors::StreamDescriptor> data, std::shared_ptr<FieldCollection> fields) :
-        data_(std::move(data)),
-        fields_(std::move(fields)) {
-        util::check(static_cast<bool>(data_), "Creating stream descriptor with null protobuf pointer");
-        util::check(static_cast<bool>(fields_), "Creating stream descriptor with null fields pointer");
-    }
-
-
-    StreamDescriptor(const StreamId& id, const IndexDescriptor &idx) {
+    StreamDescriptor(const StreamId& id, const IndexDescriptorImpl &idx) {
         set_id(id);
         set_index(idx);
     }
 
-    StreamDescriptor(std::shared_ptr<arcticdb::proto::descriptors::StreamDescriptor> data) :
-        data_(std::move(data)) {
-    }
-
     StreamDescriptor(const StreamDescriptor& other) = default;
     StreamDescriptor& operator=(const StreamDescriptor& other) = default;
 
@@ -121,11 +150,12 @@ struct StreamDescriptor {
         if(&left == &right)
             return;
 
-        swap(left.data_, right.data_);
+        swap(left.stream_id_, right.stream_id_);
+        swap(left.segment_desc_, right.segment_desc_);
         swap(left.fields_, right.fields_);
     }
 
-    StreamDescriptor& operator=(StreamDescriptor&& other) {
+    StreamDescriptor& operator=(StreamDescriptor&& other) noexcept {
         swap(*this, other);
         return *this;
     }
@@ -136,9 +166,7 @@ struct StreamDescriptor {
     }
 
     [[nodiscard]] StreamDescriptor clone() const {
-        Proto proto;
-        proto.CopyFrom(*data_);
-        return StreamDescriptor{std::make_shared<Proto>(std::move(proto)), std::make_shared<FieldCollection>(fields_->clone())};
+        return StreamDescriptor{std::make_shared<SegmentDescriptorImpl>(segment_desc_->clone()), std::make_shared<FieldCollection>(fields_->clone()), stream_id_};
     };
 
     [[nodiscard]] const FieldCollection& fields() const {
@@ -171,10 +199,14 @@ struct StreamDescriptor {
         return fields_->add(field);
     }
 
-    std::shared_ptr<FieldCollection> fields_ptr() const {
+    [[nodiscard]] std::shared_ptr<FieldCollection> fields_ptr() const {
         return fields_;
     }
 
+    [[nodiscard]] std::shared_ptr<SegmentDescriptorImpl> data_ptr() const {
+        return segment_desc_;
+    }
+
     decltype(auto) begin() {
         return fields().begin();
     }
@@ -183,11 +215,11 @@ struct StreamDescriptor {
         return fields().end();
     }
 
-    decltype(auto) begin() const {
+    [[nodiscard]] decltype(auto) begin() const {
         return fields().begin();
     }
 
-    decltype(auto) end() const {
+    [[nodiscard]] decltype(auto) end() const {
         return fields().end();
     }
 
@@ -195,22 +227,23 @@ struct StreamDescriptor {
         return fields().size();
     }
 
-    bool empty() const {
+    [[nodiscard]] bool empty() const {
         return fields().empty();
     }
 
-    std::optional<std::size_t> find_field(std::string_view view) const {
+    [[nodiscard]] std::optional<std::size_t> find_field(std::string_view view) const {
         auto it = std::find_if(begin(), end(), [&](const auto& field) {
             return field.name() == view;
         });
 
-        if (it == end()) return std::nullopt;
+        if (it == end())
+            return std::nullopt;
+
         return std::distance(begin(), it);
     }
 
     friend bool operator==(const StreamDescriptor& left, const StreamDescriptor& right) {
-        google::protobuf::util::MessageDifferencer diff;
-        if(!diff.Compare(*left.data_, *right.data_))
+        if(*left.segment_desc_ != *right.segment_desc_)
             return false;
 
         return *left.fields_ == *right.fields_;
@@ -236,95 +269,67 @@ struct StreamDescriptor {
     const Field& field(size_t pos) {
         return fields_->at(pos);
     }
-
-    [[nodiscard]] const Proto& proto() const {
-        return *data_;
-    }
-
-    Proto& mutable_proto() {
-        return *data_;
-    }
-
-    void print_proto_debug_str() const {
-        data_->PrintDebugString();
-    }
 };
 
 template <class IndexType>
-inline void set_index(arcticdb::proto::descriptors::StreamDescriptor &stream_desc) {
-    auto& pb_desc = *stream_desc.mutable_index();
-    pb_desc.set_field_count(std::uint32_t(IndexType::field_count()));
-    pb_desc.set_kind(static_cast<arcticdb::proto::descriptors::IndexDescriptor_Type>(
-        static_cast<int>(IndexType::type())));
+inline void set_index(StreamDescriptor &stream_desc) {
+    stream_desc.set_index_field_count(std::uint32_t(IndexType::field_count()));
+    stream_desc.set_index_type(IndexType::type());
 }
 
 template <typename IndexType, typename RangeType>
-StreamDescriptor index_descriptor(const StreamId& stream_id, IndexType, const RangeType& fields) {
-    arcticdb::proto::descriptors::StreamDescriptor desc;
-    set_id(desc, stream_id);
+StreamDescriptor index_descriptor_from_range(const StreamId& stream_id, IndexType, const RangeType& fields) {
+    StreamDescriptor desc;
+    desc.set_id(stream_id);
     set_index<IndexType>(desc);
-    auto out_fields = std::make_shared<FieldCollection>();
-    for(const auto& field : fields) {
+
+    auto out_fields = desc.fields_ptr();
+    for(const auto& field : fields)
         out_fields->add({field.type(), field.name()});
-    }
 
-    return StreamDescriptor(std::make_shared<StreamDescriptor::Proto>(std::move(desc)), std::move(out_fields));
+    return desc;
 }
 
 template <typename IndexType>
 StreamDescriptor index_descriptor(StreamId stream_id, IndexType index_type, std::initializer_list<FieldRef> fields) {
-    std::vector<FieldRef> fields_vec;
-    fields_vec.reserve(fields.size());
-    for(const auto& field : fields)
-        fields_vec.push_back(field);
-
-    return index_descriptor(stream_id, index_type, fields_vec);
+    return index_descriptor_from_range(stream_id, index_type, fields);
 }
 
 template <typename IndexType, typename RangeType>
-StreamDescriptor stream_descriptor(const StreamId& stream_id, IndexType idx, RangeType fields) {
+StreamDescriptor stream_descriptor_from_range(const StreamId& stream_id, IndexType idx, RangeType fields) {
     StreamDescriptor output;
-
     output.set_id(stream_id);
-    set_index<IndexType>(*output.data_);
+    set_index<IndexType>(output);
+
     for(auto i = 0u; i < IndexType::field_count(); ++i) {
         const auto& field = idx.field(i);
         output.add_field(FieldRef{field.type(), field.name()});
     }
 
-    for(const auto& field : fields) {
+    for(const auto& field : fields)
         output.add_field(FieldRef{field.type(), field.name()});
-    }
 
     return output;
 }
 
 template <typename IndexType>
-StreamDescriptor stream_descriptor(StreamId stream_id, IndexType index_type,
-                                          std::initializer_list<FieldRef> fields) {
-    std::vector<FieldRef> vec{fields};
-    return stream_descriptor(stream_id, index_type, folly::range(vec));
-}
-
-inline TypeDescriptor stream_id_descriptor(const StreamId &stream_id) {
-    return std::holds_alternative<NumericId>(stream_id) ?
-    TypeDescriptor(DataType::UINT64, 0) :
-    TypeDescriptor(DataType::ASCII_DYNAMIC64, 0);
+StreamDescriptor stream_descriptor(StreamId stream_id, IndexType index_type, std::initializer_list<FieldRef> fields) {
+    return stream_descriptor_from_range(stream_id, index_type, fields);
 }
 
 inline DataType stream_id_data_type(const StreamId &stream_id) {
     return std::holds_alternative<NumericId>(stream_id) ? DataType::UINT64 : DataType::ASCII_DYNAMIC64;
 }
 
-inline FieldCollection field_collection_from_proto(google::protobuf::RepeatedPtrField<arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor>&& fields) {
+inline FieldCollection field_collection_from_proto(const google::protobuf::RepeatedPtrField<arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor>& fields) {
     FieldCollection output;
-    for(const auto& field : fields) {
+    for(const auto& field : fields)
         output.add_field(type_desc_from_proto(field.type_desc()), field.name());
-    }
+
     return output;
 }
 
-} //namespace arcticdb
+} //namespace arcticdb::entity
 
 namespace fmt {
 template<>
@@ -342,13 +347,13 @@ struct formatter<arcticdb::entity::StreamDescriptor> {
 };
 
 template<>
-struct formatter<arcticdb::entity::StreamDescriptor::Proto> {
+struct formatter<arcticdb::proto::descriptors::StreamDescriptor> {
     template<typename ParseContext>
     constexpr auto parse(ParseContext &ctx) { return ctx.begin(); }
 
     template<typename FormatContext>
-    auto format(const arcticdb::entity::StreamDescriptor::Proto &sd, FormatContext &ctx) const {
-        return fmt::format_to(ctx.out(), "{}", sd.DebugString());
+    auto format(const arcticdb::proto::descriptors::StreamDescriptor &sd, FormatContext &ctx) const {
+        return format_to(ctx.out(), "{}", sd.DebugString());
     }
 };
 
diff --git a/cpp/arcticdb/entity/test/test_atom_key.cpp b/cpp/arcticdb/entity/test/test_atom_key.cpp
index 874a88ca60..18890fe5ef 100644
--- a/cpp/arcticdb/entity/test/test_atom_key.cpp
+++ b/cpp/arcticdb/entity/test/test_atom_key.cpp
@@ -15,9 +15,11 @@
 
 #include <folly/Range.h>
 
+using namespace arcticdb;
 using namespace arcticdb::entity;
 
 TEST(Key, Basic) {
+    using namespace arcticdb;
     using namespace arcticdb::entity;
     using namespace arcticdb::storage;
 
@@ -108,7 +110,7 @@ struct AlternativeFormat {
 TEST(Key, Formatting) {
 
     AtomKey k{
-        StreamId{NumericId{999}},
+        arcticdb::StreamId{NumericId{999}},
         VersionId(123),
         timestamp(123000000LL),
         0x789456321ULL,
@@ -148,7 +150,7 @@ TEST(AtomKey, ProtobufRoundtrip) {
     auto key = atom_key_builder().version_id(0).content_hash(1).creation_ts(2).start_index(3)
     .end_index(4).build(StreamId{"Natbag"}, KeyType::TABLE_INDEX);
 
-    auto pb_key = arcticdb::encode_key(key);
-    auto decoded_key = arcticdb::decode_key(pb_key);
+    auto pb_key = arcticdb::key_to_proto(key);
+    auto decoded_key = arcticdb::key_from_proto(pb_key);
     ASSERT_EQ(key, decoded_key);
 }
diff --git a/cpp/arcticdb/entity/test/test_ref_key.cpp b/cpp/arcticdb/entity/test/test_ref_key.cpp
index 9d1ea557b2..f3b5df81f8 100644
--- a/cpp/arcticdb/entity/test/test_ref_key.cpp
+++ b/cpp/arcticdb/entity/test/test_ref_key.cpp
@@ -11,6 +11,6 @@
 TEST(RefKey, Basic) {
     using namespace arcticdb::entity;
     RefKey rk{ "HelloWorld", KeyType::STORAGE_INFO};
-    ASSERT_EQ(rk.id(), VariantId("HelloWorld"));
+    ASSERT_EQ(rk.id(), arcticdb::VariantId("HelloWorld"));
     ASSERT_EQ(rk.type(), KeyType::STORAGE_INFO);
 }
\ No newline at end of file
diff --git a/cpp/arcticdb/entity/timeseries_descriptor.hpp b/cpp/arcticdb/entity/timeseries_descriptor.hpp
index cd3b080419..54f3d481cf 100644
--- a/cpp/arcticdb/entity/timeseries_descriptor.hpp
+++ b/cpp/arcticdb/entity/timeseries_descriptor.hpp
@@ -12,73 +12,133 @@
 
 namespace arcticdb {
 
+struct FrameDescriptorImpl : public FrameDescriptor {
+    FrameDescriptorImpl() = default;
+
+    ARCTICDB_MOVE_COPY_DEFAULT(FrameDescriptorImpl)
+
+    [[nodiscard]] FrameDescriptorImpl clone() const {
+        return *this;
+    }
+};
+
 struct TimeseriesDescriptor {
-  using Proto = arcticdb::proto::descriptors::TimeSeriesDescriptor;
-
-  std::shared_ptr<Proto> proto_ = std::make_shared<Proto>();
-  std::shared_ptr<FieldCollection> fields_ = std::make_shared<FieldCollection>();
-  TimeseriesDescriptor() = default;
-
-  TimeseriesDescriptor(std::shared_ptr<Proto> proto, std::shared_ptr<FieldCollection> fields) :
-    proto_(std::move(proto)),
-    fields_(std::move(fields)) {
-  }
-
-  [[nodiscard]] std::shared_ptr<FieldCollection> fields_ptr() const  {
-      return fields_;
-  }
-
-  [[nodiscard]] std::shared_ptr<Proto> proto_ptr() const {
-      return proto_;
-  }
-
-  [[nodiscard]] bool proto_is_null() const {
-      return !proto_;
-  }
-
-  void set_stream_descriptor(const StreamDescriptor& desc) {
-      fields_ = std::make_shared<FieldCollection>(desc.fields().clone());
-      proto_ = std::make_shared<Proto>();
-      proto_->mutable_stream_descriptor()->CopyFrom(desc.proto());
-  }
-
-  [[nodiscard]] const FieldCollection& fields() const {
-      return *fields_;
-  }
-
-  [[nodiscard]] FieldCollection& mutable_fields() {
-      return *fields_;
-  }
-
-  [[nodiscard]] Proto& mutable_proto() {
-       return *proto_;
-  }
-
-  [[nodiscard]] const Proto& proto() const {
-      return *proto_;
-  }
-
-  [[nodiscard]] TimeseriesDescriptor clone() const {
-      auto proto = std::make_shared<Proto>();
-      proto->CopyFrom(*proto_);
-      return {std::move(proto), std::make_shared<FieldCollection>(fields_->clone())};
-  }
-
-  [[nodiscard]] StreamDescriptor as_stream_descriptor() const {
-      auto stream_descriptor = std::make_shared<arcticdb::proto::descriptors::StreamDescriptor>();
-      stream_descriptor->CopyFrom(proto_->stream_descriptor());
-      return StreamDescriptor(stream_descriptor, fields_);
-  }
-
-  void copy_to_self_proto() {
-      proto_->mutable_stream_descriptor()->mutable_fields()->Clear();
-      for(const auto& field : *fields_) {
-          auto new_field = proto_->mutable_stream_descriptor()->mutable_fields()->Add();
-          new_field->set_name(std::string(field.name()));
-          new_field->mutable_type_desc()->set_dimension(static_cast<uint32_t>(field.type().dimension()));
-          set_data_type(field.type().data_type(), *new_field->mutable_type_desc());
-      }
-  }
+    using Proto = arcticdb::proto::descriptors::FrameMetadata;
+
+    std::shared_ptr<FrameDescriptorImpl> frame_data_ = std::make_shared<FrameDescriptorImpl>();
+    std::shared_ptr<SegmentDescriptorImpl> segment_desc_ = std::make_shared<SegmentDescriptorImpl>();
+    std::shared_ptr<Proto> proto_ = std::make_shared<Proto>();
+    std::shared_ptr<FieldCollection> fields_ = std::make_shared<FieldCollection>();
+    StreamId stream_id_;
+
+    TimeseriesDescriptor() = default;
+
+    TimeseriesDescriptor(
+        std::shared_ptr<FrameDescriptorImpl> frame_desc,
+        std::shared_ptr<SegmentDescriptorImpl> segment_desc,
+        std::shared_ptr<Proto> proto,
+        std::shared_ptr<FieldCollection> fields,
+        StreamId stream_id) :
+        frame_data_(std::move(frame_desc)),
+        segment_desc_(segment_desc),
+        proto_(std::move(proto)),
+        fields_(std::move(fields)),
+        stream_id_(stream_id) {
+    }
+
+    [[nodiscard]] const FrameDescriptorImpl &frame_descriptor() const {
+        return *frame_data_;
+    }
+
+    [[nodiscard]] IndexDescriptorImpl index() const {
+        return segment_desc_->index_;
+    }
+
+    void set_stream_descriptor(const StreamDescriptor &desc) {
+        segment_desc_ = desc.data_ptr();
+        fields_ = desc.fields_ptr();
+        stream_id_ = desc.stream_id_;
+    }
+
+    void set_total_rows(uint64_t rows) {
+        frame_data_->total_rows_ = rows;
+    }
+
+    [[nodiscard]] uint64_t total_rows() const {
+        return frame_data_->total_rows_;
+    }
+
+    [[nodiscard]] SortedValue sorted() const {
+        return segment_desc_->sorted_;
+    }
+
+    void set_sorted(SortedValue sorted) {
+        segment_desc_->sorted_ = sorted;
+    }
+
+    const arcticdb::proto::descriptors::UserDefinedMetadata& user_metadata() const {
+        return proto_->user_meta();
+    }
+
+    const arcticdb::proto::descriptors::NormalizationMetadata normalization() const {
+        return proto_->normalization();
+    }
+
+    void set_user_metadata(arcticdb::proto::descriptors::UserDefinedMetadata &&user_meta) {
+        *proto_->mutable_user_meta() = std::move(user_meta);
+    }
+
+    void set_normalization_metadata(arcticdb::proto::descriptors::NormalizationMetadata &&norm_meta) {
+        *proto_->mutable_normalization() = std::move(norm_meta);
+    }
+
+    void set_multi_key_metadata(arcticdb::proto::descriptors::UserDefinedMetadata &&multi_key_meta) {
+        *proto_->mutable_multi_key_meta() = std::move(multi_key_meta);
+    }
+
+    [[nodiscard]] std::shared_ptr<FieldCollection> fields_ptr() const {
+        return fields_;
+    }
+
+    [[nodiscard]] std::shared_ptr<Proto> proto_ptr() const {
+        return proto_;
+    }
+
+    [[nodiscard]] bool proto_is_null() const {
+        return !proto_;
+    }
+
+    [[nodiscard]] const FieldCollection &fields() const {
+        return *fields_;
+    }
+
+    [[nodiscard]] FieldCollection &mutable_fields() {
+        return *fields_;
+    }
+
+    [[nodiscard]] Proto &mutable_proto() {
+        return *proto_;
+    }
+
+    [[nodiscard]] const Proto &proto() const {
+        return *proto_;
+    }
+
+    [[nodiscard]] TimeseriesDescriptor clone() const {
+        auto proto = std::make_shared<Proto>();
+        proto->CopyFrom(*proto_);
+        auto frame_desc = std::make_shared<FrameDescriptorImpl>(frame_data_->clone());
+        auto segment_desc = std::make_shared<SegmentDescriptorImpl>(segment_desc_->clone());
+        return {std::move(frame_desc), std::move(segment_desc), std::move(proto), std::make_shared<FieldCollection>(fields_->clone()), stream_id_};
+    }
+
+    [[nodiscard]] bool column_groups() const {
+        return frame_data_->column_groups_;
+    }
+
+    [[nodiscard]] StreamDescriptor as_stream_descriptor() const {
+        return {segment_desc_, fields_, stream_id_};
+    }
 };
 
 } //namespace arcticdb
diff --git a/cpp/arcticdb/entity/type_utils.cpp b/cpp/arcticdb/entity/type_utils.cpp
index 83df12de1d..23ca1a1a8f 100644
--- a/cpp/arcticdb/entity/type_utils.cpp
+++ b/cpp/arcticdb/entity/type_utils.cpp
@@ -8,7 +8,6 @@
 
 #include <arcticdb/entity/type_utils.hpp>
 #include <arcticdb/entity/types.hpp>
-#include <arcticdb/entity/types_proto.hpp>
 
 namespace arcticdb {
     bool trivially_compatible_types(const entity::TypeDescriptor& left, const entity::TypeDescriptor& right) {
@@ -131,13 +130,6 @@ namespace arcticdb {
         return target;
     }
 
-    std::optional<entity::TypeDescriptor> has_valid_type_promotion(
-        const proto::descriptors::TypeDescriptor& source,
-        const proto::descriptors::TypeDescriptor& target
-    ) {
-        return has_valid_type_promotion(entity::type_desc_from_proto(source), entity::type_desc_from_proto(target));
-    }
-
     std::optional<entity::TypeDescriptor> has_valid_common_type(
         const entity::TypeDescriptor& left,
         const entity::TypeDescriptor& right
@@ -178,12 +170,4 @@ namespace arcticdb {
         }
         return maybe_common_type;
     }
-
-    std::optional<entity::TypeDescriptor> has_valid_common_type(
-        const proto::descriptors::TypeDescriptor& left,
-        const proto::descriptors::TypeDescriptor& right
-    ) {
-        return has_valid_common_type(entity::type_desc_from_proto(left), entity::type_desc_from_proto(right));
-    }
-
 }
\ No newline at end of file
diff --git a/cpp/arcticdb/entity/types-inl.hpp b/cpp/arcticdb/entity/types-inl.hpp
index 8c939a0aed..9b19d1685e 100644
--- a/cpp/arcticdb/entity/types-inl.hpp
+++ b/cpp/arcticdb/entity/types-inl.hpp
@@ -126,12 +126,12 @@ struct formatter<arcticdb::entity::TypeDescriptor> {
 };
 
 template<>
-struct formatter<arcticdb::entity::StreamId> {
+struct formatter<arcticdb::StreamId> {
     template<typename ParseContext>
     constexpr auto parse(ParseContext &ctx) { return ctx.begin(); }
 
     template<typename FormatContext>
-    constexpr auto format(const arcticdb::entity::StreamId &tsid, FormatContext &ctx) const {
+    constexpr auto format(const arcticdb::StreamId &tsid, FormatContext &ctx) const {
         return std::visit([&ctx](auto &&val) {
             return fmt::format_to(ctx.out(), "{}", val);
         }, tsid);
diff --git a/cpp/arcticdb/entity/types.hpp b/cpp/arcticdb/entity/types.hpp
index 7488848724..01920f3177 100644
--- a/cpp/arcticdb/entity/types.hpp
+++ b/cpp/arcticdb/entity/types.hpp
@@ -9,6 +9,10 @@
 
 #include <arcticdb/util/preconditions.hpp>
 #include <arcticdb/util/constructors.hpp>
+#include <arcticdb/util/variant.hpp>
+#include <arcticdb/log/log.hpp>
+#include <google/protobuf/util/message_differencer.h>
+#include "arcticdb/storage/memory_layout.hpp"
 
 #include <cstdint>
 #include <vector>
@@ -16,7 +20,6 @@
 #include <type_traits>
 #include <variant>
 
-
 #ifdef _WIN32
 // `ssize_t` is defined in `sys/types.h` but it is not ISO C (it simply is POSIX), hence its is not defined natively by MSVC.
 // See: https://learn.microsoft.com/en-us/windows/win32/winprog/windows-data-types
@@ -24,20 +27,22 @@
 using ssize_t = SSIZE_T;
 #endif
 
-namespace arcticdb::entity {
+#include <descriptors.pb.h>
 
-enum class SortedValue : uint8_t {
-    UNKNOWN = 0,
-    UNSORTED = 1,
-    ASCENDING = 2,
-    DESCENDING = 3,
-};
+namespace arcticdb::proto {
+    namespace descriptors = arcticc::pb2::descriptors_pb2;
+}
+
+namespace arcticdb {
 
 using NumericId = int64_t;
 using UnsignedId = uint64_t;
 using StringId = std::string;
 using VariantId = std::variant<NumericId, StringId, UnsignedId>;
 using StreamId = VariantId;
+
+namespace entity {
+
 using SnapshotId = VariantId;
 using VersionId = uint64_t;
 using SignedVersionId = int64_t;
@@ -50,7 +55,8 @@ using position_t = int64_t;
 
 /** The VariantId holds int64 (NumericId) but is also used to store sizes up to uint64, so needs safe conversion */
 inline NumericId safe_convert_to_numeric_id(uint64_t input) {
-    util::check(input <= static_cast<uint64_t>(std::numeric_limits<NumericId>::max()), "Numeric symbol greater than 2^63 is not supported.");
+    util::check(input <= static_cast<uint64_t>(std::numeric_limits<NumericId>::max()),
+                "Numeric symbol greater than 2^63 is not supported.");
     return static_cast<NumericId>(input);
 }
 
@@ -99,34 +105,34 @@ enum class ValueType : uint8_t {
 };
 
 // Sequence types are composed of more than one element
-constexpr bool is_sequence_type(ValueType v){
+constexpr bool is_sequence_type(ValueType v) {
     return uint8_t(v) >= uint8_t(ValueType::ASCII_FIXED) &&
         uint8_t(v) <= uint8_t(ValueType::ASCII_DYNAMIC);
 }
 
-constexpr bool is_numeric_type(ValueType v){
+constexpr bool is_numeric_type(ValueType v) {
     return v == ValueType::NANOSECONDS_UTC ||
         (uint8_t(v) >= uint8_t(ValueType::UINT) &&
             uint8_t(v) <= uint8_t(ValueType::FLOAT));
 }
 
-constexpr bool is_floating_point_type(ValueType v){
+constexpr bool is_floating_point_type(ValueType v) {
     return uint8_t(v) == uint8_t(ValueType::FLOAT);
 }
 
-constexpr bool is_time_type(ValueType v){
+constexpr bool is_time_type(ValueType v) {
     return uint8_t(v) == uint8_t(ValueType::NANOSECONDS_UTC);
 }
 
-constexpr bool is_integer_type(ValueType v){
+constexpr bool is_integer_type(ValueType v) {
     return uint8_t(v) == uint8_t(ValueType::INT) || uint8_t(v) == uint8_t(ValueType::UINT);
 }
 
-constexpr bool is_fixed_string_type(ValueType v){
+constexpr bool is_fixed_string_type(ValueType v) {
     return v == ValueType::ASCII_FIXED || v == ValueType::UTF8_FIXED;
 }
 
-constexpr bool is_dynamic_string_type(ValueType v){
+constexpr bool is_dynamic_string_type(ValueType v) {
     return is_sequence_type(v) && !is_fixed_string_type(v);
 }
 constexpr bool is_utf_type(ValueType v) {
@@ -156,7 +162,7 @@ constexpr SizeBits get_size_bits(uint8_t size) {
 }
 
 [[nodiscard]] constexpr int get_byte_count(SizeBits size_bits) {
-    switch(size_bits) {
+    switch (size_bits) {
     case SizeBits::S8: return 1;
     case SizeBits::S16: return 2;
     case SizeBits::S32: return 4;
@@ -165,7 +171,7 @@ constexpr SizeBits get_size_bits(uint8_t size) {
     }
 }
 
-namespace detail{
+namespace detail {
 
 constexpr uint8_t combine_val_bits(ValueType v, SizeBits b = SizeBits::UNKNOWN_SIZE_BITS) {
     return (static_cast<uint8_t>(v) << 3u) | static_cast<uint8_t>(b);
@@ -203,13 +209,13 @@ constexpr DataType combine_data_type(ValueType v, SizeBits b = SizeBits::UNKNOWN
 }
 
 // Constructs the corresponding DataType from a given primitive arithmetic type (u/int8_t, float, or double)
-template <typename T>
+template<typename T>
 constexpr DataType data_type_from_raw_type() {
     static_assert(std::is_arithmetic_v<T>);
     if constexpr (std::is_floating_point_v<T>) {
         return combine_data_type(ValueType::FLOAT, get_size_bits(sizeof(T)));
     }
-    if constexpr(std::is_signed_v<T>) {
+    if constexpr (std::is_signed_v<T>) {
         return combine_data_type(ValueType::INT, get_size_bits(sizeof(T)));
     }
     return combine_data_type(ValueType::UINT, get_size_bits(sizeof(T)));
@@ -234,11 +240,11 @@ constexpr size_t get_type_size(DataType dt) noexcept {
     return size_t(1) << (size_t(s) - 1);
 }
 
-constexpr bool is_sequence_type(DataType v){
+constexpr bool is_sequence_type(DataType v) {
     return is_sequence_type(slice_value_type(v));
 }
 
-constexpr bool is_numeric_type(DataType v){
+constexpr bool is_numeric_type(DataType v) {
     return is_numeric_type(slice_value_type(v));
 }
 
@@ -258,31 +264,31 @@ constexpr bool is_signed_type(DataType dt) {
     return slice_value_type(dt) == ValueType::INT;
 }
 
-constexpr bool is_floating_point_type(DataType v){
+constexpr bool is_floating_point_type(DataType v) {
     return is_floating_point_type(slice_value_type(v));
 }
 
-constexpr bool is_time_type(DataType v){
+constexpr bool is_time_type(DataType v) {
     return is_time_type(slice_value_type(v));
 }
 
-constexpr bool is_integer_type(DataType v){
+constexpr bool is_integer_type(DataType v) {
     return is_integer_type(slice_value_type(v));
 }
 
-constexpr bool is_fixed_string_type(DataType v){
+constexpr bool is_fixed_string_type(DataType v) {
     return is_fixed_string_type(slice_value_type(v));
 }
 
-constexpr bool is_dynamic_string_type(DataType v){
+constexpr bool is_dynamic_string_type(DataType v) {
     return is_dynamic_string_type(slice_value_type(v));
 }
 
-constexpr bool is_utf_type(DataType v){
+constexpr bool is_utf_type(DataType v) {
     return is_utf_type(slice_value_type(v));
 }
 
-constexpr bool is_empty_type(DataType v){
+constexpr bool is_empty_type(DataType v) {
     return is_empty_type(slice_value_type(v));
 }
 
@@ -290,8 +296,8 @@ static_assert(slice_value_type(DataType::UINT16) == ValueType(1));
 static_assert(get_type_size(DataType::UINT32) == 4);
 static_assert(get_type_size(DataType::UINT64) == 8);
 
-constexpr  ValueType get_value_type(char specifier) noexcept {
-    switch(specifier){
+constexpr ValueType get_value_type(char specifier) noexcept {
+    switch (specifier) {
     case 'u': return ValueType::UINT; //  unsigned integer
     case 'i': return ValueType::INT; //  signed integer
     case 'f': return ValueType::FLOAT; //  floating-point
@@ -312,10 +318,10 @@ constexpr  ValueType get_value_type(char specifier) noexcept {
     }
 }
 
-constexpr char get_dtype_specifier(ValueType vt){
-    switch(vt){
+constexpr char get_dtype_specifier(ValueType vt) {
+    switch (vt) {
     case ValueType::UINT: return 'u';
-    case ValueType::INT:  return 'i';
+    case ValueType::INT: return 'i';
     case ValueType::FLOAT: return 'f';
     case ValueType::BOOL: return 'b';
         // NOTE: this is safe as of Pandas < 2.0 because `datetime64` _always_ has been using nanosecond resolution,
@@ -330,12 +336,11 @@ constexpr char get_dtype_specifier(ValueType vt){
     case ValueType::ASCII_FIXED: return 'S';
     case ValueType::BYTES: return 'O';
     case ValueType::EMPTY: return 'O';
-    default:
-        return 'x';
+    default:return 'x';
     }
 }
 
-constexpr char get_dtype_specifier(DataType dt){
+constexpr char get_dtype_specifier(DataType dt) {
     return get_dtype_specifier(slice_value_type(dt));
 }
 
@@ -357,7 +362,6 @@ struct DataTypeTag<DataType::__DT__> : public DataTypeTagBase { \
 }; \
 using TAG_##__DT__ = DataTypeTag<DataType::__DT__>;
 
-
 DATA_TYPE_TAG(UINT8, std::uint8_t)
 DATA_TYPE_TAG(UINT16, std::uint16_t)
 DATA_TYPE_TAG(UINT32, std::uint32_t)
@@ -404,7 +408,7 @@ Dimension as_dim_checked(uint8_t d);
 
 struct TypeDescriptor;
 
-inline void set_data_type(DataType data_type, TypeDescriptor& type_desc);
+inline void set_data_type(DataType data_type, TypeDescriptor &type_desc);
 
 struct TypeDescriptor {
     DataType data_type_;
@@ -434,6 +438,10 @@ struct TypeDescriptor {
         return data_type_;
     }
 
+    [[nodiscard]] constexpr ValueType value_type() const {
+        return slice_value_type(data_type_);
+    }
+
     [[nodiscard]] constexpr Dimension dimension() const {
         return dimension_;
     }
@@ -451,7 +459,6 @@ struct TypeDescriptor {
     }
 };
 
-
 /// @brief Check if the type must contain data
 /// Some types are allowed not to have any data, e.g. empty arrays or the empty type (which by design denotes the
 /// lack of data).
@@ -468,11 +475,11 @@ constexpr bool is_numpy_array(TypeDescriptor td) {
 }
 
 constexpr bool is_pyobject_type(TypeDescriptor td) {
-	return is_dynamic_string_type(slice_value_type(td.data_type())) || is_bool_object_type(td.data_type()) ||
-		is_numpy_array(td);
+    return is_dynamic_string_type(slice_value_type(td.data_type())) || is_bool_object_type(td.data_type()) ||
+        is_numpy_array(td);
 }
 
-inline void set_data_type(DataType data_type, TypeDescriptor& type_desc) {
+inline void set_data_type(DataType data_type, TypeDescriptor &type_desc) {
     type_desc.data_type_ = data_type;
 }
 
@@ -503,7 +510,7 @@ struct TypeDescriptorTag {
     }
 };
 
-template <typename DTT>
+template<typename DTT>
 using ScalarTagType = TypeDescriptorTag<DTT, DimensionTag<Dimension::Dim0>>;
 
 template<typename T>
@@ -513,6 +520,66 @@ struct ScalarTypeInfo {
     using RawType = typename TDT::DataTypeTag::raw_type;
 };
 
+struct IndexDescriptorImpl : public IndexDescriptor {
+    using TypeChar = char;
+
+    IndexDescriptorImpl() = default;
+
+    IndexDescriptorImpl(uint32_t field_count, Type type) :
+        IndexDescriptor(type, field_count) {
+    }
+
+    IndexDescriptorImpl(const IndexDescriptor& idx) :
+        IndexDescriptor(idx) {
+    }
+
+    [[nodiscard]] bool uninitialized() const {
+        return field_count() == 0 && type_ == Type::UNKNOWN;
+    }
+
+    [[nodiscard]] uint32_t field_count() const {
+        return field_count_;
+    }
+
+    [[nodiscard]] Type type() const {
+        return type_;
+    }
+
+    void set_type(Type type) {
+        type_ = type;
+    }
+
+    void set_field_count(uint32_t field_count) {
+        field_count_ = field_count;
+    }
+
+    ARCTICDB_MOVE_COPY_DEFAULT(IndexDescriptorImpl)
+
+    friend bool operator==(const IndexDescriptorImpl &left, const IndexDescriptorImpl &right) {
+        return left.type() == right.type() && left.field_count_ == right.field_count_;
+    }
+};
+
+constexpr IndexDescriptorImpl::TypeChar to_type_char(IndexDescriptorImpl::Type type) {
+    switch (type) {
+    case IndexDescriptorImpl::Type::TIMESTAMP:return 'T';
+    case IndexDescriptorImpl::Type::ROWCOUNT:return 'R';
+    case IndexDescriptorImpl::Type::STRING:return 'S';
+    case IndexDescriptorImpl::Type::UNKNOWN:return 'U';
+    default:util::raise_rte("Unknown index type: {}", int(type));
+    }
+}
+
+constexpr IndexDescriptorImpl::Type from_type_char(IndexDescriptorImpl::TypeChar type) {
+    switch (type) {
+    case 'T': return IndexDescriptorImpl::Type::TIMESTAMP;
+    case 'R': return IndexDescriptorImpl::Type::ROWCOUNT;
+    case 'S': return IndexDescriptorImpl::Type::STRING;
+    case 'U': return IndexDescriptorImpl::Type::UNKNOWN;
+    default:util::raise_rte("Unknown index type: {}", int(type));
+    }
+}
+
 struct FieldRef {
     TypeDescriptor type_;
     std::string_view name_;
@@ -525,12 +592,11 @@ struct FieldRef {
         return name_;
     }
 
-    friend bool operator==(const FieldRef& left, const FieldRef& right) {
+    friend bool operator==(const FieldRef &left, const FieldRef &right) {
         return left.type_ == right.type_ && left.name_ == right.name_;
     }
 };
 
-
 struct Field {
     uint32_t size_ = 0;
     TypeDescriptor type_;
@@ -539,10 +605,8 @@ struct Field {
 
     ARCTICDB_NO_MOVE_OR_COPY(Field)
 
-
-
 private:
-    explicit Field(const FieldRef& ref) {
+    explicit Field(const FieldRef &ref) {
         set(ref.type_, ref.name_);
     }
 
@@ -550,8 +614,8 @@ struct Field {
         set(type, name);
     }
 public:
-    static void emplace(TypeDescriptor type, std::string_view name, void* ptr) {
-        new (ptr) Field(type, name);
+    static void emplace(TypeDescriptor type, std::string_view name, void *ptr) {
+        new(ptr) Field(type, name);
     }
 
     static size_t calc_size(std::string_view name) {
@@ -562,15 +626,15 @@ struct Field {
         return {name_, size_};
     }
 
-    [[nodiscard]] const TypeDescriptor& type() const {
+    [[nodiscard]] const TypeDescriptor &type() const {
         return type_;
     }
 
-    [[nodiscard]] TypeDescriptor* mutable_type_desc() {
+    [[nodiscard]] TypeDescriptor *mutable_type_desc() {
         return &type_;
     }
 
-    TypeDescriptor& mutable_type() {
+    TypeDescriptor &mutable_type() {
         return type_;
     }
 
@@ -604,13 +668,21 @@ struct FieldWrapper {
         mutable_field().set(type, name);
     }
 
-    const Field& field() const {
-        return *reinterpret_cast<const Field*>(data_.data());
+    const Field &field() const {
+        return *reinterpret_cast<const Field *>(data_.data());
+    }
+
+    const TypeDescriptor& type() const {
+        return field().type();
+    }
+
+    const std::string_view name() const {
+        return field().name();
     }
 
 private:
-    Field& mutable_field() {
-        return *reinterpret_cast<Field*>(data_.data());
+    Field &mutable_field() {
+        return *reinterpret_cast<Field *>(data_.data());
     }
 };
 
@@ -618,29 +690,32 @@ inline FieldRef scalar_field(DataType type, std::string_view name) {
     return {TypeDescriptor{type, Dimension::Dim0}, name};
 }
 
-template <typename Callable>
-auto visit_field(const Field& field, Callable&& c) {
+template<typename Callable>
+auto visit_field(const Field &field, Callable &&c) {
     return field.type().visit_tag(std::forward<Callable>(c));
 }
 
-inline bool operator==(const Field& l, const Field& r) {
+inline bool operator==(const Field &l, const Field &r) {
     return l.type() == r.type() && l.name() == r.name();
 }
 
-inline bool operator!=(const Field& l, const Field& r) {
+inline bool operator!=(const Field &l, const Field &r) {
     return !(l == r);
 }
 
-std::size_t sizeof_datatype(const TypeDescriptor& td);
-} // namespace arcticdb::entity
+std::size_t sizeof_datatype(const TypeDescriptor &td);
+
+} // namespace entity
+
+} // namespace arcticdb
 
 // StreamId ordering - numbers before strings
 namespace std {
-template<>
-struct less<arcticdb::entity::StreamId> {
 
-    bool operator()(const arcticdb::entity::StreamId &left, const arcticdb::entity::StreamId &right) const {
-        using namespace arcticdb::entity;
+template<>
+struct less<arcticdb::StreamId> {
+    bool operator()(const arcticdb::StreamId &left, const arcticdb::StreamId &right) const {
+        using namespace arcticdb;
         if (std::holds_alternative<NumericId>(left)) {
             if (std::holds_alternative<NumericId>(right))
                 return left < right;
@@ -654,7 +729,8 @@ struct less<arcticdb::entity::StreamId> {
         }
     }
 };
-}
+
+} // namespace std
 
 namespace fmt {
 
@@ -672,7 +748,19 @@ struct formatter<FieldRef> {
     }
 };
 
-}
+template<>
+struct formatter<FieldWrapper> {
+
+    template<typename ParseContext>
+    constexpr auto parse(ParseContext &ctx) { return ctx.begin(); }
+
+    template<typename FormatContext>
+    auto format(const FieldWrapper& f, FormatContext &ctx) const {
+        return fmt::format_to(ctx.out(), "{}: {}", f.type(), f.name());
+    }
+};
+
+} //namespace fmt
 
 #define ARCTICDB_TYPES_H_
 #include "types-inl.hpp"
\ No newline at end of file
diff --git a/cpp/arcticdb/entity/types_proto.cpp b/cpp/arcticdb/entity/types_proto.cpp
index 5d53a3092f..db34fe33bc 100644
--- a/cpp/arcticdb/entity/types_proto.cpp
+++ b/cpp/arcticdb/entity/types_proto.cpp
@@ -4,188 +4,128 @@
  *
  * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
  */
-
+#include <arcticdb/entity/types.hpp>
 #include <arcticdb/entity/types_proto.hpp>
 #include <google/protobuf/util/message_differencer.h>
 
 namespace arcticdb::entity {
 
-
-    bool operator==(const FieldProto& left, const FieldProto& right) {
-        google::protobuf::util::MessageDifferencer diff;
-        return diff.Compare(left, right);
-    }
-
-    bool operator<(const FieldProto& left, const FieldProto& right) {
-        return left.name() < right.name();
-    }
-
-
-
-    arcticdb::proto::descriptors::SortedValue sorted_value_to_proto(SortedValue sorted) {
-        switch (sorted) {
-        case SortedValue::UNSORTED:
-            return arcticdb::proto::descriptors::SortedValue::UNSORTED;
-        case SortedValue::DESCENDING:
-            return arcticdb::proto::descriptors::SortedValue::DESCENDING;
-        case SortedValue::ASCENDING:
-            return arcticdb::proto::descriptors::SortedValue::ASCENDING;
-        default:
-            return arcticdb::proto::descriptors::SortedValue::UNKNOWN;
-        }
-    }
-
-    SortedValue sorted_value_from_proto(arcticdb::proto::descriptors::SortedValue sorted_proto) {
-        switch (sorted_proto) {
-        case arcticdb::proto::descriptors::SortedValue::UNSORTED:
-            return SortedValue::UNSORTED;
-        case arcticdb::proto::descriptors::SortedValue::DESCENDING:
-            return SortedValue::DESCENDING;
-        case arcticdb::proto::descriptors::SortedValue::ASCENDING:
-            return SortedValue::ASCENDING;
-        default:
-            return SortedValue::UNKNOWN;
-        }
-    }
-
-
-    void set_data_type(DataType data_type, arcticdb::proto::descriptors::TypeDescriptor& type_desc) {
-        type_desc.set_size_bits(
-            static_cast<arcticdb::proto::descriptors::TypeDescriptor_SizeBits>(
-                static_cast<std::uint8_t>(slice_bit_size(data_type))));
-        type_desc.set_value_type(
-            static_cast<arcticdb::proto::descriptors::TypeDescriptor_ValueType>(
-                static_cast<std::uint8_t>(slice_value_type(data_type))));
-    }
-
-
-    [[nodiscard]]
-    auto to_proto(const TypeDescriptor& desc)
-        -> arcticdb::proto::descriptors::TypeDescriptor
-    {
-        arcticdb::proto::descriptors::TypeDescriptor output;
-        output.set_dimension(static_cast<std::uint32_t>(desc.dimension_));
-        set_data_type(desc.data_type_, output);
-
-        return output;
-    }
-
-
-
-    DataType get_data_type(const arcticdb::proto::descriptors::TypeDescriptor& type_desc) {
-        return combine_data_type(
+bool operator==(const FieldProto& left, const FieldProto& right) {
+    google::protobuf::util::MessageDifferencer diff;
+    return diff.Compare(left, right);
+}
+
+bool operator<(const FieldProto& left, const FieldProto& right) {
+    return left.name() < right.name();
+}
+
+arcticdb::proto::descriptors::SortedValue sorted_value_to_proto(SortedValue sorted) {
+    switch (sorted) {
+    case SortedValue::UNSORTED:
+        return arcticdb::proto::descriptors::SortedValue::UNSORTED;
+    case SortedValue::DESCENDING:
+        return arcticdb::proto::descriptors::SortedValue::DESCENDING;
+    case SortedValue::ASCENDING:
+        return arcticdb::proto::descriptors::SortedValue::ASCENDING;
+    default:
+        return arcticdb::proto::descriptors::SortedValue::UNKNOWN;
+    }
+}
+
+SortedValue sorted_value_from_proto(arcticdb::proto::descriptors::SortedValue sorted_proto) {
+    switch (sorted_proto) {
+    case arcticdb::proto::descriptors::SortedValue::UNSORTED:
+        return SortedValue::UNSORTED;
+    case arcticdb::proto::descriptors::SortedValue::DESCENDING:
+        return SortedValue::DESCENDING;
+    case arcticdb::proto::descriptors::SortedValue::ASCENDING:
+        return SortedValue::ASCENDING;
+    default:
+        return SortedValue::UNKNOWN;
+    }
+}
+
+void set_data_type(DataType data_type, arcticdb::proto::descriptors::TypeDescriptor& type_desc) {
+    type_desc.set_size_bits(
+        static_cast<arcticdb::proto::descriptors::TypeDescriptor_SizeBits>(
+            static_cast<std::uint8_t>(slice_bit_size(data_type))));
+
+    type_desc.set_value_type(
+        static_cast<arcticdb::proto::descriptors::TypeDescriptor_ValueType>(
+            static_cast<std::uint8_t>(slice_value_type(data_type))));
+}
+
+[[nodiscard]] arcticdb::proto::descriptors::TypeDescriptor to_proto(const TypeDescriptor& desc) {
+    arcticdb::proto::descriptors::TypeDescriptor output;
+    output.set_dimension(static_cast<std::uint32_t>(desc.dimension_));
+    set_data_type(desc.data_type_, output);
+    return output;
+}
+
+DataType get_data_type(const arcticdb::proto::descriptors::TypeDescriptor& type_desc) {
+    return combine_data_type(
+        static_cast<ValueType>(static_cast<uint8_t>(type_desc.value_type())),
+        static_cast<SizeBits>(static_cast<uint8_t>(type_desc.size_bits()))
+    );
+}
+
+TypeDescriptor type_desc_from_proto(const arcticdb::proto::descriptors::TypeDescriptor& type_desc) {
+    return {
+        combine_data_type(
             static_cast<ValueType>(static_cast<uint8_t>(type_desc.value_type())),
             static_cast<SizeBits>(static_cast<uint8_t>(type_desc.size_bits()))
-        );
-    }
-
-    TypeDescriptor type_desc_from_proto(const arcticdb::proto::descriptors::TypeDescriptor& type_desc) {
-        return {
-            combine_data_type(
-                static_cast<ValueType>(static_cast<uint8_t>(type_desc.value_type())),
-                static_cast<SizeBits>(static_cast<uint8_t>(type_desc.size_bits()))
-            ),
-            static_cast<Dimension>(static_cast<uint8_t>(type_desc.dimension()))
-        };
-    }
-
-    DataType data_type_from_proto(const arcticdb::proto::descriptors::TypeDescriptor& type_desc) {
-        return type_desc_from_proto(type_desc).data_type();
-    }
-
-
-    arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor field_proto(DataType dt, Dimension dim, std::string_view name) {
-        arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor output;
-        if (!name.empty())
-            output.set_name(name.data(), name.size());
-
-        auto output_desc = output.mutable_type_desc();
-        output_desc->set_dimension(static_cast<uint32_t>(dim));
-        output_desc->set_size_bits(static_cast<arcticdb::proto::descriptors::TypeDescriptor_SizeBits>(
-            static_cast<std::uint8_t>(slice_bit_size(dt))));
-
-        output_desc->set_value_type(
-            static_cast<arcticdb::proto::descriptors::TypeDescriptor_ValueType>(
-                static_cast<std::uint8_t>(slice_value_type(dt))));
-
-        return output;
-    }
-
-    void set_id(arcticdb::proto::descriptors::StreamDescriptor& pb_desc, StreamId id) {
-        std::visit([&pb_desc](auto&& arg) {
-            using IdType = std::decay_t<decltype(arg)>;
-            if constexpr (std::is_same_v<IdType, NumericId>)
-                pb_desc.set_num_id(arg);
-            else if constexpr (std::is_same_v<IdType, StringId>)
-                pb_desc.set_str_id(arg);
-            else
-                util::raise_rte("Encoding unknown descriptor type");
-            }, id);
-    }
-
-    IndexDescriptor::IndexDescriptor(size_t field_count, Type type) {
-        data_.set_kind(type);
-        data_.set_field_count(static_cast<uint32_t>(field_count));
-    }
-
-    IndexDescriptor::IndexDescriptor(arcticdb::proto::descriptors::IndexDescriptor data)
-        : data_(std::move(data)) {
-    }
-
-    bool IndexDescriptor::uninitialized() const {
-        return data_.field_count() == 0 && data_.kind() == Type::IndexDescriptor_Type_UNKNOWN;
-    }
-
-    const IndexDescriptor::Proto& IndexDescriptor::proto() const {
-        return data_;
-    }
-
-    size_t IndexDescriptor::field_count() const {
-        return static_cast<size_t>(data_.field_count());
-    }
-
-    IndexDescriptor::Type IndexDescriptor::type() const {
-        return data_.kind();
-    }
-
-    void IndexDescriptor::set_type(Type type) {
-        data_.set_kind(type);
-    }
-
-    bool operator==(const IndexDescriptor& left, const IndexDescriptor& right) {
-        return left.type() == right.type();
-    }
-
-    IndexDescriptor::TypeChar to_type_char(IndexDescriptor::Type type) {
-        switch (type) {
-        case IndexDescriptor::EMPTY: return 'E';
-        case IndexDescriptor::TIMESTAMP: return 'T';
-        case IndexDescriptor::ROWCOUNT: return 'R';
-        case IndexDescriptor::STRING: return 'S';
-        case IndexDescriptor::UNKNOWN: return 'U';
-        default: util::raise_rte("Unknown index type: {}", int(type));
-        }
-    }
-
-    IndexDescriptor::Type from_type_char(IndexDescriptor::TypeChar type) {
-        switch (type) {
-        case 'E': return IndexDescriptor::EMPTY;
-        case 'T': return IndexDescriptor::TIMESTAMP;
-        case 'R': return IndexDescriptor::ROWCOUNT;
-        case 'S': return IndexDescriptor::STRING;
-        case 'U': return IndexDescriptor::UNKNOWN;
-        default: util::raise_rte("Unknown index type: {}", int(type));
-        }
-    }
-
-    const char* index_type_to_str(IndexDescriptor::Type type) {
-        switch (type) {
-        case IndexDescriptor::EMPTY: return "Empty";
-        case IndexDescriptor::TIMESTAMP: return "Timestamp";
-        case IndexDescriptor::ROWCOUNT: return "Row count";
-        case IndexDescriptor::STRING: return "String";
-        case IndexDescriptor::UNKNOWN: return "Unknown";
-        default: util::raise_rte("Unknown index type: {}", int(type));
-        }
-    }
+        ),
+        static_cast<Dimension>(static_cast<uint8_t>(type_desc.dimension()))
+    };
+}
+
+DataType data_type_from_proto(const arcticdb::proto::descriptors::TypeDescriptor& type_desc) {
+    return type_desc_from_proto(type_desc).data_type();
+}
+
+arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor field_proto(DataType dt, Dimension dim, std::string_view name) {
+    arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor output;
+    if (!name.empty())
+        output.set_name(name.data(), name.size());
+
+    auto output_desc = output.mutable_type_desc();
+    output_desc->set_dimension(static_cast<uint32_t>(dim));
+    output_desc->set_size_bits(static_cast<arcticdb::proto::descriptors::TypeDescriptor_SizeBits>(
+                                   static_cast<std::uint8_t>(slice_bit_size(dt))));
+
+    output_desc->set_value_type(
+        static_cast<arcticdb::proto::descriptors::TypeDescriptor_ValueType>(
+            static_cast<std::uint8_t>(slice_value_type(dt))));
+
+    return output;
+}
+
+void set_id(arcticdb::proto::descriptors::StreamDescriptor& pb_desc, StreamId id) {
+    std::visit([&pb_desc](auto&& arg) {
+        using IdType = std::decay_t<decltype(arg)>;
+        if constexpr (std::is_same_v<IdType, NumericId>)
+            pb_desc.set_num_id(arg);
+        else if constexpr (std::is_same_v<IdType, StringId>)
+            pb_desc.set_str_id(arg);
+        else
+            util::raise_rte("Encoding unknown descriptor type");
+    }, id);
+}
+
+const char* index_type_to_str(IndexDescriptor::Type type) {
+    switch (type) {
+    case IndexDescriptor::Type::EMPTY:
+        return "Empty";
+    case IndexDescriptor::Type::TIMESTAMP:
+        return "Timestamp";
+    case IndexDescriptor::Type::ROWCOUNT:
+        return "Row count";
+    case IndexDescriptor::Type::STRING:
+        return "String";
+    case IndexDescriptor::Type::UNKNOWN:
+        return "Unknown";
+    default:
+        util::raise_rte("Unknown index type: {}", int(type));
+    }
+}
 } // namespace arcticdb
diff --git a/cpp/arcticdb/entity/types_proto.hpp b/cpp/arcticdb/entity/types_proto.hpp
index be95972fa0..689fdd720e 100644
--- a/cpp/arcticdb/entity/types_proto.hpp
+++ b/cpp/arcticdb/entity/types_proto.hpp
@@ -5,72 +5,44 @@
  * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
  */
 
-#pragma once
-
-#include <arcticdb/entity/types.hpp>
-
 #include <descriptors.pb.h>
+#include "arcticdb/storage/memory_layout.hpp"
+#include <arcticdb/entity/types.hpp>
 
 namespace arcticdb::proto {
-    namespace descriptors = arcticc::pb2::descriptors_pb2;
-}
+namespace descriptors = arcticc::pb2::descriptors_pb2;
+} //namespace arcticdb::proto
 
 namespace arcticdb::entity {
 
-    using FieldProto = arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor;
+using FieldProto = arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor;
 
-    bool operator==(const FieldProto& left, const FieldProto& right);
-    bool operator<(const FieldProto& left, const FieldProto& right);
+bool operator==(const FieldProto &left, const FieldProto &right);
+bool operator<(const FieldProto &left, const FieldProto &right);
 
+arcticdb::proto::descriptors::SortedValue sorted_value_to_proto(SortedValue sorted);
 
+SortedValue sorted_value_from_proto(arcticdb::proto::descriptors::SortedValue sorted_proto);
 
-    arcticdb::proto::descriptors::SortedValue sorted_value_to_proto(SortedValue sorted);
+void set_data_type(DataType data_type, arcticdb::proto::descriptors::TypeDescriptor &type_desc);
 
-    SortedValue sorted_value_from_proto(arcticdb::proto::descriptors::SortedValue sorted_proto);
+DataType get_data_type(const arcticdb::proto::descriptors::TypeDescriptor &type_desc);
 
+TypeDescriptor type_desc_from_proto(const arcticdb::proto::descriptors::TypeDescriptor &type_desc);
 
-    void set_data_type(DataType data_type, arcticdb::proto::descriptors::TypeDescriptor& type_desc);
+DataType data_type_from_proto(const arcticdb::proto::descriptors::TypeDescriptor &type_desc);
 
+arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor field_proto(
+    DataType dt,
+    Dimension dim,
+    std::string_view name);
 
     DataType get_data_type(const arcticdb::proto::descriptors::TypeDescriptor& type_desc);
 
-    TypeDescriptor type_desc_from_proto(const arcticdb::proto::descriptors::TypeDescriptor& type_desc);
-
     DataType data_type_from_proto(const arcticdb::proto::descriptors::TypeDescriptor& type_desc);
 
-
     arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor field_proto(DataType dt, Dimension dim, std::string_view name);
 
-
-
-    struct IndexDescriptor {
-        using Proto = arcticdb::proto::descriptors::IndexDescriptor;
-
-        Proto data_;
-        using Type = arcticdb::proto::descriptors::IndexDescriptor::Type;
-
-        static constexpr Type UNKNOWN = arcticdb::proto::descriptors::IndexDescriptor_Type_UNKNOWN;
-        static constexpr Type EMPTY = arcticdb::proto::descriptors::IndexDescriptor_Type_EMPTY;
-        static constexpr Type ROWCOUNT = arcticdb::proto::descriptors::IndexDescriptor_Type_ROWCOUNT;
-        static constexpr Type STRING = arcticdb::proto::descriptors::IndexDescriptor_Type_STRING;
-        static constexpr Type TIMESTAMP = arcticdb::proto::descriptors::IndexDescriptor_Type_TIMESTAMP;
-
-        using TypeChar = char;
-
-        IndexDescriptor() = default;
-        ARCTICDB_MOVE_COPY_DEFAULT(IndexDescriptor)
-        IndexDescriptor(size_t field_count, Type type);
-        explicit IndexDescriptor(arcticdb::proto::descriptors::IndexDescriptor data);
-        bool uninitialized() const;
-        const Proto& proto() const;
-        size_t field_count() const;
-        Type type() const;
-        void set_type(Type type);
-        friend bool operator==(const IndexDescriptor& left, const IndexDescriptor& right);
-    };
-
-    IndexDescriptor::TypeChar to_type_char(IndexDescriptor::Type type);
-    IndexDescriptor::Type from_type_char(IndexDescriptor::TypeChar type);
     const char* index_type_to_str(IndexDescriptor::Type type);
 
     void set_id(arcticdb::proto::descriptors::StreamDescriptor& pb_desc, StreamId id);
@@ -80,51 +52,52 @@ namespace arcticdb::entity {
 
 namespace fmt {
 
-    template<>
-    struct formatter<arcticdb::proto::descriptors::TypeDescriptor> {
-        template<typename ParseContext>
-        constexpr auto parse(ParseContext& ctx) { return ctx.begin(); }
-
-        template<typename FormatContext>
-        auto format(const arcticdb::proto::descriptors::TypeDescriptor& type_desc, FormatContext& ctx) const {
-            auto td = arcticdb::entity::type_desc_from_proto(type_desc);
-            return fmt::format_to(ctx.out(), "{}", td);
-        }
-    };
-
-    template<>
-    struct formatter<arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor> {
-        template<typename ParseContext>
-        constexpr auto parse(ParseContext& ctx) { return ctx.begin(); }
-
-        template<typename FormatContext>
-        auto format(const arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor& field_desc, FormatContext& ctx) const {
-            return fmt::format_to(ctx.out(), "{}: {}", field_desc.name(), field_desc.type_desc());
-        }
-    };
-
-    template<>
-    struct formatter<arcticdb::entity::IndexDescriptor> {
-        template<typename ParseContext>
-        constexpr auto parse(ParseContext& ctx) { return ctx.begin(); }
-
-        template<typename FormatContext>
-        auto format(const arcticdb::entity::IndexDescriptor& idx, FormatContext& ctx) const {
-            return fmt::format_to(ctx.out(), "IDX<size={}, kind={}>", idx.field_count(), static_cast<char>(idx.type()));
-        }
-    };
-
-    template<>
-    struct formatter<arcticdb::entity::Field> {
-        template<typename ParseContext>
-        constexpr auto parse(ParseContext& ctx) { return ctx.begin(); }
-
-        template<typename FormatContext>
-        auto format(const arcticdb::entity::Field& fd, FormatContext& ctx) const {
-            if (!fd.name().empty())
-                return fmt::format_to(ctx.out(), "FD<name={}, type={}>", fd.name(), fd.type());
-            else
-                return fmt::format_to(ctx.out(), "FD<type={}>", fd.type());
-        }
-    };
-}
+template<>
+struct formatter<arcticdb::proto::descriptors::TypeDescriptor> {
+    template<typename ParseContext>
+    constexpr auto parse(ParseContext& ctx) { return ctx.begin(); }
+
+    template<typename FormatContext>
+    auto format(const arcticdb::proto::descriptors::TypeDescriptor& type_desc, FormatContext& ctx) const {
+        auto td = arcticdb::entity::type_desc_from_proto(type_desc);
+        return fmt::format_to(ctx.out(), "{}", td);
+    }
+};
+
+template<>
+struct formatter<arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor> {
+    template<typename ParseContext>
+    constexpr auto parse(ParseContext& ctx) { return ctx.begin(); }
+
+    template<typename FormatContext>
+    auto format(const arcticdb::proto::descriptors::StreamDescriptor_FieldDescriptor& field_desc, FormatContext& ctx) const {
+        return fmt::format_to(ctx.out(), "{}: {}", field_desc.name(), field_desc.type_desc());
+    }
+};
+
+template<>
+struct formatter<arcticdb::entity::IndexDescriptorImpl> {
+    template<typename ParseContext>
+    constexpr auto parse(ParseContext& ctx) { return ctx.begin(); }
+
+    template<typename FormatContext>
+    auto format(const IndexDescriptorImpl& idx, FormatContext& ctx) const {
+        return fmt::format_to(ctx.out(), "IDX<size={}, kind={}>", idx.field_count(), static_cast<char>(idx.type()));
+    }
+};
+
+template<>
+struct formatter<arcticdb::entity::Field> {
+    template<typename ParseContext>
+    constexpr auto parse(ParseContext& ctx) { return ctx.begin(); }
+
+    template<typename FormatContext>
+    auto format(const arcticdb::entity::Field& fd, FormatContext& ctx) const {
+        if (!fd.name().empty())
+            return fmt::format_to(ctx.out(), "FD<name={}, type={}>", fd.name(), fd.type());
+        else
+            return fmt::format_to(ctx.out(), "FD<type={}>", fd.type());
+    }
+};
+} //namespace fmt
+
diff --git a/cpp/arcticdb/log/log.cpp b/cpp/arcticdb/log/log.cpp
index b4352b181f..32e8160148 100644
--- a/cpp/arcticdb/log/log.cpp
+++ b/cpp/arcticdb/log/log.cpp
@@ -277,7 +277,7 @@ bool Loggers::configure(const arcticdb::proto::logger::LoggersConfig &conf, bool
                     util::as_opt(sink_conf.daily_file().utc_rotation_minute()).value_or(0)
                 ));
                 break;
-            default:util::raise_error_msg("Unsupported sink_conf {}", sink_conf);
+            default:util::raise_rte("Unsupported sink_conf {}", sink_conf.DebugString());
         }
     }
 
diff --git a/cpp/arcticdb/log/log.hpp b/cpp/arcticdb/log/log.hpp
index bb7cc3cbad..1714ee1e91 100644
--- a/cpp/arcticdb/log/log.hpp
+++ b/cpp/arcticdb/log/log.hpp
@@ -20,6 +20,7 @@
 #define ARCTICDB_TRACE(logger, ...) (void)0
 #endif
 
+#define ARCTICDB_INFO(logger, ...) logger.info(__VA_ARGS__)
 #define ARCTICDB_RUNTIME_DEBUG(logger, ...) logger.debug(__VA_ARGS__)
 
 namespace arcticc::pb2::logger_pb2 {
diff --git a/cpp/arcticdb/pipeline/column_stats.cpp b/cpp/arcticdb/pipeline/column_stats.cpp
index f3b3f9ae73..cbeaeeb90c 100644
--- a/cpp/arcticdb/pipeline/column_stats.cpp
+++ b/cpp/arcticdb/pipeline/column_stats.cpp
@@ -14,6 +14,7 @@ namespace arcticdb {
 SegmentInMemory merge_column_stats_segments(const std::vector<SegmentInMemory>& segments) {
     SegmentInMemory merged;
     merged.init_column_map();
+    merged.descriptor().set_index(IndexDescriptorImpl{0, IndexDescriptor::Type::ROWCOUNT});
 
     // Maintain the order of the columns in the input segments
     ankerl::unordered_dense::map<std::string, size_t> field_name_to_index;
@@ -22,6 +23,7 @@ SegmentInMemory merge_column_stats_segments(const std::vector<SegmentInMemory>&
     for (auto &segment : segments) {
         for (const auto &field: segment.descriptor().fields()) {
             auto new_type = field.type();
+
             if (auto it = field_name_to_index.find(std::string{field.name()}); it != field_name_to_index.end()) {
                 auto &merged_type = type_descriptors.at(field_name_to_index.at(std::string{field.name()}));
                 auto opt_common_type = has_valid_common_type(merged_type, new_type);
diff --git a/cpp/arcticdb/pipeline/frame_utils.cpp b/cpp/arcticdb/pipeline/frame_utils.cpp
index 7573d4740b..01d66e02f4 100644
--- a/cpp/arcticdb/pipeline/frame_utils.cpp
+++ b/cpp/arcticdb/pipeline/frame_utils.cpp
@@ -14,76 +14,57 @@ namespace arcticdb {
 TimeseriesDescriptor make_timeseries_descriptor(
         // TODO: It would be more explicit to use uint64_t instead of size_t. Not doing now as it involves a lot of type changes and needs to be done carefully.
         size_t total_rows,
-        StreamDescriptor&& desc,
+        const StreamDescriptor& desc,
         arcticdb::proto::descriptors::NormalizationMetadata&& norm_meta,
         std::optional<arcticdb::proto::descriptors::UserDefinedMetadata>&& um,
         std::optional<AtomKey>&& prev_key,
         std::optional<AtomKey>&& next_key,
-        bool bucketize_dynamic
-    ) {
-    arcticdb::proto::descriptors::TimeSeriesDescriptor time_series_descriptor;
-    time_series_descriptor.set_total_rows(total_rows);
-    *time_series_descriptor.mutable_stream_descriptor() = std::move(desc.proto());
-    time_series_descriptor.mutable_normalization()->CopyFrom(norm_meta);
+        bool bucketize_dynamic) {
+    auto frame_desc = std::make_shared<FrameDescriptorImpl>();
+    frame_desc->total_rows_ = total_rows;
+    frame_desc->column_groups_ = bucketize_dynamic;
+
+    auto segment_desc = std::make_shared<SegmentDescriptorImpl>();
+    segment_desc->index_ = desc.index();
+    segment_desc->sorted_ = desc.sorted();
+
+    auto proto = std::make_shared<TimeseriesDescriptor::Proto>();
+    proto->mutable_normalization()->CopyFrom(norm_meta);
     auto user_meta = std::move(um);
     if(user_meta)
-      *time_series_descriptor.mutable_user_meta() = std::move(*user_meta);
+      *proto->mutable_user_meta() = std::move(*user_meta);
 
     if(prev_key)
-      *time_series_descriptor.mutable_next_key() = encode_key(prev_key.value());
+       proto->mutable_next_key()->CopyFrom(key_to_proto(prev_key.value()));
 
     if(next_key)
-      time_series_descriptor.mutable_next_key()->CopyFrom(encode_key(next_key.value()));
-
-    if(bucketize_dynamic)
-      time_series_descriptor.mutable_column_groups()->set_enabled(true);
+        proto->mutable_next_key()->CopyFrom(key_to_proto(next_key.value()));
 
     //TODO maybe need ensure_norm_meta?
-    return TimeseriesDescriptor{std::make_shared<TimeseriesDescriptor::Proto>(std::move(time_series_descriptor)), desc.fields_ptr()};
-}
-
-
-TimeseriesDescriptor timseries_descriptor_from_index_segment(
-    size_t total_rows,
-    pipelines::index::IndexSegmentReader&& index_segment_reader,
-    std::optional<AtomKey>&& prev_key,
-    bool bucketize_dynamic
-) {
-    return make_timeseries_descriptor(
-        total_rows,
-        StreamDescriptor{std::make_shared<StreamDescriptor::Proto>(std::move(*index_segment_reader.mutable_tsd().mutable_proto().mutable_stream_descriptor())),index_segment_reader.mutable_tsd().fields_ptr()},
-        std::move(*index_segment_reader.mutable_tsd().mutable_proto().mutable_normalization()),
-        std::move(*index_segment_reader.mutable_tsd().mutable_proto().mutable_user_meta()),
-        std::move(prev_key),
-        std::nullopt,
-        bucketize_dynamic);
+    return TimeseriesDescriptor{std::move(frame_desc), std::move(segment_desc), std::move(proto), desc.fields_ptr(), desc.id()};
 }
 
 TimeseriesDescriptor timeseries_descriptor_from_pipeline_context(
-    const std::shared_ptr<pipelines::PipelineContext>& pipeline_context,
-    std::optional<AtomKey>&& prev_key,
-    bool bucketize_dynamic) {
+        const std::shared_ptr<pipelines::PipelineContext>& pipeline_context,
+        std::optional<AtomKey>&& prev_key,
+        bool bucketize_dynamic) {
     return make_timeseries_descriptor(
         pipeline_context->total_rows_,
-        StreamDescriptor{std::make_shared<StreamDescriptor::Proto>(std::move(pipeline_context->desc_->mutable_proto())),
-            pipeline_context->desc_->fields_ptr()},
+        pipeline_context->descriptor(),
         std::move(*pipeline_context->norm_meta_),
         pipeline_context->user_meta_ ? std::make_optional<arcticdb::proto::descriptors::UserDefinedMetadata>(std::move(*pipeline_context->user_meta_)) : std::nullopt,
         std::move(prev_key),
         std::nullopt,
-        bucketize_dynamic
-        );
+        bucketize_dynamic);
 }
 
 TimeseriesDescriptor index_descriptor_from_frame(
         const std::shared_ptr<pipelines::InputTensorFrame>& frame,
         size_t existing_rows,
-        std::optional<entity::AtomKey>&& prev_key
-) {
+        std::optional<entity::AtomKey>&& prev_key) {
     return make_timeseries_descriptor(
         frame->num_rows + existing_rows,
-        StreamDescriptor{std::make_shared<StreamDescriptor::Proto>(std::move(frame->desc.mutable_proto())),
-           frame->desc.fields_ptr()},
+        frame->desc,
         std::move(frame->norm_meta),
         std::move(frame->user_meta),
         std::move(prev_key),
@@ -150,7 +131,7 @@ std::pair<size_t, size_t> offset_and_row_count(const std::shared_ptr<pipelines::
 }
 
 bool index_is_not_timeseries_or_is_sorted_ascending(const pipelines::InputTensorFrame& frame) {
-    return !std::holds_alternative<stream::TimeseriesIndex>(frame.index) || frame.desc.get_sorted() == SortedValue::ASCENDING;
+    return !std::holds_alternative<stream::TimeseriesIndex>(frame.index) || frame.desc.sorted() == SortedValue::ASCENDING;
 }
 
 }
diff --git a/cpp/arcticdb/pipeline/frame_utils.hpp b/cpp/arcticdb/pipeline/frame_utils.hpp
index c303c67c3d..4a11b2052d 100644
--- a/cpp/arcticdb/pipeline/frame_utils.hpp
+++ b/cpp/arcticdb/pipeline/frame_utils.hpp
@@ -50,7 +50,7 @@ inline size_t get_max_string_size(const pipelines::PipelineContextRow& context_r
 
 TimeseriesDescriptor make_timeseries_descriptor(
     size_t total_rows,
-    StreamDescriptor&& desc,
+    const StreamDescriptor& desc,
     arcticdb::proto::descriptors::NormalizationMetadata&& norm_meta,
     std::optional<arcticdb::proto::descriptors::UserDefinedMetadata>&& um,
     std::optional<AtomKey>&& prev_key,
diff --git a/cpp/arcticdb/pipeline/index_segment_reader.cpp b/cpp/arcticdb/pipeline/index_segment_reader.cpp
index b55b617144..8781053ed3 100644
--- a/cpp/arcticdb/pipeline/index_segment_reader.cpp
+++ b/cpp/arcticdb/pipeline/index_segment_reader.cpp
@@ -24,20 +24,8 @@ IndexSegmentReader get_index_reader(const AtomKey &prev_index, const std::shared
     return index::IndexSegmentReader{std::move(seg)};
 }
 
-IndexSegmentReader::IndexSegmentReader(SegmentInMemory&& s) : seg_(std::move(s)) {
-    seg_.metadata()->UnpackTo(&tsd_.mutable_proto());
-    if(seg_.has_index_fields()) {
-        tsd_.mutable_fields() = seg_.detach_index_fields();
-        tsd_.mutable_fields().regenerate_offsets();
-    } else {
-        TimeseriesDescriptor::Proto tsd;
-        if(seg_.metadata()->UnpackTo(&tsd)) {
-            tsd_.mutable_fields() = fields_from_proto(tsd.stream_descriptor());
-        } else {
-            util::raise_rte("Unable to unpack index fields");
-        }
-    }
-    ARCTICDB_DEBUG(log::version(), "Decoded index segment descriptor: {}", tsd_.proto().DebugString());
+IndexSegmentReader::IndexSegmentReader(SegmentInMemory&& s) :
+    seg_(std::move(s)) {
 }
 
 const Column &IndexSegmentReader::column(Fields field) const {
@@ -59,7 +47,7 @@ IndexRange get_index_segment_range(
 }
 
 bool IndexSegmentReader::bucketize_dynamic() const {
-    return tsd().proto().has_column_groups() && tsd().proto().column_groups().enabled();
+    return tsd().column_groups();
 }
 
 SliceAndKey IndexSegmentReader::row(std::size_t r) const {
@@ -106,11 +94,11 @@ IndexSegmentIterator IndexSegmentReader::last() const {
 }
 
 bool IndexSegmentReader::is_pickled() const {
-    return tsd_.proto().normalization().input_type_case() == arcticdb::proto::descriptors::NormalizationMetadata::InputTypeCase::kMsgPackFrame;
+    return tsd().proto().normalization().input_type_case() == arcticdb::proto::descriptors::NormalizationMetadata::InputTypeCase::kMsgPackFrame;
 }
 
 bool IndexSegmentReader::has_timestamp_index() const {
-    return tsd_.proto().stream_descriptor().index().kind() == arcticdb::proto::descriptors::IndexDescriptor::Type::IndexDescriptor_Type_TIMESTAMP;
+    return tsd().index().type_ == IndexDescriptor::Type::TIMESTAMP;
 }
 
 void check_column_and_date_range_filterable(const pipelines::index::IndexSegmentReader& index_segment_reader, const ReadQuery& read_query) {
@@ -119,8 +107,8 @@ void check_column_and_date_range_filterable(const pipelines::index::IndexSegment
                 "The data for this symbol is pickled and does not support column stats, date_range, row_range, or column queries");
     util::check(index_segment_reader.has_timestamp_index() || !std::holds_alternative<IndexRange>(read_query.row_filter),
                 "Cannot apply date range filter to symbol with non-timestamp index");
-    sorting::check<ErrorCode::E_UNSORTED_DATA>(index_segment_reader.get_sorted() == SortedValue::UNKNOWN ||
-                                                   index_segment_reader.get_sorted() == SortedValue::ASCENDING ||
+    sorting::check<ErrorCode::E_UNSORTED_DATA>(index_segment_reader.sorted() == SortedValue::UNKNOWN ||
+                                                   index_segment_reader.sorted() == SortedValue::ASCENDING ||
                                                    !std::holds_alternative<IndexRange>(read_query.row_filter),
                                                "When filtering data using date_range, the symbol must be sorted in ascending order. ArcticDB believes it is not sorted in ascending order and cannot therefore filter the data using date_range.");
 }
diff --git a/cpp/arcticdb/pipeline/index_segment_reader.hpp b/cpp/arcticdb/pipeline/index_segment_reader.hpp
index 63496e80fd..14ad963df2 100644
--- a/cpp/arcticdb/pipeline/index_segment_reader.hpp
+++ b/cpp/arcticdb/pipeline/index_segment_reader.hpp
@@ -37,7 +37,6 @@ struct IndexSegmentReader {
         using std::swap;
 
         swap(left.seg_, right.seg_);
-        swap(left.tsd_, right.tsd_);
     }
 
     ARCTICDB_MOVE_ONLY_DEFAULT(IndexSegmentReader)
@@ -64,26 +63,25 @@ struct IndexSegmentReader {
 
     bool bucketize_dynamic() const;
 
-    SortedValue get_sorted() const {
-        return sorted_value_from_proto(tsd().proto().stream_descriptor().sorted());
+    SortedValue sorted() const {
+        return tsd().sorted();
     }
 
     void set_sorted(SortedValue sorted)  {
-        mutable_tsd().mutable_proto().mutable_stream_descriptor()->set_sorted(sorted_value_to_proto(sorted));
+        mutable_tsd().set_sorted(sorted);
     }
 
     const TimeseriesDescriptor& tsd() const {
-        return tsd_;
+        return seg_.index_descriptor();
     }
 
     TimeseriesDescriptor& mutable_tsd() {
-        return tsd_;
+        return seg_.mutable_index_descriptor();
     }
 
 private:
     mutable std::unordered_map<ColRange, std::shared_ptr<StreamDescriptor>, AxisRange::Hasher> descriptor_by_col_group_;
     SegmentInMemory seg_;
-    TimeseriesDescriptor tsd_;
 };
 
 struct IndexSegmentIterator {
diff --git a/cpp/arcticdb/pipeline/index_utils.cpp b/cpp/arcticdb/pipeline/index_utils.cpp
index f3c214be6e..d83af8016d 100644
--- a/cpp/arcticdb/pipeline/index_utils.cpp
+++ b/cpp/arcticdb/pipeline/index_utils.cpp
@@ -16,7 +16,7 @@ namespace arcticdb::pipelines::index {
 
 template <class IndexType>
 folly::Future<entity::AtomKey> write_index(
-    TimeseriesDescriptor &&metadata,
+   const TimeseriesDescriptor& metadata,
     std::vector<SliceAndKey> &&sk,
     const IndexPartialKey &partial_key,
     const std::shared_ptr<stream::StreamSink> &sink
@@ -31,14 +31,14 @@ folly::Future<entity::AtomKey> write_index(
 
 folly::Future<entity::AtomKey> write_index(
     const stream::Index& index,
-    TimeseriesDescriptor &&metadata,
+    const TimeseriesDescriptor& metadata,
     std::vector<SliceAndKey> &&sk,
     const IndexPartialKey &partial_key,
     const std::shared_ptr<stream::StreamSink> &sink
     ) {
     return util::variant_match(index, [&] (auto idx) {
         using IndexType = decltype(idx);
-        return write_index<IndexType>(std::move(metadata), std::move(sk), partial_key, sink);
+        return write_index<IndexType>(metadata, std::move(sk), partial_key, sink);
     });
 }
 
@@ -51,7 +51,7 @@ folly::Future<entity::AtomKey> write_index(
     auto offset = frame->offset;
     auto index = stream::index_type_from_descriptor(frame->desc);
     auto timeseries_desc = index_descriptor_from_frame(frame, offset);
-    return write_index(index, std::move(timeseries_desc), std::move(slice_and_keys), partial_key, sink);
+    return write_index(index, timeseries_desc, std::move(slice_and_keys), partial_key, sink);
 }
 
 folly::Future<entity::AtomKey> write_index(
@@ -86,7 +86,7 @@ TimeseriesDescriptor get_merged_tsd(
         const std::shared_ptr<pipelines::InputTensorFrame>& new_frame) {
     auto existing_descriptor = existing_tsd.as_stream_descriptor();
     auto merged_descriptor = existing_descriptor;
-    if (existing_tsd.proto().total_rows() == 0){
+    if (existing_tsd.total_rows() == 0){
         // If the existing dataframe is empty, we use the descriptor of the new_frame
         merged_descriptor = new_frame->desc;
     }
@@ -116,7 +116,7 @@ TimeseriesDescriptor get_merged_tsd(
             }
         }
     }
-    merged_descriptor.set_sorted(deduce_sorted(existing_descriptor.get_sorted(), new_frame->desc.get_sorted()));
+    merged_descriptor.set_sorted(deduce_sorted(existing_descriptor.sorted(), new_frame->desc.sorted()));
     return make_timeseries_descriptor(
             row_count,
             std::move(merged_descriptor),
diff --git a/cpp/arcticdb/pipeline/index_utils.hpp b/cpp/arcticdb/pipeline/index_utils.hpp
index bab1b4c35f..be2a633cd7 100644
--- a/cpp/arcticdb/pipeline/index_utils.hpp
+++ b/cpp/arcticdb/pipeline/index_utils.hpp
@@ -35,14 +35,14 @@ inline std::vector<SliceAndKey> unfiltered_index(const index::IndexSegmentReader
 }
 
 template<typename RowType>
-std::optional<IndexValue> index_value_from_row(const RowType &row, IndexDescriptor::Type index_type, int field_num) {
+std::optional<IndexValue> index_value_from_row(const RowType &row, IndexDescriptorImpl::Type index_type, int field_num) {
     std::optional<IndexValue> index_value;
     switch (index_type) {
-    case IndexDescriptor::TIMESTAMP:
-        case IndexDescriptor::ROWCOUNT:
+    case IndexDescriptorImpl::Type::TIMESTAMP:
+        case IndexDescriptorImpl::Type::ROWCOUNT:
             index_value = row.template scalar_at<timestamp>(field_num);
             break;
-            case IndexDescriptor::STRING: {
+            case IndexDescriptorImpl::Type::STRING: {
                 auto opt = row.string_at(field_num);
                 index_value = opt ? std::make_optional<IndexValue>(std::string(opt.value())) : std::nullopt;
                 break;
@@ -54,25 +54,25 @@ std::optional<IndexValue> index_value_from_row(const RowType &row, IndexDescript
 }
 
 template<typename RowType>
-std::optional<IndexValue> index_start_from_row(const RowType &row, IndexDescriptor::Type index_type) {
+std::optional<IndexValue> index_start_from_row(const RowType &row, IndexDescriptorImpl::Type index_type) {
     return index_value_from_row(row, index_type, 0);
 }
 
 template<typename SegmentType, typename FieldType=pipelines::index::Fields>
-    IndexValue index_value_from_segment(const SegmentType &seg, size_t row_id, FieldType field) {
+IndexValue index_value_from_segment(const SegmentType &seg, size_t row_id, FieldType field) {
     auto index_type = seg.template scalar_at<uint8_t>(row_id, int(FieldType::index_type));
     IndexValue index_value;
-    switch (index_type.value()) {
-    case IndexDescriptor::TIMESTAMP:
-        case IndexDescriptor::ROWCOUNT:
-            index_value = seg.template scalar_at<timestamp>(row_id, int(field)).value();
-            break;
-            case IndexDescriptor::STRING:
-                index_value = std::string(seg.string_at(row_id, int(field)).value());
-                break;
-                default:
-                    util::raise_rte("Unknown index type {} for column {} and row {}",
-                                    uint32_t(index_type.value()), uint32_t(field), row_id);
+    auto type = IndexDescriptor::Type(index_type.value());
+    switch (type) {
+    case IndexDescriptorImpl::Type::TIMESTAMP:
+        case IndexDescriptorImpl::Type::ROWCOUNT:
+        index_value = seg.template scalar_at<timestamp>(row_id, int(field)).value();
+        break;
+    case IndexDescriptorImpl::Type::STRING:
+        index_value = std::string(seg.string_at(row_id, int(field)).value());
+        break;
+    default:
+        util::raise_rte("Unknown index type {} for column {} and row {}", uint32_t(index_type.value()), uint32_t(field), row_id);
     }
     return index_value;
 }
@@ -89,14 +89,14 @@ IndexValue index_end_from_segment(const SegmentType &seg, size_t row_id) {
 
 template<class IndexType>
 folly::Future<entity::AtomKey> write_index(
-    TimeseriesDescriptor&& metadata,
+    const TimeseriesDescriptor& metadata,
     std::vector<SliceAndKey>&& slice_and_keys,
     const IndexPartialKey& partial_key,
     const std::shared_ptr<stream::StreamSink>& sink);
 
 folly::Future<entity::AtomKey> write_index(
     const stream::Index& index,
-    TimeseriesDescriptor &&metadata,
+    const TimeseriesDescriptor& metadata,
     std::vector<SliceAndKey> &&sk,
     const IndexPartialKey &partial_key,
     const std::shared_ptr<stream::StreamSink> &sink);
@@ -105,23 +105,21 @@ folly::Future<entity::AtomKey> write_index(
     const std::shared_ptr<InputTensorFrame>& frame,
     std::vector<folly::Future<SliceAndKey>> &&slice_and_keys,
     const IndexPartialKey &partial_key,
-    const std::shared_ptr<stream::StreamSink> &sink
-    );
+    const std::shared_ptr<stream::StreamSink> &sink);
 
 folly::Future<entity::AtomKey> write_index(
     const std::shared_ptr<InputTensorFrame>& frame,
     std::vector<SliceAndKey> &&slice_and_keys,
     const IndexPartialKey &partial_key,
-    const std::shared_ptr<stream::StreamSink> &sink
-    );
+    const std::shared_ptr<stream::StreamSink> &sink);
 
 inline folly::Future<VersionedItem> index_and_version(
-    const stream::Index& index,
-    const std::shared_ptr<stream::StreamSink>& store,
-    TimeseriesDescriptor time_series,
-    std::vector<SliceAndKey> slice_and_keys,
-    const StreamId& stream_id,
-    VersionId version_id) {
+        const stream::Index& index,
+        const std::shared_ptr<stream::StreamSink>& store,
+        TimeseriesDescriptor time_series,
+        std::vector<SliceAndKey> slice_and_keys,
+        const StreamId& stream_id,
+        VersionId version_id) {
     return write_index(
         index,
         std::move(time_series),
diff --git a/cpp/arcticdb/pipeline/index_writer.hpp b/cpp/arcticdb/pipeline/index_writer.hpp
index 00d51b465e..c565e74fc5 100644
--- a/cpp/arcticdb/pipeline/index_writer.hpp
+++ b/cpp/arcticdb/pipeline/index_writer.hpp
@@ -28,10 +28,9 @@ class IndexWriter {
 public:
     ARCTICDB_MOVE_ONLY_DEFAULT(IndexWriter)
 
-    IndexWriter(std::shared_ptr<stream::StreamSink> sink, IndexPartialKey partial_key, TimeseriesDescriptor &&meta, const std::optional<KeyType>& key_type = std::nullopt) :
-            bucketize_columns_(meta.proto().has_column_groups() && meta.proto().column_groups().enabled()),
+    IndexWriter(std::shared_ptr<stream::StreamSink> sink, IndexPartialKey partial_key, const TimeseriesDescriptor &tsd, const std::optional<KeyType>& key_type = std::nullopt) :
+            bucketize_columns_(tsd.column_groups()),
             partial_key_(std::move(partial_key)),
-            meta_(std::move(meta)),
             agg_(Desc::schema(partial_key_.id, bucketize_columns_),
             [&](auto &&segment) {
             on_segment(std::forward<SegmentInMemory>(segment));
@@ -40,11 +39,7 @@ class IndexWriter {
             sink_(std::move(sink)),
             key_being_committed_(folly::Future<AtomKey>::makeEmpty()),
             key_type_(key_type) {
-        static const auto encoding = ConfigsMap::instance()->get_int("VersionStore.Encoding", 1);
-        if(encoding == 1) {
-            meta_.copy_to_self_proto();
-        }
-        agg_.segment().set_timeseries_descriptor(std::move(meta_)); //TODO very weird, why this short-lived member?
+        agg_.segment().set_timeseries_descriptor(tsd);
     }
 
     void add(const arcticdb::entity::AtomKey &key, const FrameSlice &slice) {
@@ -102,7 +97,7 @@ class IndexWriter {
     }
 
     folly::Future<arcticdb::entity::AtomKey> commit() {
-        agg_.commit();
+        agg_.finalize();
         return std::move(key_being_committed_);
     }
 
@@ -127,7 +122,6 @@ class IndexWriter {
 
     bool bucketize_columns_ = false;
     IndexPartialKey partial_key_;
-    TimeseriesDescriptor meta_;
     SliceAggregator agg_;
     std::shared_ptr<stream::StreamSink> sink_;
     folly::Future<arcticdb::entity::AtomKey> key_being_committed_;
diff --git a/cpp/arcticdb/pipeline/pipeline_context.cpp b/cpp/arcticdb/pipeline/pipeline_context.cpp
index 0b47842613..3defa60d49 100644
--- a/cpp/arcticdb/pipeline/pipeline_context.cpp
+++ b/cpp/arcticdb/pipeline/pipeline_context.cpp
@@ -28,16 +28,12 @@ PipelineContext::PipelineContext(SegmentInMemory& frame, const AtomKey& key) :
     map->set_from_descriptor(frame.descriptor());
 
     auto descriptor = std::make_shared<StreamDescriptor>(frame.descriptor());
-    segment_descriptors_[0] = (std::move(descriptor));
+    segment_descriptors_[0] = std::move(descriptor);
 }
 
 void PipelineContext::set_selected_columns(const std::vector<std::string>& columns) {
     util::check(static_cast<bool>(desc_), "Descriptor not set in set_selected_columns");
-    selected_columns_ = requested_column_bitset_including_index(desc_->proto(), columns);
-}
-
-bool PipelineContextRow::selected_columns(size_t n) const {
-    return !parent_->selected_columns_ || parent_->selected_columns_.value()[n];
+    selected_columns_ = requested_column_bitset_including_index(*desc_, columns);
 }
 
 const std::optional<util::BitSet>& PipelineContextRow::get_selected_columns() const {
@@ -90,6 +86,10 @@ void PipelineContextRow::set_descriptor(std::shared_ptr<StreamDescriptor>&& desc
     parent_->segment_descriptors_[index_] = std::move(desc);
 }
 
+void PipelineContextRow::set_descriptor(const StreamDescriptor& desc) {
+    parent_->segment_descriptors_[index_] = std::make_shared<StreamDescriptor>(desc);
+}
+
 void PipelineContextRow::set_descriptor(const std::shared_ptr<StreamDescriptor>& desc) {
     parent_->segment_descriptors_[index_] = desc;
 }
diff --git a/cpp/arcticdb/pipeline/pipeline_context.hpp b/cpp/arcticdb/pipeline/pipeline_context.hpp
index e15f18dd76..f8b8983bdb 100644
--- a/cpp/arcticdb/pipeline/pipeline_context.hpp
+++ b/cpp/arcticdb/pipeline/pipeline_context.hpp
@@ -39,10 +39,10 @@ struct PipelineContextRow {
     [[nodiscard]] const SliceAndKey& slice_and_key() const;
     SliceAndKey& slice_and_key();
     [[nodiscard]] const std::optional<util::BitSet>& get_selected_columns() const;
-    bool selected_columns(size_t n) const;
     bool fetch_index();
     [[nodiscard]] const StreamDescriptor& descriptor() const;
     void set_descriptor(StreamDescriptor&& desc);
+    void set_descriptor(const StreamDescriptor& desc);
     void set_descriptor(std::shared_ptr<StreamDescriptor>&& desc);
     void set_descriptor(const std::shared_ptr<StreamDescriptor>& desc);
     void set_compacted(bool val);
diff --git a/cpp/arcticdb/pipeline/query.hpp b/cpp/arcticdb/pipeline/query.hpp
index 5d20744500..39b0194f10 100644
--- a/cpp/arcticdb/pipeline/query.hpp
+++ b/cpp/arcticdb/pipeline/query.hpp
@@ -272,20 +272,20 @@ template<typename ContainerType>
 inline FilterQuery<ContainerType> create_index_filter(const IndexRange &range, bool dynamic_schema, bool column_groups) {
     static_assert(std::is_same_v<ContainerType, index::IndexSegmentReader>);
     return [rg = range, dynamic_schema, column_groups](const ContainerType &container, std::unique_ptr<util::BitSet>&& input) mutable {
-        auto index_type = container.seg().template scalar_at<uint8_t>(0u, int(index::Fields::index_type));
-
-        switch (index_type.value()) {
-        case IndexDescriptor::TIMESTAMP: {
+        auto maybe_index_type = container.seg().template scalar_at<uint8_t>(0u, int(index::Fields::index_type));
+        const auto index_type = IndexDescriptor::Type(maybe_index_type.value());
+        switch (index_type) {
+        case IndexDescriptorImpl::Type::TIMESTAMP: {
             return build_bitset_for_index<ContainerType, TimeseriesIndex>(container,
                                                                           rg,
                                                                           dynamic_schema,
                                                                           column_groups,
                                                                           std::move(input));
         }
-        case IndexDescriptor::STRING: {
+        case IndexDescriptorImpl::Type::STRING: {
             return build_bitset_for_index<ContainerType, TableIndex>(container, rg, dynamic_schema, column_groups, std::move(input));
         }
-        default:util::raise_rte("Unknown index type {} in create_index_filter", uint32_t(index_type.value()));
+        default:util::raise_rte("Unknown index type {} in create_index_filter", uint32_t(index_type));
         }
     };
 }
@@ -342,7 +342,7 @@ inline std::vector<FilterQuery<ContainerType>> build_update_query_filters(
     // be appended to, the type of the frame being appended, and the specified range, if supplied.
     std::vector<FilterQuery<ContainerType>> queries;
     util::variant_match(range,
-                        [&](const  RowRange &row_range) {
+                        [&](const RowRange &row_range) {
                             util::check(std::holds_alternative<stream::RowCountIndex>(index), "Cannot partition by row count when a timeseries-indexed frame was supplied");
                             queries.emplace_back(
                                     create_row_filter<ContainerType>(RowRange{row_range.first, row_range.second}));
diff --git a/cpp/arcticdb/pipeline/read_frame.cpp b/cpp/arcticdb/pipeline/read_frame.cpp
index 73eee520a8..1358c6e3a3 100644
--- a/cpp/arcticdb/pipeline/read_frame.cpp
+++ b/cpp/arcticdb/pipeline/read_frame.cpp
@@ -23,9 +23,10 @@
 #include <arcticdb/storage/store.hpp>
 #include <arcticdb/stream/index.hpp>
 #include <arcticdb/pipeline/column_mapping.hpp>
-#include <ankerl/unordered_dense.h>
-#include <arcticdb/codec/variant_encoded_field_collection.hpp>
 #include <arcticdb/util/magic_num.hpp>
+#include <arcticdb/codec/segment_identifier.hpp>
+
+#include <ankerl/unordered_dense.h>
 #include <google/protobuf/util/message_differencer.h>
 #include <folly/gen/Base.h>
 #include <folly/concurrency/ConcurrentHashMap.h>
@@ -58,7 +59,7 @@ StreamDescriptor get_filtered_descriptor(StreamDescriptor&& descriptor, const st
     auto index = stream::index_type_from_descriptor(desc);
     return util::variant_match(index, [&desc, &filter_columns] (const auto& idx) {
         const std::shared_ptr<FieldCollection>& fields = filter_columns ? filter_columns : desc.fields_ptr();
-        return StreamDescriptor{index_descriptor(desc.id(), idx, *fields)};
+        return StreamDescriptor{index_descriptor_from_range(desc.id(), idx, *fields)};
     });
 }
 
@@ -94,8 +95,8 @@ size_t get_index_field_count(const SegmentInMemory& frame) {
     return frame.descriptor().index().field_count();
 }
 
-const uint8_t* skip_heading_fields(const arcticdb::proto::encoding::SegmentHeader & hdr, const uint8_t*& data) {
-    const auto has_magic_numbers = EncodingVersion(hdr.encoding_version()) == EncodingVersion::V2;
+const uint8_t* skip_heading_fields(const SegmentHeader & hdr, const uint8_t*& data) {
+    const auto has_magic_numbers = hdr.encoding_version() == EncodingVersion::V2;
     if(has_magic_numbers)
         util::check_magic<MetadataMagic>(data);
 
@@ -105,8 +106,12 @@ const uint8_t* skip_heading_fields(const arcticdb::proto::encoding::SegmentHeade
         data += metadata_size;
     }
 
-    if(has_magic_numbers)
-        util::check_magic<DescriptorMagic>(data);
+    if(has_magic_numbers) {
+        util::check_magic<SegmentDescriptorMagic>(data);
+        data += sizeof(SegmentDescriptor);
+        skip_identifier(data);
+        util::check_magic<DescriptorFieldsMagic>(data);
+    }
 
     if(hdr.has_descriptor_field()) {
         auto descriptor_field_size = encoding_sizes::ndarray_field_compressed_size(hdr.descriptor_field().ndarray());
@@ -125,7 +130,7 @@ const uint8_t* skip_heading_fields(const arcticdb::proto::encoding::SegmentHeade
     return data;
 }
 
-void decode_string_pool(const arcticdb::proto::encoding::SegmentHeader & hdr, const uint8_t*& data, const uint8_t *begin ARCTICDB_UNUSED, const uint8_t* end, PipelineContextRow &context) {
+void decode_string_pool(const SegmentHeader& hdr, const uint8_t*& data, const uint8_t *begin ARCTICDB_UNUSED, const uint8_t* end, PipelineContextRow &context) {
     if (hdr.has_string_pool_field()) {
         ARCTICDB_DEBUG(log::codec(), "Decoding string pool at position: {}", data - begin);
         util::check(data != end, "Reached end of input block with string pool fields to decode");
@@ -136,21 +141,21 @@ void decode_string_pool(const arcticdb::proto::encoding::SegmentHeader & hdr, co
         if(EncodingVersion(hdr.encoding_version()) == EncodingVersion::V2)
             util::check_magic<StringPoolMagic>(data);
 
-        data += decode_field(string_pool_descriptor().type(),
-                       hdr.string_pool_field(),
+        util::check(hdr.string_pool_field().has_ndarray(), "Expected string pool field to be ndarray");
+        data += decode_ndarray(string_pool_descriptor().type(),
+                       hdr.string_pool_field().ndarray(),
                        data,
                        context.string_pool(),
                        bv,
-                       to_encoding_version(hdr.encoding_version()));
+                       hdr.encoding_version());
 
         ARCTICDB_TRACE(log::codec(), "Decoded string pool to position {}", data - begin);
     }
 }
 
-template<typename EncodedFieldType>
-void decode_index_field_impl(
+void decode_index_field(
         SegmentInMemory &frame,
-        const EncodedFieldType& field,
+        const EncodedFieldImpl& field,
         const uint8_t*& data,
         const uint8_t *begin ARCTICDB_UNUSED,
         const uint8_t* end ARCTICDB_UNUSED,
@@ -160,7 +165,7 @@ void decode_index_field_impl(
         if (!context.fetch_index()) {
             // not selected, skip decompression
             auto size = encoding_sizes::ndarray_field_compressed_size(field.ndarray());
-            if constexpr(std::is_same_v<EncodedFieldType, arcticdb::EncodedField>)
+            if (encoding_version == EncodingVersion::V2)
                 size += sizeof(ColumnMagic);
 
             data += size;
@@ -188,40 +193,16 @@ void decode_index_field_impl(
     }
 }
 
-void decode_index_field(
-    SegmentInMemory& frame,
-    VariantField variant_field,
-    const uint8_t*& data,
-    const uint8_t* begin ARCTICDB_UNUSED,
-    const uint8_t* end ARCTICDB_UNUSED,
-    PipelineContextRow& context,
-    EncodingVersion encoding_version
-) {
-    util::variant_match(variant_field, [&](auto field) {
-        decode_index_field_impl(frame, *field, data, begin, end, context, encoding_version);
-    });
-}
-
-template <typename EncodedFieldType>
-void decode_or_expand_impl(
+void decode_or_expand(
     const uint8_t*& data,
     uint8_t* dest,
-    const EncodedFieldType& encoded_field_info,
+    const EncodedFieldImpl& encoded_field_info,
     size_t dest_bytes,
     std::shared_ptr<BufferHolder> buffers,
-    EncodingVersion encding_version,
-    const ColumnMapping& m
-) {
-    if (auto handler = TypeHandlerRegistry::instance()->get_handler(m.source_type_desc_); handler) {
-        handler->handle_type(
-            data,
-            dest,
-            VariantField{&encoded_field_info},
-            dest_bytes,
-            std::move(buffers),
-            encding_version,
-            m
-        );
+	EncodingVersion encoding_version,
+    const ColumnMapping& m) {
+    if(auto handler = TypeHandlerRegistry::instance()->get_handler(m.source_type_desc_); handler) {
+        handler->handle_type(data, dest, encoded_field_info, m, dest_bytes, std::move(buffers), encoding_version);
     } else {
         std::optional<util::BitMagic> bv;
         if (encoded_field_info.has_ndarray() && encoded_field_info.ndarray().sparse_map_bytes() > 0) {
@@ -229,7 +210,7 @@ void decode_or_expand_impl(
             const auto bytes = encoding_sizes::data_uncompressed_size(ndarray);
             ChunkedBuffer sparse{bytes};
             SliceDataSink sparse_sink{sparse.data(), bytes};
-            data += decode_field(m.source_type_desc_, encoded_field_info, data, sparse_sink, bv, encding_version);
+            data += decode_field(m.source_type_desc_, encoded_field_info, data, sparse_sink, bv, encoding_version);
             m.source_type_desc_.visit_tag([dest, dest_bytes, &bv, &sparse](const auto tdt) {
                 using TagType = decltype(tdt);
                 using RawType = typename TagType::DataTypeTag::raw_type;
@@ -245,58 +226,35 @@ void decode_or_expand_impl(
                     util::default_initialize<TagType>(dest + bytes, dest_bytes - bytes);
                 });
             }
-            data += decode_field(m.source_type_desc_, encoded_field_info, data, sink, bv, encding_version);
+            data += decode_field(m.source_type_desc_, encoded_field_info, data, sink, bv, encoding_version);
         }
     }
 }
 
-size_t get_field_range_compressed_size(size_t start_idx, size_t num_fields,
-                                       const arcticdb::proto::encoding::SegmentHeader& hdr,
-                                       const VariantEncodedFieldCollection& fields) {
+size_t get_field_range_compressed_size(
+        size_t start_idx,
+        size_t num_fields,
+        const SegmentHeader& hdr,
+        const EncodedFieldCollection& fields) {
     size_t total = 0ULL;
     const size_t magic_num_size = EncodingVersion(hdr.encoding_version()) == EncodingVersion::V2 ? sizeof(ColumnMagic) : 0u;
     ARCTICDB_DEBUG(log::version(), "Skipping between {} and {}", start_idx, start_idx + num_fields);
     for(auto i = start_idx; i < start_idx + num_fields; ++i) {
-        util::variant_match(fields.at(i), [&total, magic_num_size] (const auto& field) {
-            ARCTICDB_DEBUG(log::version(), "Adding {}", encoding_sizes::ndarray_field_compressed_size(field->ndarray()) + magic_num_size);
-            total += encoding_sizes::ndarray_field_compressed_size(field->ndarray()) + magic_num_size;
-        });
+        const auto& field = fields.at(i);
+        ARCTICDB_DEBUG(log::version(), "Adding {}", encoding_sizes::ndarray_field_compressed_size(field.ndarray()) + magic_num_size);
+        total += encoding_sizes::ndarray_field_compressed_size(field.ndarray()) + magic_num_size;
     }
     ARCTICDB_DEBUG(log::version(), "Fields {} to {} contain {} bytes", start_idx, start_idx + num_fields, total);
     return total;
 }
 
-void decode_or_expand(
-    const uint8_t*& data,
-    uint8_t* dest,
-    const VariantField& variant_field,
-    size_t dest_bytes,
-    std::shared_ptr<BufferHolder> buffers,
-    EncodingVersion encoding_version,
-    const ColumnMapping& m
-) {
-    util::variant_match(variant_field, [&](auto field) {
-        decode_or_expand_impl(
-            data,
-            dest,
-            *field,
-            dest_bytes,
-            buffers,
-            encoding_version,
-            m
-        );
-    });
-}
-
 void advance_field_size(
-    const VariantField& variant_field,
+    const EncodedFieldImpl& field,
     const uint8_t*& data,
     bool has_magic_numbers
     ) {
-    util::variant_match(variant_field, [&data, has_magic_numbers] (auto field) {
     const size_t magic_num_size = has_magic_numbers ? sizeof(ColumnMagic) : 0ULL;
-    data += encoding_sizes::ndarray_field_compressed_size(field->ndarray()) + magic_num_size;
-  });
+    data += encoding_sizes::ndarray_field_compressed_size(field.ndarray()) + magic_num_size;
 }
 
 void advance_skipped_cols(
@@ -305,8 +263,8 @@ void advance_skipped_cols(
         size_t source_col,
         size_t first_col_offset,
         size_t index_fieldcount,
-        const VariantEncodedFieldCollection& fields,
-        const arcticdb::proto::encoding::SegmentHeader& hdr) {
+        const EncodedFieldCollection& fields,
+        const SegmentHeader& hdr) {
     const auto next_col = prev_col_offset + 1;
     auto skipped_cols = source_col - next_col;
     if(skipped_cols) {
@@ -341,17 +299,17 @@ void decode_into_frame_static(
     auto &hdr = seg.header();
     auto index_fieldcount = get_index_field_count(frame);
     data = skip_heading_fields(hdr, data);
-    context.set_descriptor(StreamDescriptor{ std::make_shared<StreamDescriptor::Proto>(std::move(*hdr.mutable_stream_descriptor())), seg.fields_ptr() });
+    context.set_descriptor(seg.descriptor());
     context.set_compacted(hdr.compacted());
-    ARCTICDB_DEBUG(log::version(), "Num fields: {}", seg.header().fields_size());
-    const EncodingVersion encoding_version = EncodingVersion(hdr.encoding_version());
+    ARCTICDB_DEBUG(log::version(), "Num fields: {}", seg.descriptor().field_count());
+    const auto encoding_version = hdr.encoding_version();
     const bool has_magic_nums = encoding_version == EncodingVersion::V2;
-    VariantEncodedFieldCollection fields(seg);
+    const auto& fields = hdr.body_fields();
 
     // data == end in case we have empty data types (e.g. {EMPTYVAL, Dim0}, {EMPTYVAL, Dim1}) for which we store nothing
     // in storage as they can be reconstructed in the type handler on the read path.
-    if (data != end || fields.size() > 0) {
-        auto index_field = fields.at(0u);
+    if (data != end || !fields.empty()) {
+        auto& index_field = fields.at(0u);
         decode_index_field(frame, index_field, data, begin, end, context, encoding_version);
 
         StaticColumnMappingIterator it(context, index_fieldcount);
@@ -363,7 +321,7 @@ void decode_into_frame_static(
             if(has_magic_nums)
                 util::check_magic_in_place<ColumnMagic>(data);
 
-            auto encoded_field = fields.at(it.source_field_pos());
+            auto& encoded_field = fields.at(it.source_field_pos());
             util::check(it.source_field_pos() < size_t(fields.size()), "Field index out of range: {} !< {}", it.source_field_pos(), fields.size());
             auto field_name = context.descriptor().fields(it.source_field_pos()).name();
             auto& buffer = frame.column(static_cast<ssize_t>(it.dest_col())).data().buffer();
@@ -432,22 +390,21 @@ void decode_into_frame_dynamic(
     auto &hdr = seg.header();
     auto index_fieldcount = get_index_field_count(frame);
     data = skip_heading_fields(hdr, data);
-    context.set_descriptor(StreamDescriptor{std::make_shared<StreamDescriptor::Proto>(std::move(*hdr.mutable_stream_descriptor())), seg.fields_ptr()});
+    context.set_descriptor(std::make_shared<StreamDescriptor>(seg.descriptor()));
     context.set_compacted(hdr.compacted());
-    const EncodingVersion encdoing_version = EncodingVersion(hdr.encoding_version());
-    const bool has_magic_numbers = encdoing_version == EncodingVersion::V2;
-    VariantEncodedFieldCollection fields(seg);
 
-    // data == end in case we have empty data types (e.g. {EMPTYVAL, Dim0}, {EMPTYVAL, Dim1}) for which we store nothing
-    // in storage as they can be reconstructed in the type handler on the read path.
-    if (data != end || fields.size() > 0) {
-        auto index_field = fields.at(0u);
-        decode_index_field(frame, index_field, data, begin, end, context, encdoing_version);
+    const auto encoding_version = hdr.encoding_version();
+    const bool has_magic_numbers = encoding_version == EncodingVersion::V2;
+
+    if (!hdr.body_fields().empty()) {
+        const auto& fields = hdr.body_fields();
+        auto& index_field = fields.at(0u);
+        decode_index_field(frame, index_field, data, begin, end, context, encoding_version);
 
         auto field_count = context.slice_and_key().slice_.col_range.diff() + index_fieldcount;
         for (auto field_col = index_fieldcount; field_col < field_count; ++field_col) {
             auto field_name = context.descriptor().fields(field_col).name();
-            auto encoded_field = fields.at(field_col);
+            auto& encoded_field = fields.at(field_col);
             auto frame_loc_opt = frame.column_index(field_name);
             if (!frame_loc_opt) {
                 // Column is not selected in the output frame.
@@ -458,6 +415,7 @@ void decode_into_frame_dynamic(
             auto dst_col = *frame_loc_opt;
             auto& buffer = frame.column(static_cast<position_t>(dst_col)).data().buffer();
             ColumnMapping m{frame, dst_col, field_col, context};
+
             util::check(
                 static_cast<bool>(has_valid_type_promotion(m.source_type_desc_, m.dest_type_desc_)),
                 "Can't promote type {} to type {} in field {}",
@@ -484,11 +442,10 @@ void decode_into_frame_dynamic(
                 encoded_field,
                 m.dest_bytes_,
                 buffers,
-                encdoing_version,
+                encoding_version,
                 m
             );
-            // decode_or_expand will invoke the empty type handler which will do backfilling with the default value depending on the
-            // destination type.
+
             if (!trivially_compatible_types(m.source_type_desc_, m.dest_type_desc_) && !source_is_empty) {
                 m.dest_type_desc_.visit_tag([&buffer, &m, buffers] (auto dest_desc_tag) {
                     using DestinationType =  typename decltype(dest_desc_tag)::DataTypeTag::raw_type;
@@ -516,6 +473,8 @@ void decode_into_frame_dynamic(
         }
 
         decode_string_pool(hdr, data, begin, end, context);
+    } else {
+        ARCTICDB_DEBUG(log::version(), "Empty segment");
     }
 }
 
@@ -649,7 +608,6 @@ class EmptyDynamicStringReducer {
 protected:
     Column& column_;
     SegmentInMemory frame_;
-    const Field& frame_field_;
     size_t row_ ;
     ChunkedBuffer& src_buffer_;
     size_t column_width_;
@@ -663,12 +621,11 @@ class EmptyDynamicStringReducer {
     EmptyDynamicStringReducer(
         Column& column,
         SegmentInMemory frame,
-        const Field& frame_field,
+        const Field&,
         size_t alloc_width,
         std::shared_ptr<LockType> spinlock) :
         column_(column),
         frame_(std::move(frame)),
-        frame_field_(frame_field),
         row_(0),
         src_buffer_(column.data().buffer()),
         column_width_(alloc_width),
diff --git a/cpp/arcticdb/pipeline/read_pipeline.hpp b/cpp/arcticdb/pipeline/read_pipeline.hpp
index b0dc0349d4..468e5cffd6 100644
--- a/cpp/arcticdb/pipeline/read_pipeline.hpp
+++ b/cpp/arcticdb/pipeline/read_pipeline.hpp
@@ -84,7 +84,7 @@ std::vector<SliceAndKey> filter_index(const ContainerType &container, std::optio
     return output;
 }
 
-inline util::BitSet build_column_bitset(const StreamDescriptor::Proto &desc, const folly::F14FastSet<std::string_view>& columns) {
+inline util::BitSet build_column_bitset(const StreamDescriptor& desc, const folly::F14FastSet<std::string_view>& columns) {
     util::BitSet col_bitset(static_cast<util::BitSetSizeType>(desc.fields().size()));
     for (std::size_t c = 0; c < static_cast<std::size_t>(desc.fields().size()); ++c) {
         auto& f = desc.fields(static_cast<int>(c));
@@ -95,24 +95,24 @@ inline util::BitSet build_column_bitset(const StreamDescriptor::Proto &desc, con
     return col_bitset;
 }
 
-inline util::BitSet build_column_bitset(const StreamDescriptor::Proto&desc, const std::vector<std::string>& columns) {
+inline util::BitSet build_column_bitset(const StreamDescriptor& desc, const std::vector<std::string>& columns) {
     folly::F14FastSet<std::string_view> col_set{columns.begin(), columns.end()};
     return build_column_bitset(desc, col_set);
 }
 
-inline bool contains_index_column(const std::vector<std::string>& columns, const StreamDescriptor::Proto& desc) {
+inline auto add_index_column(const std::vector<std::string>& columns, const StreamDescriptor& desc) {
+    std::vector<std::string> columns_with_index{columns};
+    columns_with_index.push_back(std::string{desc.fields(0).name()});
+    return columns_with_index;
+}
+
+inline bool contains_index_column(const std::vector<std::string>& columns, const StreamDescriptor& desc) {
     return desc.index().field_count() == 0
         || std::find(std::begin(columns), std::end(columns), desc.fields(0).name())
             != std::end(columns);
 }
 
-inline auto add_index_column(const std::vector<std::string>& columns, const StreamDescriptor::Proto& desc) {
-    std::vector<std::string> columns_with_index{columns};
-    columns_with_index.push_back(desc.fields(0).name());
-    return columns_with_index;
-}
-
-inline std::optional<util::BitSet> requested_column_bitset_including_index(const StreamDescriptor::Proto& desc, const std::vector<std::string>& columns) {
+inline std::optional<util::BitSet> requested_column_bitset_including_index(const StreamDescriptor& desc, const std::vector<std::string>& columns) {
     // Add the index column if it's not there
     if (!columns.empty()) {
         if(!contains_index_column(columns, desc)) {
@@ -125,8 +125,9 @@ inline std::optional<util::BitSet> requested_column_bitset_including_index(const
     return std::nullopt;
 }
 
-inline std::optional<util::BitSet> clause_column_bitset(const StreamDescriptor::Proto& desc,
-                                                        const std::vector<std::shared_ptr<Clause>>& clauses) {
+inline std::optional<util::BitSet> clause_column_bitset(
+        const StreamDescriptor& desc,
+        const std::vector<std::shared_ptr<Clause>>& clauses) {
     folly::F14FastSet<std::string_view> column_set;
     for (const auto& clause: clauses) {
         auto opt_columns = clause->clause_info().input_columns_;
@@ -136,18 +137,18 @@ inline std::optional<util::BitSet> clause_column_bitset(const StreamDescriptor::
             }
         }
     }
-    if (!column_set.empty()) {
+    if (!column_set.empty())
         return build_column_bitset(desc, column_set);
-    } else {
+    else
         return std::nullopt;
-    }
 }
 
 // Returns std::nullopt if all columns are required, which is the case if requested_columns is std::nullopt
 // Otherwise augment the requested_columns bitset with columns that are required by any of the clauses
-inline std::optional<util::BitSet> overall_column_bitset(const StreamDescriptor::Proto& desc,
-                                                         const std::vector<std::shared_ptr<Clause>>& clauses,
-                                                         const std::optional<util::BitSet>& requested_columns) {
+inline std::optional<util::BitSet> overall_column_bitset(
+        const StreamDescriptor& desc,
+        const std::vector<std::shared_ptr<Clause>>& clauses,
+        const std::optional<util::BitSet>& requested_columns) {
     // std::all_of returns true if the range is empty
     auto clauses_can_combine_with_column_selection = std::all_of(clauses.begin(), clauses.end(),
                                                                  [](const std::shared_ptr<Clause>& clause){
@@ -202,7 +203,7 @@ inline std::vector<FilterQuery<ContainerType>> get_column_bitset_and_query_funct
 
     if(!dynamic_schema || column_groups) {
         pipeline_context->set_selected_columns(query.columns);
-        pipeline_context->overall_column_bitset_ = overall_column_bitset(pipeline_context->descriptor().proto(),
+        pipeline_context->overall_column_bitset_ = overall_column_bitset(pipeline_context->descriptor(),
                                                                          query.clauses_,
                                                                          pipeline_context->selected_columns_);
     }
diff --git a/cpp/arcticdb/pipeline/write_frame.cpp b/cpp/arcticdb/pipeline/write_frame.cpp
index 6b70204823..400ae29448 100644
--- a/cpp/arcticdb/pipeline/write_frame.cpp
+++ b/cpp/arcticdb/pipeline/write_frame.cpp
@@ -5,11 +5,8 @@
  * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
  */
 
-#include <arcticdb/column_store/string_pool.hpp>
-#include <arcticdb/entity/native_tensor.hpp>
 #include <arcticdb/python/python_utils.hpp>
 #include <arcticdb/pipeline/input_tensor_frame.hpp>
-#include <arcticdb/pipeline/index_writer.hpp>
 #include <arcticdb/pipeline/frame_slice.hpp>
 #include <arcticdb/pipeline/index_utils.hpp>
 #include <arcticdb/pipeline/slicing.hpp>
@@ -18,14 +15,11 @@
 #include <arcticdb/entity/performance_tracing.hpp>
 #include <arcticdb/stream/aggregator.hpp>
 #include <arcticdb/entity/protobufs.hpp>
-#include <arcticdb/util/offset_string.hpp>
 #include <arcticdb/util/variant.hpp>
 #include <arcticdb/python/python_types.hpp>
 #include <arcticdb/pipeline/frame_utils.hpp>
 #include <arcticdb/pipeline/write_frame.hpp>
 #include <arcticdb/stream/append_map.hpp>
-#include <arcticdb/version/version_utils.hpp>
-#include <arcticdb/entity/merge_descriptors.hpp>
 #include <arcticdb/async/task_scheduler.hpp>
 #include <arcticdb/util/format_date.hpp>
 #include <vector>
@@ -85,7 +79,7 @@ std::tuple<stream::StreamSink::PartialKey, SegmentInMemory, FrameSlice> WriteToS
 
         auto rows_to_write = slice_.row_range.second - slice_.row_range.first;
         if (frame_->desc.index().field_count() > 0) {
-            util::check(static_cast<bool>(frame_->index_tensor), "Got null index tensor in write_slices");
+            util::check(static_cast<bool>(frame_->index_tensor), "Got null index tensor in WriteToSegmentTask");
             auto opt_error = aggregator_set_data(
                 frame_->desc.fields(0).type(),
                 frame_->index_tensor.value(),
@@ -109,7 +103,7 @@ std::tuple<stream::StreamSink::PartialKey, SegmentInMemory, FrameSlice> WriteToS
         }
 
         agg.end_block_write(rows_to_write);
-        agg.commit();
+        agg.finalize();
         return output;
     });
 }
@@ -160,20 +154,6 @@ folly::Future<std::vector<SliceAndKey>> write_slices(
     }, write_window)).via(&async::io_executor());
 }
 
-folly::Future<entity::VariantKey> write_multi_index(
-        const std::shared_ptr<InputTensorFrame>& frame,
-        std::vector<SliceAndKey>&& slice_and_keys,
-        const IndexPartialKey& partial_key,
-        const std::shared_ptr<stream::StreamSink>& sink
-) {
-    auto timeseries_desc = index_descriptor_from_frame(frame, frame->offset);
-    index::IndexWriter<stream::RowCountIndex> writer(sink, partial_key, std::move(timeseries_desc));
-    for (auto &slice_and_key : slice_and_keys) {
-        writer.add(slice_and_key.key(), slice_and_key.slice_);
-    }
-    return writer.commit();
-}
-
 folly::Future<std::vector<SliceAndKey>> slice_and_write(
         const std::shared_ptr<InputTensorFrame> &frame,
         const SlicingPolicy &slicing,
@@ -183,6 +163,9 @@ folly::Future<std::vector<SliceAndKey>> slice_and_write(
         bool sparsify_floats) {
     ARCTICDB_SUBSAMPLE_DEFAULT(SliceFrame)
     auto slices = slice(*frame, slicing);
+    if(slices.empty())
+        return folly::makeFuture(std::vector<SliceAndKey>{});
+
     ARCTICDB_SUBSAMPLE_DEFAULT(SliceAndWrite)
     return write_slices(frame, std::move(slices), slicing, std::move(key), sink, de_dup_map, sparsify_floats);
 }
@@ -199,7 +182,7 @@ write_frame(
     auto fut_slice_keys = slice_and_write(frame, slicing, IndexPartialKey{key}, store, de_dup_map, sparsify_floats);
     // Write the keys of the slices into an index segment
     ARCTICDB_SUBSAMPLE_DEFAULT(WriteIndex)
-    return std::move(fut_slice_keys).thenValue([frame=frame, key = std::move(key), &store](auto&& slice_keys) mutable {
+    return std::move(fut_slice_keys).thenValue([frame=frame, key=std::move(key), &store](auto&& slice_keys) mutable {
         return index::write_index(frame, std::forward<decltype(slice_keys)>(slice_keys), key, store);
     });
 }
@@ -221,7 +204,7 @@ folly::Future<entity::AtomKey> append_frame(
                             auto& frame_index = frame->index_tensor.value();
                             util::check(frame_index.data_type() == DataType::NANOSECONDS_UTC64,
                                         "Expected timestamp index in append, got type {}", frame_index.data_type());
-                            if (index_segment_reader.tsd().proto().total_rows() != 0 && frame_index.size() != 0) {
+                            if (index_segment_reader.tsd().total_rows() != 0 && frame_index.size() != 0) {
                                 auto first_index = NumericIndex{*frame_index.ptr_cast<timestamp>(0)};
                                 auto prev = std::get<NumericIndex>(index_segment_reader.last()->key().end_index());
                                 util::check(ignore_sort_order || prev - 1 <= first_index,
@@ -241,7 +224,7 @@ folly::Future<entity::AtomKey> append_frame(
         slices_to_write.insert(std::end(slices_to_write), std::make_move_iterator(std::begin(slice_and_keys_to_append)), std::make_move_iterator(std::end(slice_and_keys_to_append)));
         std::sort(std::begin(slices_to_write), std::end(slices_to_write));
         auto tsd = index::get_merged_tsd(frame->num_rows + frame->offset, dynamic_schema, index_segment_reader.tsd(), frame);
-        return index::write_index(stream::index_type_from_descriptor(tsd.as_stream_descriptor()), std::move(tsd), std::move(slices_to_write), key, store);
+        return index::write_index(stream::index_type_from_descriptor(tsd.as_stream_descriptor()), tsd, std::move(slices_to_write), key, store);
     });
 }
 
@@ -270,12 +253,11 @@ static RowRange partial_rewrite_row_range(
 }
 
 std::optional<SliceAndKey> rewrite_partial_segment(
-    const SliceAndKey& existing,
-    IndexRange index_range,
-    VersionId version_id,
-    AffectedSegmentPart affected_part,
-    const std::shared_ptr<Store>& store
-) {
+        const SliceAndKey& existing,
+        const IndexRange& index_range,
+        VersionId version_id,
+        AffectedSegmentPart affected_part,
+        const std::shared_ptr<Store>& store) {
     const auto& key = existing.key();
     auto kv = store->read(key).get();
     const SegmentInMemory& segment = kv.second;
@@ -294,6 +276,7 @@ std::optional<SliceAndKey> rewrite_partial_segment(
         RowRange{0, num_rows},
         existing.slice_.hash_bucket(),
         existing.slice_.num_buckets()};
+
     auto fut_key = store->write(
         key.type(),
         version_id,
@@ -312,16 +295,19 @@ std::vector<SliceAndKey> flatten_and_fix_rows(const std::array<std::vector<Slice
     for (const std::vector<SliceAndKey>& group : groups) {
         if (group.empty())
             continue;
+
         auto group_start = group.begin()->slice_.row_range.first;
         auto group_end = std::accumulate(std::begin(group), std::end(group), 0ULL, [](size_t a, const SliceAndKey& sk) {
             return std::max(a, sk.slice_.row_range.second);
         });
+
         std::transform(std::begin(group), std::end(group), std::back_inserter(output), [&](SliceAndKey sk) {
             auto range_start = global_count + (sk.slice_.row_range.first - group_start);
             auto new_range = RowRange{range_start, range_start + (sk.slice_.row_range.diff())};
             sk.slice_.row_range = new_range;
             return sk;
         });
+
         global_count += (group_end - group_start);
     }
     return output;
diff --git a/cpp/arcticdb/pipeline/write_frame.hpp b/cpp/arcticdb/pipeline/write_frame.hpp
index ea3cb4259f..f3fb10a077 100644
--- a/cpp/arcticdb/pipeline/write_frame.hpp
+++ b/cpp/arcticdb/pipeline/write_frame.hpp
@@ -90,7 +90,7 @@ enum class AffectedSegmentPart {
 
 std::optional<SliceAndKey> rewrite_partial_segment(
         const SliceAndKey& existing,
-        IndexRange index_range,
+        const IndexRange& index_range,
         VersionId version_id,
         AffectedSegmentPart affected_part,
         const std::shared_ptr<Store>& store);
diff --git a/cpp/arcticdb/processing/clause.cpp b/cpp/arcticdb/processing/clause.cpp
index 02ce794b0b..790789504f 100644
--- a/cpp/arcticdb/processing/clause.cpp
+++ b/cpp/arcticdb/processing/clause.cpp
@@ -26,12 +26,14 @@ namespace arcticdb {
 
 using namespace pipelines;
 
-std::vector<std::vector<size_t>> structure_by_row_slice(std::vector<RangesAndKey>& ranges_and_keys,
-                                                           size_t start_from) {
+std::vector<std::vector<size_t>> structure_by_row_slice(
+        std::vector<RangesAndKey>& ranges_and_keys,
+        size_t start_from) {
     std::sort(std::begin(ranges_and_keys), std::end(ranges_and_keys), [] (const RangesAndKey& left, const RangesAndKey& right) {
         return std::tie(left.row_range_.first, left.col_range_.first) < std::tie(right.row_range_.first, right.col_range_.first);
     });
     ranges_and_keys.erase(ranges_and_keys.begin(), ranges_and_keys.begin() + start_from);
+
     std::vector<std::vector<size_t>> res;
     RowRange previous_row_range;
     for (const auto& [idx, ranges_and_key]: folly::enumerate(ranges_and_keys)) {
@@ -70,7 +72,6 @@ std::vector<std::vector<size_t>> structure_all_together(std::vector<RangesAndKey
     return {std::move(res)};
 }
 
-
 /*
  * On entry to a clause, construct ProcessingUnits from the input entity IDs. These will either be provided by the
  * structure_for_processing method for the first clause in the pipeline, or by the previous clause for all subsequent
@@ -475,8 +476,9 @@ Composite<EntityIds> AggregationClause::process(Composite<EntityIds>&& entity_id
     }
     SegmentInMemory seg;
     auto index_col = std::make_shared<Column>(make_scalar_type(grouping_data_type), grouping_map.size(), true, false);
+
     seg.add_column(scalar_field(grouping_data_type, grouping_column_), index_col);
-    seg.descriptor().set_index(IndexDescriptor(0, IndexDescriptor::ROWCOUNT));
+    seg.descriptor().set_index(IndexDescriptorImpl(0, IndexDescriptorImpl::Type::ROWCOUNT));
 
     details::visit_type(grouping_data_type, [&grouping_map, &index_col](auto data_type_tag) {
         using col_type_info = ScalarTypeInfo<decltype(data_type_tag)>;
@@ -694,7 +696,7 @@ Composite<EntityIds> ResampleClause<closed_boundary>::process(Composite<EntityId
     RowRange output_row_range(row_slices.front().row_ranges_->at(0)->start(),
                               row_slices.front().row_ranges_->at(0)->start() + output_index_column->row_count());
     seg.add_column(scalar_field(DataType::NANOSECONDS_UTC64, index_column_name), output_index_column);
-    seg.descriptor().set_index(IndexDescriptor(1, IndexDescriptor::TIMESTAMP));
+    seg.descriptor().set_index(IndexDescriptorImpl(1, IndexDescriptor::Type::TIMESTAMP));
     auto& string_pool = seg.string_pool();
     for (const auto& aggregator: aggregators_) {
         std::vector<std::optional<ColumnWithStrings>> input_agg_columns;
@@ -903,7 +905,7 @@ void merge_impl(
     FieldCollection new_fields{};
     (void)new_fields.add(fields[0].ref());
 
-    auto index_desc = index_descriptor(stream_id, index, new_fields);
+    auto index_desc = index_descriptor_from_range(stream_id, index, new_fields);
     auto desc = StreamDescriptor{index_desc};
 
     AggregatorType agg{
@@ -934,8 +936,8 @@ std::optional<std::vector<Composite<EntityIds>>> MergeClause::repartition(
     auto compare =
             [](const std::unique_ptr<SegmentWrapper> &left,
                const std::unique_ptr<SegmentWrapper> &right) {
-                const auto left_index = index::index_value_from_row(left->row(), IndexDescriptor::TIMESTAMP, 0);
-                const auto right_index = index::index_value_from_row(right->row(), IndexDescriptor::TIMESTAMP, 0);
+                const auto left_index = index::index_value_from_row(left->row(), IndexDescriptorImpl::Type::TIMESTAMP, 0);
+                const auto right_index = index::index_value_from_row(right->row(), IndexDescriptorImpl::Type::TIMESTAMP, 0);
                 return left_index > right_index;
             };
 
@@ -1037,7 +1039,7 @@ Composite<EntityIds> ColumnStatsGenerationClause::process(Composite<EntityIds>&&
     end_index_col->set_row_data(0);
 
     SegmentInMemory seg;
-    seg.descriptor().set_index(IndexDescriptor(0, IndexDescriptor::ROWCOUNT));
+    seg.descriptor().set_index(IndexDescriptorImpl(0, IndexDescriptorImpl::Type::ROWCOUNT));
     seg.add_column(scalar_field(DataType::NANOSECONDS_UTC64, start_index_column_name), start_index_col);
     seg.add_column(scalar_field(DataType::NANOSECONDS_UTC64, end_index_column_name), end_index_col);
     for (const auto& agg_data: folly::enumerate(aggregators_data)) {
diff --git a/cpp/arcticdb/processing/clause.hpp b/cpp/arcticdb/processing/clause.hpp
index 866e4e49d0..503ae1fec2 100644
--- a/cpp/arcticdb/processing/clause.hpp
+++ b/cpp/arcticdb/processing/clause.hpp
@@ -346,12 +346,6 @@ struct PartitionClause {
     }
 };
 
-inline StreamDescriptor empty_descriptor(arcticdb::proto::descriptors::IndexDescriptor::Type type = arcticdb::proto::descriptors::IndexDescriptor::ROWCOUNT, const StreamId &id = "merged") {
-    const auto index = stream::variant_index_from_type(type);
-    const auto field_count = util::variant_match(index, [] (const auto& idx) { return idx.field_count(); });
-    return StreamDescriptor{StreamId{id}, IndexDescriptor{field_count, type}, std::make_shared<FieldCollection>()};
-}
-
 struct NamedAggregator {
     NamedAggregator(const std::string& aggregation_operator,
                     const std::string& input_column_name,
diff --git a/cpp/arcticdb/processing/operation_dispatch_unary.hpp b/cpp/arcticdb/processing/operation_dispatch_unary.hpp
index 8e4fd37542..7d59d05aee 100644
--- a/cpp/arcticdb/processing/operation_dispatch_unary.hpp
+++ b/cpp/arcticdb/processing/operation_dispatch_unary.hpp
@@ -127,6 +127,7 @@ VariantData unary_comparator(const ColumnWithStrings& col, Func&& func) {
     constexpr auto sparse_missing_value_output = std::is_same_v<std::remove_reference_t<Func>, IsNullOperator>;
     details::visit_type(col.column_->type().data_type(), [&](auto col_tag) {
         using type_info = ScalarTypeInfo<decltype(col_tag)>;
+        // Non-explicit lambda capture due to a bug in LLVM: https://github.com/llvm/llvm-project/issues/34798
         Column::transform<typename type_info::TDT>(*(col.column_), output_bitset, sparse_missing_value_output, [&](auto input_value) -> bool {
             if constexpr (is_floating_point_type(type_info::data_type)) {
                 return func.apply(input_value);
diff --git a/cpp/arcticdb/processing/unsorted_aggregation.cpp b/cpp/arcticdb/processing/unsorted_aggregation.cpp
index 4a64118c99..7b4e68f450 100644
--- a/cpp/arcticdb/processing/unsorted_aggregation.cpp
+++ b/cpp/arcticdb/processing/unsorted_aggregation.cpp
@@ -9,8 +9,7 @@
 
 #include <cmath>
 
-namespace arcticdb
-{
+namespace arcticdb {
 
 void MinMaxAggregatorData::aggregate(const ColumnWithStrings& input_column) {
     details::visit_type(input_column.column_->type().data_type(), [&] (auto col_tag) {
@@ -57,63 +56,64 @@ SegmentInMemory MinMaxAggregatorData::finalize(const std::vector<ColumnName>& ou
 }
 
 namespace {
-    template<typename T, typename T2=void>
-    struct OutputType;
-
-    template <typename InputType>
-    struct OutputType <InputType, typename std::enable_if_t<is_floating_point_type(InputType::DataTypeTag::data_type)>> {
-        using type = ScalarTagType<DataTypeTag<DataType::FLOAT64>>;
-    };
-
-    template <typename InputType>
-    struct OutputType <InputType, typename std::enable_if_t<is_unsigned_type(InputType::DataTypeTag::data_type)>> {
-        using type = ScalarTagType<DataTypeTag<DataType::UINT64>>;
-    };
-
-    template <typename InputType>
-    struct OutputType<InputType, typename std::enable_if_t<is_signed_type(InputType::DataTypeTag::data_type) && is_integer_type(InputType::DataTypeTag::data_type)>> {
-        using type = ScalarTagType<DataTypeTag<DataType::INT64>>;
-    };
-
-    template<>
-    struct OutputType<DataTypeTag<DataType::BOOL8>, void> {
-        using type = ScalarTagType<DataTypeTag<DataType::BOOL8>>;
-    };
-
-    template<>
-    struct OutputType<DataTypeTag<DataType::NANOSECONDS_UTC64>, void> {
-        using type = ScalarTagType<DataTypeTag<DataType::NANOSECONDS_UTC64>>;
-    };
 
-    template<>
-    struct OutputType<DataTypeTag<DataType::EMPTYVAL>, void> {
-        using type = ScalarTagType<DataTypeTag<DataType::EMPTYVAL>>;
-    };
-
-    template<>
-    struct OutputType<DataTypeTag<DataType::ASCII_FIXED64>, void> {
-        using type = ScalarTagType<DataTypeTag<DataType::ASCII_FIXED64>>;
-    };
-
-    template<>
-    struct OutputType<DataTypeTag<DataType::UTF_DYNAMIC64>, void> {
-        using type = ScalarTagType<DataTypeTag<DataType::UTF_DYNAMIC64>>;
-    };
-
-    template<>
-    struct OutputType<DataTypeTag<DataType::ASCII_DYNAMIC64>, void> {
-        using type = ScalarTagType<DataTypeTag<DataType::ASCII_DYNAMIC64>>;
-    };
-
-    template<>
-    struct OutputType<DataTypeTag<DataType::UTF_FIXED64>, void> {
-        using type = ScalarTagType<DataTypeTag<DataType::UTF_FIXED64>>;
-    };
-
-    template<>
-    struct OutputType<DataTypeTag<DataType::BOOL_OBJECT8>, void> {
-        using type = ScalarTagType<DataTypeTag<DataType::BOOL_OBJECT8>>;
-    };
+template<typename T, typename T2=void>
+struct OutputType;
+
+template <typename InputType>
+struct OutputType <InputType, typename std::enable_if_t<is_floating_point_type(InputType::DataTypeTag::data_type)>> {
+    using type = ScalarTagType<DataTypeTag<DataType::FLOAT64>>;
+};
+
+template <typename InputType>
+struct OutputType <InputType, typename std::enable_if_t<is_unsigned_type(InputType::DataTypeTag::data_type)>> {
+    using type = ScalarTagType<DataTypeTag<DataType::UINT64>>;
+};
+
+template <typename InputType>
+struct OutputType<InputType, typename std::enable_if_t<is_signed_type(InputType::DataTypeTag::data_type) && is_integer_type(InputType::DataTypeTag::data_type)>> {
+    using type = ScalarTagType<DataTypeTag<DataType::INT64>>;
+};
+
+template<>
+struct OutputType<DataTypeTag<DataType::BOOL8>, void> {
+    using type = ScalarTagType<DataTypeTag<DataType::BOOL8>>;
+};
+
+template<>
+struct OutputType<DataTypeTag<DataType::NANOSECONDS_UTC64>, void> {
+    using type = ScalarTagType<DataTypeTag<DataType::NANOSECONDS_UTC64>>;
+};
+
+template<>
+struct OutputType<DataTypeTag<DataType::EMPTYVAL>, void> {
+    using type = ScalarTagType<DataTypeTag<DataType::EMPTYVAL>>;
+};
+
+template<>
+struct OutputType<DataTypeTag<DataType::ASCII_FIXED64>, void> {
+    using type = ScalarTagType<DataTypeTag<DataType::ASCII_FIXED64>>;
+};
+
+template<>
+struct OutputType<DataTypeTag<DataType::UTF_DYNAMIC64>, void> {
+    using type = ScalarTagType<DataTypeTag<DataType::UTF_DYNAMIC64>>;
+};
+
+template<>
+struct OutputType<DataTypeTag<DataType::ASCII_DYNAMIC64>, void> {
+    using type = ScalarTagType<DataTypeTag<DataType::ASCII_DYNAMIC64>>;
+};
+
+template<>
+struct OutputType<DataTypeTag<DataType::UTF_FIXED64>, void> {
+    using type = ScalarTagType<DataTypeTag<DataType::UTF_FIXED64>>;
+};
+
+template<>
+struct OutputType<DataTypeTag<DataType::BOOL_OBJECT8>, void> {
+    using type = ScalarTagType<DataTypeTag<DataType::BOOL_OBJECT8>>;
+};
 }
 
 /**********************
diff --git a/cpp/arcticdb/python/normalization_checks.cpp b/cpp/arcticdb/python/normalization_checks.cpp
index 17584bc941..68e67957e9 100644
--- a/cpp/arcticdb/python/normalization_checks.cpp
+++ b/cpp/arcticdb/python/normalization_checks.cpp
@@ -107,6 +107,8 @@ void update_rowcount_normalization_data(
     if (old_index) {
         constexpr auto error_suffix =
             " the existing version. Please convert both to use Int64Index if you need this to work.";
+
+        util::check(new_index != nullptr, "New index is null in normalization checks");
         normalization::check<ErrorCode::E_INCOMPATIBLE_INDEX>(
             old_index->is_physically_stored() == new_index->is_physically_stored(),
             "The argument uses a {} index which is incompatible with {}",
@@ -162,14 +164,6 @@ bool check_pandas_like(
     return false;
 }
 
-size_t product(const google::protobuf::RepeatedField<size_t> &shape) {
-    // FUTURE: use std::reduce when our libc++ implements it
-    size_t out = 1;
-    for (auto i : shape)
-        out *= i;
-    return out;
-}
-
 template<class NormalizationMetadata>
 bool check_ndarray_append(const NormalizationMetadata &old_norm, NormalizationMetadata &new_norm) {
     if (old_norm.has_np() || new_norm.has_np()) {
@@ -194,10 +188,10 @@ void fix_normalization_or_throw(
     auto &old_norm = existing_isr.tsd().proto().normalization();
     auto &new_norm = new_frame.norm_meta;
     if (check_pandas_like(old_norm, new_norm)) {
-        const IndexDescriptor::Type old_index_type = existing_isr.tsd().proto().stream_descriptor().index().kind();
+        const IndexDescriptor::Type old_index_type = existing_isr.tsd().index().type();
         const IndexDescriptor::Type new_index_type = new_frame.desc.index().type();
-        if (old_index_type == new_index_type && old_index_type == IndexDescriptor::ROWCOUNT) {
-            update_rowcount_normalization_data(old_norm, new_norm, existing_isr.tsd().proto().total_rows());
+        if (old_index_type == new_index_type && old_index_type == IndexDescriptor::Type::ROWCOUNT) {
+            update_rowcount_normalization_data(old_norm, new_norm, existing_isr.tsd().total_rows());
         }
         return;
     }
@@ -206,8 +200,7 @@ void fix_normalization_or_throw(
             return;
     } else {
         // ndarray normalizes to a ROWCOUNT frame and we don't support update on those
-        normalization::check<ErrorCode::E_UPDATE_NOT_SUPPORTED>(!old_norm.has_np() && !new_norm.has_np(),
-                        "current normalization scheme doesn't allow update of ndarray");
+        normalization::check<ErrorCode::E_UPDATE_NOT_SUPPORTED>(!old_norm.has_np() && !new_norm.has_np(), "current normalization scheme doesn't allow update of ndarray");
     }
 }
 
diff --git a/cpp/arcticdb/python/python_handlers.cpp b/cpp/arcticdb/python/python_handlers.cpp
index 4e9aca7f33..59bbbd0577 100644
--- a/cpp/arcticdb/python/python_handlers.cpp
+++ b/cpp/arcticdb/python/python_handlers.cpp
@@ -9,226 +9,230 @@
 #include <arcticdb/codec/encoding_sizes.hpp>
 #include <arcticdb/codec/codec.hpp>
 #include <arcticdb/util/buffer_holder.hpp>
+#include <arcticdb/pipeline/column_mapping.hpp>
+#include <arcticdb/util/sparse_utils.hpp>
 
 namespace arcticdb {
 
-    /// @brief Generate numpy.dtype object from ArcticDB type descriptor
-    /// The dtype is used as type specifier for numpy arrays stored as column elements
-    /// @note There is special handling for ArcticDB's empty type
-    /// When numpy creates an empty array its type is float64. We want to mimic this because:
-    /// i) There is no equivalent to empty value
-    /// ii) We want input dataframes to be exact match of the output and that includes the type
-    [[nodiscard]] static inline py::dtype generate_python_dtype(const TypeDescriptor& td, stride_t type_byte_size) {
-        if(is_empty_type(td.data_type())) {
-            return py::dtype{"f8"};
-        }
-        return py::dtype{fmt::format("{}{:d}", get_dtype_specifier(td.data_type()), type_byte_size)};
+/// @brief Generate numpy.dtype object from ArcticDB type descriptor
+/// The dtype is used as type specifier for numpy arrays stored as column elements
+/// @note There is special handling for ArcticDB's empty type
+/// When numpy creates an empty array its type is float64. We want to mimic this because:
+/// i) There is no equivalent to empty value
+/// ii) We want input dataframes to be exact match of the output and that includes the type
+[[nodiscard]] static inline py::dtype generate_python_dtype(const TypeDescriptor &td, stride_t type_byte_size) {
+    if (is_empty_type(td.data_type())) {
+        return py::dtype{"f8"};
     }
+    return py::dtype{fmt::format("{}{:d}", get_dtype_specifier(td.data_type()), type_byte_size)};
+}
 
-    /// @important This calls pybind's initialize array function which is NOT thread safe. Moreover, numpy arrays can
-    /// be created only by the thread holding the GIL. In practice we can get away with allocating arrays only from
-    /// a single thread (even if it's not the one holding the GIL). This, however, is not guaranteed to work.
-    /// @todo Allocate numpy arrays only from the thread holding the GIL
-    [[nodiscard]] static inline PyObject* initialize_array(
-        const pybind11::dtype& descr,
-        const shape_t shapes,
-        const stride_t strides,
-        const void* source_ptr,
-        std::shared_ptr<Column> owner,
-        std::mutex& creation_mutex
-    ) {
-        std::lock_guard creation_guard{creation_mutex};
-        // TODO: Py capsule can take only void ptr as input. We need a better way to handle destruction
-        //  Allocating shared ptr on the heap is sad.
-        auto* object = new std::shared_ptr<Column>(std::move(owner));
-        auto arr = py::array(descr, {shapes}, {strides}, source_ptr, py::capsule(object, [](void* obj){
-            delete reinterpret_cast<std::shared_ptr<Column>*>(obj);
-        }));
-        return arr.release().ptr();
-    }
+/// @important This calls pybind's initialize array function which is NOT thread safe. Moreover, numpy arrays can
+/// be created only by the thread holding the GIL. In practice we can get away with allocating arrays only from
+/// a single thread (even if it's not the one holding the GIL). This, however, is not guaranteed to work.
+/// @todo Allocate numpy arrays only from the thread holding the GIL
+[[nodiscard]] static inline PyObject* initialize_array(
+    const pybind11::dtype &descr,
+    const shape_t shapes,
+    const stride_t strides,
+    const void *source_ptr,
+    std::shared_ptr<Column> owner,
+    std::mutex &creation_mutex
+) {
+    std::lock_guard creation_guard{creation_mutex};
+    // TODO: Py capsule can take only void ptr as input. We need a better way to handle destruction
+    //  Allocating shared ptr on the heap is sad.
+    auto *object = new std::shared_ptr<Column>(std::move(owner));
+    auto arr = py::array(descr, {shapes}, {strides}, source_ptr, py::capsule(object, [](void *obj) {
+        delete reinterpret_cast<std::shared_ptr<Column> *>(obj);
+    }));
+    return arr.release().ptr();
+}
 
-    static inline const PyObject** fill_with_none(const PyObject** dest, size_t count) {
-        auto none = py::none();
-        std::generate_n(dest, count, [&none]() { return none.inc_ref().ptr(); });
-        return dest + count;
+static inline const PyObject** fill_with_none(const PyObject** dest, size_t count) {
+    auto none = py::none();
+    std::generate_n(dest, count, [&none]() { return none.inc_ref().ptr(); });
+    return dest + count;
+}
+
+void EmptyHandler::handle_type(
+    const uint8_t *&input,
+    uint8_t *dest,
+    const EncodedFieldImpl& field,
+    const ColumnMapping& mapping,
+    size_t dest_bytes,
+    const std::shared_ptr<BufferHolder>&,
+    EncodingVersion encoding_version
+) {
+    ARCTICDB_SAMPLE(HandleEmpty, 0)
+    util::check(dest != nullptr, "Got null destination pointer");
+    ARCTICDB_TRACE(
+        log::version(),
+        "Empty type handler invoked for source type: {}, destination type: {}, num rows: {}",
+        mapping.source_type_desc_,
+        mapping.dest_type_desc_,
+        mapping.num_rows_
+    );
+    static_assert(get_type_size(DataType::EMPTYVAL) == sizeof(PyObject *));
+
+    if (encoding_version == EncodingVersion::V2)
+        util::check_magic<ColumnMagic>(input);
+
+   // const auto num_rows = dest_bytes / get_type_size(DataType::EMPTYVAL);
+  //  auto* target = reinterpret_cast<const PyObject**>(dest);
+
+    if (field.encoding_case() == EncodedFieldType::NDARRAY) {
+        const auto& ndarray_field = field.ndarray();
+        const auto num_blocks = ndarray_field.values_size();
+        util::check(num_blocks <= 1, "Unexpected number of empty type blocks: {}", num_blocks);
+        for (auto block_num = 0; block_num < num_blocks; ++block_num) {
+            const auto& block_info = ndarray_field.values(block_num);
+            input += block_info.out_bytes();
+        }
+    } else {
+        util::raise_rte("Unsupported encoding {}", field);
     }
+    mapping.dest_type_desc_.visit_tag([dest, dest_bytes] (auto tdt) {
+        using DataType = decltype(tdt);
+        util::default_initialize<DataType>(dest, dest_bytes);
+    });
+}
 
-    void EmptyHandler::handle_type(
-        const uint8_t*& input,
-        uint8_t* dest,
-        const VariantField& variant_field,
-        size_t dest_bytes,
-        std::shared_ptr<BufferHolder>,
-        EncodingVersion,
-        const ColumnMapping& m
-    ) {
-        ARCTICDB_SAMPLE(HandleEmpty, 0)
-        util::check(dest != nullptr, "Got null destination pointer");
-        ARCTICDB_TRACE(
-            log::version(),
-            "Empty type handler invoked for source type: {}, destination type: {}, num rows: {}",
-            m.source_type_desc_,
-            m.dest_type_desc_,
-            m.num_rows_
-        );
-        static_assert(get_type_size(DataType::EMPTYVAL) == sizeof(PyObject*));
-
-        m.dest_type_desc_.visit_tag([&](auto tag) {
-            util::default_initialize<decltype(tag)>(dest, dest_bytes);
-        });
+int EmptyHandler::type_size() const {
+    return sizeof(PyObject *);
+}
 
-        util::variant_match(variant_field, [&input](const auto& field) {
-            using EncodedFieldType = std::decay_t<decltype(*field)>;
-            if constexpr (std::is_same_v<EncodedFieldType, arcticdb::EncodedField>)
-                util::check_magic<ColumnMagic>(input);
-
-            if (field->encoding_case() == EncodedFieldType::kNdarray) {
-                const auto& ndarray_field = field->ndarray();
-                const auto num_blocks = ndarray_field.values_size();
-                util::check(num_blocks <= 1, "Unexpected number of empty type blocks: {}", num_blocks);
-                for (auto block_num = 0; block_num < num_blocks; ++block_num) {
-                    const auto& block_info = ndarray_field.values(block_num);
-                    input += block_info.out_bytes();
-                }
-            } else {
-                util::raise_error_msg("Unsupported encoding {}", *field);
-            }
+void EmptyHandler::default_initialize(void *dest, size_t byte_size) const {
+    fill_with_none(reinterpret_cast<const PyObject**>(dest), byte_size / type_size());
+}
+
+void BoolHandler::handle_type(
+    const uint8_t *&data,
+    uint8_t *dest,
+    const EncodedFieldImpl &field,
+    const ColumnMapping& m,
+    size_t,
+    const std::shared_ptr<BufferHolder>&,
+    EncodingVersion encoding_version) {
+    ARCTICDB_SAMPLE(HandleBool, 0)
+    util::check(dest != nullptr, "Got null destination pointer");
+    util::check(field.has_ndarray(), "Bool handler expected array");
+    ARCTICDB_DEBUG(log::version(), "Bool handler got encoded field: {}", field.DebugString());
+    auto ptr_dest = reinterpret_cast<const PyObject**>(dest);
+    const auto &ndarray = field.ndarray();
+    const auto bytes = encoding_sizes::data_uncompressed_size(ndarray);
+    ChunkedBuffer decoded_data = ChunkedBuffer::presized(bytes);
+    SliceDataSink decoded_data_sink{decoded_data.data(), bytes};
+    std::optional<util::BitSet> sparse_map;
+    data += decode_field(m.source_type_desc_, field, data, decoded_data_sink, sparse_map, encoding_version);
+    const auto num_bools = sparse_map.has_value() ? sparse_map->count() : m.num_rows_;
+    auto ptr_src = decoded_data.template ptr_cast<uint8_t>(0, num_bools * sizeof(uint8_t));
+    if (sparse_map.has_value()) {
+        ARCTICDB_TRACE(log::codec(), "Bool handler using a sparse map");
+        unsigned last_row = 0u;
+        for (auto en = sparse_map->first(); en < sparse_map->end(); ++en, last_row++) {
+            const auto current_pos = *en;
+            ptr_dest = fill_with_none(ptr_dest, current_pos - last_row);
+            last_row = current_pos;
+            *ptr_dest++ = py::bool_(static_cast<bool>(*ptr_src++)).release().ptr();
+        }
+        fill_with_none(ptr_dest, m.num_rows_ - last_row);
+    } else {
+        ARCTICDB_TRACE(log::codec(), "Bool handler didn't find a sparse map. Assuming dense array.");
+        std::transform(ptr_src, ptr_src + num_bools, ptr_dest, [](uint8_t value) {
+            return py::bool_(static_cast<bool>(value)).release().ptr();
         });
     }
+}
 
-    int EmptyHandler::type_size() const {
-        return sizeof(PyObject*);
-    }
+int BoolHandler::type_size() const {
+    return sizeof(PyObject *);
+}
 
-    void EmptyHandler::default_initialize(void* dest, size_t byte_size) const {
-        fill_with_none(reinterpret_cast<const PyObject**>(dest), byte_size / type_size());
-    }
+void BoolHandler::default_initialize(void *dest, size_t byte_size) const {
+    fill_with_none(reinterpret_cast<const PyObject**>(dest), byte_size / type_size());
+}
 
-    void BoolHandler::handle_type(
-        const uint8_t*& data,
-        uint8_t* dest,
-        const VariantField& encoded_field_info,
-        size_t,
-        std::shared_ptr<BufferHolder>,
-        EncodingVersion encding_version,
-        const ColumnMapping& m
-    ) {
-        std::visit([&](const auto& field){
-            ARCTICDB_SAMPLE(HandleBool, 0)
-            util::check(dest != nullptr, "Got null destination pointer");
-            util::check(field->has_ndarray(), "Bool handler expected array");
-            ARCTICDB_DEBUG(log::version(), "Bool handler got encoded field: {}", field->DebugString());
-            auto ptr_dest = reinterpret_cast<const PyObject**>(dest);
-            const auto& ndarray = field->ndarray();
-            const auto bytes = encoding_sizes::data_uncompressed_size(ndarray);
-            ChunkedBuffer decoded_data = ChunkedBuffer::presized(bytes);
-            SliceDataSink decoded_data_sink{decoded_data.data(), bytes};
-            std::optional<util::BitSet> sparse_map;
-            data += decode_field(m.source_type_desc_, *field, data, decoded_data_sink, sparse_map, encding_version);
-            const auto num_bools = sparse_map.has_value() ? sparse_map->count() : m.num_rows_;
-            auto ptr_src = decoded_data.template ptr_cast<uint8_t>(0, num_bools * sizeof(uint8_t));
-            if (sparse_map.has_value()) {
-                ARCTICDB_TRACE(log::codec(), "Bool handler using a sparse map");
-                unsigned last_row = 0u;
-                for (auto en = sparse_map->first(); en < sparse_map->end(); ++en, last_row++) {
-                    const auto current_pos = *en;
-                    ptr_dest = fill_with_none(ptr_dest, current_pos - last_row);
-                    last_row = current_pos;
-                    *ptr_dest++ = py::bool_(static_cast<bool>(*ptr_src++)).release().ptr();
-                }
-                fill_with_none(ptr_dest, m.num_rows_ - last_row);
-            } else {
-                ARCTICDB_TRACE(log::codec(), "Bool handler didn't find a sparse map. Assuming dense array.");
-                std::transform(ptr_src, ptr_src + num_bools, ptr_dest, [](uint8_t value) {
-                    return py::bool_(static_cast<bool>(value)).release().ptr();
-                });
-            }
-        }, encoded_field_info);
+std::mutex ArrayHandler::initialize_array_mutex;
+
+void ArrayHandler::handle_type(
+    const uint8_t *&data,
+    uint8_t *dest,
+    const EncodedFieldImpl &field,
+    const ColumnMapping& m,
+    size_t,
+    const std::shared_ptr<BufferHolder>& buffers,
+    EncodingVersion encoding_version
+) {
+    ARCTICDB_SAMPLE(HandleArray, 0)
+    util::check(field.has_ndarray(), "Expected ndarray in array object handler");
+
+    auto ptr_dest = reinterpret_cast<const PyObject**>(dest);
+    if (!field.ndarray().sparse_map_bytes()) {
+        ARCTICDB_DEBUG(log::version(), "Array handler has no values");
+        fill_with_none(ptr_dest, m.num_rows_);
+        return;
     }
+    std::shared_ptr<Column> column = buffers->get_buffer(m.source_type_desc_, true);
+    column->check_magic();
+    ARCTICDB_DEBUG(log::version(), "Column got buffer at {}", uintptr_t(column.get()));
+    auto bv = std::make_optional(util::BitSet{});
+    data += decode_field(m.source_type_desc_, field, data, *column, bv, encoding_version);
+
+    auto last_row = 0u;
+    ARCTICDB_SUBSAMPLE(InitArrayAcquireGIL, 0)
+    const auto strides = static_cast<stride_t>(get_type_size(m.source_type_desc_.data_type()));
+    const py::dtype py_dtype = generate_python_dtype(m.source_type_desc_, strides);
+    m.source_type_desc_.visit_tag([&](auto tdt) {
+        const auto &blocks = column->blocks();
+        if (blocks.empty())
+            return;
+
+        auto block_it = blocks.begin();
+        const auto *shapes = column->shape_ptr();
+        auto block_pos = 0u;
+        const auto *ptr_src = (*block_it)->data();
+        constexpr stride_t stride = static_cast<TypeDescriptor>(tdt).get_type_byte_size();
+        for (auto en = bv->first(); en < bv->end(); ++en) {
+            const shape_t shape = shapes ? *shapes : 0;
+            const auto offset = *en;
+            ptr_dest = fill_with_none(ptr_dest, offset - last_row);
+            last_row = offset;
+            *ptr_dest++ = initialize_array(py_dtype,
+                                           shape,
+                                           stride,
+                                           ptr_src + block_pos,
+                                           column,
+                                           initialize_array_mutex);
+            block_pos += shape * stride;
+            if (shapes) {
+                ++shapes;
+            }
+            if (block_it != blocks.end() && block_pos == (*block_it)->bytes() && ++block_it != blocks.end()) {
+                ptr_src = (*block_it)->data();
+                block_pos = 0;
+            }
 
-    int BoolHandler::type_size() const {
-        return sizeof(PyObject*);
-    }
+            ++last_row;
+        }
+        if (block_it != blocks.end() && block_pos == (*block_it)->bytes() && ++block_it != blocks.end()) {
+            ptr_src = (*block_it)->data();
+            block_pos = 0;
+        }
 
-    void BoolHandler::default_initialize(void* dest, size_t byte_size) const {
-        fill_with_none(reinterpret_cast<const PyObject**>(dest), byte_size / type_size());
-    }
+        ++last_row;
+    });
 
-    std::mutex ArrayHandler::initialize_array_mutex;
-
-    void ArrayHandler::handle_type(
-        const uint8_t*& data,
-        uint8_t* dest,
-        const VariantField& encoded_field_info,
-        size_t,
-        std::shared_ptr<BufferHolder> buffers,
-        EncodingVersion encoding_version,
-        const ColumnMapping& m
-    ) {
-        util::variant_match(encoded_field_info, [&](auto field){
-            ARCTICDB_SAMPLE(HandleArray, 0)
-            util::check(field->has_ndarray(), "Expected ndarray in array object handler");
-
-            auto ptr_dest = reinterpret_cast<const PyObject**>(dest);
-            if(!field->ndarray().sparse_map_bytes()) {
-                log::version().info("Array handler has no values");
-                fill_with_none(ptr_dest, m.num_rows_);
-                return;
-            }
-            std::shared_ptr<Column> column = buffers->get_buffer(m.source_type_desc_, true);
-            column->check_magic();
-            log::version().info("Column got buffer at {}", uintptr_t(column.get()));
-            auto bv = std::make_optional(util::BitSet{});
-            data += decode_field(m.source_type_desc_, *field, data, *column, bv, encoding_version);
-
-            auto last_row = 0u;
-            ARCTICDB_SUBSAMPLE(InitArrayAcquireGIL, 0)
-            const auto strides = static_cast<stride_t>(get_type_size(m.source_type_desc_.data_type()));
-            const py::dtype py_dtype = generate_python_dtype(m.source_type_desc_, strides);
-            m.source_type_desc_.visit_tag([&] (auto tdt) {
-                const auto& blocks = column->blocks();
-                if(blocks.empty())
-                    return;
-
-                auto block_it = blocks.begin();
-                const auto* shapes = column->shape_ptr();
-                auto block_pos = 0u;
-                const auto* ptr_src = (*block_it)->data();
-                constexpr stride_t stride = static_cast<TypeDescriptor>(tdt).get_type_byte_size();
-                for (auto en = bv->first(); en < bv->end(); ++en) {
-                    const shape_t shape = shapes ? *shapes : 0;
-                    const auto offset = *en;
-                    ptr_dest = fill_with_none(ptr_dest, offset - last_row);
-                    last_row = offset;
-                    *ptr_dest++ = initialize_array(py_dtype,
-                        shape,
-                        stride,
-                        ptr_src + block_pos,
-                        column,
-                        initialize_array_mutex);
-                    block_pos += shape * stride;
-                    if(shapes) {
-                        ++shapes;
-                    }
-                    if(block_it != blocks.end() && block_pos == (*block_it)->bytes() && ++block_it != blocks.end()) {
-                        ptr_src = (*block_it)->data();
-                        block_pos = 0;
-                    }
-
-                    ++last_row;
-                }
-            });
-
-            ARCTICDB_SUBSAMPLE(ArrayIncNones, 0)
-            fill_with_none(ptr_dest, m.num_rows_ - last_row);
-        });
-    }
+    ARCTICDB_SUBSAMPLE(ArrayIncNones, 0)
+    fill_with_none(ptr_dest, m.num_rows_ - last_row);
+}
 
-    int ArrayHandler::type_size() const {
-        return sizeof(PyObject*);
-    }
+int ArrayHandler::type_size() const {
+    return sizeof(PyObject *);
+}
 
-    void ArrayHandler::default_initialize(void* dest, size_t byte_size) const {
-        fill_with_none(reinterpret_cast<const PyObject**>(dest), byte_size / type_size());
-    }
+void ArrayHandler::default_initialize(void *dest, size_t byte_size) const {
+    fill_with_none(reinterpret_cast<const PyObject**>(dest), byte_size / type_size());
 }
+
+} //namespace arcticdb
\ No newline at end of file
diff --git a/cpp/arcticdb/python/python_handlers.hpp b/cpp/arcticdb/python/python_handlers.hpp
index 186c87cbd9..b042f2bb04 100644
--- a/cpp/arcticdb/python/python_handlers.hpp
+++ b/cpp/arcticdb/python/python_handlers.hpp
@@ -17,42 +17,44 @@ namespace arcticdb {
         void handle_type(
             const uint8_t*& data,
             uint8_t* dest,
-            const VariantField& encoded_field,
+            const EncodedFieldImpl& encoded_field,
+            const ColumnMapping& mapping,
             size_t dest_bytes,
-            std::shared_ptr<BufferHolder> buffers,
-            EncodingVersion encding_version,
-            const ColumnMapping& columnMapping
+            const std::shared_ptr<BufferHolder>& buffers,
+            EncodingVersion encding_version
         );
 
         int type_size() const;
+
         void default_initialize(void* dest, size_t byte_size) const;
     };
 
     struct BoolHandler {
-        /// @see arcticdb::ITypeHandler
         void handle_type(
             const uint8_t *&data,
             uint8_t *dest,
-            const VariantField &encoded_field,
+            const EncodedFieldImpl &encoded_field,
+            const ColumnMapping& mapping,
             size_t dest_bytes,
-            std::shared_ptr<BufferHolder> buffers,
-            EncodingVersion encding_version,
-            const ColumnMapping& columnMapping
+            const std::shared_ptr<BufferHolder>& buffers,
+            EncodingVersion encding_version
         );
+
         int type_size() const;
+
         void default_initialize(void* dest, size_t byte_size) const;
     };
 
     struct DecimalHandler {
         void handle_type(
-            const uint8_t*& data,
-            uint8_t* dest,
-            const VariantField& encoded_field,
-            size_t dest_bytes,
-            std::shared_ptr<BufferHolder> buffers,
-            EncodingVersion encding_version,
-            const ColumnMapping& m
+                const uint8_t*& data,
+                uint8_t* dest,
+                const EncodedFieldImpl& encoded_field,
+                const ColumnMapping& mapping,
+                size_t dest_bytes,
+                const std::shared_ptr<BufferHolder>& buffers
         );
+
         int type_size() const;
     };
 
@@ -61,14 +63,17 @@ namespace arcticdb {
         void handle_type(
             const uint8_t*& data,
             uint8_t* dest,
-            const VariantField& encoded_field,
+            const EncodedFieldImpl& encoded_field,
+            const ColumnMapping& mapping,
             size_t dest_bytes,
-            std::shared_ptr<BufferHolder> buffers,
-            EncodingVersion encding_version,
-            const ColumnMapping& columnMapping
+            const std::shared_ptr<BufferHolder>& buffers,
+            EncodingVersion encding_version
         );
+
         int type_size() const;
+
         void default_initialize(void* dest, size_t byte_size) const;
+
         static std::mutex initialize_array_mutex;
     };
 } //namespace arcticdb
diff --git a/cpp/arcticdb/python/python_to_tensor_frame.cpp b/cpp/arcticdb/python/python_to_tensor_frame.cpp
index 86b66fc250..8ef6def2b6 100644
--- a/cpp/arcticdb/python/python_to_tensor_frame.cpp
+++ b/cpp/arcticdb/python/python_to_tensor_frame.cpp
@@ -231,14 +231,14 @@ std::shared_ptr<InputTensorFrame> py_ndf_to_frame(
         // TODO handle string indexes
         if (index_tensor.data_type() == DataType::NANOSECONDS_UTC64) {
             res->desc.set_index_field_count(1);
-            res->desc.set_index_type(IndexDescriptor::TIMESTAMP);
+            res->desc.set_index_type(IndexDescriptor::Type::TIMESTAMP);
 
             res->desc.add_scalar_field(index_tensor.dt_, index_column_name);
             res->index = stream::TimeseriesIndex(index_column_name);
             res->index_tensor = std::move(index_tensor);
         } else {
             res->index = stream::RowCountIndex();
-            res->desc.set_index_type(IndexDescriptor::ROWCOUNT);
+            res->desc.set_index_type(IndexDescriptor::Type::ROWCOUNT);
             res->desc.add_scalar_field(index_tensor.dt_, index_column_name);
             res->field_tensors.push_back(std::move(index_tensor));
         }
@@ -268,12 +268,12 @@ std::shared_ptr<InputTensorFrame> py_ndf_to_frame(
     // index explicitly. Thus we handle this case after all columns are read so that we know how many rows are there.
     if (idx_names.empty()) {
         res->index = stream::RowCountIndex();
-        res->desc.set_index_type(IndexDescriptor::ROWCOUNT);
+        res->desc.set_index_type(IndexDescriptor::Type::ROWCOUNT);
     }
 
     if (empty_types && res->num_rows == 0) {
         res->index = stream::EmptyIndex();
-        res->desc.set_index_type(IndexDescriptor::EMPTY);
+        res->desc.set_index_type(IndexDescriptor::Type::EMPTY);
     }
 
     ARCTICDB_DEBUG(log::version(), "Received frame with descriptor {}", res->desc);
@@ -293,7 +293,7 @@ std::shared_ptr<InputTensorFrame> py_none_to_frame() {
 
     // Fill index
     res->index = stream::RowCountIndex();
-    res->desc.set_index_type(IndexDescriptor::ROWCOUNT);
+    res->desc.set_index_type(IndexDescriptorImpl::Type::ROWCOUNT);
 
     // Fill tensors
     auto col_name = "bytes";
diff --git a/cpp/arcticdb/storage/azure/azure_mock_client.cpp b/cpp/arcticdb/storage/azure/azure_mock_client.cpp
index a4d1de2782..e3194eaa06 100644
--- a/cpp/arcticdb/storage/azure/azure_mock_client.cpp
+++ b/cpp/arcticdb/storage/azure/azure_mock_client.cpp
@@ -84,7 +84,7 @@ Segment MockAzureClient::read_blob(
         throw get_exception(message, error_code, Azure::Core::Http::HttpStatusCode::NotFound);
     }
 
-    return pos->second;
+    return std::move(pos->second);
 }
 
 void MockAzureClient::delete_blobs(
diff --git a/cpp/arcticdb/storage/azure/azure_real_client.cpp b/cpp/arcticdb/storage/azure/azure_real_client.cpp
index b3404f7ade..be354768ae 100644
--- a/cpp/arcticdb/storage/azure/azure_real_client.cpp
+++ b/cpp/arcticdb/storage/azure/azure_real_client.cpp
@@ -51,24 +51,13 @@ void RealAzureClient::write_blob(
         const Azure::Storage::Blobs::UploadBlockBlobFromOptions& upload_option,
         unsigned int request_timeout) {
 
-    std::shared_ptr<Buffer> tmp;
-    auto hdr_size = segment.segment_header_bytes_size();
-    auto [dst, write_size] = segment.try_internal_write(tmp, hdr_size);
-    util::check(arcticdb::Segment::FIXED_HEADER_SIZE + hdr_size + segment.buffer().bytes() <= write_size,
-                "Size disparity, fixed header size {} + variable header size {} + buffer size {}  >= total size {}",
-                arcticdb::Segment::FIXED_HEADER_SIZE,
-                hdr_size,
-                segment.buffer().bytes(),
-                write_size);
+    auto [dst, write_size, buffer] = segment.serialize_header();
     ARCTICDB_SUBSAMPLE(AzureStorageUploadObject, 0)
     auto blob_client = container_client.GetBlockBlobClient(blob_name);
     ARCTICDB_RUNTIME_DEBUG(log::storage(), "Writing key '{}' with {} bytes of data",
                            blob_name,
-                           segment.total_segment_size(hdr_size));
+                           write_size);
     blob_client.UploadFrom(dst, write_size, upload_option, get_context(request_timeout));
-    ARCTICDB_RUNTIME_DEBUG(log::storage(), "Wrote key '{}' with {} bytes of data",
-                           blob_name,
-                           segment.total_segment_size(hdr_size));
 }
 
 Segment RealAzureClient::read_blob(
diff --git a/cpp/arcticdb/storage/azure/azure_storage.cpp b/cpp/arcticdb/storage/azure/azure_storage.cpp
index ee3f33b447..3f8058ce2a 100644
--- a/cpp/arcticdb/storage/azure/azure_storage.cpp
+++ b/cpp/arcticdb/storage/azure/azure_storage.cpp
@@ -249,7 +249,7 @@ void do_iterate_type_impl(KeyType key_type,
         }
 
         KeyDescriptor key_descriptor(prefix,
-            is_ref_key_class(key_type) ? IndexDescriptor::UNKNOWN : IndexDescriptor::TIMESTAMP, FormatType::TOKENIZED);
+            is_ref_key_class(key_type) ? IndexDescriptorImpl::Type::UNKNOWN : IndexDescriptorImpl::Type::TIMESTAMP, FormatType::TOKENIZED);
         auto key_prefix = prefix_handler(prefix, key_type_dir, key_descriptor, key_type);
 
         try {
diff --git a/cpp/arcticdb/storage/azure/azure_storage.hpp b/cpp/arcticdb/storage/azure/azure_storage.hpp
index 65d3181d0c..4075717ba8 100644
--- a/cpp/arcticdb/storage/azure/azure_storage.hpp
+++ b/cpp/arcticdb/storage/azure/azure_storage.hpp
@@ -15,6 +15,7 @@
 #include <arcticdb/entity/protobufs.hpp>
 #include <arcticdb/util/composite.hpp>
 #include <arcticdb/util/configs_map.hpp>
+#include <arcticdb/util/pb_util.hpp>
 #include <azure/core.hpp>
 #include <azure/storage/blobs.hpp>
 #include <cstdlib>
diff --git a/cpp/arcticdb/storage/coalesced/multi_segment_utils.hpp b/cpp/arcticdb/storage/coalesced/multi_segment_utils.hpp
index 40babf026e..b00042e531 100644
--- a/cpp/arcticdb/storage/coalesced/multi_segment_utils.hpp
+++ b/cpp/arcticdb/storage/coalesced/multi_segment_utils.hpp
@@ -15,14 +15,14 @@ static constexpr uint64_t NumericFlag = uint64_t(1) << 31;
 static_assert(NumericFlag > NumericMask);
 
 template<typename StorageType>
-uint64_t get_symbol_prefix(const entity::StreamId& stream_id) {
+uint64_t get_symbol_prefix(const StreamId& stream_id) {
     using InternalType = uint64_t;
     static_assert(sizeof(StorageType) <= sizeof(InternalType));
     constexpr size_t end = sizeof(InternalType);
     constexpr size_t begin = sizeof(InternalType) - sizeof(StorageType);
     StorageType data{};
     util::variant_match(stream_id,
-        [&] (const entity::StringId& string_id) {
+        [&] (const StringId& string_id) {
             auto* target = reinterpret_cast<char*>(&data);
             for(size_t p = begin, i = 0; p < end && i < string_id.size(); ++p, ++i) {
                 const auto c = string_id[i];
@@ -30,8 +30,8 @@ uint64_t get_symbol_prefix(const entity::StreamId& stream_id) {
                 target[p] = c;
             }
         },
-        [&data] (const entity::NumericId& numeric_id) {
-            util::check(numeric_id < static_cast<entity::NumericId>(NumericMask), "Numeric id too large: {}", numeric_id);
+        [&data] (const NumericId& numeric_id) {
+            util::check(numeric_id < static_cast<NumericId>(NumericMask), "Numeric id too large: {}", numeric_id);
             data &= NumericFlag;
             data &= numeric_id;
         }
@@ -49,7 +49,7 @@ struct TimeSymbol {
 
     IndexDataType data_ = 0UL;
 
-    TimeSymbol(const entity::StreamId& stream_id, entity::timestamp time) {
+    TimeSymbol(const StreamId& stream_id, entity::timestamp time) {
         set_data(stream_id, time);
     }
 
@@ -62,7 +62,7 @@ struct TimeSymbol {
     }
 
 private:
-    void set_data(const entity::StreamId& stream_id, entity::timestamp time) {
+    void set_data(const StreamId& stream_id, entity::timestamp time) {
         time <<= 32;
         auto prefix = get_symbol_prefix<uint32_t>(stream_id);
         data_ = time | prefix;
diff --git a/cpp/arcticdb/storage/file/mapped_file_storage.cpp b/cpp/arcticdb/storage/file/mapped_file_storage.cpp
index 5037feda89..ba9c6e0db1 100644
--- a/cpp/arcticdb/storage/file/mapped_file_storage.cpp
+++ b/cpp/arcticdb/storage/file/mapped_file_storage.cpp
@@ -8,6 +8,7 @@
 
 #include <arcticdb/log/log.hpp>
 #include <arcticdb/entity/atom_key.hpp>
+#include <arcticdb/entity/protobuf_mappings.hpp>
 #include <arcticdb/storage/constants.hpp>
 #include <arcticdb/storage/library_path.hpp>
 #include <arcticdb/storage/open_mode.hpp>
@@ -45,7 +46,7 @@ void MappedFileStorage::init() {
             EncodingVersion{
             static_cast<uint16_t>(config_.encoding_version())}).max_compressed_bytes_;
         StreamId id = config_.has_str_id() ? StreamId{} : NumericId{};
-        data_size += entity::max_key_size(id, IndexDescriptor{config_.index()});
+        data_size += entity::max_key_size(id, index_descriptor_from_proto(config_.index()));
         file_.create_file(config_.path(), data_size);
     } else {
         ARCTICDB_DEBUG(log::storage(), "Opening existing mapped file storage at path {}", config_.path());
@@ -64,10 +65,10 @@ void MappedFileStorage::do_load_header(size_t header_offset, size_t header_size)
     multi_segment_header_.set_segment(std::move(header));
 }
 
-uint64_t MappedFileStorage::get_data_offset(const Segment& seg, size_t header_size) {
+uint64_t MappedFileStorage::get_data_offset(const Segment& seg) {
     ARCTICDB_SAMPLE(MappedFileStorageGetOffset, 0)
     std::lock_guard lock{offset_mutex_};
-    const auto segment_size = seg.total_segment_size(header_size);
+    const auto segment_size = seg.size();
     ARCTICDB_DEBUG(log::storage(), "Mapped file storage returning offset {} and adding {} bytes", offset_, segment_size);
     const auto previous_offset = offset_;
     offset_ += segment_size;
@@ -76,12 +77,11 @@ uint64_t MappedFileStorage::get_data_offset(const Segment& seg, size_t header_si
 
 uint64_t MappedFileStorage::write_segment(Segment&& seg) {
     auto segment = std::move(seg);
-    const auto header_size = segment.segment_header_bytes_size();
-    auto offset = get_data_offset(segment, header_size);
+    auto offset = get_data_offset(segment);
     auto* data = file_.data() + offset;
     ARCTICDB_SUBSAMPLE(FileStorageMemCpy, 0)
-    ARCTICDB_DEBUG(log::storage(), "Mapped file storage writing segment of size {} at offset {}",  segment.total_segment_size(header_size), offset);
-    segment.write_to(data, header_size);
+    segment.write_to(data);
+    ARCTICDB_DEBUG(log::storage(), "Mapped file storage wrote segment of size {} at offset {}",  segment.size(), offset);
     return offset;
 }
 
@@ -89,8 +89,8 @@ void MappedFileStorage::do_write(Composite<KeySegmentPair>&& kvs) {
     ARCTICDB_SAMPLE(MappedFileStorageWriteValues, 0)
     auto key_values = std::move(kvs);
     key_values.broadcast([this] (auto key_seg) {
-        const auto size = key_seg.segment().total_segment_size();
         const auto offset = write_segment(std::move(key_seg.segment()));
+        const auto size = key_seg.segment().size();
         multi_segment_header_.add_key_and_offset(key_seg.atom_key(), offset, size);
     });
 }
diff --git a/cpp/arcticdb/storage/file/mapped_file_storage.hpp b/cpp/arcticdb/storage/file/mapped_file_storage.hpp
index e960c39714..98a60a024a 100644
--- a/cpp/arcticdb/storage/file/mapped_file_storage.hpp
+++ b/cpp/arcticdb/storage/file/mapped_file_storage.hpp
@@ -10,8 +10,10 @@
 #include <arcticdb/storage/single_file_storage.hpp>
 #include <arcticdb/storage/storage_factory.hpp>
 #include <arcticdb/entity/protobufs.hpp>
+#include <arcticdb/entity/protobuf_mappings.hpp>
 #include <arcticdb/util/composite.hpp>
 #include <arcticdb/util/memory_mapped_file.hpp>
+#include <arcticdb/util/pb_util.hpp>
 #include <arcticdb/storage/coalesced/multi_segment_header.hpp>
 
 namespace fs = std::filesystem;
@@ -56,7 +58,7 @@ class MappedFileStorage final : public SingleFileStorage {
 
     void do_finalize(KeyData key_data) override;
 
-    uint64_t get_data_offset(const Segment& seg, size_t header_size);
+    uint64_t get_data_offset(const Segment& seg);
 
     void do_load_header(size_t header_offset, size_t header_size) override;
 
@@ -82,7 +84,7 @@ inline arcticdb::proto::storage::VariantStorage pack_config(
         size_t file_size,
         size_t items_count,
         const StreamId& id,
-        const IndexDescriptor& index_desc,
+        const IndexDescriptorImpl& index_desc,
         EncodingVersion encoding_version,
         const arcticdb::proto::encoding::VariantCodec& codec_opts) {
     arcticdb::proto::storage::VariantStorage output;
@@ -93,7 +95,7 @@ inline arcticdb::proto::storage::VariantStorage pack_config(
     util::variant_match(id,
                             [&cfg] (const StringId& str) { cfg.set_str_id(str); },
                             [&cfg] (const NumericId& n) { cfg.set_num_id(n); });
-    cfg.mutable_index()->CopyFrom(index_desc.proto()),
+    cfg.mutable_index()->CopyFrom(index_descriptor_to_proto(index_desc)),
     cfg.set_encoding_version(static_cast<uint32_t>(encoding_version));
     cfg.mutable_codec_opts()->CopyFrom(codec_opts);
     util::pack_to_any(cfg, *output.mutable_config());
diff --git a/cpp/arcticdb/storage/library.hpp b/cpp/arcticdb/storage/library.hpp
index 6d7b782689..dd102e2358 100644
--- a/cpp/arcticdb/storage/library.hpp
+++ b/cpp/arcticdb/storage/library.hpp
@@ -76,28 +76,15 @@ class Library {
             throw LibraryPermissionException(library_path_, open_mode(), "write");
         }
 
-        [[maybe_unused]] const size_t total_size = kvs.fold(
-            [](size_t s, const KeySegmentPair& seg) { return s + seg.segment().total_segment_size(); },
-            size_t(0)
-        );
-        [[maybe_unused]] const auto kv_count = kvs.size();
         storages_->write(std::move(kvs));
-        ARCTICDB_TRACE(log::storage(), "{} kv written, {} bytes", kv_count, total_size);
     }
 
     void update(Composite<KeySegmentPair>&& kvs, storage::UpdateOpts opts) {
         ARCTICDB_SAMPLE(LibraryUpdate, 0)
-        if (open_mode() < OpenMode::WRITE) {
+        if (open_mode() < OpenMode::WRITE)
             throw LibraryPermissionException(library_path_, open_mode(), "update");
-        }
 
-        [[maybe_unused]] const size_t total_size = kvs.fold(
-            [](size_t s, const KeySegmentPair& seg) { return s + seg.segment().total_segment_size(); },
-            size_t(0)
-        );
-        [[maybe_unused]] const auto kv_count = kvs.size();
         storages_->update(std::move(kvs), opts);
-        ARCTICDB_TRACE(log::storage(), "{} kv updated, {} bytes", kv_count, total_size);
     }
 
     void read(Composite<VariantKey>&& ks, const ReadVisitor& visitor, ReadKeyOpts opts) {
diff --git a/cpp/arcticdb/storage/library_manager.cpp b/cpp/arcticdb/storage/library_manager.cpp
index 098f48c300..8f4e28095f 100644
--- a/cpp/arcticdb/storage/library_manager.cpp
+++ b/cpp/arcticdb/storage/library_manager.cpp
@@ -85,6 +85,7 @@ LibraryManager::LibraryManager(const std::shared_ptr<storage::Library>& library)
 void LibraryManager::write_library_config(const py::object& lib_cfg, const LibraryPath& path, const StorageOverride& storage_override,
                           const bool validate) const {
     SegmentInMemory segment;
+    segment.descriptor().set_index({0UL, IndexDescriptor::Type::ROWCOUNT});
 
     arcticdb::proto::storage::LibraryConfig lib_cfg_proto;
     google::protobuf::Any output = {};
diff --git a/cpp/arcticdb/storage/lmdb/lmdb_mock_client.cpp b/cpp/arcticdb/storage/lmdb/lmdb_mock_client.cpp
index f95ff09c0e..e403bc3758 100644
--- a/cpp/arcticdb/storage/lmdb/lmdb_mock_client.cpp
+++ b/cpp/arcticdb/storage/lmdb/lmdb_mock_client.cpp
@@ -87,18 +87,18 @@ std::optional<Segment> MockLmdbClient::read(const std::string& db_name, std::str
         return std::nullopt;
     }
 
-    return lmdb_contents_.at(key);
+    return std::make_optional<Segment>(lmdb_contents_.at(key).clone());
 }
 
 void MockLmdbClient::write(const std::string& db_name, std::string& path, arcticdb::Segment&& segment,
                            ::lmdb::txn&, ::lmdb::dbi&, int64_t) {
     LmdbKey key = {db_name, path};
-    raise_if_has_failure_trigger(key, StorageOperation::WRITE);
+            raise_if_has_failure_trigger(key, StorageOperation::WRITE);
 
     if(has_key(key)) {
         raise_key_exists_error(lmdb_operation_string(StorageOperation::WRITE));
     } else {
-        lmdb_contents_.insert({key, segment});
+        lmdb_contents_.try_emplace(key, std::move(segment));
     }
 }
 
diff --git a/cpp/arcticdb/storage/lmdb/lmdb_real_client.cpp b/cpp/arcticdb/storage/lmdb/lmdb_real_client.cpp
index 180650ad82..547abcf114 100644
--- a/cpp/arcticdb/storage/lmdb/lmdb_real_client.cpp
+++ b/cpp/arcticdb/storage/lmdb/lmdb_real_client.cpp
@@ -35,7 +35,7 @@ std::optional<Segment> RealLmdbClient::read(const std::string&, std::string& pat
         return std::nullopt;
     }
 
-    auto segment = Segment::from_bytes(reinterpret_cast<std::uint8_t *>(mdb_val.mv_data),mdb_val.mv_size);
+    auto segment = Segment::from_bytes(reinterpret_cast<std::uint8_t *>(mdb_val.mv_data), mdb_val.mv_size);
     return segment;
 }
 
@@ -43,9 +43,8 @@ void RealLmdbClient::write(const std::string&, std::string& path, arcticdb::Segm
                            ::lmdb::txn& txn, ::lmdb::dbi& dbi, int64_t overwrite_flag) {
     MDB_val mdb_key{path.size(), path.data()};
 
-    std::size_t hdr_sz = seg.segment_header_bytes_size();
     MDB_val mdb_val;
-    mdb_val.mv_size = seg.total_segment_size(hdr_sz);
+    mdb_val.mv_size = seg.calculate_size();
 
     ARCTICDB_SUBSAMPLE(LmdbPut, 0)
     int rc = ::mdb_put(txn.handle(), dbi.handle(), &mdb_key, &mdb_val, MDB_RESERVE | overwrite_flag);
@@ -55,7 +54,7 @@ void RealLmdbClient::write(const std::string&, std::string& path, arcticdb::Segm
 
     ARCTICDB_SUBSAMPLE(LmdbMemCpy, 0)
     // mdb_val now points to a reserved memory area we must write to
-    seg.write_to(reinterpret_cast<std::uint8_t *>(mdb_val.mv_data), hdr_sz);
+    seg.write_to(reinterpret_cast<std::uint8_t *>(mdb_val.mv_data));
 }
 
 bool RealLmdbClient::remove(const std::string&, std::string& path, ::lmdb::txn& txn, ::lmdb::dbi& dbi) {
diff --git a/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp b/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp
index 2a5ff2eb88..c1506756c5 100644
--- a/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp
+++ b/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp
@@ -71,6 +71,7 @@ void LmdbStorage::do_write_internal(Composite<KeySegmentPair>&& kvs, ::lmdb::txn
             ARCTICDB_DEBUG(log::storage(), "Lmdb storage writing segment with key {}", kv.key_view());
             auto k = to_serialized_key(kv.variant_key());
             auto &seg = kv.segment();
+
             int64_t overwrite_flag = std::holds_alternative<RefKey>(kv.variant_key()) ? 0 : MDB_NOOVERWRITE;
             try {
                 lmdb_client_->write(db_name, k, std::move(seg), txn, dbi, overwrite_flag);
@@ -139,9 +140,8 @@ void LmdbStorage::do_read(Composite<VariantKey>&& ks, const ReadVisitor& visitor
                     ARCTICDB_SUBSAMPLE(LmdbStorageVisitSegment, 0)
                     std::any keepalive;
                     segment.value().set_keepalive(std::any(std::move(txn)));
+                    ARCTICDB_DEBUG(log::storage(), "Read key {}: {}, with {} bytes of data", variant_key_type(k), variant_key_view(k), segment.value().size());
                     visitor(k, std::move(segment.value()));
-                    ARCTICDB_DEBUG(log::storage(), "Read key {}: {}, with {} bytes of data", variant_key_type(k),
-                                   variant_key_view(k), segment.value().total_segment_size());
                 } else {
                     ARCTICDB_DEBUG(log::storage(), "Failed to find segment for key {}", variant_key_view(k));
                     failed_reads.push_back(k);
diff --git a/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp b/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp
index 24d337bfb8..90be35aa68 100644
--- a/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp
+++ b/cpp/arcticdb/storage/lmdb/lmdb_storage.hpp
@@ -9,7 +9,7 @@
 
 #include <arcticdb/storage/storage.hpp>
 #include <arcticdb/storage/storage_factory.hpp>
-
+#include <arcticdb/util/pb_util.hpp>
 #include <arcticdb/entity/protobufs.hpp>
 
 #include <folly/Range.h>
diff --git a/cpp/arcticdb/storage/memory/memory_storage.cpp b/cpp/arcticdb/storage/memory/memory_storage.cpp
index 7834b1b2c7..f36ac83f95 100644
--- a/cpp/arcticdb/storage/memory/memory_storage.cpp
+++ b/cpp/arcticdb/storage/memory/memory_storage.cpp
@@ -12,10 +12,21 @@
 #include <arcticdb/util/preconditions.hpp>
 #include <arcticdb/entity/performance_tracing.hpp>
 #include <arcticdb/storage/storage_options.hpp>
+#include <arcticdb/codec/protobuf_mappings.hpp>
 #include <arcticdb/storage/storage_utils.hpp>
 
 namespace arcticdb::storage::memory {
 
+void add_serialization_fields(KeySegmentPair& kv) {
+    auto& segment = kv.segment();
+    auto& hdr = segment.header();
+    (void)segment.calculate_size();
+    if(hdr.encoding_version() == EncodingVersion::V2) {
+        const auto* src = segment.buffer().data();
+        set_body_fields(hdr, src);
+    }
+}
+
     namespace fg = folly::gen;
 
     std::string MemoryStorage::name() const {
@@ -36,15 +47,15 @@ namespace arcticdb::storage::memory {
                                         if (auto it = key_vec.find(key); it != key_vec.end()) {
                                             key_vec.erase(it);
                                         }
-
-                                        key_vec.try_emplace(key, kv.segment());
+                                        add_serialization_fields(kv);
+                                        key_vec.try_emplace(key, std::move(kv.segment()));
                                     },
                                     [&](const AtomKey &key) {
                                         if (key_vec.find(key) != key_vec.end()) {
                                             throw DuplicateKeyException(key);
                                         }
-
-                                        key_vec.try_emplace(key, kv.segment());
+                                        add_serialization_fields(kv);
+                                        key_vec.try_emplace(key, std::move(kv.segment()));
                                     }
                 );
             }
@@ -70,7 +81,9 @@ namespace arcticdb::storage::memory {
                 if(it != key_vec.end()) {
                     key_vec.erase(it);
                 }
-                key_vec.insert(std::make_pair(kv.variant_key(), kv.segment()));
+
+                add_serialization_fields(kv);
+                key_vec.insert(std::make_pair(kv.variant_key(), kv.segment().clone()));
             }
         });
     }
@@ -86,8 +99,7 @@ namespace arcticdb::storage::memory {
 
                 if(it != key_vec.end()) {
                     ARCTICDB_DEBUG(log::storage(), "Read key {}: {}", variant_key_type(k), variant_key_view(k));
-                    auto seg = it->second;
-                    visitor(k, std::move(seg));
+                    visitor(k, it->second.clone());
                 } else {
                     throw KeyNotFoundException(std::move(ks));
                 }
diff --git a/cpp/arcticdb/storage/memory/memory_storage.hpp b/cpp/arcticdb/storage/memory/memory_storage.hpp
index 1876b858e1..3be5f81841 100644
--- a/cpp/arcticdb/storage/memory/memory_storage.hpp
+++ b/cpp/arcticdb/storage/memory/memory_storage.hpp
@@ -14,6 +14,7 @@
 #include <folly/Range.h>
 #include <folly/concurrency/ConcurrentHashMap.h>
 #include <arcticdb/storage/key_segment_pair.hpp>
+#include <arcticdb/util/pb_util.hpp>
 
 namespace arcticdb::storage::memory {
 
diff --git a/cpp/arcticdb/storage/memory_layout.hpp b/cpp/arcticdb/storage/memory_layout.hpp
new file mode 100644
index 0000000000..16d9a002e1
--- /dev/null
+++ b/cpp/arcticdb/storage/memory_layout.hpp
@@ -0,0 +1,333 @@
+/* Copyright 2023 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
+ */
+#pragma once
+
+#include <cstdint>
+#include <array>
+#include "arcticdb/codec/magic_words.hpp"
+
+namespace arcticdb {
+
+#pragma pack(push)
+#pragma pack(1)
+
+constexpr size_t encoding_size = 6;
+// And extendable list of codecs supported by ArcticDB
+// N.B. this list is likely to change
+enum class Codec : uint16_t {
+    UNKNOWN = 0,
+    ZSTD,
+    PFOR,
+    LZ4,
+    PASS,
+};
+
+// Codecs form a discriminated union of same-sized objects
+// within the BlockCodec structure
+struct ZstdCodec {
+    static constexpr Codec type_ = Codec::ZSTD;
+
+    int32_t level_ = 0;
+    bool is_streaming_ = false;
+    uint8_t padding_ = 0;
+};
+
+static_assert(sizeof(ZstdCodec) == encoding_size);
+
+struct Lz4Codec {
+    static constexpr Codec type_ = Codec::LZ4;
+
+    int32_t acceleration_ = 1;
+    uint16_t padding_ = 0;
+};
+
+static_assert(sizeof(Lz4Codec) == encoding_size);
+
+struct PassthroughCodec {
+    static constexpr Codec type_ = Codec::PASS;
+
+    uint32_t unused_ = 0;
+    uint16_t padding_ = 0;
+};
+
+struct PforCodec {
+    static constexpr Codec type_ = Codec::PFOR;
+
+    uint32_t unused_ = 0;
+    uint16_t padding_ = 0;
+};
+
+struct BlockCodec {
+    Codec codec_ = Codec::UNKNOWN;
+    constexpr static size_t DataSize = 24;
+    std::array<uint8_t, DataSize> data_ = {};
+};
+
+struct Block {
+    uint32_t in_bytes_ = 0;
+    uint32_t out_bytes_ = 0;
+    uint64_t hash_ = 0;
+    uint16_t encoder_version_ = 0;
+    bool is_shape_ = false;
+    uint8_t num_codecs_ = 0;
+    std::array<BlockCodec, 1> codecs_;
+
+    Block() = default;
+};
+
+static_assert(sizeof(Block) == 46);
+
+// Possible types of encoded fields, which are
+// sets of blocks representing a column of data
+enum class EncodedFieldType : uint8_t {
+    UNKNOWN,
+    NDARRAY,
+    DICTIONARY
+};
+
+enum class BitmapFormat : uint8_t {
+    UNKNOWN,
+    DENSE,
+    BITMAGIC
+};
+
+// Each encoded field will have zero or one shapes blocks,
+// a potentially large number of values (data) blocks, and
+// an optional sparse bitmap. The block array serves as a
+// pointer to the first block
+struct EncodedField {
+    EncodedFieldType type_ = EncodedFieldType::UNKNOWN;
+    uint16_t shapes_count_ = 0u;
+    uint16_t values_count_ = 0u;
+    uint32_t sparse_map_bytes_ = 0u;
+    uint32_t items_count_ = 0u;
+    BitmapFormat format_ = BitmapFormat::UNKNOWN;
+    std::array<Block, 1> blocks_;
+};
+
+static_assert(sizeof(EncodedField) == 60);
+
+enum class EncodingVersion : uint16_t {
+    V1 = 0,
+    V2 = 1,
+    COUNT = 2
+};
+
+constexpr static uint16_t MAGIC_NUMBER = 0xFA57;
+
+// FixedHeader is the first thing in every encoded segment,
+// and is invariant between all encodings. The encoding version
+// allows for everything after this header to potentially change,
+// but is currently not used
+struct FixedHeader {
+    std::uint16_t magic_number;
+    std::uint16_t encoding_version;
+    std::uint32_t header_bytes;
+};
+
+constexpr static std::size_t FIXED_HEADER_SIZE = sizeof(FixedHeader);
+
+// This flag indicates that a segment is a compacted set of other segments
+enum class HeaderFlag : uint8_t {
+    COMPACTED,
+};
+
+// The segment is bootstrapped by a set of uncompressed EncodedFields,
+// that describe the sets of fields referencing the segment data. The
+// location of these initial fields is described by the following
+// structure.
+struct FieldBuffer {
+    mutable uint32_t fields_bytes_ = 0U;
+    mutable uint32_t offset_bytes_ = 0U;
+};
+
+// HeaderData describes the format of the data contained within
+// the segment. At the moment there are two encoding versions, a
+// legacy encoding utilizing a protobuf header, and the binary
+// encoding described by the MemoryLayout structure below.
+struct HeaderData { ;
+    EncodingVersion encoding_version_ = EncodingVersion::V1;
+    uint16_t fields_ = 0U;
+    uint8_t flags_ = 0U;
+    uint64_t footer_offset_ = 0U;
+    FieldBuffer field_buffer_;
+};
+
+// Indicates the sortedness of this segment
+enum class SortedValue : uint8_t {
+    UNKNOWN = 0,
+    UNSORTED = 1,
+    ASCENDING = 2,
+    DESCENDING = 3,
+};
+
+// Dynamic schema frames can change their schema over time,
+// adding and removing columns and changing types. A dynamic
+// schema type indicates that for each row group, not all of
+// the columns in the global descriptor will necessarily
+// be found
+enum class SchemaType : uint8_t {
+    STATIC,
+    DYNAMIC
+};
+
+// The type of indexing of a frame as a whole
+struct IndexDescriptor {
+    enum class Type : int32_t {
+        UNKNOWN = 0,
+        EMPTY = 69,
+        ROWCOUNT = 82,
+        STRING = 83,
+        TIMESTAMP = 84
+    };
+
+    IndexDescriptor() = default;
+
+    IndexDescriptor(Type type, uint32_t field_count) :
+        type_(type),
+        field_count_(field_count) {
+    }
+
+    Type type_ = Type::UNKNOWN;
+    uint32_t field_count_ = 0U;
+};
+
+// Implementers can store additional metadata in an opaque field
+// to control the denormalization of normalized frame data into
+// language-specific objects such as Pandas dataframes. It is
+// assumed that this information is non-essential to the data
+// objects and can be ignored when denormalizing to different
+// languages and libraries
+enum class FrameMetadataEncoding : uint8_t {
+    PROTOBUF = 0
+};
+
+// A FrameDescriptor describes a dataframe as a whole; it is used on
+// segments that describe and index other segments
+struct FrameDescriptor {
+    uint64_t total_rows_ = 0UL;
+    bool column_groups_ = false;
+    FrameMetadataEncoding metadata_encoding_ = FrameMetadataEncoding::PROTOBUF;
+};
+
+// A SegmentDescriptor is present in every segment, and describes the
+// contents of this particular segment, rather than other segments
+// to which it refers
+struct SegmentDescriptor {
+    SortedValue sorted_ = SortedValue::UNKNOWN;
+    uint64_t compressed_bytes_ = 0UL;
+    uint64_t uncompressed_bytes_ = 0UL;
+    uint64_t row_count_ = 0UL;
+    IndexDescriptor index_;
+};
+
+// Frame identifiers can be of either numeric or string type
+enum class IdentifierType : uint8_t {
+    NUMERIC = 0,
+    STRING = 1
+};
+
+struct SegmentIdentifierHeader {
+    IdentifierType type_ = IdentifierType::NUMERIC;
+    uint32_t size_ = 0;
+};
+
+// A segment header contains a set of optional fields that describe the contents of a given segment
+enum class FieldOffset : uint8_t {
+    METADATA, // Opaque field for user and normalization metadata
+    STRING_POOL, // Deduplicated compressed field of string data
+    DESCRIPTOR, // Collection of field names and types for the current segment
+    INDEX, // Optional additional set of fields used when this segment indexes a dataframe
+    COLUMN, // Set of encoded fields that represent the body (user) data of the segment
+    COUNT
+};
+
+// Note. The structures below contain variable-length fields (represented by named structures) and should not be
+// used to implement the data format directly. They are intended as a syntactically-correct representation
+// of the storage format only.
+
+// Denotes a field that may or may not be present
+template<typename FieldType>
+struct Optional {};
+
+// A list of field descriptors containing the name, type and dimensionality of a column of data
+struct FieldList {};
+
+// A list of encoded fields that describes the contents of other fields. An encoded field is a list of blocks
+// with a specific set of compression types
+struct EncodedFieldList {};
+
+// An opaque field to be filled with user-determined content, used for things like
+// language-specific normalization data
+struct OpaqueField {};
+
+// A data field described by an EncodedField, consists of a set of compressed blocks that may represent
+// shapes and values, and an optional sparse bitmap
+struct ColumnField {
+    ColumnMagic column_magic_;
+};
+
+// A compressed block of data containing some other structure. A compressed field is represented by an EncodedField
+// which contains a set of Block objects describing the compression type
+template <typename FieldType>
+struct CompressedField {
+};
+
+// A set of fields that are repeated, whose number corresponds to a unary field describing this set. For example, the
+// number of repeated column fields should correspond to the number of entries in the descriptor (which describes the
+// user-facing information about a column's contents, and the number of EncodedFields in the body fields, which describe
+// the block structure and compression
+template <typename FieldType>
+struct RepeatedField {
+};
+
+// Binary representation of a segment header. Contains positioning information about the structure of the segment,
+// and the list of fields representing the segment metadata fields
+struct SegmentHeaderData {
+    HeaderData data_;
+    EncodedFieldList header_fields_; // Header fields containing the fields described by FieldOffsets
+    std::array<uint32_t, 5> offset_ = {}; // Maps the entries in the FieldOffset enumeration to the header field entries
+};
+
+// The overall memory layout of an ArcticDB segment. Note that this class need not necessarily be used
+// to implement a reader or writer of the memory layout - refer to comment above
+struct MemoryLayout {
+    FixedHeader fixed_header_;
+    SegmentHeaderData variable_header_;
+
+    MetadataMagic metadata_magic_;
+    Optional<OpaqueField> metadata_;
+
+    SegmentDescriptorMagic segment_descriptor_magic_;
+    SegmentDescriptor segment_descriptor_;
+    SegmentIdentifierHeader identifier_header_;
+    Optional<OpaqueField> identifier_data_;
+    DescriptorFieldsMagic descriptor_magic_;
+    CompressedField<FieldList> descriptor_fields_;
+
+    IndexMagic index_magic_;
+    // Optional fields present if this segment refers to a complete dataframe, i.e. if it is a primary index
+    Optional<FrameDescriptor> index_frame_descriptor_;
+    Optional<SegmentDescriptorMagic> index_segment_descriptor_magic_;
+    Optional<SegmentDescriptor> index_segment_descriptor_;
+    Optional<SegmentIdentifierHeader> index_identifier_header_;
+    Optional<OpaqueField> index_identifier_data_;
+    Optional<CompressedField<FieldList>> index_descriptor_fields_;
+
+    RepeatedField<ColumnField> columns_;
+
+    StringPoolMagic string_pool_magic_;
+    Optional<ColumnField> string_pool_field_;
+
+    EncodedFieldList body_fields_;  // Encoded field list representing the user data fields (columns)
+};
+
+#pragma pack(pop)
+
+
+
+} //namespace arcticdb
\ No newline at end of file
diff --git a/cpp/arcticdb/storage/mongo/mongo_client.cpp b/cpp/arcticdb/storage/mongo/mongo_client.cpp
index 608dd04b8a..9d68414e5e 100644
--- a/cpp/arcticdb/storage/mongo/mongo_client.cpp
+++ b/cpp/arcticdb/storage/mongo/mongo_client.cpp
@@ -50,9 +50,9 @@ StreamId stream_id_from_document(DocType& doc, KeyType key_type) {
 
 template<typename DocType>
 AtomKey atom_key_from_document(DocType &doc, KeyType key_type) {
-    auto index_type = IndexDescriptor::Type(doc["index_type"].get_int32().value);
+    auto index_type = IndexDescriptorImpl::Type(doc["index_type"].get_int32().value);
     IndexValue start_index, end_index;
-    if (index_type == IndexDescriptor::TIMESTAMP) {
+    if (index_type == IndexDescriptorImpl::Type::TIMESTAMP) {
         start_index = doc["start_time"].get_int64().value;
         end_index = doc["end_time"].get_int64().value;
     } else {
@@ -117,7 +117,7 @@ void add_atom_key_values(bsoncxx::builder::basic::document& basic_builder, const
 
     auto index_type = arcticdb::stream::get_index_value_type(key);
     basic_builder.append(kvp("index_type", types::b_int32{static_cast<int32_t>(index_type)}));
-    if(index_type == IndexDescriptor::TIMESTAMP) {
+    if(index_type == IndexDescriptorImpl::Type::TIMESTAMP) {
         basic_builder.append(kvp("start_time", types::b_int64{int64_t(std::get<NumericId>(key.start_index()))}));
         basic_builder.append(kvp("end_time", types::b_int64{int64_t(std::get<NumericId>(key.end_index()))}));
     } else
@@ -134,13 +134,12 @@ auto build_document(storage::KeySegmentPair &kv) {
     using builder::stream::document;
 
     const auto &key = kv.variant_key();
-    const auto &segment = kv.segment();
-    const auto hdr_size = segment.segment_header_bytes_size();
-    const auto total_size = segment.total_segment_size(hdr_size);
+    auto &segment = kv.segment();
+    const auto total_size = segment.calculate_size();
     /*thread_local*/ std::vector<uint8_t> buffer{};
     buffer.resize(total_size);
     bsoncxx::types::b_binary data = {};
-    kv.segment().write_to(buffer.data(), hdr_size);
+    kv.segment().write_to(buffer.data());
     data.size = uint32_t(total_size);
     data.bytes = buffer.data();
 
@@ -264,6 +263,7 @@ bool MongoClientImpl::write_segment(const std::string &database_name,
     auto collection = database[collection_name];
 
     ARCTICDB_SUBSAMPLE(MongoStorageWriteInsertOne, 0)
+    ARCTICDB_DEBUG(log::storage(), "Mongo client writing data with key {}", variant_key_view(kv.variant_key()));
     if(std::holds_alternative<RefKey>(kv.variant_key())) {
         mongocxx::model::replace_one replace{document{} << "key" << fmt::format("{}", kv.ref_key()) << finalize, doc.view()};
         replace.upsert(true);
diff --git a/cpp/arcticdb/storage/mongo/mongo_storage.cpp b/cpp/arcticdb/storage/mongo/mongo_storage.cpp
index f1de98d9c3..f9d33e072e 100644
--- a/cpp/arcticdb/storage/mongo/mongo_storage.cpp
+++ b/cpp/arcticdb/storage/mongo/mongo_storage.cpp
@@ -125,7 +125,7 @@ void MongoStorage::do_read(Composite<VariantKey>&& ks, const ReadVisitor& visito
             try {
                 auto kv = client_->read_segment(db_, collection, k);
                 // later we should add the key to failed_reads in this case
-                if (!kv.has_value() || !kv.value().has_segment()) {
+                if (!kv.has_value()) {
                     keys_not_found.push_back(k);
                 }
                 else {
diff --git a/cpp/arcticdb/storage/mongo/mongo_storage.hpp b/cpp/arcticdb/storage/mongo/mongo_storage.hpp
index d46e491292..52d3ec4bd7 100644
--- a/cpp/arcticdb/storage/mongo/mongo_storage.hpp
+++ b/cpp/arcticdb/storage/mongo/mongo_storage.hpp
@@ -12,6 +12,7 @@
 #include <arcticdb/storage/mongo/mongo_client_wrapper.hpp>
 #include <arcticdb/entity/protobufs.hpp>
 #include <arcticdb/util/composite.hpp>
+#include <arcticdb/util/pb_util.hpp>
 #include <folly/Range.h>
 
 namespace arcticdb::storage::mongo {
diff --git a/cpp/arcticdb/storage/rocksdb/rocksdb_storage.cpp b/cpp/arcticdb/storage/rocksdb/rocksdb_storage.cpp
index 1ad3e361d4..566c827c3e 100644
--- a/cpp/arcticdb/storage/rocksdb/rocksdb_storage.cpp
+++ b/cpp/arcticdb/storage/rocksdb/rocksdb_storage.cpp
@@ -20,6 +20,7 @@
 #include <rocksdb/options.h>
 #include <rocksdb/utilities/options_util.h>
 #include <rocksdb/slice.h>
+#include <folly/gen/Base.h>
 
 namespace arcticdb::storage::rocksdb {
 
@@ -236,11 +237,10 @@ void RocksDBStorage::do_write_internal(Composite<KeySegmentPair>&& kvs) {
             auto k_str = to_serialized_key(kv.variant_key());
 
             auto& seg = kv.segment();
-            auto hdr_sz = seg.segment_header_bytes_size();
-            auto total_sz = seg.total_segment_size(hdr_sz);
+            auto total_sz = seg.calculate_size();
             std::string seg_data;
             seg_data.resize(total_sz);
-            seg.write_to(reinterpret_cast<std::uint8_t *>(seg_data.data()), hdr_sz);
+            seg.write_to(reinterpret_cast<std::uint8_t *>(seg_data.data()));
             auto allow_override = std::holds_alternative<RefKey>(kv.variant_key());
             if (!allow_override && do_key_exists(kv.variant_key())) {
                 throw DuplicateKeyException(kv.variant_key());
diff --git a/cpp/arcticdb/storage/rocksdb/rocksdb_storage.hpp b/cpp/arcticdb/storage/rocksdb/rocksdb_storage.hpp
index 9c2e811eaf..b5dea2953a 100644
--- a/cpp/arcticdb/storage/rocksdb/rocksdb_storage.hpp
+++ b/cpp/arcticdb/storage/rocksdb/rocksdb_storage.hpp
@@ -11,9 +11,10 @@
 #include <arcticdb/storage/storage_factory.hpp>
 #include <arcticdb/entity/protobufs.hpp>
 #include <arcticdb/util/composite.hpp>
-#include <folly/Range.h>
+#include <arcticdb/util/pb_util.hpp>
 #include <arcticdb/storage/key_segment_pair.hpp>
 
+#include <folly/Range.h>
 #include <rocksdb/db.h>
 
 namespace arcticdb::storage::rocksdb {
diff --git a/cpp/arcticdb/storage/s3/detail-inl.hpp b/cpp/arcticdb/storage/s3/detail-inl.hpp
index 228f886229..0fcf13379d 100644
--- a/cpp/arcticdb/storage/s3/detail-inl.hpp
+++ b/cpp/arcticdb/storage/s3/detail-inl.hpp
@@ -293,10 +293,10 @@ namespace s3 {
             // Generally we get the key descriptor from the AtomKey, but in the case of iterating version journals
             // where we want to have a narrower prefix, we can use the info that it's a version journal and derive
             // the Descriptor.
-            // TODO: Set the IndexDescriptor correctly
+            // TODO: Set the IndexDescriptorImpl correctly
             KeyDescriptor key_descriptor(prefix,
-                                         is_ref_key_class(key_type) ? IndexDescriptor::UNKNOWN
-                                                                    : IndexDescriptor::TIMESTAMP,
+                                         is_ref_key_class(key_type) ? IndexDescriptorImpl::Type::UNKNOWN
+                                                                    : IndexDescriptorImpl::Type::TIMESTAMP,
                                          FormatType::TOKENIZED);
             auto key_prefix = prefix_handler(prefix, key_type_dir, key_descriptor, key_type);
             ARCTICDB_RUNTIME_DEBUG(log::storage(), "Searching for objects in bucket {} with prefix {}", bucket_name,
diff --git a/cpp/arcticdb/storage/s3/s3_mock_client.cpp b/cpp/arcticdb/storage/s3/s3_mock_client.cpp
index 683b0b8aba..44d646cb42 100644
--- a/cpp/arcticdb/storage/s3/s3_mock_client.cpp
+++ b/cpp/arcticdb/storage/s3/s3_mock_client.cpp
@@ -77,7 +77,7 @@ S3Result<Segment> MockS3Client::get_object(
     if (pos == s3_contents.end()){
         return {not_found_error};
     }
-    return {pos->second};
+    return {pos->second.clone()};
 }
 
 S3Result<std::monostate> MockS3Client::put_object(
diff --git a/cpp/arcticdb/storage/s3/s3_real_client.cpp b/cpp/arcticdb/storage/s3/s3_real_client.cpp
index 37e18788e0..7a070027d9 100644
--- a/cpp/arcticdb/storage/s3/s3_real_client.cpp
+++ b/cpp/arcticdb/storage/s3/s3_real_client.cpp
@@ -95,7 +95,6 @@ struct S3StreamBuffer : public std::streambuf {
     }
 };
 
-
 struct S3IOStream : public std::iostream {
     S3StreamBuffer stream_buf_;
 
@@ -142,16 +141,7 @@ S3Result<std::monostate> RealS3Client::put_object(
     request.SetKey(s3_object_name.c_str());
     ARCTICDB_RUNTIME_DEBUG(log::storage(), "Set s3 key {}", request.GetKey().c_str());
 
-    std::shared_ptr<Buffer> tmp;
-    auto hdr_size = segment.segment_header_bytes_size();
-    auto [dst, write_size] = segment.try_internal_write(tmp, hdr_size);
-    util::check(arcticdb::Segment::FIXED_HEADER_SIZE + hdr_size + segment.buffer().bytes() <=
-                write_size,
-                "Size disparity, fixed header size {} + variable header size {} + buffer size {}  >= total size {}",
-                arcticdb::Segment::FIXED_HEADER_SIZE,
-                hdr_size,
-                segment.buffer().bytes(),
-                write_size);
+    auto [dst, write_size, buffer] = segment.serialize_header();
     auto body = std::make_shared<boost::interprocess::bufferstream>(
             reinterpret_cast<char *>(dst), write_size);
     util::check(body->good(), "Overflow of bufferstream with size {}", write_size);
@@ -165,7 +155,7 @@ S3Result<std::monostate> RealS3Client::put_object(
     }
     ARCTICDB_RUNTIME_DEBUG(log::storage(), "Wrote key '{}', with {} bytes of data",
                            s3_object_name,
-                           segment.total_segment_size(hdr_size));
+                           segment.size());
     return {std::monostate()};
 }
 
diff --git a/cpp/arcticdb/storage/storage.hpp b/cpp/arcticdb/storage/storage.hpp
index a6c6da610d..d88d511f67 100644
--- a/cpp/arcticdb/storage/storage.hpp
+++ b/cpp/arcticdb/storage/storage.hpp
@@ -129,7 +129,7 @@ class Storage {
         KeySegmentPair key_seg;
         const ReadVisitor& visitor = [&key_seg](const VariantKey & vk, Segment&& value) {
             key_seg.variant_key() = vk;
-            key_seg.segment() = value;
+            key_seg.segment() = std::move(value);
         };
 
         read(std::forward<KeyType>(key), visitor, opts);
diff --git a/cpp/arcticdb/storage/test/test_embedded.cpp b/cpp/arcticdb/storage/test/test_embedded.cpp
index 60ae84f09c..afaaf0a06d 100644
--- a/cpp/arcticdb/storage/test/test_embedded.cpp
+++ b/cpp/arcticdb/storage/test/test_embedded.cpp
@@ -11,6 +11,7 @@
 #include <arcticdb/storage/storage.hpp>
 #include <arcticdb/storage/lmdb/lmdb_storage.hpp>
 #include <arcticdb/storage/memory/memory_storage.hpp>
+#include <arcticdb/stream/test/stream_test_common.hpp>
 
 #ifdef ARCTICDB_INCLUDE_ROCKSDB
 #include <arcticdb/storage/rocksdb/rocksdb_storage.hpp>
@@ -96,9 +97,10 @@ TEST_P(SimpleTestSuite, Example) {
     std::unique_ptr<as::Storage> storage = GetParam().new_backend();
     ac::entity::AtomKey k = ac::entity::atom_key_builder().gen_id(1).build<ac::entity::KeyType::TABLE_DATA>(NumericId{999});
 
-    as::KeySegmentPair kv(k);
-    kv.segment().header().set_start_ts(1234);
-    kv.segment().set_buffer(std::make_shared<Buffer>());
+    auto segment_in_memory = get_test_frame<arcticdb::stream::TimeseriesIndex>("symbol", {}, 10, 0).segment_;
+    auto codec_opts = proto::encoding::VariantCodec();
+    auto segment = encode_dispatch(std::move(segment_in_memory), codec_opts, arcticdb::EncodingVersion::V2);
+    arcticdb::storage::KeySegmentPair kv(k, std::move(segment));
 
     storage->write(std::move(kv));
 
@@ -110,10 +112,8 @@ TEST_P(SimpleTestSuite, Example) {
         res.segment() = std::move(seg);
         res.segment().force_own_buffer(); // necessary since the non-owning buffer won't survive the visit
     }, storage::ReadKeyOpts{});
-    ASSERT_EQ(res.segment().header().start_ts(), 1234);
 
     res = storage->read(k, as::ReadKeyOpts{});
-    ASSERT_EQ(res.segment().header().start_ts(), 1234);
 
     bool executed = false;
     storage->iterate_type(arcticdb::entity::KeyType::TABLE_DATA,
@@ -123,9 +123,10 @@ TEST_P(SimpleTestSuite, Example) {
                          });
     ASSERT_TRUE(executed);
 
-    as::KeySegmentPair update_kv(k);
-    update_kv.segment().header().set_start_ts(4321);
-    update_kv.segment().set_buffer(std::make_shared<Buffer>());
+    segment_in_memory = get_test_frame<arcticdb::stream::TimeseriesIndex>("symbol", {}, 10, 0).segment_;
+    codec_opts = proto::encoding::VariantCodec();
+    segment = encode_dispatch(std::move(segment_in_memory), codec_opts, arcticdb::EncodingVersion::V2);
+    arcticdb::storage::KeySegmentPair update_kv(k, std::move(segment));
 
     storage->update(std::move(update_kv), as::UpdateOpts{});
 
@@ -135,10 +136,8 @@ TEST_P(SimpleTestSuite, Example) {
         update_res.segment() = std::move(seg);
         update_res.segment().force_own_buffer(); // necessary since the non-owning buffer won't survive the visit
     }, as::ReadKeyOpts{});
-    ASSERT_EQ(update_res.segment().header().start_ts(), 4321);
 
     update_res = storage->read(k, as::ReadKeyOpts{});
-    ASSERT_EQ(update_res.segment().header().start_ts(), 4321);
 
     executed = false;
     storage->iterate_type(arcticdb::entity::KeyType::TABLE_DATA,
@@ -167,8 +166,8 @@ TEST_P(SimpleTestSuite, Strings) {
 
     google::protobuf::Any any;
     arcticdb::TimeseriesDescriptor metadata;
-    metadata.mutable_proto().set_total_rows(12);
-    metadata.mutable_proto().mutable_stream_descriptor()->CopyFrom(s.descriptor().proto());
+    metadata.set_total_rows(12);
+    metadata.set_stream_descriptor(s.descriptor());
     any.PackFrom(metadata.proto());
     s.set_metadata(std::move(any));
 
@@ -185,7 +184,6 @@ TEST_P(SimpleTestSuite, Strings) {
     ac::entity::AtomKey k = ac::entity::atom_key_builder().gen_id(1).build<ac::entity::KeyType::TABLE_DATA>(NumericId{999});
     auto save_k = k;
     as::KeySegmentPair kv(std::move(k), std::move(seg));
-    kv.segment().header().set_start_ts(1234);
     storage->write(std::move(kv));
 
     as::KeySegmentPair res;
@@ -194,7 +192,6 @@ TEST_P(SimpleTestSuite, Strings) {
         res.segment() = std::move(seg);
         res.segment().force_own_buffer(); // necessary since the non-owning buffer won't survive the visit
     }, as::ReadKeyOpts{});
-    ASSERT_EQ(res.segment().header().start_ts(), 1234);
 
     SegmentInMemory res_mem = decode_segment(std::move(res.segment()));
     ASSERT_EQ(s.string_at(0, 1), res_mem.string_at(0, 1));
diff --git a/cpp/arcticdb/storage/test/test_mongo_storage.cpp b/cpp/arcticdb/storage/test/test_mongo_storage.cpp
index 87f560d180..3827fc0020 100644
--- a/cpp/arcticdb/storage/test/test_mongo_storage.cpp
+++ b/cpp/arcticdb/storage/test/test_mongo_storage.cpp
@@ -36,7 +36,6 @@ TEST(MongoStorage, ClientSession) {
     ac::entity::AtomKey k = ac::entity::atom_key_builder().gen_id(1).build<ac::entity::KeyType::TABLE_DATA>("999");
 
     as::KeySegmentPair kv(k);
-    kv.segment().header().set_start_ts(1234);
 
     storage.write(std::move(kv));
 
@@ -47,10 +46,8 @@ TEST(MongoStorage, ClientSession) {
         res.segment() = std::move(seg);
         res.segment().force_own_buffer(); // necessary since the non-owning buffer won't survive the visit
     }, as::ReadKeyOpts{});
-    ASSERT_EQ(res.segment().header().start_ts(), 1234);
 
     res = storage.read(k, as::ReadKeyOpts{});
-    ASSERT_EQ(res.segment().header().start_ts(), 1234);
 
     bool executed = false;
     storage.iterate_type(ac::entity::KeyType::TABLE_DATA,
@@ -67,7 +64,6 @@ TEST(MongoStorage, ClientSession) {
     ASSERT_TRUE(executed);
 
     as::KeySegmentPair update_kv(k);
-    update_kv.segment().header().set_start_ts(4321);
 
     storage.update(std::move(update_kv), as::UpdateOpts{});
 
@@ -78,10 +74,8 @@ TEST(MongoStorage, ClientSession) {
         update_res.segment() = std::move(seg);
         update_res.segment().force_own_buffer(); // necessary since the non-owning buffer won't survive the visit
     }, as::ReadKeyOpts{});
-    ASSERT_EQ(update_res.segment().header().start_ts(), 4321);
 
     update_res = storage.read(k, as::ReadKeyOpts{});
-    ASSERT_EQ(update_res.segment().header().start_ts(), 4321);
 
     executed = false;
     storage.iterate_type(ac::entity::KeyType::TABLE_DATA,
@@ -91,9 +85,8 @@ TEST(MongoStorage, ClientSession) {
                          });
     ASSERT_TRUE(executed);
 
-    ac::entity::AtomKey numeric_k = ac::entity::atom_key_builder().gen_id(1).build<ac::entity::KeyType::STREAM_GROUP>(ac::entity::NumericId{999});
+    ac::entity::AtomKey numeric_k = ac::entity::atom_key_builder().gen_id(1).build<ac::entity::KeyType::STREAM_GROUP>(ac::NumericId{999});
     as::KeySegmentPair numeric_kv(numeric_k);
-    numeric_kv.segment().header().set_start_ts(7890);
 
     storage.write(std::move(numeric_kv));
 
@@ -104,8 +97,6 @@ TEST(MongoStorage, ClientSession) {
         numeric_res.segment() = std::move(seg);
         numeric_res.segment().force_own_buffer(); // necessary since the non-owning buffer won't survive the visit
     }, as::ReadKeyOpts{});
-    ASSERT_EQ(numeric_res.segment().header().start_ts(), 7890);
 
     numeric_res = storage.read(numeric_k, as::ReadKeyOpts{});
-    ASSERT_EQ(numeric_res.segment().header().start_ts(), 7890);
 }
diff --git a/cpp/arcticdb/storage/test/test_storage_exceptions.cpp b/cpp/arcticdb/storage/test/test_storage_exceptions.cpp
index 173a3909b7..9a312fbcc7 100644
--- a/cpp/arcticdb/storage/test/test_storage_exceptions.cpp
+++ b/cpp/arcticdb/storage/test/test_storage_exceptions.cpp
@@ -251,9 +251,11 @@ TEST_F(LMDBStorageTestBase, WriteMapFullError) {
     auto storage = factory.create();
 
     arcticdb::entity::AtomKey k = arcticdb::entity::atom_key_builder().gen_id(0).build<arcticdb::entity::KeyType::VERSION>("sym");
-    arcticdb::storage::KeySegmentPair kv(k);
-    kv.segment().header().set_start_ts(1234);
-    kv.segment().set_buffer(std::make_shared<arcticdb::Buffer>(40000));
+
+    auto segment_in_memory = get_test_frame<arcticdb::stream::TimeseriesIndex>("symbol", {}, 40000, 0).segment_;
+    auto codec_opts = proto::encoding::VariantCodec();
+    auto segment = encode_dispatch(std::move(segment_in_memory), codec_opts, arcticdb::EncodingVersion::V2);
+    arcticdb::storage::KeySegmentPair kv(k, std::move(segment));
 
     ASSERT_THROW({
         storage->write(std::move(kv));
diff --git a/cpp/arcticdb/stream/aggregator-inl.hpp b/cpp/arcticdb/stream/aggregator-inl.hpp
index d2f9e7a04b..d22023e3cb 100644
--- a/cpp/arcticdb/stream/aggregator-inl.hpp
+++ b/cpp/arcticdb/stream/aggregator-inl.hpp
@@ -16,15 +16,18 @@ void Aggregator<Index, Schema, SegmentingPolicy, DensityPolicy>::end_row() {
     segment_.end_row();
     stats_.update(row_builder_.nbytes());
     if (segmenting_policy_(stats_)) {
-        commit_impl();
+        commit_impl(false);
     }
 }
 
 template<class Index, class Schema, class SegmentingPolicy, class DensityPolicy>
-inline void Aggregator<Index, Schema, SegmentingPolicy, DensityPolicy>::commit_impl() {
+inline void Aggregator<Index, Schema, SegmentingPolicy, DensityPolicy>::commit_impl(bool final) {
     // TODO critical section here in async scenario
     callback_(std::move(segment_));
     commits_count_++;
+    if(final)
+        return;
+
     segment_ = SegmentInMemory(schema_policy_.default_descriptor(), segmenting_policy_.expected_row_size(), false, SparsePolicy::allow_sparse);
     segment_.init_column_map();
     stats_.reset();
@@ -32,12 +35,19 @@ inline void Aggregator<Index, Schema, SegmentingPolicy, DensityPolicy>::commit_i
 
 template<class Index, class Schema, class SegmentingPolicy, class DensityPolicy>
 inline void Aggregator<Index, Schema, SegmentingPolicy, DensityPolicy>::commit() {
-    if (ARCTICDB_LIKELY(segment_.row_count() > 0 || segment_.metadata())) { // LIKELY
-//        segment_.end_sparse_columns();
-        commit_impl();
+    if (ARCTICDB_LIKELY(segment_.row_count() > 0 || segment_.metadata()) || segment_.has_index_descriptor()) {
+        commit_impl(false);
+    }
+}
+
+template<class Index, class Schema, class SegmentingPolicy, class DensityPolicy>
+inline void Aggregator<Index, Schema, SegmentingPolicy, DensityPolicy>::finalize() {
+    if (ARCTICDB_LIKELY(segment_.row_count() > 0 || segment_.metadata()) || segment_.has_index_descriptor()) {
+        commit_impl(true);
     }
 }
 
+
 template<class Index, class Schema, class SegmentingPolicy, class DensityPolicy>
 inline void Aggregator<Index, Schema, SegmentingPolicy, DensityPolicy>::clear() {
     segment_.clear();
diff --git a/cpp/arcticdb/stream/aggregator.hpp b/cpp/arcticdb/stream/aggregator.hpp
index 90d2f9753a..793dfdce78 100644
--- a/cpp/arcticdb/stream/aggregator.hpp
+++ b/cpp/arcticdb/stream/aggregator.hpp
@@ -194,7 +194,9 @@ class Aggregator {
         // TODO implement rollback
     }
 
-    virtual void commit(); // TODO return Future?
+    virtual void commit();
+
+    virtual void finalize();
 
     void clear();
 
@@ -274,7 +276,7 @@ class Aggregator {
     AggregationStats& stats() { return stats_; }
 
 protected:
-    void commit_impl();
+    void commit_impl(bool final);
 
 private:
     template<class T, std::enable_if_t<std::is_integral_v<T> || std::is_floating_point_v<T>, int> = 0>
diff --git a/cpp/arcticdb/stream/append_map.cpp b/cpp/arcticdb/stream/append_map.cpp
index 0ed792e691..c310fe0bfc 100644
--- a/cpp/arcticdb/stream/append_map.cpp
+++ b/cpp/arcticdb/stream/append_map.cpp
@@ -54,28 +54,17 @@ struct AppendMapEntry {
     }
 };
 
-
 AppendMapEntry entry_from_key(
     const std::shared_ptr<stream::StreamSource>& store,
     const entity::AtomKey& key,
     bool load_data);
 
-//std::pair<std::optional<entity::AtomKey>, size_t> read_head(
-//    const std::shared_ptr<stream::StreamSource>& store,
-//    StreamId stream_id);
-
 std::vector<AppendMapEntry> get_incomplete_append_slices_for_stream_id(
     const std::shared_ptr<Store> &store,
     const StreamId &stream_id,
     bool via_iteration,
     bool load_data);
 
-inline bool has_appends_key(
-    const std::shared_ptr<stream::StreamSource>& store,
-    const RefKey& ref_key) {
-    return store->key_exists(ref_key).get();
-}
-
 inline std::vector<AppendMapEntry> load_via_iteration(
     const std::shared_ptr<Store>& store,
     const StreamId& stream_id,
@@ -84,7 +73,6 @@ inline std::vector<AppendMapEntry> load_via_iteration(
     auto prefix = std::holds_alternative<StringId>(stream_id) ? std::get<StringId>(stream_id) : std::string();
 
     std::vector<AppendMapEntry> output;
-
     store->iterate_type(KeyType::APPEND_DATA, [&store, load_data, &output, &stream_id] (const auto& vk) {
         const auto& key = to_atom(vk);
         if(key.id() != stream_id)
@@ -137,15 +125,11 @@ void fix_slice_rowcounts(std::vector<AppendMapEntry>& entries, size_t complete_r
 }
 
 TimeseriesDescriptor pack_timeseries_descriptor(
-    StreamDescriptor&& descriptor,
+    const StreamDescriptor& descriptor,
     size_t total_rows,
     std::optional<AtomKey>&& next_key,
     arcticdb::proto::descriptors::NormalizationMetadata&& norm_meta) {
-    util::check(descriptor.proto().has_index(), "Stream descriptor without index in pack_timeseries_descriptor");
-    auto tsd = make_timeseries_descriptor(total_rows, std::move(descriptor), std::move(norm_meta), std::nullopt, std::nullopt, std::move(next_key), false);
-    if(ConfigsMap::instance()->get_int("VersionStore.Encoding", 1) == 1) {
-        tsd.copy_to_self_proto();
-    }
+    auto tsd = make_timeseries_descriptor(total_rows, descriptor, std::move(norm_meta), std::nullopt, std::nullopt, std::move(next_key), false);
     return tsd;
 }
 
@@ -169,21 +153,30 @@ SegmentInMemory incomplete_segment_from_frame(
     std::visit([&](const auto& idx) {
         using IdxType = std::decay_t<decltype(idx)>;
         using SingleSegmentAggregator = Aggregator<IdxType, FixedSchema, NeverSegmentPolicy>;
-
+        auto copy_prev_key = prev_key;
         auto timeseries_desc = index_descriptor_from_frame(frame, existing_rows, std::move(prev_key));
         util::check(!timeseries_desc.fields().empty(), "Expected fields not to be empty in incomplete segment");
         auto norm_meta = timeseries_desc.proto().normalization();
-        StreamDescriptor descriptor(std::make_shared<StreamDescriptor::Proto>(std::move(*timeseries_desc.mutable_proto().mutable_stream_descriptor())), timeseries_desc.fields_ptr());
+        auto descriptor = timeseries_desc.as_stream_descriptor();
         SingleSegmentAggregator agg{FixedSchema{descriptor, index}, [&](auto&& segment) {
-            auto tsd = pack_timeseries_descriptor(std::move(descriptor), existing_rows + num_rows, std::move(prev_key), std::move(norm_meta));
-            segment.set_timeseries_descriptor(std::move(tsd));
+            auto tsd = pack_timeseries_descriptor(descriptor, existing_rows + num_rows, std::move(copy_prev_key), std::move(norm_meta));
+            segment.set_timeseries_descriptor(tsd);
             output = std::forward<SegmentInMemory>(segment);
         }};
 
         if (has_index) {
             util::check(static_cast<bool>(index_tensor), "Expected index tensor for index type {}", agg.descriptor().index());
-            auto opt_error = aggregator_set_data(agg.descriptor().field(0).type(), index_tensor.value(), agg, 0, num_rows, offset_in_frame, slice_num_for_column,
-                                num_rows, allow_sparse);
+            auto opt_error = aggregator_set_data(
+                agg.descriptor().field(0).type(),
+                index_tensor.value(),
+                agg,
+                0,
+                num_rows,
+                offset_in_frame,
+                slice_num_for_column,
+                num_rows,
+                allow_sparse);
+
             if (opt_error.has_value()) {
                 opt_error->raise(agg.descriptor().field(0).name());
             }
@@ -278,9 +271,9 @@ void write_head(const std::shared_ptr<Store>& store, const AtomKey& next_key, si
     ARCTICDB_DEBUG(log::version(), "Writing append map head with key {}", next_key);
     auto desc = stream_descriptor(next_key.id(), RowCountIndex{}, {});
     SegmentInMemory segment(desc);
-    auto tsd = pack_timeseries_descriptor(std::move(desc), total_rows, next_key, {});
-    segment.set_timeseries_descriptor(std::move(tsd));
-    store->write(KeyType::APPEND_REF, next_key.id(), std::move(segment)).get();
+    auto tsd = pack_timeseries_descriptor(desc, total_rows, next_key, {});
+    segment.set_timeseries_descriptor(tsd);
+    store->write_sync(KeyType::APPEND_REF, next_key.id(), std::move(segment));
 }
 
 void remove_incomplete_segments(
@@ -316,18 +309,17 @@ std::vector<AppendMapEntry> load_via_list(
 std::pair<std::optional<AtomKey>, size_t> read_head(const std::shared_ptr<StreamSource>& store, StreamId stream_id) {
     auto ref_key = RefKey{std::move(stream_id), KeyType::APPEND_REF};
     auto output = std::make_pair<std::optional<AtomKey>, size_t>(std::nullopt, 0);
-
-    if(!has_appends_key(store, ref_key))
-        return output;
-
-    auto fut = store->read(ref_key);
-    auto [key, seg] = std::move(fut).get();
-    const auto& tsd = seg.index_descriptor();
-    if(tsd.proto().has_next_key()) {
-        output.first = decode_key(tsd.proto().next_key());
+    try {
+        auto [key, seg] = store->read_sync(ref_key);
+        const auto &tsd = seg.index_descriptor();
+        if (tsd.proto().has_next_key())
+            output.first = key_from_proto(tsd.proto().next_key());
+
+        output.second = tsd.total_rows();
+    } catch (storage::KeyNotFoundException& ex) {
+        ARCTICDB_RUNTIME_DEBUG(log::version(), "Failed to get head of append list for {}: {}", ref_key, ex.what());
     }
 
-    output.second = tsd.proto().total_rows();
     return output;
 }
 
@@ -338,18 +330,18 @@ std::pair<TimeseriesDescriptor, std::optional<SegmentInMemory>> get_descriptor_a
     storage::ReadKeyOpts opts = storage::ReadKeyOpts{}) {
     if(load_data) {
         auto [key, seg] = store->read_sync(k, opts);
-        return std::make_pair(TimeseriesDescriptor{seg.timeseries_proto(), seg.index_fields()}, std::make_optional<SegmentInMemory>(seg));
+        return std::make_pair(seg.index_descriptor(), std::make_optional<SegmentInMemory>(seg));
     } else {
         auto [key, tsd] = store->read_timeseries_descriptor(k, opts).get();
         return std::make_pair(std::move(tsd), std::nullopt);
     }
 }
 
-AppendMapEntry create_entry(const arcticdb::proto::descriptors::TimeSeriesDescriptor& tsd) {
+AppendMapEntry create_entry(const TimeseriesDescriptor& tsd) {
     AppendMapEntry entry;
 
-    if(tsd.has_next_key())
-        entry.next_key_ = decode_key(tsd.next_key());
+    if(tsd.proto().has_next_key())
+        entry.next_key_ = key_from_proto(tsd.proto().next_key());
 
     entry.total_rows_ = tsd.total_rows();
     return entry;
@@ -359,7 +351,7 @@ AppendMapEntry entry_from_key(const std::shared_ptr<StreamSource>& store, const
     auto opts = storage::ReadKeyOpts{};
     opts.dont_warn_about_missing_key = true;
     auto [tsd, seg] = get_descriptor_and_data(store, key, load_data, opts);
-    auto entry = create_entry(tsd.proto());
+    auto entry = create_entry(tsd);
     auto descriptor = std::make_shared<StreamDescriptor>();
     auto desc = std::make_shared<StreamDescriptor>(tsd.as_stream_descriptor());
     auto index_field_count = desc->index().field_count();
@@ -408,8 +400,7 @@ void append_incomplete_segment(
     auto seg_row_count = seg.row_count();
 
     auto tsd = pack_timeseries_descriptor(seg.descriptor().clone(), seg_row_count, std::move(next_key), {});
-    seg.set_timeseries_descriptor(std::move(tsd));
-    util::check(static_cast<bool>(seg.metadata()), "Expected metadata");
+    seg.set_timeseries_descriptor(tsd);
     auto new_key = store->write(
             arcticdb::stream::KeyType::APPEND_DATA,
             0,
@@ -440,7 +431,7 @@ std::vector<AppendMapEntry> get_incomplete_append_slices_for_stream_id(
     if(!entries.empty()) {
         auto index_desc = entries[0].descriptor().index();
 
-        if (index_desc.type() != IndexDescriptor::ROWCOUNT) {
+        if (index_desc.type() != IndexDescriptorImpl::Type::ROWCOUNT) {
             std::sort(std::begin(entries), std::end(entries));
         } else {
             // Can't sensibly sort rowcount indexes, so you'd better have written them in the right order
diff --git a/cpp/arcticdb/stream/index.cpp b/cpp/arcticdb/stream/index.cpp
index 7c97fe259d..32cb1bb8e7 100644
--- a/cpp/arcticdb/stream/index.cpp
+++ b/cpp/arcticdb/stream/index.cpp
@@ -11,16 +11,12 @@
 #include <arcticdb/pipeline/index_fields.hpp>
 #include <arcticdb/entity/type_utils.hpp>
 
+#include <folly/Range.h>
 
 namespace arcticdb::stream {
 
-IndexDescriptor::Type get_index_value_type(const AtomKey& key) {
-        return std::holds_alternative<timestamp>(key.start_index()) ? IndexDescriptor::TIMESTAMP
-                                                                    : IndexDescriptor::STRING;
-    }
-
 template <typename Derived>
-    StreamDescriptor BaseIndex<Derived>::create_stream_descriptor(
+StreamDescriptor BaseIndex<Derived>::create_stream_descriptor(
     StreamId stream_id,
     std::initializer_list<FieldRef> fields
 ) const {
@@ -28,11 +24,15 @@ template <typename Derived>
     return create_stream_descriptor(stream_id, folly::range(fds));
 }
 
+[[nodiscard]] IndexDescriptor::Type get_index_value_type(const AtomKey& key) {
+    return std::holds_alternative<timestamp>(key.start_index()) ? IndexDescriptor::Type::TIMESTAMP : IndexDescriptor::Type::STRING;
+}
+
 template <typename Derived> const Derived* BaseIndex<Derived>::derived() const {
     return static_cast<const Derived*>(this);
 }
 
-template <typename Derived> BaseIndex<Derived>::operator IndexDescriptor() const {
+template <typename Derived> BaseIndex<Derived>::operator IndexDescriptorImpl() const {
     return {Derived::field_count(), Derived::type()};
 }
 
@@ -172,7 +172,6 @@ RowCountIndex RowCountIndex::default_index() {
     return RowCountIndex{};
 }
 
-
 IndexValue RowCountIndex::start_value_for_segment(const SegmentInMemory& segment) {
     return static_cast<timestamp>(segment.offset());
 }
@@ -210,35 +209,40 @@ IndexValue EmptyIndex::end_value_for_keys_segment(const SegmentInMemory& segment
 }
 
 Index index_type_from_descriptor(const StreamDescriptor& desc) {
-    switch (desc.index().proto().kind()) {
-    case IndexDescriptor::EMPTY: return EmptyIndex{};
-    case IndexDescriptor::TIMESTAMP: return TimeseriesIndex::make_from_descriptor(desc);
-    case IndexDescriptor::STRING: return TableIndex::make_from_descriptor(desc);
-    case IndexDescriptor::ROWCOUNT: return RowCountIndex{};
+    switch (desc.index().type()) {
+    case IndexDescriptor::Type::EMPTY:
+        return EmptyIndex{};
+    case IndexDescriptor::Type::TIMESTAMP:
+        return TimeseriesIndex::make_from_descriptor(desc);
+    case IndexDescriptor::Type::STRING:
+        return TableIndex::make_from_descriptor(desc);
+    case IndexDescriptor::Type::ROWCOUNT:
+        return RowCountIndex{};
     default:
         util::raise_rte(
             "Data obtained from storage refers to an index type that this build of ArcticDB doesn't understand ({}).",
-            int(desc.index().proto().kind())
+            int(desc.index().type())
         );
     }
 }
 
-Index default_index_type_from_descriptor(const IndexDescriptor::Proto& desc) {
-    switch (desc.kind()) {
-    case IndexDescriptor::EMPTY: return EmptyIndex{};
-    case IndexDescriptor::TIMESTAMP: return TimeseriesIndex::default_index();
-    case IndexDescriptor::STRING: return TableIndex::default_index();
-    case IndexDescriptor::ROWCOUNT: return RowCountIndex::default_index();
-    default: util::raise_rte("Unknown index type {} trying to generate index type", int(desc.kind()));
+Index default_index_type_from_descriptor(const IndexDescriptorImpl& desc) {
+    switch (desc.type()) {
+    case IndexDescriptor::Type::EMPTY:
+        return EmptyIndex{};
+    case IndexDescriptor::Type::TIMESTAMP:
+        return TimeseriesIndex::default_index();
+    case IndexDescriptor::Type::STRING:
+        return TableIndex::default_index();
+    case IndexDescriptor::Type::ROWCOUNT:
+        return RowCountIndex::default_index();
+    default:
+        util::raise_rte("Unknown index type {} trying to generate index type", int(desc.type()));
     }
 }
 
-Index default_index_type_from_descriptor(const IndexDescriptor& desc) {
-    return default_index_type_from_descriptor(desc.proto());
-}
-
 IndexDescriptor get_descriptor_from_index(const Index& index) {
-    return util::variant_match(index, [](const auto& idx) { return static_cast<IndexDescriptor>(idx); });
+    return util::variant_match(index, [](const auto& idx) { return static_cast<IndexDescriptorImpl>(idx); });
 }
 
 Index empty_index() {
@@ -249,4 +253,5 @@ template class BaseIndex<TimeseriesIndex>;
 template class BaseIndex<TableIndex>;
 template class BaseIndex<RowCountIndex>;
 template class BaseIndex<EmptyIndex>;
+
 }
\ No newline at end of file
diff --git a/cpp/arcticdb/stream/index.hpp b/cpp/arcticdb/stream/index.hpp
index 377e52e1c9..2fcdd07708 100644
--- a/cpp/arcticdb/stream/index.hpp
+++ b/cpp/arcticdb/stream/index.hpp
@@ -28,16 +28,15 @@ template <typename Derived>
 class BaseIndex {
 public:
     template <class RangeType> StreamDescriptor create_stream_descriptor(StreamId stream_id, RangeType&& fields) const {
-        return stream_descriptor(stream_id, *derived(), std::move(fields));
+        return stream_descriptor_from_range(stream_id, *derived(), std::move(fields));
     }
 
     [[nodiscard]] StreamDescriptor create_stream_descriptor(StreamId stream_id, std::initializer_list<FieldRef> fields) const;
     [[nodiscard]] const Derived* derived() const;
-    explicit operator IndexDescriptor() const;
+    explicit operator IndexDescriptorImpl() const;
     [[nodiscard]] FieldRef field(size_t) const;
 };
 
-//TODO make this into just a numeric index, of which timestamp is a special case
 class TimeseriesIndex : public BaseIndex<TimeseriesIndex> {
 public:
     static constexpr const char* DefaultName = "time" ;
@@ -50,8 +49,8 @@ class TimeseriesIndex : public BaseIndex<TimeseriesIndex> {
         return 1;
     }
 
-    static constexpr IndexDescriptor::Type type() {
-        return IndexDescriptor::TIMESTAMP;
+    static constexpr IndexDescriptorImpl::Type type() {
+        return IndexDescriptorImpl::Type::TIMESTAMP;
     }
     TimeseriesIndex(const std::string& name);
     static TimeseriesIndex default_index();
@@ -96,8 +95,8 @@ class TableIndex : public BaseIndex<TableIndex> {
         return 1;
     }
 
-    static constexpr IndexDescriptor::Type type() {
-        return IndexDescriptor::STRING;
+    static constexpr IndexDescriptorImpl::Type type() {
+        return IndexDescriptorImpl::Type::STRING;
     }
 
     void check(const FieldCollection& fields) const;
@@ -138,7 +137,7 @@ class RowCountIndex : public BaseIndex<RowCountIndex> {
 
     static constexpr size_t field_count() { return 0; }
 
-    static constexpr IndexDescriptor::Type type() { return IndexDescriptor::ROWCOUNT; }
+    static constexpr IndexDescriptorImpl::Type type() { return IndexDescriptorImpl::Type::ROWCOUNT; }
 
     static IndexValue start_value_for_segment(const SegmentInMemory& segment);
 
@@ -166,7 +165,7 @@ class EmptyIndex : public BaseIndex<EmptyIndex> {
     }
 
     static constexpr IndexDescriptor::Type type() {
-        return IndexDescriptor::EMPTY;
+        return IndexDescriptor::Type::EMPTY;
     }
 
     static constexpr const char* name() {
@@ -186,9 +185,10 @@ class EmptyIndex : public BaseIndex<EmptyIndex> {
 using Index = std::variant<stream::TimeseriesIndex, stream::RowCountIndex, stream::TableIndex, stream::EmptyIndex>;
 
 Index index_type_from_descriptor(const StreamDescriptor& desc);
-Index default_index_type_from_descriptor(const IndexDescriptor::Proto& desc);
+Index default_index_type_from_descriptor(const IndexDescriptorImpl& desc);
 
 // Only to be used for visitation to get field count etc as the name is not set
+
 Index variant_index_from_type(IndexDescriptor::Type type);
 Index default_index_type_from_descriptor(const IndexDescriptor& desc);
 IndexDescriptor get_descriptor_from_index(const Index& index);
diff --git a/cpp/arcticdb/stream/index_aggregator.hpp b/cpp/arcticdb/stream/index_aggregator.hpp
index f3eabfc8d7..1af2294fcd 100644
--- a/cpp/arcticdb/stream/index_aggregator.hpp
+++ b/cpp/arcticdb/stream/index_aggregator.hpp
@@ -35,7 +35,7 @@ class FlatIndexingPolicy {
   public:
     template<class C>
     FlatIndexingPolicy(StreamId stream_id, C&& c) :
-        callback_(std::move(c)),
+        callback_(std::forward<decltype(c)>(c)),
         schema_(idx_schema(stream_id, DataIndexType::default_index())),
         segment_(schema_.default_descriptor()) {}
 
@@ -50,8 +50,14 @@ class FlatIndexingPolicy {
         }
     }
 
-    void set_metadata(google::protobuf::Any &&meta) {
-        segment_.set_metadata(std::move(meta));
+    void finalize() {
+        if (ARCTICDB_LIKELY(!segment_.empty())) {
+            callback_(std::move(segment_));
+        }
+    }
+
+    void set_timeseries_descriptor(const TimeseriesDescriptor& timeseries_descriptor) {
+        segment_.set_timeseries_descriptor(timeseries_descriptor);
     }
 
   private:
@@ -65,7 +71,7 @@ class IndexAggregator {
   public:
     template<class C>
     IndexAggregator(StreamId stream_id, C &&c):
-        indexing_policy_(stream_id, std::move(c)) {}
+        indexing_policy_(stream_id, std::forward<decltype(c)>(c)) {}
 
     void add_key(const AtomKey &key) {
         indexing_policy_.add_key(key);
@@ -75,8 +81,12 @@ class IndexAggregator {
         indexing_policy_.commit();
     }
 
-    void set_metadata(google::protobuf::Any &&meta) {
-        indexing_policy_.set_metadata(std::move(meta));
+    void finalize() {
+        indexing_policy_.finalize();
+    }
+
+    void set_timeseries_descriptor(const TimeseriesDescriptor& timeseries_descriptor) {
+        indexing_policy_.set_timeseries_descriptor(timeseries_descriptor);
     }
 
   private:
diff --git a/cpp/arcticdb/stream/merge.hpp b/cpp/arcticdb/stream/merge.hpp
index 32b19fd658..61a4ebeec7 100644
--- a/cpp/arcticdb/stream/merge.hpp
+++ b/cpp/arcticdb/stream/merge.hpp
@@ -19,7 +19,7 @@ void do_merge(
     while (!input_streams.empty()) {
         auto next = input_streams.pop_top();
 
-        agg.start_row(pipelines::index::index_value_from_row(next->row(), IndexDescriptor::TIMESTAMP, 0).value()) ([&next, add_symbol_column](auto &rb) {
+        agg.start_row(pipelines::index::index_value_from_row(next->row(), IndexDescriptorImpl::Type::TIMESTAMP, 0).value()) ([&next, add_symbol_column](auto &rb) {
             if(add_symbol_column)
                 rb.set_scalar_by_name("symbol", std::string_view(std::get<StringId>(next->id())), DataType::UTF_DYNAMIC64);
 
diff --git a/cpp/arcticdb/stream/merge_utils.hpp b/cpp/arcticdb/stream/merge_utils.hpp
index 0e5c3ce338..d9efbbede7 100644
--- a/cpp/arcticdb/stream/merge_utils.hpp
+++ b/cpp/arcticdb/stream/merge_utils.hpp
@@ -78,10 +78,12 @@ inline void merge_segments(
                 }
             }
         }
-        if (segment.row_count() && segment.descriptor().index().type() == IndexDescriptor::TIMESTAMP) {
+
+        if (segment.row_count() && segment.descriptor().index().type() == IndexDescriptorImpl::Type::TIMESTAMP) {
             min_idx = std::min(min_idx, segment.begin()->begin()->value<timestamp>());
             max_idx = std::max(max_idx, (segment.end() - 1)->begin()->value<timestamp>());
         }
+
         merge_string_columns(segment, merged.string_pool_ptr(), false);
         merged.append(segment);
         merged.set_compacted(true);
diff --git a/cpp/arcticdb/stream/protobuf_mappings.cpp b/cpp/arcticdb/stream/protobuf_mappings.cpp
new file mode 100644
index 0000000000..c37aab635c
--- /dev/null
+++ b/cpp/arcticdb/stream/protobuf_mappings.cpp
@@ -0,0 +1,83 @@
+/* Copyright 2023 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
+ */
+#include <arcticdb/stream/protobuf_mappings.hpp>
+#include <arcticdb/entity/types.hpp>
+#include <arcticdb/entity/timeseries_descriptor.hpp>
+
+#include <google/protobuf/text_format.h>
+#include <string>
+
+namespace arcticdb {
+
+struct FrameDescriptorImpl;
+
+arcticdb::proto::descriptors::NormalizationMetadata make_timeseries_norm_meta(const StreamId& stream_id) {
+    using namespace arcticdb::proto::descriptors;
+    NormalizationMetadata norm_meta;
+    NormalizationMetadata_PandasDataFrame pandas;
+    auto id = std::get<StringId>(stream_id);
+    pandas.mutable_common()->set_name(std::move(id));
+    NormalizationMetadata_PandasIndex pandas_index;
+    pandas_index.set_name("time");
+    pandas.mutable_common()->mutable_index()->CopyFrom(pandas_index);
+    norm_meta.mutable_df()->CopyFrom(pandas);
+    return norm_meta;
+}
+
+arcticdb::proto::descriptors::NormalizationMetadata make_rowcount_norm_meta(const StreamId& stream_id) {
+    using namespace arcticdb::proto::descriptors;
+    NormalizationMetadata norm_meta;
+    NormalizationMetadata_PandasDataFrame pandas;
+    auto id = std::get<StringId>(stream_id);
+    pandas.mutable_common()->set_name(std::move(id));
+    NormalizationMetadata_PandasIndex pandas_index;
+    pandas_index.set_is_physically_stored(true);
+    pandas.mutable_common()->mutable_index()->CopyFrom(pandas_index);
+    norm_meta.mutable_df()->CopyFrom(pandas);
+    return norm_meta;
+}
+
+/**
+ * Set the minimum defaults into norm_meta. Originally created to synthesize norm_meta for incomplete compaction.
+ */
+void ensure_timeseries_norm_meta(arcticdb::proto::descriptors::NormalizationMetadata& norm_meta, const StreamId& stream_id, bool set_tz) {
+    if(norm_meta.input_type_case() == arcticdb::proto::descriptors::NormalizationMetadata::INPUT_TYPE_NOT_SET) {
+        norm_meta.CopyFrom(make_timeseries_norm_meta(stream_id));
+    }
+
+    if(set_tz && norm_meta.df().common().index().tz().empty())
+        norm_meta.mutable_df()->mutable_common()->mutable_index()->set_tz("UTC");
+}
+
+void ensure_rowcount_norm_meta(arcticdb::proto::descriptors::NormalizationMetadata& norm_meta, const StreamId& stream_id) {
+    if(norm_meta.input_type_case() == arcticdb::proto::descriptors::NormalizationMetadata::INPUT_TYPE_NOT_SET) {
+        norm_meta.CopyFrom(make_rowcount_norm_meta(stream_id));
+    }
+}
+
+FrameDescriptorImpl frame_descriptor_from_proto(arcticdb::proto::descriptors::TimeSeriesDescriptor& tsd) {
+    FrameDescriptorImpl output;
+    output.column_groups_ = tsd.has_column_groups() && tsd.column_groups().enabled();
+    output.total_rows_ = tsd.total_rows();
+    return output;
+}
+
+SegmentDescriptorImpl segment_descriptor_from_proto(const arcticdb::proto::descriptors::StreamDescriptor& desc) {
+    SegmentDescriptorImpl output;
+    output.sorted_ = SortedValue(desc.sorted());
+    output.compressed_bytes_ = desc.out_bytes();
+    output.uncompressed_bytes_ = desc.in_bytes();
+    output.row_count_ = desc.row_count();
+    output.index_ = IndexDescriptor(IndexDescriptor::Type(desc.index().kind()), desc.index().field_count());
+    return output;
+}
+
+StreamId stream_id_from_proto(const arcticdb::proto::descriptors::StreamDescriptor& desc) {
+    return desc.id_case() == desc.kNumId ? StreamId(desc.num_id()) : StreamId(desc.str_id());
+}
+
+} //namespace arcticdb
\ No newline at end of file
diff --git a/cpp/arcticdb/stream/protobuf_mappings.hpp b/cpp/arcticdb/stream/protobuf_mappings.hpp
index a47df433c7..da5f4473dd 100644
--- a/cpp/arcticdb/stream/protobuf_mappings.hpp
+++ b/cpp/arcticdb/stream/protobuf_mappings.hpp
@@ -8,56 +8,34 @@
 #pragma once
 
 #include <arcticdb/entity/protobufs.hpp>
-#include <arcticdb/entity/types_proto.hpp>
-
-#include <google/protobuf/text_format.h>
+#include <arcticdb/entity/types.hpp>
 
 #include <string>
 
 namespace arcticdb {
 
-inline arcticdb::proto::descriptors::NormalizationMetadata make_timeseries_norm_meta(const entity::StreamId& stream_id) {
-    using namespace arcticdb::proto::descriptors;
-    NormalizationMetadata norm_meta;
-    NormalizationMetadata_PandasDataFrame pandas;
-    auto id = std::get<entity::StringId>(stream_id);
-    pandas.mutable_common()->set_name(std::move(id));
-    NormalizationMetadata_PandasIndex pandas_index;
-    pandas_index.set_name("time");
-    pandas.mutable_common()->mutable_index()->CopyFrom(pandas_index);
-    norm_meta.mutable_df()->CopyFrom(pandas);
-    return norm_meta;
-}
+struct FrameDescriptorImpl;
 
-inline arcticdb::proto::descriptors::NormalizationMetadata make_rowcount_norm_meta(const entity::StreamId& stream_id) {
-    using namespace arcticdb::proto::descriptors;
-    NormalizationMetadata norm_meta;
-    NormalizationMetadata_PandasDataFrame pandas;
-    auto id = std::get<entity::StringId>(stream_id);
-    pandas.mutable_common()->set_name(std::move(id));
-    NormalizationMetadata_PandasIndex pandas_index;
-    pandas_index.set_is_physically_stored(true);
-    pandas.mutable_common()->mutable_index()->CopyFrom(pandas_index);
-    norm_meta.mutable_df()->CopyFrom(pandas);
-    return norm_meta;
+namespace entity {
+struct SegmentDescriptorImpl;
 }
 
-/**
- * Set the minimum defaults into norm_meta. Originally created to synthesize norm_meta for incomplete compaction.
- */
-inline void ensure_timeseries_norm_meta(arcticdb::proto::descriptors::NormalizationMetadata& norm_meta, const entity::StreamId& stream_id, bool set_tz) {
-    if(norm_meta.input_type_case() == arcticdb::proto::descriptors::NormalizationMetadata::INPUT_TYPE_NOT_SET) {
-        norm_meta.CopyFrom(make_timeseries_norm_meta(stream_id));
-    }
+inline arcticdb::proto::descriptors::NormalizationMetadata make_rowcount_norm_meta(const StreamId& stream_id);
 
-    if(set_tz && norm_meta.df().common().index().tz().empty())
-        norm_meta.mutable_df()->mutable_common()->mutable_index()->set_tz("UTC");
-}
+arcticdb::proto::descriptors::NormalizationMetadata make_timeseries_norm_meta(const StreamId& stream_id);
 
-inline void ensure_rowcount_norm_meta(arcticdb::proto::descriptors::NormalizationMetadata& norm_meta, const entity::StreamId& stream_id) {
-    if(norm_meta.input_type_case() == arcticdb::proto::descriptors::NormalizationMetadata::INPUT_TYPE_NOT_SET) {
-        norm_meta.CopyFrom(make_rowcount_norm_meta(stream_id));
-    }
-}
+arcticdb::proto::descriptors::NormalizationMetadata make_rowcount_norm_meta(const StreamId& stream_id);
+
+void ensure_timeseries_norm_meta(arcticdb::proto::descriptors::NormalizationMetadata& norm_meta, const StreamId& stream_id, bool set_tz);
+
+void ensure_rowcount_norm_meta(arcticdb::proto::descriptors::NormalizationMetadata& norm_meta, const StreamId& stream_id);
+
+FrameDescriptorImpl frame_descriptor_from_proto(arcticdb::proto::descriptors::TimeSeriesDescriptor& tsd);
+
+entity::SegmentDescriptorImpl segment_descriptor_from_proto(const arcticdb::proto::descriptors::StreamDescriptor& desc);
+
+StreamId stream_id_from_proto(const arcticdb::proto::descriptors::StreamDescriptor& desc);
+
+size_t num_blocks(const arcticdb::proto::encoding::EncodedField& field);
 
 } //namespace arcticdb
\ No newline at end of file
diff --git a/cpp/arcticdb/stream/python_bindings.cpp b/cpp/arcticdb/stream/python_bindings.cpp
index 02cd52cfd1..e69be7ac60 100644
--- a/cpp/arcticdb/stream/python_bindings.cpp
+++ b/cpp/arcticdb/stream/python_bindings.cpp
@@ -8,6 +8,7 @@
 #include <arcticdb/stream/python_bindings.hpp>
 
 #include <pybind11/stl_bind.h>
+#include <pybind11/operators.h>
 #include <arcticdb/python/reader.hpp>
 
 #include <arcticdb/storage/python_bindings.hpp>
@@ -24,14 +25,31 @@ namespace py = pybind11;
 namespace arcticdb {
 using namespace arcticdb::python_util;
 
-std::vector<FieldRef> field_collection_to_ref_vector(const FieldCollection& fields){
-    auto result = std::vector<FieldRef>();
+std::vector<FieldWrapper> field_collection_to_ref_vector(const FieldCollection& fields){
+    auto result = std::vector<FieldWrapper>{};
     result.reserve(fields.size());
-    std::transform(fields.begin(), fields.end(), std::back_inserter(result), [](const Field& field){return field.ref();});
+    std::transform(fields.begin(), fields.end(), std::back_inserter(result), [](const Field& field){return FieldWrapper{field.type(), field.name()};});
     return result;
 }
 
 void register_types(py::module &m) {
+
+    py::enum_<ValueType>(m, "ValueType")
+#define VALUE_TYPE(__VT__) .value(#__VT__, ValueType::__VT__)
+            VALUE_TYPE(UINT)
+            VALUE_TYPE(INT)
+            VALUE_TYPE(FLOAT)
+            VALUE_TYPE(BOOL)
+            VALUE_TYPE(NANOSECONDS_UTC)
+            VALUE_TYPE(ASCII_FIXED)
+            VALUE_TYPE(UTF8_FIXED)
+            VALUE_TYPE(BYTES)
+            VALUE_TYPE(UTF_DYNAMIC)
+            VALUE_TYPE(EMPTY)
+            VALUE_TYPE(BOOL_OBJECT)
+#undef VALUE_TYPE
+            ;
+
     py::enum_<DataType>(m, "DataType")
 #define DATA_TYPE(__DT__) .value(#__DT__, DataType::__DT__)
         DATA_TYPE(UINT8)
@@ -48,9 +66,6 @@ void register_types(py::module &m) {
         DATA_TYPE(NANOSECONDS_UTC64)
         DATA_TYPE(ASCII_FIXED64)
         DATA_TYPE(ASCII_DYNAMIC64)
-        //DATA_TYPE(UTF8_STRING)
-        //     DATA_TYPE(BYTES)
-        //   DATA_TYPE(PICKLE)
 #undef DATA_TYPE
         ;
 
@@ -62,46 +77,58 @@ void register_types(py::module &m) {
     m.def("as_dim_checked", &as_dim_checked, "Turns a uint8_t into a Dimension enum object");
 
     python_util::add_repr(py::class_<TypeDescriptor>(m, "TypeDescriptor")
-                              .def(py::init<DataType, Dimension>())
-                              .def("data_type", &TypeDescriptor::data_type)
-                              .def("dimension", &TypeDescriptor::dimension)
-    );
-    //TODO re-add this constructor
+        .def(py::init<DataType, Dimension>())
+        .def(py::self == py::self)
+        .def(py::self != py::self)
+        .def("data_type", &TypeDescriptor::data_type)
+        .def_property_readonly("value_type", &TypeDescriptor::value_type)
+        .def("dimension", &TypeDescriptor::dimension));
+
     python_util::add_repr(py::class_<FieldRef>(m, "FieldDescriptor")
-                              .def(py::init<TypeDescriptor, std::string_view>())
-                              .def("type", &FieldRef::type)
-                              .def("name", &FieldRef::name)
-    );
+        .def(py::init<TypeDescriptor, std::string_view>())
+        .def_property_readonly("type", &FieldRef::type)
+        .def_property_readonly("name", &FieldRef::name));
 
-    py::enum_<IndexDescriptor::Type>(m, "IndexKind")
-        .value("TIMESTAMP", IndexDescriptor::TIMESTAMP)
-        .value("STRING", IndexDescriptor::STRING)
-        .value("ROWCOUNT", IndexDescriptor::ROWCOUNT);
+    python_util::add_repr(py::class_<FieldWrapper>(m, "FieldDescriptorWrapper")
+        .def_property_readonly("type", &FieldWrapper::type)
+        .def_property_readonly("name", &FieldWrapper::name));
+
+    python_util::add_repr(py::class_<IndexDescriptorImpl>(m, "IndexDescriptor")
+        .def(py::init<std::size_t, IndexDescriptorImpl::Type>())
+        .def("field_count", &IndexDescriptorImpl::field_count)
+        .def("kind", &IndexDescriptorImpl::type));
 
-    python_util::add_repr(py::class_<IndexDescriptor>(m, "IndexDescriptor")
-                              .def(py::init<std::size_t, IndexDescriptor::Type>())
-                              .def("field_count", &IndexDescriptor::field_count)
-                              .def("kind", &IndexDescriptor::type)
-    );
+
+    py::enum_<IndexDescriptorImpl::Type>(m, "IndexKind")
+        .value("TIMESTAMP", IndexDescriptorImpl::Type::TIMESTAMP)
+        .value("STRING", IndexDescriptorImpl::Type::STRING)
+        .value("ROWCOUNT", IndexDescriptorImpl::Type::ROWCOUNT);
 
    python_util::add_repr(py::class_<StreamDescriptor>(m, "StreamDescriptor")
-    .def(py::init([](StreamId stream_id, IndexDescriptor idx_desc, const std::vector<FieldRef>& fields) {
-                                  auto index = stream::default_index_type_from_descriptor(idx_desc.proto());
-                                  return util::variant_match(index, [&stream_id, &fields] (auto idx_type){
-                                      return StreamDescriptor{index_descriptor(stream_id, idx_type, fields_from_range(fields))};
-                                  });
-                              }))
-                              .def("id", &StreamDescriptor::id)
-                              .def("fields", [](const StreamDescriptor& desc){
-                                  return field_collection_to_ref_vector(desc.fields());
-                              })
+        .def(py::init([](StreamId stream_id, IndexDescriptorImpl idx_desc, const std::vector<FieldRef>& fields) {
+            auto index = stream::default_index_type_from_descriptor(idx_desc);
+            return util::variant_match(index, [&stream_id, &fields] (auto idx_type){
+                return StreamDescriptor{index_descriptor_from_range(stream_id, idx_type, fields_from_range(fields))};
+            });
+        }))
+        .def("id", &StreamDescriptor::id)
+        .def("fields", [](const StreamDescriptor& desc){
+            return field_collection_to_ref_vector(desc.fields());
+        })
     );
 
-    python_util::add_repr(py::class_<TimeseriesDescriptor>(m, "TimeseriesDescriptor")
-                                  .def("fields", [](const TimeseriesDescriptor& desc){
-                                      return field_collection_to_ref_vector(desc.fields());
-                                  })
-    );
+    py::class_<TimeseriesDescriptor>(m, "TimeseriesDescriptor")
+        .def_property_readonly("fields", [](const TimeseriesDescriptor& desc){
+            return field_collection_to_ref_vector(desc.fields());
+        }).def_property_readonly("normalization", [](const TimeseriesDescriptor& self) {
+            return python_util::pb_to_python(self.normalization());
+        }).def_property_readonly("sorted", [](const TimeseriesDescriptor& self) {
+            return self.sorted();
+        }).def_property_readonly("index", [](const TimeseriesDescriptor& self) {
+            return self.index();
+        }).def_property_readonly("total_rows", [](const TimeseriesDescriptor& self) {
+            return self.total_rows();
+        });
 
     py::class_<PyTimestampRange>(m, "TimestampRange")
         .def(py::init<const py::object &, const py::object &>())
@@ -111,8 +138,7 @@ void register_types(py::module &m) {
         .def_property_readonly("start_nanos_utc", &PyTimestampRange::start_nanos_utc)
         .def_property_readonly("end_nanos_utc", &PyTimestampRange::end_nanos_utc);
 
-    m.def("create_timestamp_index_stream_descriptor", [](StreamId tsid,
-                                                         const std::vector<FieldRef>& fields) {
+    m.def("create_timestamp_index_stream_descriptor", [](StreamId tsid, const std::vector<FieldRef>& fields) {
         auto rg = folly::range(fields.begin(), fields.end());
         const auto index = stream::TimeseriesIndex::default_index();
         return index.create_stream_descriptor(tsid, fields_from_range(rg));
@@ -126,9 +152,7 @@ struct SegmentHolder {
     SegmentInMemory segment;
 };
 
-
 void register_stream_bindings(py::module &m) {
-
     using Agg = FixedTimestampAggregator;
     using FixedTickRowBuilder = typename Agg::RowBuilderType;
 
diff --git a/cpp/arcticdb/stream/row_builder.hpp b/cpp/arcticdb/stream/row_builder.hpp
index a95b01e053..2babc8814a 100644
--- a/cpp/arcticdb/stream/row_builder.hpp
+++ b/cpp/arcticdb/stream/row_builder.hpp
@@ -21,6 +21,7 @@
 
 #include <folly/Range.h>
 #include <folly/Function.h>
+#include <folly/ScopeGuard.h>
 
 #include <typeinfo>
 
@@ -88,7 +89,7 @@ class RowBuilder {
         return *this;
     }
 
-    std::optional<std::size_t> find_field(std::string_view field_name) const {
+    [[nodiscard]] std::optional<std::size_t> find_field(std::string_view field_name) const {
         return descriptor().find_field(field_name);
     }
 
@@ -154,7 +155,7 @@ class RowBuilder {
         aggregator_.set_string_list(pos, input);
     }
 
-    std::size_t nbytes() const {
+    [[nodiscard]] std::size_t nbytes() const {
         return std::size_t(nbytes_);
     }
 
@@ -162,12 +163,11 @@ class RowBuilder {
         return *aggregator_;
     }
 
-    const arcticdb::entity::StreamDescriptor &descriptor() const {
+    [[nodiscard]] const arcticdb::entity::StreamDescriptor &descriptor() const {
         return aggregator_.descriptor();
     }
 
   private:
-
     void reset() {
         nbytes_ = 0;
     }
diff --git a/cpp/arcticdb/stream/schema.hpp b/cpp/arcticdb/stream/schema.hpp
index 41b581e7f1..0d6ba99fa8 100644
--- a/cpp/arcticdb/stream/schema.hpp
+++ b/cpp/arcticdb/stream/schema.hpp
@@ -28,10 +28,10 @@ class FixedSchema {
         index_(std::move(index)) {
     }
 
-    static FixedSchema default_schema(const Index &index) {
-        return util::variant_match(index, [](auto idx) {
+    static FixedSchema default_schema(const Index &index, const StreamId& stream_id) {
+        return util::variant_match(index, [&stream_id](auto idx) {
             using IndexType = std::remove_reference_t<decltype(idx)>;
-            return FixedSchema(StreamDescriptor(), IndexType::default_index());
+            return FixedSchema(StreamDescriptor(stream_id), IndexType::default_index());
         });
     }
 
@@ -84,10 +84,10 @@ class DynamicSchema {
         index_(index) {
     }
 
-    static DynamicSchema default_schema(const Index &index) {
-        return util::variant_match(index, [](auto idx) {
+    static DynamicSchema default_schema(const Index &index, const StreamId& stream_id) {
+        return util::variant_match(index, [stream_id](auto idx) {
             using IndexType = std::remove_reference_t<decltype(idx)>;
-            return DynamicSchema(StreamDescriptor(), IndexType::default_index());
+            return DynamicSchema(StreamDescriptor(stream_id), IndexType::default_index());
         });
     }
 
diff --git a/cpp/arcticdb/stream/segment_aggregator.hpp b/cpp/arcticdb/stream/segment_aggregator.hpp
index 6cdb307171..cbf8186457 100644
--- a/cpp/arcticdb/stream/segment_aggregator.hpp
+++ b/cpp/arcticdb/stream/segment_aggregator.hpp
@@ -59,6 +59,7 @@ template<class Index, class Schema, class SegmentingPolicy = RowCountSegmentPoli
                 stream_descriptor_ = segment.descriptor();
             }
         }
+        segment.reset_timeseries_descriptor();
         AggregatorType::stats().update_many(segment.row_count(), segment.num_bytes());
         //TODO very specific use-case, you probably don't want this
         if(convert_int_to_float)
@@ -73,6 +74,10 @@ template<class Index, class Schema, class SegmentingPolicy = RowCountSegmentPoli
         util::print_total_mem_usage(__FILE__, __LINE__, __FUNCTION__);
     }
 
+    void finalize() override {
+        commit();
+    }
+
     void commit() override {
         if(segments_.empty())
             return;
@@ -98,7 +103,7 @@ template<class Index, class Schema, class SegmentingPolicy = RowCountSegmentPoli
 
         if (AggregatorType::segment().row_count() > 0) {
             auto slice = merge_slices(slices_, AggregatorType::segment().descriptor());
-            AggregatorType::commit_impl();
+            AggregatorType::commit_impl(false);
             slice_callback_(std::move(slice));
         }
         segments_.clear();
diff --git a/cpp/arcticdb/stream/stream_reader.hpp b/cpp/arcticdb/stream/stream_reader.hpp
index 7bdd41d0c8..7bc5bbc901 100644
--- a/cpp/arcticdb/stream/stream_reader.hpp
+++ b/cpp/arcticdb/stream/stream_reader.hpp
@@ -55,7 +55,7 @@ class RowsFromSegIterator : public IndexRangeFilter {
 
             // Not filtering rows where we have a rowcount index - the assumption is that it's essentially an un-indexed blob
             // that we need to segment somehow.
-            auto accept = index_type == IndexDescriptor::ROWCOUNT || accept_index(pipelines::index::index_start_from_row(res.value(), index_type).value());
+            auto accept = index_type == IndexDescriptorImpl::Type::ROWCOUNT || accept_index(pipelines::index::index_start_from_row(res.value(), index_type).value());
             if (++row_id == seg_->row_count()) {
                 prev_seg_ = seg_;
                 seg_ = std::nullopt;
diff --git a/cpp/arcticdb/stream/stream_utils.hpp b/cpp/arcticdb/stream/stream_utils.hpp
index 66927997e0..c87e6053b4 100644
--- a/cpp/arcticdb/stream/stream_utils.hpp
+++ b/cpp/arcticdb/stream/stream_utils.hpp
@@ -368,9 +368,9 @@ inline std::set<StreamId> filter_by_regex(const std::set<StreamId>& results, con
     return filtered_results;
 }
 
-inline std::vector<std::string> get_index_columns_from_descriptor(const TimeseriesDescriptor& descriptor) {
-    const auto& norm_info = descriptor.proto().normalization();
-    const auto& stream_descriptor = descriptor.proto().stream_descriptor();
+inline std::vector<std::string> get_index_columns_from_descriptor(const TimeseriesDescriptor& tsd) {
+    const auto& norm_info = tsd.proto().normalization();
+    const auto& stream_descriptor = tsd.as_stream_descriptor();
     // For explicit integer indexes, the index is actually present in the first column even though the field_count
     // is 0.
     ssize_t index_till;
@@ -382,7 +382,7 @@ inline std::vector<std::string> get_index_columns_from_descriptor(const Timeseri
 
     std::vector<std::string> index_columns;
     for(auto field_idx = 0; field_idx < index_till; ++field_idx)
-        index_columns.push_back(stream_descriptor.fields(field_idx).name());
+        index_columns.push_back(std::string{stream_descriptor.fields(field_idx).name()});
 
     return index_columns;
 }
diff --git a/cpp/arcticdb/stream/stream_writer.hpp b/cpp/arcticdb/stream/stream_writer.hpp
index 6b79f1e02b..74ae2cf2e5 100644
--- a/cpp/arcticdb/stream/stream_writer.hpp
+++ b/cpp/arcticdb/stream/stream_writer.hpp
@@ -35,8 +35,7 @@ folly::Future<VariantKey> collect_and_commit(
     VersionId version_id,
     std::optional<IndexRange> specified_range,
     std::shared_ptr<StreamSink> store,
-    Verifier &&verifier,
-    google::protobuf::Any &&metadata) {
+    Verifier &&verifier) {
 
     // Shared ptr here is used to keep the futures alive until the collect future is ready
     auto commit_keys = std::make_shared<std::vector<folly::Future<VariantKey>>>(std::move(fut_keys));
@@ -71,7 +70,6 @@ folly::Future<VariantKey> collect_and_commit(
         idx_agg.add_key(to_atom(key));
     }
 
-    idx_agg.set_metadata(std::move(metadata));
     idx_agg.commit();
     util::check(index_key.valid(), "Empty key returned while committing index");
     return index_key;
@@ -133,8 +131,7 @@ class StreamWriter : boost::noncopyable {
             version_id_,
             specified_range_,
             store_,
-            std::move(verify),
-            google::protobuf::Any());
+            std::move(verify));
     }
 
     StreamId stream_id() const {
diff --git a/cpp/arcticdb/stream/test/test_append_map.cpp b/cpp/arcticdb/stream/test/test_append_map.cpp
index 8a93c2a3f7..e2c5bb2ffd 100644
--- a/cpp/arcticdb/stream/test/test_append_map.cpp
+++ b/cpp/arcticdb/stream/test/test_append_map.cpp
@@ -43,7 +43,7 @@ TEST(Append, MergeDescriptorsPromote) {
     using namespace arcticdb;
 
     StreamId id{"test_desc"};
-    IndexDescriptor idx{1u, IndexDescriptor::TIMESTAMP};
+    IndexDescriptorImpl idx{1u, IndexDescriptorImpl::Type::TIMESTAMP};
 
     std::vector<FieldRef> fields {
         scalar_field(DataType::NANOSECONDS_UTC64, "time"),
@@ -85,7 +85,7 @@ TEST(Append, MergeDescriptorsNoPromote) {
     using namespace arcticdb;
 
     StreamId id{"test_desc"};
-    IndexDescriptor idx{1u, IndexDescriptor::TIMESTAMP};
+    IndexDescriptorImpl idx{1u, IndexDescriptorImpl::Type::TIMESTAMP};
 
     std::vector<FieldRef> fields {
         scalar_field(DataType::NANOSECONDS_UTC64, "time"),
diff --git a/cpp/arcticdb/toolbox/library_tool.cpp b/cpp/arcticdb/toolbox/library_tool.cpp
index 0ff5573a7b..14ca2ceffd 100644
--- a/cpp/arcticdb/toolbox/library_tool.cpp
+++ b/cpp/arcticdb/toolbox/library_tool.cpp
@@ -39,7 +39,7 @@ Segment LibraryTool::read_to_segment(const VariantKey& key) {
     auto kv = store_->read_compressed_sync(key, storage::ReadKeyOpts{});
     util::check(kv.has_segment(), "Failed to read key: {}", key);
     kv.segment().force_own_buffer();
-    return kv.segment();
+    return std::move(kv.segment());
 }
 
 std::optional<google::protobuf::Any> LibraryTool::read_metadata(const VariantKey& key){
@@ -55,7 +55,7 @@ TimeseriesDescriptor LibraryTool::read_timeseries_descriptor(const VariantKey& k
     return store_->read_timeseries_descriptor(key).get().second;
 }
 
-void LibraryTool::write(VariantKey key, Segment segment) {
+void LibraryTool::write(VariantKey key, Segment& segment) {
     storage::KeySegmentPair kv{std::move(key), std::move(segment)};
     store_->write_compressed_sync(std::move(kv));
 }
diff --git a/cpp/arcticdb/toolbox/library_tool.hpp b/cpp/arcticdb/toolbox/library_tool.hpp
index abc4867b4c..ef769cf62a 100644
--- a/cpp/arcticdb/toolbox/library_tool.hpp
+++ b/cpp/arcticdb/toolbox/library_tool.hpp
@@ -43,7 +43,7 @@ class LibraryTool {
 
     [[nodiscard]] TimeseriesDescriptor read_timeseries_descriptor(const VariantKey& key);
 
-    void write(VariantKey key, Segment segment);
+    void write(VariantKey key, Segment& segment);
 
     void remove(VariantKey key);
 
@@ -53,7 +53,7 @@ class LibraryTool {
 
     std::string get_key_path(const VariantKey& key);
 
-    std::vector<VariantKey> find_keys_for_id(entity::KeyType kt, const entity::StreamId &stream_id);
+    std::vector<VariantKey> find_keys_for_id(entity::KeyType kt, const StreamId &stream_id);
 
     int count_keys(entity::KeyType kt);
 
diff --git a/cpp/arcticdb/util/buffer.hpp b/cpp/arcticdb/util/buffer.hpp
index 905856c22c..6b0148c701 100644
--- a/cpp/arcticdb/util/buffer.hpp
+++ b/cpp/arcticdb/util/buffer.hpp
@@ -52,14 +52,14 @@ struct BufferView : public BaseBuffer<BufferView, false> {
 };
 
 struct Buffer : public BaseBuffer<Buffer, true> {
-    void init(size_t size, const std::optional<size_t>& preamble = std::nullopt) {
+    void reserve(size_t size, const std::optional<size_t>& preamble = std::nullopt) {
         preamble_bytes_ = preamble.value_or(0);
         ensure(size);
         check_invariants();
     }
 
     explicit Buffer(size_t size, std::optional<size_t> preamble = std::nullopt) {
-        init(size, preamble);
+        reserve(size, preamble);
     }
 
     Buffer() = default;
@@ -69,6 +69,10 @@ struct Buffer : public BaseBuffer<Buffer, true> {
         check_invariants();
     }
 
+    static auto presized(size_t size) {
+        return Buffer(size);
+    };
+
     Buffer &operator=(Buffer &&b) noexcept {
         deallocate();
         using std::swap;
@@ -138,7 +142,7 @@ struct Buffer : public BaseBuffer<Buffer, true> {
     [[nodiscard]] Buffer clone() const {
         Buffer output;
         if(total_bytes() > 0) {
-            output.init(body_bytes_, preamble_bytes_);
+            output.reserve(body_bytes_, preamble_bytes_);
             util::check(data_ != nullptr && output.data_ != nullptr, "Error in buffer allocation of size {} + {}", body_bytes_, preamble_bytes_);
             memcpy(output.data_, data_, total_bytes());
         }
@@ -150,7 +154,7 @@ struct Buffer : public BaseBuffer<Buffer, true> {
         check_invariants();
         if (bytes_offset  + required_bytes > bytes()) {
             std::string err = fmt::format("Cursor overflow in reallocating buffer ptr_cast, cannot read {} bytes from a buffer of size {} with cursor "
-                                          "at {}, as it would required {} bytes. ",
+                                          "at {}, as it would require {} bytes. ",
                                           required_bytes,
                                           bytes(),
                                           bytes_offset,
@@ -169,8 +173,7 @@ struct Buffer : public BaseBuffer<Buffer, true> {
     }
 
     inline void ensure(size_t bytes) {
-        const size_t total_size = bytes + preamble_bytes_;
-        if(total_size > capacity_) {
+        if(const size_t total_size = bytes + preamble_bytes_; total_size > capacity_) {
             resize(total_size);
         } else {
             ARCTICDB_TRACE(log::version(), "Buffer {} has sufficient bytes for {}, ptr {} data {}, capacity {}",
@@ -259,7 +262,106 @@ struct Buffer : public BaseBuffer<Buffer, true> {
     entity::timestamp ts_ = 0;
 };
 
-using VariantBuffer = std::variant<std::monostate, std::shared_ptr<Buffer>, BufferView>;
+class VariantBuffer {
+    using VariantType = std::variant<std::monostate, std::shared_ptr<Buffer>, BufferView>;
+
+    VariantType buffer_;
+public:
+    VariantBuffer() = default;
+
+    template<typename BufferType>
+    VariantBuffer(BufferType&& buf) :
+            buffer_(std::forward<decltype(buf)>(buf)) {
+    }
+
+    [[nodiscard]] VariantBuffer clone() const {
+        return util::variant_match(buffer_,
+           [] (const BufferView& bv) { auto b = std::make_shared<Buffer>(); bv.copy_to(*b); return VariantBuffer{std::move(b)}; },
+           [] (const std::shared_ptr<Buffer>& buf) { return VariantBuffer{ std::make_shared<Buffer>(buf->clone())}; },
+           [] (const std::monostate) -> VariantBuffer { util::raise_rte("Uninitialized buffer"); }
+           );
+    }
+
+    template<typename BufferType>
+    VariantBuffer& operator=(BufferType&& buf) {
+        buffer_ = std::forward<decltype(buf)>(buf);
+        return *this;
+    }
+
+    [[nodiscard]] const std::shared_ptr<Buffer>& get_owning_buffer() const {
+        return std::get<std::shared_ptr<Buffer>>(buffer_);
+    }
+
+    uint8_t* data() {
+        return util::variant_match(buffer_,
+           [] (BufferView& bv) { return bv.data(); },
+           [] (const std::shared_ptr<Buffer>& buf) { return buf->data(); },
+           [] (const std::monostate) ->uint8_t* { util::raise_rte("Uninitialized buffer"); }
+        );
+    }
+
+    [[nodiscard]] size_t preamble_bytes() const {
+        if (std::holds_alternative<std::shared_ptr<Buffer>>(buffer_)) {
+            return std::get<std::shared_ptr<Buffer>>(buffer_)->preamble_bytes();
+        } else {
+            return 0U;
+        }
+    }
+
+    [[nodiscard]] BufferView view() const {
+        if (std::holds_alternative<std::shared_ptr<Buffer>>(buffer_)) {
+            return std::get<std::shared_ptr<Buffer>>(buffer_)->view();
+        } else {
+            return std::get<BufferView>(buffer_);
+        }
+    }
+
+    [[nodiscard]] std::size_t bytes() const {
+        std::size_t s = 0;
+        util::variant_match(buffer_,
+            [] (const std::monostate&) { /* Uninitialized buffer */},
+            [&s](const BufferView& b) { s = b.bytes(); },
+            [&s](const std::shared_ptr<Buffer>& b) { s = b->bytes(); });
+
+        return s;
+    }
+
+    [[nodiscard]] bool is_uninitialized() const {
+        return std::holds_alternative<std::monostate>(buffer_);
+    }
+
+    void move_buffer(VariantBuffer &&that) {
+        if(is_uninitialized() || that.is_uninitialized()) {
+            std::swap(buffer_, that.buffer_);
+        } else if (!(is_owning_buffer() ^ that.is_owning_buffer())) {
+            if (is_owning_buffer()) {
+                swap(*std::get<std::shared_ptr<Buffer>>(buffer_), *std::get<std::shared_ptr<Buffer>>(that.buffer_));
+            } else {
+                swap(std::get<BufferView>(buffer_), std::get<BufferView>(that.buffer_));
+            }
+        } else if (is_owning_buffer()) {
+            log::storage().info("Copying segment");
+            // data of segment being moved is not owned, moving it is dangerous, copying instead
+            std::get<BufferView>(that.buffer_).copy_to(*std::get<std::shared_ptr<Buffer>>(buffer_));
+        } else {
+            // data of this segment is a view, but the move data is moved
+            buffer_ = std::move(std::get<std::shared_ptr<Buffer>>(that.buffer_));
+        }
+    }
+
+    [[nodiscard]] bool is_owning_buffer() const {
+        return std::holds_alternative<std::shared_ptr<Buffer>>(buffer_);
+    }
+
+    void force_own_buffer() {
+        if (!is_owning_buffer()) {
+            auto b = std::make_shared<Buffer>();
+            std::get<BufferView>(buffer_).copy_to(*b);
+            buffer_ = std::move(b);
+        }
+    }
+};
+
 
 
 } // namespace arcticdb
diff --git a/cpp/arcticdb/util/buffer_holder.hpp b/cpp/arcticdb/util/buffer_holder.hpp
index 603500eeea..8388a0f47d 100644
--- a/cpp/arcticdb/util/buffer_holder.hpp
+++ b/cpp/arcticdb/util/buffer_holder.hpp
@@ -7,7 +7,7 @@
 
 #pragma once
 
-#include <column_store/column.hpp>
+#include <arcticdb/column_store/column.hpp>
 #include <vector>
 
 namespace arcticdb {
@@ -22,4 +22,4 @@ struct BufferHolder {
         return column;
     }
 };
-}
+}  //namespace arcticdb
diff --git a/cpp/arcticdb/util/cursored_buffer.hpp b/cpp/arcticdb/util/cursored_buffer.hpp
index 38a2f97a3f..fbe1ff0aba 100644
--- a/cpp/arcticdb/util/cursored_buffer.hpp
+++ b/cpp/arcticdb/util/cursored_buffer.hpp
@@ -129,7 +129,6 @@ struct CursoredBuffer {
 
     template<typename T>
     const T *ptr_cast(position_t t_pos, size_t required_bytes) const {
-//        return reinterpret_cast<const T *>(buffer_.template ptr_cast<T>(t_pos * sizeof(T), required_bytes));
         return reinterpret_cast<const T *>(buffer_.template ptr_cast<T>(t_pos * sizeof(T), required_bytes));
     }
 
diff --git a/cpp/arcticdb/util/lock_table.hpp b/cpp/arcticdb/util/lock_table.hpp
index 699594c3ac..b56efdb2e2 100644
--- a/cpp/arcticdb/util/lock_table.hpp
+++ b/cpp/arcticdb/util/lock_table.hpp
@@ -40,7 +40,7 @@ struct ScopedLock {
 };
 
 class LockTable {
-    std::unordered_map<entity::StreamId, std::shared_ptr<Lock>> locks_;
+    std::unordered_map<StreamId, std::shared_ptr<Lock>> locks_;
     std::mutex mutex_;
 public:
     LockTable() = default;
diff --git a/cpp/arcticdb/util/magic_num.hpp b/cpp/arcticdb/util/magic_num.hpp
index 07ad429d0f..192c754edd 100644
--- a/cpp/arcticdb/util/magic_num.hpp
+++ b/cpp/arcticdb/util/magic_num.hpp
@@ -27,7 +27,7 @@ struct MagicNum {
 
     void check() const {
         std::string_view expected(reinterpret_cast<const char*>(&Magic), 4);
-        util::check(magic_ == Magic, "Magic number failure, expected {}({}) got {}({})", Magic, expected, magic_);
+        util::check(magic_ == Magic, "Magic number failure, expected {}({}) got {}", Magic, expected, magic_);
     }
 
   private:
diff --git a/cpp/arcticdb/util/pb_util.hpp b/cpp/arcticdb/util/pb_util.hpp
index 307e0d00cb..37cbebbcb1 100644
--- a/cpp/arcticdb/util/pb_util.hpp
+++ b/cpp/arcticdb/util/pb_util.hpp
@@ -12,6 +12,7 @@
 #include <google/protobuf/any.h>
 #include <google/protobuf/any.pb.h>
 #include <google/protobuf/util/message_differencer.h>
+#include <arcticdb/util/preconditions.hpp>
 #include <folly/Range.h>
 
 #include <exception>
@@ -19,11 +20,6 @@
 
 namespace arcticdb::util {
 
-template<class Msg, class ExcType=std::invalid_argument>
-[[noreturn]] void raise_error_msg(const char *pattern, const Msg &msg) {
-   // google::protobuf::TextFormat::PrintToString(msg, &s);
-    throw ExcType(fmt::format(fmt::runtime(pattern), msg.DebugString()));
-}
 
 namespace {
 constexpr char TYPE_URL[] = "cxx.arctic.org";
@@ -37,7 +33,7 @@ void pack_to_any(const Msg &msg, google::protobuf::Any &any) {
 inline folly::StringPiece get_arcticdb_pb_type_name(const google::protobuf::Any &any) {
     folly::StringPiece sp{any.type_url()};
     if (!sp.startsWith(TYPE_URL)) {
-        raise_error_msg("Not a valid arcticc proto msg", any);
+        util::raise_rte("Not a valid arcticc proto msg", any.DebugString());
     }
     return sp.subpiece(sizeof(TYPE_URL), sp.size());
 }
diff --git a/cpp/arcticdb/util/ref_counted_map.hpp b/cpp/arcticdb/util/ref_counted_map.hpp
deleted file mode 100644
index addd5c66a8..0000000000
--- a/cpp/arcticdb/util/ref_counted_map.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/* Copyright 2023 Man Group Operations Limited
- *
- * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
- *
- * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
- */
-
-#pragma once
-
-#include <arcticdb/column_store/memory_segment.hpp>
-#include <arcticdb/storage/store.hpp>
-
-namespace arcticdb {
-
-class SegmentMap {
-    using ValueType = std::variant<VariantKey, SegmentInMemoryImpl>;
-    using ContainerType = std::unordered_map<uint64_t, std::weak_ptr<ValueType>>;
-    std::atomic<uint64_t> id_;
-    std::shared_ptr<Store> store_;
-    std::mutex mutex_;
-public:
-    using const_iterator = ContainerType::const_iterator;
-
-    SegmentMap(const std::shared_ptr<Store>& store) :
-        store_(store) {
-    }
-
-    uint64_t insert(std::shared_ptr<SegmentInMemoryImpl>&& seg) {
-        const auto id = id_++;
-        std::shared_ptr<ValueType> value(
-            std::move(seg),
-            [this, id](ValueType* v)
-                map_.erase(id);
-                delete v;
-            }
-            );
-        map_.emplace(id, value);
-        return id;
-    }
-
-    int size() const {
-        return map_.size();
-    }
-
-    const_iterator begin() const {
-        return map_.begin();
-    }
-
-    const_iterator end() const {
-        return map_.end();
-    }
-private:
-    container_type map_;
-};
-}
\ No newline at end of file
diff --git a/cpp/arcticdb/util/sparse_utils.hpp b/cpp/arcticdb/util/sparse_utils.hpp
index 13bb5a78b0..dcde86cc68 100644
--- a/cpp/arcticdb/util/sparse_utils.hpp
+++ b/cpp/arcticdb/util/sparse_utils.hpp
@@ -135,7 +135,7 @@ inline void dump_bitvector(const util::BitMagic& bv) {
         vals.push_back(idx);
         ++en;
     }
-    log::version().info("Bit vector values {}", vals);
+    ARCTICDB_DEBUG(log::version(), "Bit vector values {}", vals);
 }
 
 }
diff --git a/cpp/arcticdb/util/test/generators.hpp b/cpp/arcticdb/util/test/generators.hpp
index c73c526842..27855679fb 100644
--- a/cpp/arcticdb/util/test/generators.hpp
+++ b/cpp/arcticdb/util/test/generators.hpp
@@ -408,7 +408,7 @@ struct SegmentToInputFrameAdapter {
         input_frame_->desc = segment_.descriptor();
         input_frame_->num_rows = segment_.row_count();
         size_t col{0};
-        if (segment_.descriptor().index().type() != IndexDescriptor::ROWCOUNT) {
+        if (segment_.descriptor().index().type() != IndexDescriptorImpl::Type::ROWCOUNT) {
             for (size_t i = 0; i < segment_.descriptor().index().field_count(); ++i) {
                 input_frame_->index_tensor = tensor_from_column(segment_.column(col));
                 ++col;
diff --git a/cpp/arcticdb/util/test/gtest_utils.hpp b/cpp/arcticdb/util/test/gtest_utils.hpp
index 0085533e34..a923929c84 100644
--- a/cpp/arcticdb/util/test/gtest_utils.hpp
+++ b/cpp/arcticdb/util/test/gtest_utils.hpp
@@ -18,7 +18,7 @@ template<> inline void PrintTo(const our_type&val, ::std::ostream* os) { fmt::pr
 MAKE_GTEST_FMT(arcticdb::entity::RefKey, "{}")
 MAKE_GTEST_FMT(arcticdb::entity::AtomKeyImpl, "{}")
 MAKE_GTEST_FMT(arcticdb::entity::VariantKey, "VariantKey({})")
-MAKE_GTEST_FMT(arcticdb::entity::VariantId, "VariantId({})")
+MAKE_GTEST_FMT(arcticdb::VariantId, "VariantId({})")
 
 // FUTURE (C++20): with capabilities, we can write a generic PrintTo that covers all fmt::format-able types that is
 // not ambiguous with the built-in
diff --git a/cpp/arcticdb/util/test/rapidcheck_generators.hpp b/cpp/arcticdb/util/test/rapidcheck_generators.hpp
index f7ad4afdaa..414511f29d 100644
--- a/cpp/arcticdb/util/test/rapidcheck_generators.hpp
+++ b/cpp/arcticdb/util/test/rapidcheck_generators.hpp
@@ -71,9 +71,9 @@ struct Arbitrary<arcticdb::entity::StreamDescriptor> {
         for (const auto& field_name: field_names) {
             field_descriptors.add_field(arcticdb::entity::scalar_field(*gen_numeric_datatype(), field_name));
         }
-        auto desc =stream_descriptor(arcticdb::entity::StreamId{id}, arcticdb::stream::RowCountIndex{}, arcticdb::fields_from_range(field_descriptors));
+        auto desc = stream_descriptor_from_range(arcticdb::StreamId{id}, arcticdb::stream::RowCountIndex{}, std::move(field_descriptors));
         return gen::build<StreamDescriptor>(
-            gen::set(&StreamDescriptor::data_, gen::just(desc.data_)),
+            gen::set(&StreamDescriptor::segment_desc_, gen::just(desc.segment_desc_)),
             gen::set(&StreamDescriptor::fields_, gen::just(desc.fields_))
         );
     }
diff --git a/cpp/arcticdb/util/test/test_slab_allocator.cpp b/cpp/arcticdb/util/test/test_slab_allocator.cpp
index d2ae018e17..199f67bfd0 100644
--- a/cpp/arcticdb/util/test/test_slab_allocator.cpp
+++ b/cpp/arcticdb/util/test/test_slab_allocator.cpp
@@ -38,15 +38,12 @@ pointer_set<MemoryChunk> call_alloc(MemoryChunk& mc, std::size_t n, int64_t& exe
         mcps.insert(mc.allocate());
 
     auto time_end = std::chrono::high_resolution_clock::now();
-
-    execution_time_ms =
-            std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_begin).count();
+    execution_time_ms = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_begin).count();
     return mcps;
 }
 
 template <typename MemoryChunk>
-void check_sets(const pointer_set<MemoryChunk>& s1, const pointer_set<MemoryChunk>& s2)
-{
+void check_sets(const pointer_set<MemoryChunk>& s1, const pointer_set<MemoryChunk>& s2) {
     auto end = std::cend(s2);
     for (auto* p : s1)
         if (s2.find(p) != end)
@@ -54,8 +51,7 @@ void check_sets(const pointer_set<MemoryChunk>& s1, const pointer_set<MemoryChun
 }
 
 template <typename MemoryChunk>
-void run_test(MemoryChunk& mc, unsigned int K)
-{
+void run_test(MemoryChunk& mc, unsigned int K) {
     std::vector<int64_t> execution_times(num_threads);
     int64_t avg = 0;
     for (size_t k = 0; k < K; ++k ) {
@@ -72,7 +68,6 @@ void run_test(MemoryChunk& mc, unsigned int K)
             t.wait();
 
         for (size_t i = 0; i < num_threads; ++i) {
-//            std::cout << "Execution time for thread " << i << ": " << execution_times[ i ] << " ms\n";
             avg += execution_times[i] ;
         }
 
@@ -139,7 +134,6 @@ TEST(SlabAlloc, Integer) {
         ASSERT_EQ(*p, i);
         mc128.deallocate(p);
     }
-
 }
 
 TEST(SlabAlloc, Char32) {
diff --git a/cpp/arcticdb/util/timer.hpp b/cpp/arcticdb/util/timer.hpp
index 82aa7dd280..9a26c8b1f2 100644
--- a/cpp/arcticdb/util/timer.hpp
+++ b/cpp/arcticdb/util/timer.hpp
@@ -202,7 +202,7 @@ class interval_timer {
 /* Timer helper, use like so:
  *
    ScopedTimer timer{"read_partial", [](auto msg) {
-        log::version().info(msg);
+        ARCTICDB_DEBUG(log::version(), msg);
     }};
 */
 class ScopedTimer {
@@ -261,7 +261,7 @@ class ScopedTimer {
 
 /* Timer helper, use like so:
   ScopedTimer timer{"read_partial", [](auto time) {
-        log::version().info(time);
+        ARCTICDB_DEBUG(log::version(), time);
     }};
 */
 
diff --git a/cpp/arcticdb/util/type_handler.hpp b/cpp/arcticdb/util/type_handler.hpp
index 87fcb6296f..e1b2819390 100644
--- a/cpp/arcticdb/util/type_handler.hpp
+++ b/cpp/arcticdb/util/type_handler.hpp
@@ -8,8 +8,8 @@
 #pragma once
 
 #include <arcticdb/entity/types.hpp>
-#include <arcticdb/codec/variant_encoded_field_collection.hpp>
-#include <arcticdb/pipeline/column_mapping.hpp>
+#include <arcticdb/column_store/chunked_buffer.hpp>
+#include <arcticdb/codec/segment_header.hpp>
 
 #include <folly/Poly.h>
 
@@ -19,6 +19,7 @@
 namespace arcticdb {
 
 struct BufferHolder;
+struct ColumnMapping;
 
 struct ITypeHandler {
     template<class Base>
@@ -31,21 +32,21 @@ struct ITypeHandler {
         void handle_type(
             const uint8_t*& source,
             uint8_t* dest,
-            const VariantField& encoded_field_info,
+            const EncodedFieldImpl& encoded_field_info,
+            const ColumnMapping& mapping,
             size_t dest_bytes,
-            std::shared_ptr<BufferHolder> buffers,
-            EncodingVersion encoding_version,
-            const ColumnMapping& m
+            const std::shared_ptr<BufferHolder>& buffers,
+            EncodingVersion encoding_version
         ) {
             folly::poly_call<0>(
                 *this,
                 source,
                 dest,
                 encoded_field_info,
+                mapping,
                 dest_bytes,
                 buffers,
-                encoding_version,
-                m
+                encoding_version
             );
         }
 
diff --git a/cpp/arcticdb/version/local_versioned_engine.cpp b/cpp/arcticdb/version/local_versioned_engine.cpp
index 1b75b7cfe1..960c87e35b 100644
--- a/cpp/arcticdb/version/local_versioned_engine.cpp
+++ b/cpp/arcticdb/version/local_versioned_engine.cpp
@@ -46,6 +46,7 @@ void LocalVersionedEngine::initialize(const std::shared_ptr<storage::Library>& l
         async::TaskScheduler::set_forked(false);
         async::TaskScheduler::reattach_instance();
     }
+    (void)async::TaskScheduler::instance();
 }
 
 template LocalVersionedEngine::LocalVersionedEngine(const std::shared_ptr<storage::Library>& library, const util::SysClock&);
@@ -370,10 +371,10 @@ folly::Future<DescriptorItem> LocalVersionedEngine::get_descriptor(
     .thenValue([](auto&& key_seg_pair) -> DescriptorItem {
         auto key = to_atom(std::move(key_seg_pair.first));
         auto seg = std::move(key_seg_pair.second);
-        auto tsd = std::make_optional<arcticdb::proto::descriptors::TimeSeriesDescriptor>();
-        if (seg.has_metadata()) {
-            seg.metadata()->UnpackTo(&(*tsd));
-        }
+        std::optional<TimeseriesDescriptor> timeseries_descriptor;
+        if (seg.has_index_descriptor())
+            timeseries_descriptor.emplace(seg.index_descriptor());
+
         std::optional<timestamp> start_index;
         std::optional<timestamp> end_index;
         if (seg.row_count() > 0) {
@@ -394,7 +395,7 @@ folly::Future<DescriptorItem> LocalVersionedEngine::get_descriptor(
                 }
             });
         }
-        return DescriptorItem{std::move(key), start_index, end_index, std::move(tsd)};
+        return DescriptorItem{std::move(key), start_index, end_index, std::move(timeseries_descriptor)};
     });
 }
 
@@ -611,7 +612,7 @@ VersionedItem LocalVersionedEngine::write_versioned_metadata_internal(
                                                                         stream_id,
                                                                         VersionQuery{});
     if(update_info.previous_index_key_.has_value()) {
-        ARCTICDB_DEBUG(log::version(), "write_versioned_dataframe for stream_id: {}", stream_id);
+        ARCTICDB_DEBUG(log::version(), "write_versioned_metadata for stream_id: {}", stream_id);
         auto index_key = UpdateMetadataTask{store(), update_info, std::move(user_meta)}();
         write_version_and_prune_previous(prune_previous_versions, index_key, update_info.previous_index_key_);
         return VersionedItem{ std::move(index_key) };
@@ -738,10 +739,10 @@ VersionedItem LocalVersionedEngine::write_individual_segment(
     ) {
     ARCTICDB_SAMPLE(WriteVersionedDataFrame, 0)
 
-    ARCTICDB_RUNTIME_DEBUG(log::version(), "Command: write_versioned_dataframe");
+    ARCTICDB_RUNTIME_DEBUG(log::version(), "Command: write individual segment");
     auto [maybe_prev, deleted] = ::arcticdb::get_latest_version(store(), version_map(), stream_id, VersionQuery{});
     auto version_id = get_next_version_from_key(maybe_prev);
-    ARCTICDB_DEBUG(log::version(), "write_versioned_dataframe for stream_id: {} , version_id = {}", stream_id, version_id);
+    ARCTICDB_DEBUG(log::version(), "write individual segment for stream_id: {} , version_id = {}", stream_id, version_id);
     auto index = index_type_from_descriptor(segment.descriptor());
     auto range = get_range_from_segment(index, segment);
 
@@ -1732,9 +1733,9 @@ std::unordered_map<KeyType, KeySizesInfo> LocalVersionedEngine::scan_object_size
             ++sizes_info.count;
             key_size_calculators.emplace_back(std::forward<const VariantKey>(k), [&sizes_info] (auto&& ks) {
                 auto key_seg = std::move(ks);
-                sizes_info.compressed_size += key_seg.segment().total_segment_size();
-                auto desc = key_seg.segment().header().stream_descriptor();
-                sizes_info.uncompressed_size += desc.in_bytes();
+                sizes_info.compressed_size += key_seg.segment().size();
+                const auto& desc = key_seg.segment().descriptor();
+                sizes_info.uncompressed_size += desc.uncompressed_bytes();
                 return key_seg.variant_key();
             });
         });
@@ -1765,9 +1766,9 @@ std::unordered_map<StreamId, std::unordered_map<KeyType, KeySizesInfo>> LocalVer
                 auto key_seg = std::move(ks);
                 auto variant_key = key_seg.variant_key();
                 auto stream_id = variant_key_id(variant_key);
-                auto compressed_size = key_seg.segment().total_segment_size();
-                auto desc = key_seg.segment().header().stream_descriptor();
-                auto uncompressed_size = desc.in_bytes();
+                auto compressed_size = key_seg.segment().size();
+                auto desc = key_seg.segment().descriptor();
+                auto uncompressed_size = desc.uncompressed_bytes();
 
                 {
                     std::lock_guard lock{mutex};
diff --git a/cpp/arcticdb/version/python_bindings.cpp b/cpp/arcticdb/version/python_bindings.cpp
index 738d289666..afcc9f88b7 100644
--- a/cpp/arcticdb/version/python_bindings.cpp
+++ b/cpp/arcticdb/version/python_bindings.cpp
@@ -232,11 +232,7 @@ void register_bindings(py::module &version, py::exception<arcticdb::ArcticExcept
         .def_property_readonly("start_index", &DescriptorItem::start_index)
         .def_property_readonly("end_index", &DescriptorItem::end_index)
         .def_property_readonly("creation_ts", &DescriptorItem::creation_ts)
-        .def_property_readonly("timeseries_descriptor", [](const DescriptorItem& self) {
-            // FUTURE: Use std::optional monadic operations in C++23
-            auto opt_tsd = self.timeseries_descriptor();
-            return opt_tsd.has_value() ? python_util::pb_to_python(*opt_tsd) : pybind11::none();
-        });
+        .def_property_readonly("timeseries_descriptor", &DescriptorItem::timeseries_descriptor);
 
     py::class_<pipelines::FrameSlice, std::shared_ptr<pipelines::FrameSlice>>(version, "FrameSlice")
         .def_property_readonly("col_range", &pipelines::FrameSlice::columns)
@@ -765,6 +761,21 @@ void register_bindings(py::module &version, py::exception<arcticdb::ArcticExcept
       .def("read_versioned_dataframe",
            &LocalVersionedEngine::read_dataframe_version_internal,
            py::call_guard<SingleThreadMutexHolder>(), "Read a dataframe from the store");
+
+    version.def("sorted_value_name", [] (SortedValue sorted_value) {
+        switch(sorted_value) {
+        case SortedValue::UNKNOWN:
+            return "UNKNOWN";
+        case SortedValue::ASCENDING:
+            return "ASCENDING";
+        case SortedValue::DESCENDING:
+            return "DESCENDING";
+        case SortedValue::UNSORTED:
+            return "UNSORTED";
+        default:
+            util::raise_rte("Unknown sorted value: {}", static_cast<uint8_t>(sorted_value));
+        }
+    });
 }
 
 } //namespace arcticdb::version_store
diff --git a/cpp/arcticdb/version/schema_checks.hpp b/cpp/arcticdb/version/schema_checks.hpp
index caaa495727..695f27e383 100644
--- a/cpp/arcticdb/version/schema_checks.hpp
+++ b/cpp/arcticdb/version/schema_checks.hpp
@@ -33,13 +33,13 @@ inline IndexDescriptor::Type get_common_index_type(const IndexDescriptor::Type&
     if (left == right) {
         return left;
     }
-    if (left == IndexDescriptor::EMPTY) {
+    if (left == IndexDescriptor::Type::EMPTY) {
         return right;
     }
-    if (right == IndexDescriptor::EMPTY) {
+    if (right == IndexDescriptor::Type::EMPTY) {
         return left;
     }
-    return IndexDescriptor::UNKNOWN;
+    return IndexDescriptor::Type::UNKNOWN;
 }
 
 inline void check_normalization_index_match(
@@ -53,20 +53,20 @@ inline void check_normalization_index_match(
     if (operation == UPDATE) {
         const bool new_is_timeseries = std::holds_alternative<TimeseriesIndex>(frame.index);
         util::check_rte(
-            (old_idx_kind == IndexDescriptor::TIMESTAMP || old_idx_kind == IndexDescriptor::EMPTY) && new_is_timeseries,
+            (old_idx_kind == IndexDescriptor::Type::TIMESTAMP || old_idx_kind == IndexDescriptor::Type::EMPTY) && new_is_timeseries,
             "Update will not work as expected with a non-timeseries index"
         );
     } else {
         const IndexDescriptor::Type common_index_type = get_common_index_type(old_idx_kind, new_idx_kind);
         if (empty_types) {
             normalization::check<ErrorCode::E_INCOMPATIBLE_INDEX>(
-                common_index_type != IndexDescriptor::UNKNOWN,
+                common_index_type != IndexDescriptor::Type::UNKNOWN,
                 "Cannot append {} index to {} index",
                 index_type_to_str(new_idx_kind),
                 index_type_to_str(old_idx_kind)
             );
         } else {
-            // (old_idx_kind == IndexDescriptor::TIMESTAMP && new_idx_kind == IndexDescriptor::ROWCOUNT) is left to preserve
+            // (old_idx_kind == IndexDescriptor::Type::TIMESTAMP && new_idx_kind == IndexDescriptor::Type::ROWCOUNT) is left to preserve
             // pre-empty index behavior with pandas 2, see test_empty_writes.py::test_append_empty_series. Empty pd.Series
             // have Rowrange index, but due to: https://github.com/man-group/ArcticDB/blob/bd1776291fe402d8b18af9fea865324ebd7705f1/python/arcticdb/version_store/_normalization.py#L545
             // it gets converted to DatetimeIndex (all empty indexes except categorical and multiindex are converted to datetime index
@@ -76,8 +76,8 @@ inline void check_normalization_index_match(
             // after we enable the empty index.
             const bool input_frame_is_series = frame.norm_meta.has_series();
             normalization::check<ErrorCode::E_INCOMPATIBLE_INDEX>(
-                common_index_type != IndexDescriptor::UNKNOWN ||
-                    (input_frame_is_series && old_idx_kind == IndexDescriptor::TIMESTAMP && new_idx_kind == IndexDescriptor::ROWCOUNT),
+                common_index_type != IndexDescriptor::Type::UNKNOWN ||
+                    (input_frame_is_series && old_idx_kind == IndexDescriptor::Type::TIMESTAMP && new_idx_kind == IndexDescriptor::Type::ROWCOUNT),
                 "Cannot append {} index to {} index",
                 index_type_to_str(new_idx_kind),
                 index_type_to_str(old_idx_kind)
@@ -91,7 +91,7 @@ inline bool columns_match(
     const StreamDescriptor& new_df_descriptor
 ) {
     const int index_field_size =
-        df_in_store_descriptor.index().type() == IndexDescriptor::EMPTY ? new_df_descriptor.index().field_count() : 0;
+        df_in_store_descriptor.index().type() == IndexDescriptor::Type::EMPTY ? new_df_descriptor.index().field_count() : 0;
     // The empty index is compatible with all other index types. Differences in the index fields in this case is
     // allowed. The index fields are always the first in the list.
     if (df_in_store_descriptor.fields().size() + index_field_size != new_df_descriptor.fields().size()) {
diff --git a/cpp/arcticdb/version/snapshot.cpp b/cpp/arcticdb/version/snapshot.cpp
index 027d7b6114..e702fe1317 100644
--- a/cpp/arcticdb/version/snapshot.cpp
+++ b/cpp/arcticdb/version/snapshot.cpp
@@ -41,14 +41,12 @@ void write_snapshot_entry(
     }
     // Serialize and store the python metadata in the journal entry for snapshot.
     if (!user_meta.is_none()) {
-        arcticdb::proto::descriptors::UserDefinedMetadata user_meta_proto;
-        google::protobuf::Any output = {};
-        python_util::pb_from_python(user_meta, user_meta_proto);
-        output.PackFrom(user_meta_proto);
-        snapshot_agg.set_metadata(std::move(output));
+        TimeseriesDescriptor timeseries_descriptor;
+        python_util::pb_from_python(user_meta, *timeseries_descriptor.mutable_proto().mutable_user_meta());
+        snapshot_agg.set_timeseries_descriptor(timeseries_descriptor);
     }
 
-    snapshot_agg.commit();
+    snapshot_agg.finalize();
     if (log_changes) {
         log_create_snapshot(store, snapshot_id);
     }
@@ -111,7 +109,7 @@ void iterate_snapshots(const std::shared_ptr<Store>& store, folly::Function<void
             e.keys().broadcast([&vk, &e](const VariantKey& key) {
                 if (key != vk) throw storage::KeyNotFoundException(std::move(e.keys()));
             });
-            log::version().info("Ignored exception due to {} being deleted during iterate_snapshots().");
+            ARCTICDB_DEBUG(log::version(), "Ignored exception due to {} being deleted during iterate_snapshots().");
         }
     }
 }
diff --git a/cpp/arcticdb/version/symbol_list.cpp b/cpp/arcticdb/version/symbol_list.cpp
index e2b7920649..7377a6f827 100644
--- a/cpp/arcticdb/version/symbol_list.cpp
+++ b/cpp/arcticdb/version/symbol_list.cpp
@@ -32,7 +32,7 @@ using CollectionType = std::vector<SymbolListEntry>;
 constexpr std::string_view version_string = "_v2_";
 constexpr NumericIndex version_identifier = std::numeric_limits<NumericIndex>::max();
 
-SymbolListData::SymbolListData(std::shared_ptr<VersionMap> version_map, entity::StreamId type_indicator, uint32_t seed) :
+SymbolListData::SymbolListData(std::shared_ptr<VersionMap> version_map, StreamId type_indicator, uint32_t seed) :
     type_holder_(std::move(type_indicator)),
     seed_(seed),
     version_map_(std::move(version_map)){
@@ -160,7 +160,6 @@ T scalar_at(const SegmentInMemory& seg, position_t row, position_t col){
     return scalar.value();
 }
 
-
 StreamId stream_id_from_segment(
         DataType data_type,
         const SegmentInMemory& seg,
@@ -202,7 +201,7 @@ std::vector<SymbolListEntry> read_old_style_list_from_storage(const SegmentInMem
 
 std::vector<SymbolListEntry> read_new_style_list_from_storage(const SegmentInMemory& seg) {
     std::vector<SymbolListEntry> output;
-    if(seg.row_count() == 0)
+    if(seg.empty())
         return output;
 
     const auto data_type = get_symbol_data_type(seg);
@@ -316,7 +315,6 @@ bool contains_unknown_reference_ids(const std::vector<SymbolEntryData>& updated)
     });
 }
 
-
 SymbolVectorResult cannot_validate_symbol_vector() {
     return {ProblematicResult{true}};
 }
@@ -379,8 +377,6 @@ ProblematicResult is_problematic(
     if(existing.reference_id_ < latest.reference_id_)
         return not_a_problem();
 
-
-
     if(all_same_action)
         return not_a_problem();
 
@@ -682,11 +678,6 @@ SegmentInMemory write_entries_to_symbol_segment(
 
 SegmentInMemory create_empty_segment(const StreamId& stream_id) {
     SegmentInMemory output{StreamDescriptor{stream_id}};
-    google::protobuf::Any any = {};
-    arcticdb::proto::descriptors::SymbolListDescriptor metadata;
-    metadata.set_enabled(true);
-    any.PackFrom(metadata);
-    output.set_metadata(std::move(any));
     return output;
 }
 
@@ -706,6 +697,7 @@ VariantKey write_symbols(
         segment = write_entries_to_symbol_segment(stream_id, type_holder, symbols);
     }
 
+    ARCTICDB_RUNTIME_DEBUG(log::symbol(), "Writing symbol segment with stream id {} and {} rows", stream_id, segment.row_count());
     return store->write_sync(KeyType::SYMBOL_LIST, 0, stream_id, NumericIndex{ 0 }, NumericIndex{ 0 }, std::move(segment));
 }
 
diff --git a/cpp/arcticdb/version/symbol_list.hpp b/cpp/arcticdb/version/symbol_list.hpp
index a7face5802..f24181380e 100644
--- a/cpp/arcticdb/version/symbol_list.hpp
+++ b/cpp/arcticdb/version/symbol_list.hpp
@@ -13,26 +13,22 @@
 #include <arcticdb/async/base_task.hpp>
 #include <arcticdb/version/version_map.hpp>
 
-
-
 #include <folly/futures/Future.h>
 
 #include <set>
 
-
-
 namespace arcticdb {
 
 struct LoadResult;
 class Store;
 
 struct SymbolListData {
-    entity::StreamId type_holder_;
+    StreamId type_holder_;
     uint32_t seed_;
     std::shared_ptr<VersionMap> version_map_;
     std::atomic<bool> warned_expected_slowdown_ = false;
 
-    explicit SymbolListData(std::shared_ptr<VersionMap> version_map, entity::StreamId type_indicator = entity::StringId(),
+    explicit SymbolListData(std::shared_ptr<VersionMap> version_map, StreamId type_indicator = StringId(),
                             uint32_t seed = 0);
 };
 
@@ -48,7 +44,7 @@ enum class ActionType : uint8_t {
     DELETE
 };
 
-inline entity::StreamId action_id(ActionType action) {
+inline StreamId action_id(ActionType action) {
     switch (action) {
     case ActionType::ADD:
         return StringId{AddSymbol};
@@ -85,10 +81,10 @@ inline bool operator==(const SymbolEntryData& l, const SymbolEntryData& r) {
 }
 
 struct SymbolListEntry : public SymbolEntryData {
-    entity::StreamId stream_id_;
+    StreamId stream_id_;
 
     SymbolListEntry(
-        entity::StreamId stream_id,
+        StreamId stream_id,
         entity::VersionId reference_id,
         timestamp reference_time,
         ActionType action
@@ -149,27 +145,27 @@ ProblematicResult is_problematic(const std::vector<SymbolEntryData>& updated, ti
 class SymbolList {
     SymbolListData data_;
   public:
-    explicit SymbolList(std::shared_ptr<VersionMap> version_map, entity::StreamId type_indicator = entity::StringId(),
+    explicit SymbolList(std::shared_ptr<VersionMap> version_map, StreamId type_indicator = StringId(),
                         uint32_t seed = 0) :
         data_(std::move(version_map), std::move(type_indicator), seed) {
     }
 
-    std::set<entity::StreamId> load(const std::shared_ptr<VersionMap>& version_map, const std::shared_ptr<Store>& store, bool no_compaction);
+    std::set<StreamId> load(const std::shared_ptr<VersionMap>& version_map, const std::shared_ptr<Store>& store, bool no_compaction);
 
-    std::vector<entity::StreamId> get_symbols(const std::shared_ptr<Store>& store, bool no_compaction=false) {
+    std::vector<StreamId> get_symbols(const std::shared_ptr<Store>& store, bool no_compaction=false) {
         auto symbols = load(data_.version_map_, store, no_compaction);
         return {std::make_move_iterator(symbols.begin()), std::make_move_iterator(symbols.end())};
     }
 
-    std::set<entity::StreamId> get_symbol_set(const std::shared_ptr<Store>& store) {
+    std::set<StreamId> get_symbol_set(const std::shared_ptr<Store>& store) {
         return load(data_.version_map_, store, false);
     }
 
     size_t compact(const std::shared_ptr<Store>& store);
 
-    static void add_symbol(const std::shared_ptr<Store>& store, const entity::StreamId& symbol, entity::VersionId reference_id);
+    static void add_symbol(const std::shared_ptr<Store>& store, const StreamId& symbol, entity::VersionId reference_id);
 
-    static void remove_symbol(const std::shared_ptr<Store>& store, const entity::StreamId& symbol, entity::VersionId reference_id);
+    static void remove_symbol(const std::shared_ptr<Store>& store, const StreamId& symbol, entity::VersionId reference_id);
 
     static void clear(const std::shared_ptr<Store>& store);
 
@@ -187,13 +183,13 @@ std::vector<Store::RemoveKeyResultType> delete_keys(
 struct WriteSymbolTask : async::BaseTask {
     const std::shared_ptr<Store> store_;
     std::shared_ptr<SymbolList> symbol_list_;
-    const entity::StreamId stream_id_;
+    const StreamId stream_id_;
     const entity::VersionId reference_id_;
 
     WriteSymbolTask(
             std::shared_ptr<Store> store,
             std::shared_ptr<SymbolList> symbol_list,
-            entity::StreamId stream_id,
+            StreamId stream_id,
             entity::VersionId reference_id) :
             store_(std::move(store)),
             symbol_list_(std::move(symbol_list)),
@@ -233,4 +229,4 @@ struct formatter<arcticdb::SymbolEntryData> {
     }
 };
 
-} //namespace fmt
\ No newline at end of file
+} //namespace fmt
diff --git a/cpp/arcticdb/version/test/test_snapshot.cpp b/cpp/arcticdb/version/test/test_snapshot.cpp
index c0eb544e65..cd03a81788 100644
--- a/cpp/arcticdb/version/test/test_snapshot.cpp
+++ b/cpp/arcticdb/version/test/test_snapshot.cpp
@@ -86,7 +86,6 @@ TEST(SnapshotCreate, Basic) {
 
     auto version_key = std::move(fut.wait().value());
     ::sleep(1);
-    log::root().info("{}", version_key);
     std::this_thread::sleep_for(std::chrono::milliseconds(500));
     version_store->snapshot("blah");
     version_store->list_snapshots();
diff --git a/cpp/arcticdb/version/test/test_version_store.cpp b/cpp/arcticdb/version/test/test_version_store.cpp
index 4339777ae1..0964b062f0 100644
--- a/cpp/arcticdb/version/test/test_version_store.cpp
+++ b/cpp/arcticdb/version/test/test_version_store.cpp
@@ -769,7 +769,7 @@ TEST(VersionStore, TestWriteAppendMapHead) {
 
     auto key = atom_key_builder().version_id(0).creation_ts(PilotedClock::nanos_since_epoch()).content_hash(0).build(symbol, KeyType::APPEND_DATA);
 
-    auto descriptor = StreamDescriptor{symbol, IndexDescriptor{1u, IndexDescriptor::TIMESTAMP}, std::make_shared<FieldCollection>(fields_from_range(fields))};
+    auto descriptor = StreamDescriptor{symbol, IndexDescriptorImpl{1u, IndexDescriptorImpl::Type::TIMESTAMP}, std::make_shared<FieldCollection>(fields_from_range(fields))};
     write_head(version_store._test_get_store(), key, num_rows);
     auto [next_key, total_rows] = read_head(version_store._test_get_store(), symbol);
     ASSERT_EQ(next_key, key);
diff --git a/cpp/arcticdb/version/test/version_map_model.hpp b/cpp/arcticdb/version/test/version_map_model.hpp
index cf6be722d5..718caf8189 100644
--- a/cpp/arcticdb/version/test/version_map_model.hpp
+++ b/cpp/arcticdb/version/test/version_map_model.hpp
@@ -35,14 +35,14 @@ struct MapStorePair {
     }
 
     void write_version(const std::string &id) {
-        log::version().info("MapStorePair, write version {}", id);
+        ARCTICDB_DEBUG(log::version(), "MapStorePair, write version {}", id);
         auto prev = get_latest_version(store_, map_, id, pipelines::VersionQuery{}).first;
         auto version_id = prev ? prev->version_id() + 1 : 0;
         map_->write_version(store_, make_test_index_key(id, version_id, KeyType::TABLE_INDEX), prev);
     }
 
     void delete_all_versions(const std::string &id) {
-        log::version().info("MapStorePair, delete_all_versions {}", id);
+        ARCTICDB_DEBUG(log::version(), "MapStorePair, delete_all_versions {}", id);
         if(tombstones_)
             map_->delete_all_versions(store_, id);
         else
@@ -50,7 +50,7 @@ struct MapStorePair {
     }
 
     void write_and_prune_previous(const std::string &id) {
-        log::version().info("MapStorePair, write_and_prune_previous version {}", id);
+        ARCTICDB_DEBUG(log::version(), "MapStorePair, write_and_prune_previous version {}", id);
         auto prev = get_latest_version(store_, map_, id, pipelines::VersionQuery{}).first;
         auto version_id = prev ? prev->version_id() + 1 : 0;
 
@@ -112,14 +112,14 @@ struct VersionMapTombstonesModel {
     VersionMapTombstonesModel() = default;
 
     std::optional<VersionId> get_latest_version(const std::string &id) const {
-        log::version().info("VersionMapTombstonesModel, get_latest_version {}", id);
+        ARCTICDB_DEBUG(log::version(), "VersionMapTombstonesModel, get_latest_version {}", id);
         auto it = data_.find(id);
         return it == data_.end() || it->second.empty() ? std::nullopt
                                                        : std::make_optional<VersionId>(*it->second.begin());
     }
 
     std::optional<VersionId> get_latest_undeleted_version(const std::string &id) const {
-        log::version().info("VersionMapTombstonesModel, get_latest_undeleted_version {}", id);
+        ARCTICDB_DEBUG(log::version(), "VersionMapTombstonesModel, get_latest_undeleted_version {}", id);
         auto it = data_.find(id);
         if(it == data_.end()) return std::nullopt;
 
@@ -132,7 +132,7 @@ struct VersionMapTombstonesModel {
     }
 
     std::vector<VersionId> get_all_versions(const std::string &id) const {
-        log::version().info("VersionMapTombstonesModel, get_all_versions", id);
+        ARCTICDB_DEBUG(log::version(), "VersionMapTombstonesModel, get_all_versions", id);
         std::vector<VersionId> output;
         auto it = data_.find(id);
         if (it != data_.end()) {
@@ -145,14 +145,14 @@ struct VersionMapTombstonesModel {
     }
 
     void write_version(const std::string &id) {
-        log::version().info("VersionMapTombstonesModel, write version {}", id);
+        ARCTICDB_DEBUG(log::version(), "VersionMapTombstonesModel, write version {}", id);
         auto prev = get_latest_version(id);
         auto version_id = prev ? *prev + 1 : 0;
         data_[id].insert(version_id);
     }
 
     void delete_versions(const std::vector<VersionId>& versions, const std::string& id) {
-        log::version().info("VersionMapTombstonesModel, delete_versions {}", id);
+        ARCTICDB_DEBUG(log::version(), "VersionMapTombstonesModel, delete_versions {}", id);
         auto& tombstones = tombstones_[id];
         for(auto v : versions)
             tombstones.insert(v);
@@ -163,7 +163,7 @@ struct VersionMapTombstonesModel {
     }
 
     void write_and_prune_previous(const std::string &id) {
-        log::version().info("VersionMapTombstonesModel, write_and_prune_previous version {}", id);
+        ARCTICDB_DEBUG(log::version(), "VersionMapTombstonesModel, write_and_prune_previous version {}", id);
         auto prev = get_latest_version(id);
         VersionId version_id{0};
         if (prev) {
diff --git a/cpp/arcticdb/version/version_core-inl.hpp b/cpp/arcticdb/version/version_core-inl.hpp
index 501a88263e..7fb742072f 100644
--- a/cpp/arcticdb/version/version_core-inl.hpp
+++ b/cpp/arcticdb/version/version_core-inl.hpp
@@ -66,8 +66,8 @@ void merge_frames_for_keys_impl(
 
     auto compare = [](const std::unique_ptr<StreamMergeWrapper>& left,
                       const std::unique_ptr<StreamMergeWrapper>& right) {
-        return pipelines::index::index_value_from_row(left->row(), IndexDescriptor::TIMESTAMP, 0) >
-            pipelines::index::index_value_from_row(right->row(), IndexDescriptor::TIMESTAMP, 0);
+        return pipelines::index::index_value_from_row(left->row(), IndexDescriptor::Type::TIMESTAMP, 0) >
+            pipelines::index::index_value_from_row(right->row(), IndexDescriptor::Type::TIMESTAMP, 0);
     };
 
     movable_priority_queue<std::unique_ptr<StreamMergeWrapper>, std::vector<std::unique_ptr<StreamMergeWrapper>>, decltype(compare)> input_streams{compare};
diff --git a/cpp/arcticdb/version/version_core.cpp b/cpp/arcticdb/version/version_core.cpp
index 38289d6fb3..1f0497b869 100644
--- a/cpp/arcticdb/version/version_core.cpp
+++ b/cpp/arcticdb/version/version_core.cpp
@@ -26,7 +26,6 @@
 #include <arcticdb/entity/type_utils.hpp>
 #include <arcticdb/stream/schema.hpp>
 #include <arcticdb/pipeline/index_writer.hpp>
-#include <arcticdb/entity/metrics.hpp>
 #include <arcticdb/pipeline/index_utils.hpp>
 #include <arcticdb/util/composite.hpp>
 #include <arcticdb/pipeline/column_mapping.hpp>
@@ -109,13 +108,13 @@ folly::Future<entity::AtomKey> async_write_dataframe_impl(
 }
 
 namespace {
-IndexDescriptor::Proto check_index_match(const arcticdb::stream::Index& index, const IndexDescriptor::Proto& desc) {
+IndexDescriptorImpl check_index_match(const arcticdb::stream::Index& index, const IndexDescriptorImpl& desc) {
     if (std::holds_alternative<stream::TimeseriesIndex>(index))
         util::check(
-            desc.kind() == IndexDescriptor::TIMESTAMP || desc.kind() == IndexDescriptor::EMPTY,
+            desc.type() == IndexDescriptor::Type::TIMESTAMP || desc.type() == IndexDescriptor::Type::EMPTY,
                     "Index mismatch, cannot update a non-timeseries-indexed frame with a timeseries");
     else
-        util::check(desc.kind() == IndexDescriptor::ROWCOUNT,
+        util::check(desc.type() == IndexDescriptorImpl::Type::ROWCOUNT,
                     "Index mismatch, cannot update a timeseries with a non-timeseries-indexed frame");
 
     return desc;
@@ -128,7 +127,7 @@ void sorted_data_check_append(const InputTensorFrame& frame, index::IndexSegment
     }
     sorting::check<ErrorCode::E_UNSORTED_DATA>(
         !std::holds_alternative<stream::TimeseriesIndex>(frame.index) ||
-        index_segment_reader.mutable_tsd().mutable_proto().stream_descriptor().sorted() == arcticdb::proto::descriptors::SortedValue::ASCENDING,
+        index_segment_reader.tsd().sorted() == SortedValue::ASCENDING,
         "When calling append with validate_index enabled, the existing data must be sorted");
 }
 
@@ -145,7 +144,7 @@ folly::Future<AtomKey> async_append_impl(
     ARCTICDB_DEBUG(log::version(), "append stream_id: {} , version_id: {}", stream_id, update_info.next_version_id_);
     auto index_segment_reader = index::get_index_reader(*(update_info.previous_index_key_), store);
     bool bucketize_dynamic = index_segment_reader.bucketize_dynamic();
-    auto row_offset = index_segment_reader.tsd().proto().total_rows();
+    auto row_offset = index_segment_reader.tsd().total_rows();
     util::check_rte(!index_segment_reader.is_pickled(), "Cannot append to pickled data");
     frame->set_offset(static_cast<ssize_t>(row_offset));
     fix_descriptor_mismatch_or_throw(APPEND, options.dynamic_schema, index_segment_reader, *frame, empty_types);
@@ -290,11 +289,9 @@ VersionedItem delete_range_impl(
     auto flattened_slice_and_keys = flatten_and_fix_rows(groups, row_count);
 
     std::sort(std::begin(flattened_slice_and_keys), std::end(flattened_slice_and_keys));
-    bool bucketize_dynamic = index_segment_reader.bucketize_dynamic();
-    auto time_series = timseries_descriptor_from_index_segment(row_count, std::move(index_segment_reader), std::nullopt, bucketize_dynamic);
-    auto version_key_fut = util::variant_match(index, [&time_series, &flattened_slice_and_keys, &stream_id, &version_id, &store] (auto idx) {
+    auto version_key_fut = util::variant_match(index, [&index_segment_reader, &flattened_slice_and_keys, &stream_id, &version_id, &store] (auto idx) {
         using IndexType = decltype(idx);
-        return pipelines::index::write_index<IndexType>(std::move(time_series), std::move(flattened_slice_and_keys), IndexPartialKey{stream_id, version_id}, store);
+        return pipelines::index::write_index<IndexType>(index_segment_reader.tsd(), std::move(flattened_slice_and_keys), IndexPartialKey{stream_id, version_id}, store);
     });
     auto version_key = std::move(version_key_fut).get();
     auto versioned_item = VersionedItem(to_atom(std::move(version_key)));
@@ -302,19 +299,19 @@ VersionedItem delete_range_impl(
     return versioned_item;
 }
 
-void sorted_data_check_update(InputTensorFrame& frame, index::IndexSegmentReader& index_segment_reader){
+void sorted_data_check_update(InputTensorFrame& frame, const index::IndexSegmentReader& index_segment_reader){
     bool is_time_series = std::holds_alternative<stream::TimeseriesIndex>(frame.index);
     sorting::check<ErrorCode::E_UNSORTED_DATA>(
         is_time_series,
         "When calling update, the input data must be a time series.");
-    bool input_data_is_sorted = frame.desc.get_sorted() == SortedValue::ASCENDING ||
-                                frame.desc.get_sorted() == SortedValue::UNKNOWN;
+    bool input_data_is_sorted = frame.desc.sorted() == SortedValue::ASCENDING ||
+                                frame.desc.sorted() == SortedValue::UNKNOWN;
     // If changing this error message, the corresponding message in _normalization.py::restrict_data_to_date_range_only should also be updated
     sorting::check<ErrorCode::E_UNSORTED_DATA>(
         input_data_is_sorted,
         "When calling update, the input data must be sorted.");
-    bool existing_data_is_sorted = index_segment_reader.get_sorted() == SortedValue::ASCENDING ||
-                                    index_segment_reader.get_sorted() == SortedValue::UNKNOWN;
+    bool existing_data_is_sorted = index_segment_reader.sorted() == SortedValue::ASCENDING ||
+                                    index_segment_reader.sorted() == SortedValue::UNKNOWN;
     sorting::check<ErrorCode::E_UNSORTED_DATA>(
          existing_data_is_sorted,
         "When calling update, the existing data must be sorted.");
@@ -333,9 +330,9 @@ VersionedItem update_impl(
     ARCTICDB_DEBUG(log::version(), "Update versioned dataframe for stream_id: {} , version_id = {}", stream_id, update_info.previous_index_key_->version_id());
     auto index_segment_reader = index::get_index_reader(*(update_info.previous_index_key_), store);
     util::check_rte(!index_segment_reader.is_pickled(), "Cannot update pickled data");
-    auto index_desc = check_index_match(frame->index, index_segment_reader.tsd().proto().stream_descriptor().index());
+    auto index_desc = check_index_match(frame->index, index_segment_reader.tsd().index());
     util::check(
-        index_desc.kind() == IndexDescriptor::TIMESTAMP || index_desc.kind() == IndexDescriptor::EMPTY,
+        index_desc.type() == IndexDescriptor::Type::TIMESTAMP || index_desc.type() == IndexDescriptor::Type::EMPTY,
         "Update not supported for non-timeseries indexes"
     );
     sorted_data_check_update(*frame, index_segment_reader);
@@ -412,9 +409,6 @@ VersionedItem update_impl(
 FrameAndDescriptor read_multi_key(
     const std::shared_ptr<Store>& store,
     const SegmentInMemory& index_key_seg) {
-    const auto& multi_index_seg = index_key_seg;
-    TimeseriesDescriptor tsd;
-    multi_index_seg.metadata()->UnpackTo(&tsd.mutable_proto());
     std::vector<AtomKey> keys;
     for (size_t idx = 0; idx < index_key_seg.row_count(); idx++) {
         keys.push_back(stream::read_key_row(index_key_seg, static_cast<ssize_t>(idx)));
@@ -424,7 +418,7 @@ FrameAndDescriptor read_multi_key(
     ReadQuery read_query;
     auto res = read_dataframe_impl(store, VersionedItem{std::move(dup)}, read_query, {});
 
-    TimeseriesDescriptor multi_key_desc{tsd};
+    TimeseriesDescriptor multi_key_desc{index_key_seg.index_descriptor()};
     multi_key_desc.mutable_proto().mutable_normalization()->CopyFrom(res.desc_.proto().normalization());
     return {res.frame_, multi_key_desc, keys, std::shared_ptr<BufferHolder>{}};
 }
@@ -602,11 +596,13 @@ void set_output_descriptors(
 
 std::shared_ptr<std::unordered_set<std::string>> columns_to_decode(const std::shared_ptr<PipelineContext>& pipeline_context) {
     std::shared_ptr<std::unordered_set<std::string>> res;
+    ARCTICDB_DEBUG(log::version(), "Creating columns list with {} bits set", pipeline_context->overall_column_bitset_->count());
     if(pipeline_context->overall_column_bitset_) {
         res = std::make_shared<std::unordered_set<std::string>>();
         auto en = pipeline_context->overall_column_bitset_->first();
         auto en_end = pipeline_context->overall_column_bitset_->end();
         while (en < en_end) {
+            ARCTICDB_DEBUG(log::version(), "Adding field {}", pipeline_context->desc_->field(*en).name());
             res->insert(std::string(pipeline_context->desc_->field(*en++).name()));
         }
     }
@@ -700,7 +696,7 @@ void add_index_columns_to_query(const ReadQuery& read_query, const TimeseriesDes
         std::vector<std::string> index_columns_to_add;
         for(const auto& index_column : index_columns) {
             if(std::find(std::begin(read_query.columns), std::end(read_query.columns), index_column) == std::end(read_query.columns))
-                index_columns_to_add.push_back(index_column);
+                index_columns_to_add.push_back(std::string(index_column));
         }
         read_query.columns.insert(std::begin(read_query.columns), std::begin(index_columns_to_add), std::end(index_columns_to_add));
     }
@@ -709,8 +705,7 @@ void add_index_columns_to_query(const ReadQuery& read_query, const TimeseriesDes
 FrameAndDescriptor read_segment_impl(
     const std::shared_ptr<Store>& store,
     const VariantKey& key) {
-    auto fut_segment = store->read(key);
-    auto [_, seg] = std::move(fut_segment).get();
+    auto [_, seg] = store->read_sync(key);
     return frame_and_descriptor_from_segment(std::move(seg));
 }
 
@@ -761,7 +756,7 @@ void read_indexed_keys_to_pipeline(
     add_index_columns_to_query(read_query, index_segment_reader.tsd());
 
     const auto& tsd = index_segment_reader.tsd();
-    read_query.calculate_row_filter(static_cast<int64_t>(tsd.proto().total_rows()));
+    read_query.calculate_row_filter(static_cast<int64_t>(tsd.total_rows()));
     bool bucketize_dynamic = index_segment_reader.bucketize_dynamic();
     pipeline_context->desc_ = tsd.as_stream_descriptor();
 
@@ -774,7 +769,7 @@ void read_indexed_keys_to_pipeline(
 
     pipeline_context->slice_and_keys_ = filter_index(index_segment_reader, combine_filter_functions(queries));
     pipeline_context->total_rows_ = pipeline_context->calc_rows();
-    pipeline_context->rows_ = index_segment_reader.tsd().proto().total_rows();
+    pipeline_context->rows_ = index_segment_reader.tsd().total_rows();
     pipeline_context->norm_meta_ = std::make_unique<arcticdb::proto::descriptors::NormalizationMetadata>(std::move(*index_segment_reader.mutable_tsd().mutable_proto().mutable_normalization()));
     pipeline_context->user_meta_ = std::make_unique<arcticdb::proto::descriptors::UserDefinedMetadata>(std::move(*index_segment_reader.mutable_tsd().mutable_proto().mutable_user_meta()));
     pipeline_context->bucketize_dynamic_ = bucketize_dynamic;
@@ -840,7 +835,7 @@ void check_incompletes_index_ranges_dont_overlap(const std::shared_ptr<PipelineC
       - that the earliest timestamp in an incomplete segment is greater than the latest timestamp existing in the
         symbol in the case of a parallel append
      */
-    if (pipeline_context->descriptor().index().type() == IndexDescriptor::TIMESTAMP) {
+    if (pipeline_context->descriptor().index().type() == IndexDescriptorImpl::Type::TIMESTAMP) {
         std::optional<timestamp> last_existing_index_value;
         // Beginning of incomplete segments == beginning of all segments implies all segments are incompletes, so we are
         // writing, not appending
@@ -1122,7 +1117,7 @@ FrameAndDescriptor read_column_stats_impl(
     try {
         auto segment_in_memory = store->read(column_stats_key).get().second;
         TimeseriesDescriptor tsd;
-        tsd.mutable_proto().set_total_rows(segment_in_memory.row_count());
+        tsd.set_total_rows(segment_in_memory.row_count());
         tsd.set_stream_descriptor(segment_in_memory.descriptor());
         return {SegmentInMemory(std::move(segment_in_memory)), tsd, {}, {}};
     } catch (const std::exception& e) {
@@ -1221,8 +1216,8 @@ VersionedItem collate_and_write(
     TimeseriesDescriptor tsd;
 
     tsd.set_stream_descriptor(pipeline_context->descriptor());
+    tsd.set_total_rows(pipeline_context->total_rows_);
     auto& tsd_proto = tsd.mutable_proto();
-    tsd_proto.set_total_rows(pipeline_context->total_rows_);
     tsd_proto.mutable_normalization()->CopyFrom(*pipeline_context->norm_meta_);
     if(user_meta)
         tsd_proto.mutable_user_meta()->CopyFrom(*user_meta);
@@ -1263,7 +1258,7 @@ VersionedItem sort_merge_impl(
     std::optional<SortedValue> previous_sorted_value;
     if(append && update_info.previous_index_key_.has_value()) {
         read_indexed_keys_to_pipeline(store, pipeline_context, *(update_info.previous_index_key_), read_query, ReadOptions{});
-        previous_sorted_value.emplace(pipeline_context->desc_->get_sorted());
+        previous_sorted_value.emplace(pipeline_context->desc_->sorted());
     }
 
     auto num_versioned_rows = pipeline_context->total_rows_;
@@ -1358,7 +1353,7 @@ VersionedItem compact_incomplete_impl(
     std::optional<SortedValue> previous_sorted_value;
     if(append && update_info.previous_index_key_.has_value()) {
         read_indexed_keys_to_pipeline(store, pipeline_context, *(update_info.previous_index_key_), read_query, read_options);
-        previous_sorted_value.emplace(pipeline_context->desc_->get_sorted());
+        previous_sorted_value.emplace(pipeline_context->desc_->sorted());
     }
 
     auto prev_size = pipeline_context->slice_and_keys_.size();
@@ -1378,11 +1373,13 @@ VersionedItem compact_incomplete_impl(
     std::vector<folly::Future<VariantKey>> fut_vec;
     std::vector<FrameSlice> slices;
     bool dynamic_schema = write_options.dynamic_schema;
-    auto index = index_type_from_descriptor(first_seg.descriptor());
-    auto policies = std::make_tuple(index,
-                                    dynamic_schema ? VariantSchema{DynamicSchema::default_schema(index)} : VariantSchema{FixedSchema::default_schema(index)}, 
-                                    sparsify ? VariantColumnPolicy{SparseColumnPolicy{}} : VariantColumnPolicy{DenseColumnPolicy{}}
-                                    );
+    const auto index = index_type_from_descriptor(first_seg.descriptor());
+    auto policies = std::make_tuple(
+        index,
+        dynamic_schema ? VariantSchema{DynamicSchema::default_schema(index, stream_id)} : VariantSchema{FixedSchema::default_schema(index, stream_id)},
+        sparsify ? VariantColumnPolicy{SparseColumnPolicy{}} : VariantColumnPolicy{DenseColumnPolicy{}}
+        );
+
     util::variant_match(std::move(policies), [
         &fut_vec, &slices, pipeline_context=pipeline_context, &store, convert_int_to_float, &previous_sorted_value, &write_options] (auto &&idx, auto &&schema, auto &&column_policy) {
         using IndexType = std::remove_reference_t<decltype(idx)>;
@@ -1411,7 +1408,6 @@ VersionedItem compact_incomplete_impl(
         pipeline_context->incompletes_after(),
         user_meta);
 
-
     store->remove_keys(delete_keys).get();
     return vit;
 }
@@ -1437,7 +1433,7 @@ PredefragmentationInfo get_pre_defragmentation_info(
     first_col_segment_idx.reserve(slice_and_keys.size());
     std::optional<CompactionStartInfo> compaction_start_info;
     size_t segment_idx = 0, num_to_segments_after_compact = 0, new_segment_row_size = 0;
-    for(auto it = slice_and_keys.begin(); it != slice_and_keys.end(); it++) {
+    for(auto it = slice_and_keys.begin(); it != slice_and_keys.end(); ++it) {
         auto &slice = it->slice();
 
         if (slice.row_range.diff() < segment_size && !compaction_start_info)
@@ -1481,10 +1477,11 @@ VersionedItem defragment_symbol_data_impl(
     // in the new index segment, we will start appending after this value
     std::vector<folly::Future<VariantKey>> fut_vec;
     std::vector<FrameSlice> slices;
-    auto index = index_type_from_descriptor(pre_defragmentation_info.pipeline_context->descriptor());
-    auto policies = std::make_tuple(index,
-                                    options.dynamic_schema ? VariantSchema{DynamicSchema::default_schema(index)} : VariantSchema{FixedSchema::default_schema(index)}
-                                    );
+    const auto index = index_type_from_descriptor(pre_defragmentation_info.pipeline_context->descriptor());
+    auto policies = std::make_tuple(
+        index,
+        options.dynamic_schema ? VariantSchema{DynamicSchema::default_schema(index, stream_id)} : VariantSchema{FixedSchema::default_schema(index, stream_id)}
+        );
 
     util::variant_match(std::move(policies), [
         &fut_vec, &slices, &store, &options, &pre_defragmentation_info, segment_size=segment_size] (auto &&idx, auto &&schema) {
@@ -1493,24 +1490,24 @@ VersionedItem defragment_symbol_data_impl(
         using IndexType = std::remove_reference_t<decltype(idx)>;
         using SchemaType = std::remove_reference_t<decltype(schema)>;
         do_compact<IndexType, SchemaType, RowCountSegmentPolicy, DenseColumnPolicy>(
-                    segments.begin(),
-                    segments.end(),
-                    pre_defragmentation_info.pipeline_context,
-                    fut_vec,
-                    slices,
-                    store,
-                    false,
-                    segment_size);
+            segments.begin(),
+            segments.end(),
+            pre_defragmentation_info.pipeline_context,
+            fut_vec,
+            slices,
+            store,
+            false,
+            segment_size);
     });
 
     auto keys = folly::collect(fut_vec).get();
     auto vit = collate_and_write(
-            store,
-            pre_defragmentation_info.pipeline_context,
-            slices,
-            keys,
-            pre_defragmentation_info.append_after.value(),
-            std::nullopt);
+        store,
+        pre_defragmentation_info.pipeline_context,
+        slices,
+        keys,
+        pre_defragmentation_info.append_after.value(),
+        std::nullopt);
     
     return vit;
 }
diff --git a/cpp/arcticdb/version/version_map.hpp b/cpp/arcticdb/version/version_map.hpp
index a196ad2130..91aa228676 100644
--- a/cpp/arcticdb/version/version_map.hpp
+++ b/cpp/arcticdb/version/version_map.hpp
@@ -417,7 +417,7 @@ class VersionMapImpl {
     void compact_if_necessary_stand_alone(const std::shared_ptr<Store>& store, size_t batch_size) {
         auto map = get_num_version_entries(store, batch_size);
         size_t max_blocks = ConfigsMap::instance()->get_int("VersionMap.MaxVersionBlocks", 5);
-        const auto total_symbols = map.size();
+        const auto total_symbols ARCTICDB_UNUSED = map.size();
         size_t num_sym_compacted = 0;
         for(const auto& [symbol, size] : map) {
             if(size < max_blocks)
@@ -431,10 +431,10 @@ class VersionMapImpl {
                 log::version().warn("Error: {} in compacting {}", e.what(), symbol);
             }
             if (num_sym_compacted % 50 == 0) {
-                log::version().info("Compacted {} symbols", num_sym_compacted);
+                ARCTICDB_RUNTIME_DEBUG(log::version(), "Compacted {} symbols", num_sym_compacted);
             }
         }
-        log::version().info("Compacted {} out of {} total symbols", num_sym_compacted, total_symbols);
+        ARCTICDB_RUNTIME_DEBUG(log::version(), "Compacted {} out of {} total symbols", num_sym_compacted, total_symbols);
     }
 
     void compact(std::shared_ptr<Store> store, const StreamId& stream_id) {
@@ -792,8 +792,9 @@ class VersionMapImpl {
             entry->load_type_ = load_param.load_type_;
         }
         catch (const std::runtime_error &err) {
+            (void)err;
             if (iterate_on_failure) {
-                log::version().info(
+                ARCTICDB_DEBUG(log::version(),
                         "Loading versions from storage via ref key failed with error: {}, will load via iteration",
                         err.what());
             } else {
diff --git a/cpp/arcticdb/version/version_store_api.cpp b/cpp/arcticdb/version/version_store_api.cpp
index 3aced0cebb..f57de693fa 100644
--- a/cpp/arcticdb/version/version_store_api.cpp
+++ b/cpp/arcticdb/version/version_store_api.cpp
@@ -290,12 +290,10 @@ namespace {
 py::object get_metadata_from_segment(
     const SegmentInMemory& segment
 ) {
-    auto metadata_proto = segment.metadata();
     py::object pyobj;
-    if (metadata_proto) {
+    if (segment.has_user_metadata()) {
         arcticdb::proto::descriptors::UserDefinedMetadata user_meta_proto;
-        metadata_proto->UnpackTo(&user_meta_proto);
-        pyobj = python_util::pb_to_python(user_meta_proto);
+        pyobj = python_util::pb_to_python(segment.user_metadata());
     } else {
         pyobj = pybind11::none();
     }
@@ -886,7 +884,7 @@ std::vector<SnapshotVariantKey> ARCTICDB_UNUSED iterate_snapshot_tombstones (
             ARCTICDB_DEBUG(log::version(), "Processing {}", snap_tomb_key);
             std::vector<IndexTypeKey> indexes{};
             auto snap_seg = store->read_sync(snap_tomb_key).second;
-            auto before = candidates.size();
+            auto before ARCTICDB_UNUSED = candidates.size();
 
             for (size_t idx = 0; idx < snap_seg.row_count(); idx++) {
                 auto key = read_key_row(snap_seg, static_cast<ssize_t>(idx));
@@ -901,7 +899,7 @@ std::vector<SnapshotVariantKey> ARCTICDB_UNUSED iterate_snapshot_tombstones (
                 indexes.clear();
             }
 
-            log::version().info("Processed {} keys from snapshot {}. {} are unique.",
+            ARCTICDB_DEBUG(log::version(), "Processed {} keys from snapshot {}. {} are unique.",
                                 snap_seg.row_count(), variant_key_id(snap_tomb_key), candidates.size() - before);
             snap_tomb_keys.emplace_back(std::move(snap_tomb_key));
         });
@@ -1010,9 +1008,15 @@ namespace {
 py::object metadata_protobuf_to_pyobject(const std::optional<google::protobuf::Any>& metadata_proto) {
     py::object pyobj;
     if (metadata_proto) {
-        arcticdb::proto::descriptors::TimeSeriesDescriptor tsd;
-        metadata_proto->UnpackTo(&tsd);
-        pyobj = python_util::pb_to_python(tsd.user_meta());
+        if(metadata_proto->Is<arcticdb::proto::descriptors::TimeSeriesDescriptor>()) {
+            arcticdb::proto::descriptors::TimeSeriesDescriptor tsd;
+            metadata_proto->UnpackTo(&tsd);
+            pyobj = python_util::pb_to_python(tsd.user_meta());
+        } else {
+            arcticdb::proto::descriptors::FrameMetadata meta;
+            metadata_proto->UnpackTo(&meta);
+            pyobj = python_util::pb_to_python(meta.user_meta());
+        }
     }
     else {
         pyobj = pybind11::none();
diff --git a/cpp/arcticdb/version/version_tasks.hpp b/cpp/arcticdb/version/version_tasks.hpp
index b44c2dfed9..f21e01adb6 100644
--- a/cpp/arcticdb/version/version_tasks.hpp
+++ b/cpp/arcticdb/version/version_tasks.hpp
@@ -28,7 +28,6 @@ struct UpdateMetadataTask : async::BaseTask {
         store_(std::move(store)),
         update_info_(std::move(update_info)),
         user_meta_(std::move(user_meta)) {
-
     }
 
     AtomKey operator()() const {
@@ -37,14 +36,14 @@ struct UpdateMetadataTask : async::BaseTask {
         auto index_key = *(update_info_.previous_index_key_);
         auto segment = store_->read_sync(index_key).second;
 
-        auto tsd = segment.index_descriptor();
-        google::protobuf::Any output = {};
-        tsd.mutable_proto().mutable_user_meta()->CopyFrom(user_meta_);
-        output.PackFrom(tsd.proto());
-
-        segment.override_metadata(std::move(output));
-        return to_atom(store_->write_sync(index_key.type(), update_info_.next_version_id_, index_key.id(), index_key.start_index(),
-                                           index_key.end_index(), std::move(segment)));
+        segment.mutable_index_descriptor().mutable_proto().mutable_user_meta()->CopyFrom(user_meta_);
+        return to_atom(store_->write_sync(
+                index_key.type(),
+                update_info_.next_version_id_,
+                index_key.id(),
+                index_key.start_index(),
+                index_key.end_index(),
+                std::move(segment)));
     }
 };
 
@@ -59,12 +58,12 @@ struct AsyncRestoreVersionTask : async::BaseTask {
         std::shared_ptr<Store> store,
         std::shared_ptr<VersionMap> version_map,
         StreamId stream_id,
-        const entity::AtomKey& index_key,
+        entity::AtomKey index_key,
         std::optional<AtomKey> maybe_prev) :
         store_(std::move(store)),
         version_map_(std::move(version_map)),
         stream_id_(std::move(stream_id)),
-        index_key_(index_key),
+        index_key_(std::move(index_key)),
         maybe_prev_(std::move(maybe_prev)) {
     }
 
@@ -172,5 +171,4 @@ struct WriteAndPrunePreviousTask : async::BaseTask {
     }
 };
 
-
 } //namespace arcticdb
diff --git a/cpp/arcticdb/version/version_utils.cpp b/cpp/arcticdb/version/version_utils.cpp
index ba5b993210..a8f9067337 100644
--- a/cpp/arcticdb/version/version_utils.cpp
+++ b/cpp/arcticdb/version/version_utils.cpp
@@ -20,8 +20,8 @@ using namespace arcticdb::entity;
 using namespace arcticdb::stream;
 
 
-std::unordered_map<entity::StreamId, size_t> get_num_version_entries(const std::shared_ptr<Store>& store, size_t batch_size)  {
-    std::unordered_map<entity::StreamId, size_t> output;
+std::unordered_map<StreamId, size_t> get_num_version_entries(const std::shared_ptr<Store>& store, size_t batch_size)  {
+    std::unordered_map<StreamId, size_t> output;
     size_t max_blocks = ConfigsMap::instance()->get_int("VersionMap.MaxVersionBlocks", 5);
     store->iterate_type(entity::KeyType::VERSION, [&output, batch_size, max_blocks] (const VariantKey& key) {
         ++output[variant_key_id(key)];
@@ -48,10 +48,10 @@ std::unordered_map<entity::StreamId, size_t> get_num_version_entries(const std::
 FrameAndDescriptor frame_and_descriptor_from_segment(SegmentInMemory&& seg) {
     TimeseriesDescriptor tsd;
     auto& tsd_proto = tsd.mutable_proto();
-    tsd_proto.set_total_rows(seg.row_count());
+    tsd.set_total_rows(seg.row_count());
     const auto& seg_descriptor = seg.descriptor();
-    tsd_proto.mutable_stream_descriptor()->CopyFrom(seg_descriptor.proto());
-    if (seg.descriptor().index().type() == IndexDescriptor::ROWCOUNT)
+    tsd.set_stream_descriptor(seg_descriptor);
+    if (seg_descriptor.index().type() == IndexDescriptor::Type::ROWCOUNT)
         ensure_rowcount_norm_meta(*tsd_proto.mutable_normalization(), seg_descriptor.id());
     else
         ensure_timeseries_norm_meta(*tsd.mutable_proto().mutable_normalization(), seg_descriptor.id(), false);
diff --git a/cpp/arcticdb/version/version_utils.hpp b/cpp/arcticdb/version/version_utils.hpp
index e21229280a..d5d7ac3112 100644
--- a/cpp/arcticdb/version/version_utils.hpp
+++ b/cpp/arcticdb/version/version_utils.hpp
@@ -46,22 +46,19 @@ inline entity::VariantKey write_multi_index_entry(
     for (auto &key : keys) {
         multi_index_agg.add_key(to_atom(key));
     }
-    google::protobuf::Any any = {};
-    TimeseriesDescriptor metadata;
+    TimeseriesDescriptor timeseries_descriptor;
 
     if (!metastruct.is_none()) {
         arcticdb::proto::descriptors::UserDefinedMetadata multi_key_proto;
         python_util::pb_from_python(metastruct, multi_key_proto);
-        metadata.mutable_proto().mutable_multi_key_meta()->CopyFrom(multi_key_proto);
+        timeseries_descriptor.set_multi_key_metadata(std::move(multi_key_proto));
     }
     if (!user_meta.is_none()) {
         arcticdb::proto::descriptors::UserDefinedMetadata user_meta_proto;
         python_util::pb_from_python(user_meta, user_meta_proto);
-        metadata.mutable_proto().mutable_user_meta()->CopyFrom(user_meta_proto);
+        timeseries_descriptor.set_user_metadata(std::move(user_meta_proto));
     }
-    any.PackFrom(metadata.proto());
-    multi_index_agg.set_metadata(std::move(any));
-
+    multi_index_agg.set_timeseries_descriptor(timeseries_descriptor);
     multi_index_agg.commit();
     return multi_key_fut.wait().value();
 }
diff --git a/cpp/proto/arcticc/pb2/descriptors.proto b/cpp/proto/arcticc/pb2/descriptors.proto
index 420e040734..520095bc31 100644
--- a/cpp/proto/arcticc/pb2/descriptors.proto
+++ b/cpp/proto/arcticc/pb2/descriptors.proto
@@ -95,6 +95,7 @@ message StreamDescriptor {
     SortedValue sorted = 7;
     uint64 in_bytes = 8;
     uint64 out_bytes = 9;
+    uint64 row_count = 10;
 }
 
 message MsgPackSerialization {
@@ -311,6 +312,14 @@ message TimeSeriesDescriptor
     UserDefinedMetadata multi_key_meta = 7;
 }
 
+message FrameMetadata
+{
+    NormalizationMetadata normalization = 1;
+    UserDefinedMetadata user_meta = 2;
+    AtomKey next_key = 3;
+    UserDefinedMetadata multi_key_meta = 4;
+}
+
 message SymbolListDescriptor
 {
     bool enabled = 1;
diff --git a/cpp/proto/arcticc/pb2/encoding.proto b/cpp/proto/arcticc/pb2/encoding.proto
index 4e50d66998..f2b7c262c4 100644
--- a/cpp/proto/arcticc/pb2/encoding.proto
+++ b/cpp/proto/arcticc/pb2/encoding.proto
@@ -26,13 +26,11 @@ message SegmentHeader {
         ROWCOUNT = 0;
         XX_HASH = 1;
     }
-    HashType hashing_algo = 6; // defaults to none
+    reserved 6; // unused
     EncodedField metadata_field = 7;  // optional metadata encoding
     EncodedField string_pool_field = 8; // string pool
     bool compacted = 9; // is the segment the result of a compaction
-    EncodedField descriptor_field = 10;
-    EncodedField index_descriptor_field = 11;
-    EncodedField column_fields = 12;
+    reserved 10 to 12; // No longer used
     uint32 encoding_version = 13;
 }
 
diff --git a/python/arcticdb/storage_fixtures/mongo.py b/python/arcticdb/storage_fixtures/mongo.py
index 85aa1bacc0..f6450507b7 100644
--- a/python/arcticdb/storage_fixtures/mongo.py
+++ b/python/arcticdb/storage_fixtures/mongo.py
@@ -111,6 +111,7 @@ def __init__(self, data_dir: Optional[str] = None, port=0, executable="mongod"):
         self._data_dir = data_dir or tempfile.mkdtemp("ManagedMongoDBServer")
         self._port = port or get_ephemeral_port(5)
         self._executable = executable
+        self._client = None
 
     def _safe_enter(self):
         from pymongo import MongoClient
diff --git a/python/arcticdb/version_store/_store.py b/python/arcticdb/version_store/_store.py
index 2585f46f27..42b83aeaa5 100644
--- a/python/arcticdb/version_store/_store.py
+++ b/python/arcticdb/version_store/_store.py
@@ -21,7 +21,8 @@
 from typing import Any, Optional, Union, List, Sequence, Tuple, Dict, Set
 from contextlib import contextmanager
 
-from arcticc.pb2.descriptors_pb2 import IndexDescriptor, TypeDescriptor, SortedValue
+from arcticc.pb2.descriptors_pb2 import IndexDescriptor, TypeDescriptor
+from arcticdb_ext.version_store import SortedValue
 from arcticc.pb2.storage_pb2 import LibraryConfig, EnvironmentConfigsMap
 from arcticdb.preconditions import check
 from arcticdb.supported_types import DateRangeInput, ExplicitlySupportedDates
@@ -34,6 +35,7 @@
     LibraryIndex as _LibraryIndex,
     Library as _Library,
 )
+from arcticdb_ext.types import IndexKind
 from arcticdb.version_store.read_result import ReadResult
 from arcticdb_ext.version_store import IndexRange as _IndexRange
 from arcticdb_ext.version_store import RowRange as _RowRange
@@ -46,6 +48,7 @@
 from arcticdb_ext.version_store import ColumnStats as _ColumnStats
 from arcticdb_ext.version_store import StreamDescriptorMismatch
 from arcticdb_ext.version_store import DataError
+from arcticdb_ext.version_store import sorted_value_name
 from arcticdb.authorization.permissions import OpenMode
 from arcticdb.exceptions import ArcticDbNotYetImplemented, ArcticNativeException
 from arcticdb.flattener import Flattener
@@ -2471,11 +2474,22 @@ def is_symbol_pickled(self, symbol: str, as_of: Optional[VersionQueryInput] = No
         dit = self.version_store.read_descriptor(symbol, version_query)
         return self.is_pickled_descriptor(dit.timeseries_descriptor)
 
+
+    @staticmethod
+    def _does_not_have_date_range(desc, min_ts, max_ts):
+        if desc.index.kind() != IndexKind.TIMESTAMP:
+            return True
+        if desc.sorted == SortedValue.UNSORTED:
+            return True
+        if min_ts is None:
+            return True
+        if max_ts is None:
+            return True
+
+        return False
+
     def _get_time_range_from_ts(self, desc, min_ts, max_ts):
-        if desc.stream_descriptor.index.kind != IndexDescriptor.Type.TIMESTAMP or \
-                desc.stream_descriptor.sorted == SortedValue.UNSORTED or \
-                min_ts is None or \
-                max_ts is None:
+        if self._does_not_have_date_range(desc, min_ts, max_ts):
             return datetime64("nat"), datetime64("nat")
         input_type = desc.normalization.WhichOneof("input_type")
         tz = None
@@ -2556,8 +2570,8 @@ def open_mode(self):
 
     def _process_info(self, symbol: str, dit, as_of: Optional[VersionQueryInput] = None) -> Dict[str, Any]:
         timeseries_descriptor = dit.timeseries_descriptor
-        columns = [f.name for f in timeseries_descriptor.stream_descriptor.fields]
-        dtypes = [f.type_desc for f in timeseries_descriptor.stream_descriptor.fields]
+        columns = [f.name for f in timeseries_descriptor.fields]
+        dtypes = [f.type for f in timeseries_descriptor.fields]
         index = []
         index_dtype = []
         input_type = timeseries_descriptor.normalization.WhichOneof("input_type")
@@ -2614,7 +2628,7 @@ def _process_info(self, symbol: str, dit, as_of: Optional[VersionQueryInput] = N
             "normalization_metadata": timeseries_descriptor.normalization,
             "type": self.get_arctic_style_type_info_for_norm(timeseries_descriptor),
             "date_range": date_range,
-            "sorted": SortedValue.Name(timeseries_descriptor.stream_descriptor.sorted),
+            "sorted": sorted_value_name(timeseries_descriptor.sorted),
         }
 
     def get_info(self, symbol: str, version: Optional[VersionQueryInput] = None) -> Dict[str, Any]:
diff --git a/python/tests/integration/arcticdb/test_arctic.py b/python/tests/integration/arcticdb/test_arctic.py
index c511d4650d..07fa39f9a8 100644
--- a/python/tests/integration/arcticdb/test_arctic.py
+++ b/python/tests/integration/arcticdb/test_arctic.py
@@ -18,6 +18,7 @@
 
 from arcticdb_ext.exceptions import InternalException, UserInputException
 from arcticdb_ext.storage import NoDataFoundException
+from arcticdb_ext.version_store import SortedValue
 from arcticdb.exceptions import ArcticDbNotYetImplemented, LibraryNotFound, MismatchingLibraryOptions
 from arcticdb.adapters.mongo_library_adapter import MongoLibraryAdapter
 from arcticdb.arctic import Arctic
@@ -136,6 +137,25 @@ def test_azurite_ssl_verification(azurite_ssl_storage, monkeypatch, client_cert_
     lib.write("sym", pd.DataFrame())
 
 
+def test_basic_metadata(lmdb_version_store):
+    lib = lmdb_version_store
+    df = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
+    metadata = {"fluffy" : "muppets"}
+    lib.write("my_symbol", df, metadata=metadata)
+    vit = lib.read_metadata("my_symbol")
+    assert vit.metadata == metadata
+
+
+def test_sorted_roundtrip(lmdb_version_store):
+    lib = lmdb_version_store
+
+    symbol = "sorted_test"
+    df = pd.DataFrame({"column": [1, 2, 3, 4]}, index=pd.date_range(start="1/1/2018", end="1/4/2018"))
+    lib.write(symbol, df)
+    desc = lib.get_description(symbol)
+    assert desc.sorted == 'ASCENDING'
+
+
 def test_basic_write_read_update_and_append(arctic_library):
     lib = arctic_library
     df = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
diff --git a/python/tests/unit/arcticdb/version_store/test_column_type_changes.py b/python/tests/unit/arcticdb/version_store/test_column_type_changes.py
index 1a9da90a80..c04bba6043 100644
--- a/python/tests/unit/arcticdb/version_store/test_column_type_changes.py
+++ b/python/tests/unit/arcticdb/version_store/test_column_type_changes.py
@@ -95,6 +95,7 @@ def test_changing_fixed_string_width(version_store_factory, dynamic_schema, wide
     received_update = lib.read(sym_update).data
     assert_frame_equal(expected_update, received_update)
 
+
 def test_type_promotion_stored_in_index_key(lmdb_version_store_dynamic_schema):
     lib = lmdb_version_store_dynamic_schema
     lib_tool = lib.library_tool()
@@ -104,7 +105,7 @@ def test_type_promotion_stored_in_index_key(lmdb_version_store_dynamic_schema):
     def get_type_of_column():
         index_key = lib_tool.find_keys_for_symbol(KeyType.TABLE_INDEX, sym)[-1]
         tsd = lib_tool.read_timeseries_descriptor(index_key)
-        type_desc = [field.type() for field in tsd.fields() if field.name() == col][0]
+        type_desc = [field.type for field in tsd.fields if field.name == col][0]
         return type_desc.data_type()
 
     df_write = pd.DataFrame({col: [1, 2]}, dtype="int8", index=pd.date_range("2024-01-01", periods=2))
diff --git a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
index 9b63c8e559..1441766468 100644
--- a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
+++ b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
@@ -77,6 +77,50 @@ def empty_index(request):
     yield request.param
 
 
+def test_simple_empty_column(lmdb_version_store_empty_types_v1):
+    lib = lmdb_version_store_empty_types_v1
+    df = pd.DataFrame({"col": 2 * [None]})
+    lib.write("sym", df)
+    vit = lib.read("sym")
+    assert_frame_equal(vit.data, df)
+
+
+def test_integer_simple(lmdb_version_store_empty_types_v1):
+    lib = lmdb_version_store_empty_types_v1
+    lib.write("sym", pd.DataFrame({"col": 2 * [None]}))
+    int_dtype = 'int16'
+    df_non_empty = pd.DataFrame({"col": np.array([1,2,3], dtype=int_dtype)})
+    lib.append("sym", df_non_empty)
+    expected_result = pd.DataFrame({"col": np.array([0,0,1,2,3], dtype=int_dtype)})
+    assert_frame_equal(lib.read("sym").data, expected_result)
+    assert_frame_equal(
+        lib.read("sym", row_range=[0,2]).data,
+        pd.DataFrame({"col": np.array([0,0], dtype=int_dtype)})
+    )
+    assert_frame_equal(
+        lib.read("sym", row_range=[2,5]).data,
+        df_non_empty
+    )
+
+
+def test_integer_simple_dynamic(lmdb_version_store_empty_types_dynamic_schema_v1):
+    lib = lmdb_version_store_empty_types_dynamic_schema_v1
+    lib.write("sym", pd.DataFrame({"col": 2 * [None]}))
+    int_dtype = 'int16'
+    df_non_empty = pd.DataFrame({"col": np.array([1,2,3], dtype=int_dtype)})
+    lib.append("sym", df_non_empty)
+    expected_result = pd.DataFrame({"col": np.array([0,0,1,2,3], dtype=int_dtype)})
+    assert_frame_equal(lib.read("sym").data, expected_result)
+    assert_frame_equal(
+        lib.read("sym", row_range=[0,2]).data,
+        pd.DataFrame({"col": np.array([0,0], dtype=int_dtype)})
+    )
+    assert_frame_equal(
+        lib.read("sym", row_range=[2,5]).data,
+        df_non_empty
+    )
+
+
 class TestCanAppendToColumnWithNones:
     """
     Tests that it is possible to write a column containing None values and latter append to it. Initially the type of
diff --git a/python/tests/unit/arcticdb/version_store/test_parallel.py b/python/tests/unit/arcticdb/version_store/test_parallel.py
index d8b20e4822..ca9b34bda2 100644
--- a/python/tests/unit/arcticdb/version_store/test_parallel.py
+++ b/python/tests/unit/arcticdb/version_store/test_parallel.py
@@ -401,7 +401,6 @@ def test_parallel_append_exactly_matches_existing(lmdb_version_store):
     assert lib.get_info(sym)["sorted"] == "ASCENDING"
 
 
-
 def test_parallel_append_overlapping_with_existing(lmdb_version_store):
     lib = lmdb_version_store
     sym = "test_parallel_append_overlapping_with_existing"