11
11
#include " bytes/iobuf.h"
12
12
#include " datalake/schema_registry.h"
13
13
#include " model/record.h"
14
+ #include " utils/vint.h"
14
15
15
16
#include < gtest/gtest.h>
16
17
@@ -22,12 +23,15 @@ TEST(DatalakeSchemaRegistry, SchemaIdForValidRecord) {
22
23
uint8_t magic = 0 ;
23
24
int32_t schema_id = 12 ;
24
25
int32_t schema_id_encoded = htobe32 (schema_id);
26
+ std::string payload = " Hello world" ;
25
27
value.append (&magic, 1 );
26
28
value.append (reinterpret_cast <uint8_t *>(&schema_id_encoded), 4 );
29
+ value.append (payload.data (), payload.size ());
27
30
28
31
auto res = datalake::get_value_schema_id (value);
29
32
ASSERT_TRUE (res.has_value ());
30
- EXPECT_EQ (res.value ()(), schema_id);
33
+ EXPECT_EQ (res.value ().schema_id (), schema_id);
34
+ EXPECT_EQ (res.value ().shared_message_data .size_bytes (), payload.size ());
31
35
}
32
36
33
37
TEST (DatalakeSchemaRegistry, SchemaIdForShortRecord) {
@@ -57,3 +61,86 @@ TEST(DatalakeSchemaRegistry, SchemaIdForBadMagic) {
57
61
ASSERT_TRUE (res.has_error ());
58
62
EXPECT_EQ (res.error (), datalake::get_schema_error::no_schema_id);
59
63
}
64
+
65
+ TEST (DatalakeSchemaRegistry, GetProtoOffsetsOk) {
66
+ iobuf value;
67
+
68
+ uint8_t magic = 0 ; // Invalid magic
69
+ uint32_t schema_id = 12 ;
70
+ int32_t schema_id_encoded = htobe32 (schema_id);
71
+ std::string payload = " Hello world" ;
72
+ value.append (&magic, 1 );
73
+ value.append (reinterpret_cast <uint8_t *>(&schema_id_encoded), 4 );
74
+
75
+ uint8_t proto_msg_count = 9 ;
76
+ std::array<uint8_t , 16 > encoded;
77
+ size_t encoded_size = vint::serialize (proto_msg_count, encoded.data ());
78
+ value.append (encoded.data (), encoded_size);
79
+
80
+ for (uint8_t i = 0 ; i < proto_msg_count; i++) {
81
+ encoded_size = vint::serialize (i, encoded.data ());
82
+ value.append (encoded.data (), encoded_size);
83
+ }
84
+ value.append (payload.data (), payload.size ());
85
+
86
+ auto res = datalake::get_proto_offsets (value);
87
+ ASSERT_TRUE (res.has_value ());
88
+ const auto & offsets = res.value ().protobuf_offsets ;
89
+ EXPECT_EQ (offsets.size (), proto_msg_count);
90
+ for (int32_t o = 0 ; o < offsets.size (); o++) {
91
+ EXPECT_EQ (o, offsets[o]);
92
+ }
93
+ EXPECT_EQ (res.value ().schema_id (), schema_id);
94
+ EXPECT_EQ (res.value ().shared_message_data .size_bytes (), payload.size ());
95
+ }
96
+
97
+ TEST (DatalakeSchemaRegistry, GetProtoOffsetsDefaultZero) {
98
+ // This tests a special case where the offset count is 0, we should assume
99
+ // that the message is the first one defined in the schema and return {0}.
100
+ iobuf value;
101
+
102
+ uint8_t magic = 0 ; // Invalid magic
103
+ uint32_t schema_id = 12 ;
104
+ int32_t schema_id_encoded = htobe32 (schema_id);
105
+ std::string payload = " Hello world" ;
106
+ value.append (&magic, 1 );
107
+ value.append (reinterpret_cast <uint8_t *>(&schema_id_encoded), 4 );
108
+
109
+ uint8_t proto_msg_count = 0 ;
110
+ std::array<uint8_t , 16 > encoded;
111
+ size_t encoded_size = vint::serialize (proto_msg_count, encoded.data ());
112
+ value.append (encoded.data (), encoded_size);
113
+ value.append (payload.data (), payload.size ());
114
+
115
+ auto res = datalake::get_proto_offsets (value);
116
+ ASSERT_TRUE (res.has_value ());
117
+ const auto & offsets = res.value ().protobuf_offsets ;
118
+ EXPECT_EQ (offsets.size (), 1 );
119
+ EXPECT_EQ (offsets[0 ], 0 );
120
+ EXPECT_EQ (res.value ().schema_id (), schema_id);
121
+ EXPECT_EQ (res.value ().shared_message_data .size_bytes (), payload.size ());
122
+ }
123
+
124
+ TEST (DatalakeSchemaRegistry, GetProtoOffsetsNotEnoughData) {
125
+ iobuf value;
126
+
127
+ uint8_t magic = 0 ; // Invalid magic
128
+ uint32_t schema_id = 12 ;
129
+ int32_t schema_id_encoded = htobe32 (schema_id);
130
+ value.append (&magic, 1 );
131
+ value.append (reinterpret_cast <uint8_t *>(&schema_id_encoded), 4 );
132
+
133
+ uint8_t proto_msg_count = 9 ;
134
+ std::array<uint8_t , 16 > encoded;
135
+ size_t encoded_size = vint::serialize (proto_msg_count, encoded.data ());
136
+ value.append (encoded.data (), encoded_size);
137
+
138
+ for (uint8_t i = 0 ; i < proto_msg_count - 1 ; i++) {
139
+ encoded_size = vint::serialize (i, encoded.data ());
140
+ value.append (encoded.data (), encoded_size);
141
+ }
142
+
143
+ auto res = datalake::get_proto_offsets (value);
144
+ ASSERT_TRUE (res.has_error ());
145
+ EXPECT_EQ (res.error (), datalake::get_schema_error::not_enough_bytes);
146
+ }
0 commit comments