10
10
11
11
#include " bytes/iobuf.h"
12
12
#include " datalake/schema_registry.h"
13
+ #include " gmock/gmock.h"
13
14
#include " model/record.h"
15
+ #include " utils/vint.h"
14
16
15
17
#include < gtest/gtest.h>
16
18
17
19
#include < variant>
18
20
19
- TEST (DatalakeSchemaRegistry, SchemaIdForValidRecord) {
20
- iobuf value;
21
+ namespace {
22
+ void buf_append (iobuf& b, uint8_t byte) { b.append (&byte, 1 ); }
23
+ void buf_append (iobuf& b, int32_t val) {
24
+ b.append (reinterpret_cast <uint8_t *>(&val), 4 );
25
+ }
26
+ void buf_append (iobuf& b, const bytes& byte) {
27
+ b.append (byte.data (), byte.size ());
28
+ }
29
+ void buf_append (iobuf& b, const std::string& str) {
30
+ b.append (str.data (), str.size ());
31
+ }
21
32
33
+ template <typename ... Args>
34
+ iobuf buf_from (const Args&... args) {
35
+ iobuf b;
36
+ (buf_append (b, args), ...);
37
+ return b;
38
+ }
39
+ } // namespace
40
+
41
+ TEST (DatalakeSchemaRegistry, SchemaIdForValidRecord) {
22
42
uint8_t magic = 0 ;
23
43
int32_t schema_id = 12 ;
24
- int32_t schema_id_encoded = htobe32 (schema_id);
25
- value.append (&magic, 1 );
26
- value.append (reinterpret_cast <uint8_t *>(&schema_id_encoded), 4 );
44
+ int32_t schema_id_encoded = ss::cpu_to_be (schema_id);
45
+ std::string payload = " Hello world" ;
46
+
47
+ iobuf value = buf_from (magic, schema_id_encoded, payload);
27
48
28
49
auto res = datalake::get_value_schema_id (value);
29
50
ASSERT_TRUE (res.has_value ());
30
- EXPECT_EQ (res.value ()(), schema_id);
51
+ EXPECT_EQ (res.value ().schema_id (), schema_id);
52
+ EXPECT_EQ (res.value ().shared_message_data .size_bytes (), payload.size ());
31
53
}
32
54
33
55
TEST (DatalakeSchemaRegistry, SchemaIdForShortRecord) {
34
56
iobuf value;
35
57
36
58
uint8_t magic = 0 ;
37
59
int32_t schema_id = 12 ;
38
- int32_t schema_id_encoded = htobe32 (schema_id);
60
+ int32_t schema_id_encoded = ss::cpu_to_be (schema_id);
39
61
value.append (&magic, 1 );
40
62
// Only adding 3 bytes here instead of 4 to generate an invalid record.
41
63
value.append (reinterpret_cast <uint8_t *>(&schema_id_encoded), 3 );
@@ -46,14 +68,80 @@ TEST(DatalakeSchemaRegistry, SchemaIdForShortRecord) {
46
68
}
47
69
48
70
TEST (DatalakeSchemaRegistry, SchemaIdForBadMagic) {
49
- iobuf value;
50
-
51
71
uint8_t magic = 5 ; // Invalid magic
52
72
int32_t schema_id = 12 ;
53
- int32_t schema_id_encoded = htobe32 (schema_id);
54
- value.append (&magic, 1 );
55
- value.append (reinterpret_cast <uint8_t *>(&schema_id_encoded), 4 );
73
+ int32_t schema_id_encoded = ss::cpu_to_be (schema_id);
74
+
75
+ iobuf value = buf_from (magic, schema_id_encoded);
76
+
56
77
auto res = datalake::get_value_schema_id (value);
57
78
ASSERT_TRUE (res.has_error ());
58
79
EXPECT_EQ (res.error (), datalake::get_schema_error::no_schema_id);
59
80
}
81
+
82
+ TEST (DatalakeSchemaRegistry, GetProtoOffsetsOk) {
83
+ uint8_t magic = 0 ;
84
+ uint32_t schema_id = 12 ;
85
+ int32_t schema_id_encoded = ss::cpu_to_be (schema_id);
86
+ std::string payload = " Hello world" ;
87
+
88
+ uint8_t proto_msg_count = 5 ;
89
+ auto encoded = vint::to_bytes (proto_msg_count);
90
+ iobuf value = buf_from (magic, schema_id_encoded, encoded);
91
+
92
+ for (uint8_t i = 0 ; i < proto_msg_count; i++) {
93
+ encoded = vint::to_bytes (i);
94
+ value.append (encoded.data (), encoded.size ());
95
+ }
96
+ value.append (payload.data (), payload.size ());
97
+
98
+ auto res = datalake::get_proto_offsets (value);
99
+ ASSERT_TRUE (res.has_value ());
100
+ const auto & offsets = res.value ().protobuf_offsets ;
101
+ EXPECT_THAT (offsets, testing::ElementsAre (0 , 1 , 2 , 3 , 4 ));
102
+ EXPECT_EQ (res.value ().schema_id (), schema_id);
103
+ EXPECT_EQ (res.value ().shared_message_data .size_bytes (), payload.size ());
104
+ }
105
+
106
+ TEST (DatalakeSchemaRegistry, GetProtoOffsetsDefaultZero) {
107
+ // This tests a special case where the offset count is 0, we should assume
108
+ // that the message is the first one defined in the schema and return {0}.
109
+
110
+ uint8_t magic = 0 ;
111
+ uint32_t schema_id = 12 ;
112
+ int32_t schema_id_encoded = ss::cpu_to_be (schema_id);
113
+ std::string payload = " Hello world" ;
114
+
115
+ uint8_t proto_msg_count = 0 ;
116
+ auto encoded = vint::to_bytes (proto_msg_count);
117
+
118
+ iobuf value = buf_from (magic, schema_id_encoded, encoded, payload);
119
+
120
+ auto res = datalake::get_proto_offsets (value);
121
+ ASSERT_TRUE (res.has_value ());
122
+ const auto & offsets = res.value ().protobuf_offsets ;
123
+ EXPECT_EQ (offsets.size (), 1 );
124
+ EXPECT_EQ (offsets[0 ], 0 );
125
+ EXPECT_EQ (res.value ().schema_id (), schema_id);
126
+ EXPECT_EQ (res.value ().shared_message_data .size_bytes (), payload.size ());
127
+ }
128
+
129
+ TEST (DatalakeSchemaRegistry, GetProtoOffsetsNotEnoughData) {
130
+ uint8_t magic = 0 ;
131
+ uint32_t schema_id = 12 ;
132
+ int32_t schema_id_encoded = ss::cpu_to_be (schema_id);
133
+
134
+ uint8_t proto_msg_count = 9 ;
135
+ auto encoded = vint::to_bytes (proto_msg_count);
136
+
137
+ iobuf value = buf_from (magic, schema_id_encoded, encoded);
138
+
139
+ for (uint8_t i = 0 ; i < proto_msg_count - 1 ; i++) {
140
+ encoded = vint::to_bytes (i);
141
+ value.append (encoded.data (), encoded.size ());
142
+ }
143
+
144
+ auto res = datalake::get_proto_offsets (value);
145
+ ASSERT_TRUE (res.has_error ());
146
+ EXPECT_EQ (res.error (), datalake::get_schema_error::not_enough_bytes);
147
+ }
0 commit comments