17
17
from rptest .clients .types import TopicSpec
18
18
from rptest .clients .rpk import RpkTool
19
19
from rptest .clients .kafka_cli_tools import KafkaCliTools
20
- from rptest .util import (
21
- wait_until ,
22
- )
20
+ from rptest .services .kgo_verifier_services import KgoVerifierProducer
21
+ from rptest .util import wait_until , expect_timeout
23
22
from rptest .utils .si_utils import BucketView
24
23
25
24
from ducktape .mark import matrix
34
33
]
35
34
36
35
37
- class AdjacentSegmentMergingTest (RedpandaTest ):
36
+ class AdjacentSegmentMergingTestBase (RedpandaTest ):
38
37
s3_topic_name = "panda-topic"
39
- topics = (TopicSpec (name = s3_topic_name , partition_count = 1 , replication_factor = 3 ),)
40
38
41
- def __init__ (self , test_context ):
39
+ def __init__ (self , test_context , extra_rp_conf : dict [ str , str ] = {}, ** kwargs ):
42
40
si_settings = SISettings (
43
41
test_context ,
44
42
cloud_storage_max_connections = 10 ,
@@ -56,8 +54,11 @@ def __init__(self, test_context):
56
54
57
55
self .bucket_name = si_settings .cloud_storage_bucket
58
56
59
- super (AdjacentSegmentMergingTest , self ).__init__ (
60
- test_context = test_context , extra_rp_conf = xtra_conf , si_settings = si_settings
57
+ super ().__init__ (
58
+ test_context = test_context ,
59
+ extra_rp_conf = {** xtra_conf , ** extra_rp_conf },
60
+ si_settings = si_settings ,
61
+ ** kwargs ,
61
62
)
62
63
63
64
self .kafka_tools = KafkaCliTools (self .redpanda )
@@ -66,6 +67,19 @@ def __init__(self, test_context):
66
67
def setUp (self ):
67
68
super ().setUp () # topic is created here
68
69
70
+
71
+ class AdjacentSegmentMergingTest (AdjacentSegmentMergingTestBase ):
72
+ topics = (
73
+ TopicSpec (
74
+ name = AdjacentSegmentMergingTestBase .s3_topic_name ,
75
+ partition_count = 1 ,
76
+ replication_factor = 3 ,
77
+ ),
78
+ )
79
+
80
+ def __init__ (self , * args , ** kwargs ):
81
+ super ().__init__ (* args , ** kwargs )
82
+
69
83
@cluster (num_nodes = 3 )
70
84
@matrix (acks = [- 1 , 1 ], cloud_storage_type = get_cloud_storage_type ())
71
85
def test_reupload_of_local_segments (self , acks , cloud_storage_type ):
@@ -110,3 +124,111 @@ def manifest_has_one_segment():
110
124
return False
111
125
112
126
wait_until (manifest_has_one_segment , 60 )
127
+
128
+
129
+ class AdjacentSegmentMergingToggleCompactionTest (AdjacentSegmentMergingTestBase ):
130
+ topics = (
131
+ TopicSpec (
132
+ name = AdjacentSegmentMergingTestBase .s3_topic_name ,
133
+ partition_count = 1 ,
134
+ replication_factor = 1 ,
135
+ cleanup_policy = TopicSpec .CLEANUP_COMPACT ,
136
+ min_cleanable_dirty_ratio = 0.0 ,
137
+ max_compaction_lag_ms = 3000 ,
138
+ ),
139
+ )
140
+
141
+ def __init__ (self , test_context , * args , ** kwargs ):
142
+ xtra_conf = dict (
143
+ cloud_storage_enable_compacted_topic_reupload = False ,
144
+ cloud_storage_enable_segment_merging = True ,
145
+ log_compaction_interval_ms = 50 ,
146
+ log_compaction_use_sliding_window = False ,
147
+ compacted_log_segment_size = 1024 * 512 ,
148
+ max_compaction_lag_ms = 3000 ,
149
+ )
150
+ self .test_context = test_context
151
+ super ().__init__ (
152
+ test_context , extra_rp_conf = xtra_conf , num_brokers = 1 , * args , ** kwargs
153
+ )
154
+
155
+ @cluster (num_nodes = 2 )
156
+ @matrix (
157
+ acks = [
158
+ - 1 ,
159
+ 1 ,
160
+ ],
161
+ cloud_storage_type = get_cloud_storage_type (),
162
+ )
163
+ def test_reupload_of_local_segments (self , acks , cloud_storage_type ):
164
+ """Test adjacent segment merging using using local data.
165
+ The test starts by uploading large number of very small segments.
166
+ The total amount of data produced is smaller than the target segment
167
+ size. Because of that, after the housekeeping we should end up with
168
+ only one segment in the cloud.
169
+ The retention is not enable so the reupload process can use data
170
+ available locally.
171
+ """
172
+
173
+ def produce_some ():
174
+ for _ in range (10 ):
175
+ KgoVerifierProducer .oneshot (
176
+ context = self .test_context ,
177
+ redpanda = self .redpanda ,
178
+ topic = self .topic ,
179
+ msg_size = 1024 ,
180
+ msg_count = 1024 ,
181
+ key_set_cardinality = 1 ,
182
+ )
183
+ # # Every 'produce' call should create at least one segment
184
+ # # in the cloud which is 1MiB
185
+ # self.kafka_tools.produce(self.topic, 1024, 1024, acks)
186
+ time .sleep (1 )
187
+ time .sleep (5 )
188
+
189
+ produce_some ()
190
+
191
+ self .rpk .alter_topic_config (
192
+ self .topic , TopicSpec .PROPERTY_CLEANUP_POLICY , TopicSpec .CLEANUP_DELETE
193
+ )
194
+
195
+ self .redpanda .set_cluster_config (
196
+ {"log_compaction_use_sliding_window" : True }, expect_restart = True
197
+ )
198
+
199
+ def manifest_has_large_segment ():
200
+ try :
201
+ num_good = 0
202
+ for ntp , manifest in BucketView (
203
+ self .redpanda
204
+ ).partition_manifests .items ():
205
+ target_lower_bound = 1024 * 1024 * 8
206
+ for name , meta in manifest ["segments" ].items ():
207
+ self .logger .info (f"segment { name } , segment_meta: { meta } " )
208
+ if meta ["size_bytes" ] >= target_lower_bound :
209
+ # we will only see large segments with size
210
+ # greater than lower bound if housekeeping
211
+ # is working
212
+ num_good += 1
213
+ return num_good > 0
214
+ except Exception as err :
215
+ import traceback
216
+
217
+ self .logger .info (
218
+ "" .join (
219
+ traceback .format_exception (type (err ), err , err .__traceback__ )
220
+ )
221
+ )
222
+ return False
223
+
224
+ self .logger .debug (
225
+ "The log is full of small compacted segments, so housekeeping shouldn't have any effect"
226
+ )
227
+ with expect_timeout ():
228
+ wait_until (manifest_has_large_segment , 30 )
229
+
230
+ self .logger .debug (
231
+ "Produce some more small segments with compaction off. Housekeeping should make progress now"
232
+ )
233
+ produce_some ()
234
+ wait_until (manifest_has_large_segment , 60 )
0 commit comments