@@ -103,23 +103,6 @@ ss::future<> manager::start() {
103
103
co_await handle_on_link_change (id);
104
104
}
105
105
106
- auto controller_node_leader = _partition_leader_cache->get_leader_node (
107
- ::model::controller_ntp);
108
- if (
109
- controller_node_leader.has_value ()
110
- && controller_node_leader.value () == _self) {
111
- auto controller_shard_leader = _partition_manager->shard_owner (
112
- ::model::controller_ntp);
113
- if (
114
- controller_shard_leader.has_value ()
115
- && controller_shard_leader.value () == ss::this_shard_id ()) {
116
- vlog (
117
- cllog.info , " Cluster link manager started on controller shard" );
118
- handle_partition_state_change (
119
- ::model::controller_ntp, ntp_leader::yes);
120
- }
121
- }
122
-
123
106
_link_task_reconciler_timer.set_callback ([this ] {
124
107
ssx::spawn_with_gate (_g, [this ] { return link_task_reconciler (); });
125
108
});
@@ -223,10 +206,12 @@ void manager::on_link_change(model::id_t id) {
223
206
}
224
207
225
208
void manager::handle_partition_state_change (
226
- ::model::ntp ntp, ntp_leader is_ntp_leader) {
209
+ ::model::ntp ntp,
210
+ ntp_leader is_ntp_leader,
211
+ std::optional<::model::term_id> term) {
227
212
vlog (cllog.trace , " NTP={} leadership changed to {}" , ntp, is_ntp_leader);
228
- _queue.submit ([this , ntp{std::move (ntp)}, is_ntp_leader]() mutable {
229
- return handle_on_leadership_change (std::move (ntp), is_ntp_leader);
213
+ _queue.submit ([this , ntp{std::move (ntp)}, is_ntp_leader, term ]() mutable {
214
+ return handle_on_leadership_change (std::move (ntp), is_ntp_leader, term );
230
215
});
231
216
}
232
217
@@ -243,20 +228,17 @@ ss::future<> manager::handle_on_link_change(model::id_t id) {
243
228
try {
244
229
vlog (cllog.debug , " Stopping cluster link with id={}" , id);
245
230
co_await it->second ->stop ();
246
- _links.erase (it);
247
231
} catch (const std::exception& e) {
232
+ // generally not possible since stop() is noexcept
233
+ // but is not enforced for coroutines by the compiler.
248
234
vlog (
249
235
cllog.warn ,
250
- " Failed to stop link {}: \" {}\" . Re-attempting link "
251
- " stop "
252
- " within {} seconds" ,
236
+ " Failed to stop link {}: \" {}, going ahead and removing "
237
+ " it\" ." ,
253
238
id,
254
- e,
255
- retry_delay.count ());
256
- _queue.submit_delayed (retry_delay, [this , id] {
257
- return handle_on_link_change (id);
258
- });
239
+ e);
259
240
}
241
+ _links.erase (it);
260
242
} else {
261
243
vlog (cllog.trace , " No link found for id={}" , id);
262
244
}
@@ -312,7 +294,30 @@ ss::future<> manager::handle_on_link_change(model::id_t id) {
312
294
e);
313
295
}
314
296
}
315
- co_await new_link->start ();
297
+
298
+ std::exception_ptr start_eptr = nullptr ;
299
+ try {
300
+ co_await new_link->start ();
301
+ } catch (...) {
302
+ start_eptr = std::current_exception ();
303
+ }
304
+ if (start_eptr) {
305
+ vlog (
306
+ cllog.warn ,
307
+ " Failed to start link {}: \" {}\" " ,
308
+ id,
309
+ start_eptr);
310
+ try {
311
+ co_await new_link->stop ();
312
+ } catch (...) {
313
+ vlog (
314
+ cllog.warn ,
315
+ " Failed to stop link {}: \" {}\" , ignoring.." ,
316
+ id,
317
+ std::current_exception ());
318
+ }
319
+ std::rethrow_exception (start_eptr);
320
+ }
316
321
_links.emplace (id, std::move (new_link));
317
322
_link_created_cv.broadcast ();
318
323
} catch (const ss::semaphore_aborted&) {
@@ -394,7 +399,9 @@ ss::future<> manager::link_task_reconciler() {
394
399
}
395
400
396
401
ss::future<> manager::handle_on_leadership_change (
397
- ::model::ntp ntp, ntp_leader is_ntp_leader) {
402
+ ::model::ntp ntp,
403
+ ntp_leader is_ntp_leader,
404
+ std::optional<::model::term_id> term) {
398
405
vlog (
399
406
cllog.trace ,
400
407
" Handling leadership change for NTP={}, is_ntp_leader={}" ,
@@ -415,9 +422,11 @@ ss::future<> manager::handle_on_leadership_change(
415
422
}
416
423
}
417
424
418
- co_await ss::parallel_for_each (_links, [ntp, is_ntp_leader](auto & pair) {
419
- return pair.second ->handle_on_leadership_change (ntp, is_ntp_leader);
420
- });
425
+ co_await ss::parallel_for_each (
426
+ _links, [ntp, is_ntp_leader, term](auto & pair) {
427
+ return pair.second ->handle_on_leadership_change (
428
+ ntp, is_ntp_leader, term);
429
+ });
421
430
}
422
431
423
432
ss::future<::cluster::cluster_link::errc> manager::add_mirror_topic (
0 commit comments