-
Notifications
You must be signed in to change notification settings - Fork 601
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
datalake/coordinator: initial coordinator base #23712
Changes from all commits
531f0ac
9be8c3e
4ce066f
68c7ea2
da7e4e7
4df2c37
9ca5fac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
/* | ||
* Copyright 2024 Redpanda Data, Inc. | ||
* | ||
* Licensed as a Redpanda Enterprise file under the Redpanda Community | ||
* License (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* https://github.com/redpanda-data/redpanda/blob/master/licenses/rcl.md | ||
*/ | ||
#include "datalake/coordinator/coordinator.h" | ||
|
||
#include "base/vlog.h" | ||
#include "container/fragmented_vector.h" | ||
#include "datalake/coordinator/state_update.h" | ||
#include "datalake/logger.h" | ||
#include "model/fundamental.h" | ||
#include "model/record_batch_reader.h" | ||
#include "storage/record_batch_builder.h" | ||
|
||
namespace datalake::coordinator { | ||
|
||
namespace { | ||
coordinator::errc convert_stm_errc(coordinator_stm::errc e) { | ||
switch (e) { | ||
case coordinator_stm::errc::not_leader: | ||
return coordinator::errc::not_leader; | ||
case coordinator_stm::errc::shutting_down: | ||
return coordinator::errc::shutting_down; | ||
case coordinator_stm::errc::apply_error: | ||
return coordinator::errc::stm_apply_error; | ||
case coordinator_stm::errc::raft_error: | ||
return coordinator::errc::timedout; | ||
} | ||
} | ||
} // namespace | ||
|
||
std::ostream& operator<<(std::ostream& o, coordinator::errc e) { | ||
switch (e) { | ||
case coordinator::errc::not_leader: | ||
return o << "coordinator::errc::not_leader"; | ||
case coordinator::errc::shutting_down: | ||
return o << "coordinator::errc::shutting_down"; | ||
case coordinator::errc::stm_apply_error: | ||
return o << "coordinator::errc::stm_apply_error"; | ||
case coordinator::errc::timedout: | ||
return o << "coordinator::errc::timedout"; | ||
} | ||
} | ||
|
||
ss::future<> coordinator::stop_and_wait() { | ||
as_.request_abort(); | ||
return gate_.close(); | ||
} | ||
|
||
checked<ss::gate::holder, coordinator::errc> coordinator::maybe_gate() { | ||
ss::gate::holder h; | ||
if (as_.abort_requested() || gate_.is_closed()) { | ||
return errc::shutting_down; | ||
} | ||
return gate_.hold(); | ||
} | ||
|
||
ss::future<checked<std::nullopt_t, coordinator::errc>> | ||
coordinator::sync_add_files( | ||
model::topic_partition tp, chunked_vector<translated_offset_range> entries) { | ||
if (entries.empty()) { | ||
vlog(datalake_log.debug, "Empty entry requested {}", tp); | ||
co_return std::nullopt; | ||
} | ||
auto gate = maybe_gate(); | ||
if (gate.has_error()) { | ||
co_return gate.error(); | ||
} | ||
vlog( | ||
datalake_log.debug, | ||
"Sync add files requested {}: [{}, {}], {} files", | ||
tp, | ||
entries.begin()->start_offset, | ||
entries.back().last_offset, | ||
entries.size()); | ||
auto sync_res = co_await stm_.sync(10s); | ||
if (sync_res.has_error()) { | ||
co_return convert_stm_errc(sync_res.error()); | ||
} | ||
auto added_last_offset = entries.back().last_offset; | ||
auto update_res = add_files_update::build( | ||
mmaslankaprv marked this conversation as resolved.
Show resolved
Hide resolved
|
||
stm_.state(), tp, std::move(entries)); | ||
if (update_res.has_error()) { | ||
// NOTE: rejection here is just an optimization -- the operation would | ||
// fail to be applied to the STM anyway. | ||
vlog( | ||
datalake_log.debug, | ||
"Rejecting request to add files for {}: {}", | ||
tp, | ||
update_res.error()); | ||
co_return errc::stm_apply_error; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I think this is a validation error? (since its pre replication). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the caller's perspective they should be the same thing, no? the request does not apply cleanly to the stm There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking from a debugging perspective, if the request was rejected due to a race or if was legit validation, nbd, feel free to keep it as is. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Leaving it for now, i think the log is enough, unless these get serialized in the response and the only logs we have are the worker logs |
||
} | ||
storage::record_batch_builder builder( | ||
model::record_batch_type::datalake_coordinator, model::offset{0}); | ||
builder.add_raw_kv( | ||
serde::to_iobuf(add_files_update::key), | ||
serde::to_iobuf(std::move(update_res.value()))); | ||
auto repl_res = co_await stm_.replicate_and_wait( | ||
sync_res.value(), std::move(builder).build(), as_); | ||
if (repl_res.has_error()) { | ||
co_return convert_stm_errc(repl_res.error()); | ||
} | ||
|
||
// Check that the resulting state matches that expected by the caller. | ||
// NOTE: a mismatch here just means there was a race to update the STM, and | ||
// this should be handled by callers. | ||
// TODO: would be nice to encapsulate this in some update validator. | ||
auto prt_opt = stm_.state().partition_state(tp); | ||
if ( | ||
!prt_opt.has_value() || prt_opt->get().pending_entries.empty() | ||
|| prt_opt->get().pending_entries.back().last_offset | ||
!= added_last_offset) { | ||
vlog( | ||
datalake_log.debug, | ||
"Resulting last offset for {} does not match expected {}", | ||
tp, | ||
added_last_offset); | ||
co_return errc::stm_apply_error; | ||
} | ||
co_return std::nullopt; | ||
} | ||
|
||
ss::future<checked<std::optional<kafka::offset>, coordinator::errc>> | ||
coordinator::sync_get_last_added_offset(model::topic_partition tp) { | ||
auto gate = maybe_gate(); | ||
if (gate.has_error()) { | ||
co_return gate.error(); | ||
} | ||
auto sync_res = co_await stm_.sync(10s); | ||
if (sync_res.has_error()) { | ||
co_return convert_stm_errc(sync_res.error()); | ||
} | ||
auto prt_state_opt = stm_.state().partition_state(tp); | ||
if (!prt_state_opt.has_value()) { | ||
co_return std::nullopt; | ||
} | ||
const auto& prt_state = prt_state_opt->get(); | ||
if (prt_state.pending_entries.empty()) { | ||
co_return prt_state.last_committed; | ||
} | ||
co_return prt_state.pending_entries.back().last_offset; | ||
} | ||
|
||
} // namespace datalake::coordinator |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
/* | ||
* Copyright 2024 Redpanda Data, Inc. | ||
* | ||
* Licensed as a Redpanda Enterprise file under the Redpanda Community | ||
* License (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* https://github.com/redpanda-data/redpanda/blob/master/licenses/rcl.md | ||
*/ | ||
#pragma once | ||
|
||
#include "container/fragmented_vector.h" | ||
#include "datalake/coordinator/state_machine.h" | ||
#include "datalake/coordinator/state_update.h" | ||
#include "model/fundamental.h" | ||
|
||
namespace datalake::coordinator { | ||
|
||
// Public interface that provides access to the coordinator STM. Conceptually, | ||
// the STM focuses solely on persisting deterministic updates, while this: | ||
// 1. wrangles additional aspects of these updates like concurrency, and | ||
// 2. reconciles the STM state with external catalogs. | ||
class coordinator { | ||
public: | ||
enum class errc { | ||
not_leader, | ||
stm_apply_error, | ||
timedout, | ||
shutting_down, | ||
}; | ||
explicit coordinator(coordinator_stm& stm) | ||
: stm_(stm) {} | ||
|
||
ss::future<> stop_and_wait(); | ||
ss::future<checked<std::nullopt_t, errc>> sync_add_files( | ||
model::topic_partition tp, chunked_vector<translated_offset_range>); | ||
ss::future<checked<std::optional<kafka::offset>, errc>> | ||
sync_get_last_added_offset(model::topic_partition tp); | ||
|
||
private: | ||
checked<ss::gate::holder, errc> maybe_gate(); | ||
|
||
coordinator_stm& stm_; | ||
|
||
ss::gate gate_; | ||
ss::abort_source as_; | ||
}; | ||
std::ostream& operator<<(std::ostream&, coordinator::errc); | ||
|
||
} // namespace datalake::coordinator |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
h
is unused? intentional?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops! unintentional, may fix in a follow up if this PR can get merged