From a2200b70ae6778eceed7099e2ed5b5c403d6f09b Mon Sep 17 00:00:00 2001 From: Oren Leiman Date: Tue, 6 Jan 2026 23:38:41 -0800 Subject: [PATCH 1/2] cs_clients/util: Add find_multipart_boundary This is well-specified and identical between ABS and GCS, so factor it out for easier testing. Signed-off-by: Oren Leiman --- .../cloud_storage_clients/tests/util_test.cc | 135 ++++++++++++++++++ src/v/cloud_storage_clients/util.cc | 62 ++++++++ src/v/cloud_storage_clients/util.h | 5 + 3 files changed, 202 insertions(+) diff --git a/src/v/cloud_storage_clients/tests/util_test.cc b/src/v/cloud_storage_clients/tests/util_test.cc index 9e4e1261f33b8..5e482850c62e5 100644 --- a/src/v/cloud_storage_clients/tests/util_test.cc +++ b/src/v/cloud_storage_clients/tests/util_test.cc @@ -2,6 +2,7 @@ #include "bytes/iobuf_parser.h" #include "cloud_storage_clients/util.h" +#include #include TEST(Util, AllPathsToFile) { @@ -805,3 +806,137 @@ TEST(MultipartMalformed, OnlyEndBoundary) { // accomplish here. As long as multipart_response_parser returns iobuf parts // that themselves contain properly trimmed HTTP responses (header + body w/ // correct line endings), then multipart_subresponse should work as expected. + +// ============================================================================ +// find_multipart_boundary tests +// ============================================================================ + +TEST(FindMultipartBoundary, ValidBoundary) { + using namespace cloud_storage_clients; + + http::client::response_header headers; + headers.insert( + boost::beast::http::field::content_type, + "multipart/mixed; boundary=batch_boundary_123"); + + auto result = util::find_multipart_boundary(headers); + + EXPECT_TRUE(result.has_value()); + EXPECT_EQ(result.value(), "batch_boundary_123"); +} + +TEST(FindMultipartBoundary, BoundaryWithWhitespace) { + using namespace cloud_storage_clients; + + http::client::response_header headers; + headers.insert( + boost::beast::http::field::content_type, + "multipart/mixed; boundary =\t batch_boundary_123"); + + auto result = util::find_multipart_boundary(headers); + + EXPECT_TRUE(result.has_value()); + // Whitespace should be stripped + EXPECT_EQ(result.value(), "batch_boundary_123"); +} + +TEST(FindMultipartBoundary, BoundaryWithQuotes) { + using namespace cloud_storage_clients; + + http::client::response_header headers; + headers.insert( + boost::beast::http::field::content_type, + R"(multipart/mixed; boundary="batch_boundary_123")"); + + auto result = util::find_multipart_boundary(headers); + + EXPECT_TRUE(result.has_value()); + // Quotes should be stripped + EXPECT_EQ(result.value(), "batch_boundary_123"); +} + +namespace { +ss::sstring eptr_what(std::exception_ptr eptr) { + if (eptr == nullptr) { + return {}; + } + try { + std::rethrow_exception(eptr); + } catch (const std::runtime_error& e) { + return e.what(); + } + return {}; +} +} // namespace + +TEST(FindMultipartBoundary, MissingContentType) { + using namespace cloud_storage_clients; + + http::client::response_header headers; + // No Content-Type header set + + auto result = util::find_multipart_boundary(headers); + + EXPECT_FALSE(result.has_value()) << result.value(); + EXPECT_THAT( + eptr_what(result.error()), testing::HasSubstr("Content-Type missing")); +} + +TEST(FindMultipartBoundary, NotMultipartContentType) { + using namespace cloud_storage_clients; + + http::client::response_header headers; + headers.insert(boost::beast::http::field::content_type, "application/json"); + + auto result = util::find_multipart_boundary(headers); + + EXPECT_FALSE(result.has_value()) << result.value(); + EXPECT_THAT( + eptr_what(result.error()), + testing::HasSubstr("Expected multipart Content-Type")); +} + +TEST(FindMultipartBoundary, MissingBoundaryParameter) { + using namespace cloud_storage_clients; + + http::client::response_header headers; + headers.insert(boost::beast::http::field::content_type, "multipart/mixed"); + + auto result = util::find_multipart_boundary(headers); + + EXPECT_FALSE(result.has_value()) << result.value(); + EXPECT_THAT( + eptr_what(result.error()), + testing::HasSubstr("Boundary missing from multipart")); +} + +TEST(FindMultipartBoundary, MissingBoundaryEqualSign) { + using namespace cloud_storage_clients; + + http::client::response_header headers; + headers.insert( + boost::beast::http::field::content_type, + "multipart/mixed; boundary foobar"); + + auto result = util::find_multipart_boundary(headers); + + EXPECT_FALSE(result.has_value()) << result.value(); + EXPECT_THAT( + eptr_what(result.error()), + testing::HasSubstr("Boundary missing from multipart")); +} + +TEST(FindMultipartBoundary, EmptyBoundaryParameter) { + using namespace cloud_storage_clients; + + http::client::response_header headers; + headers.insert( + boost::beast::http::field::content_type, R"(multipart/mixed; boundary=)"); + + auto result = util::find_multipart_boundary(headers); + + EXPECT_FALSE(result.has_value()) << result.value(); + EXPECT_THAT( + eptr_what(result.error()), + testing::HasSubstr("Boundary missing from multipart")); +} diff --git a/src/v/cloud_storage_clients/util.cc b/src/v/cloud_storage_clients/util.cc index 7c905fa5bd192..3a7af15a39fd2 100644 --- a/src/v/cloud_storage_clients/util.cc +++ b/src/v/cloud_storage_clients/util.cc @@ -512,4 +512,66 @@ multipart_subresponse::iobuf_to_constbufseq(const iobuf& buf) { return seq; }; +std::expected +find_multipart_boundary(const http::client::response_header& headers) { + constexpr std::string_view boundary_name{"boundary"}; + constexpr std::string_view content_type_multipart{"multipart/mixed"}; + + auto content_type_it = headers.find( + boost::beast::http::field::content_type); + std::string_view boundary; + if (content_type_it == headers.end()) { + return std::unexpected( + std::make_exception_ptr( + std::runtime_error( + "find_multipart_boundary: Content-Type missing"))); + } + std::string_view content_type{content_type_it->value()}; + if (auto pos = content_type.find(content_type_multipart); + pos == content_type.npos) { + return std::unexpected( + std::make_exception_ptr( + std::runtime_error( + fmt::format( + "find_multipart_boundary: Expected multipart Content-Type: {}", + content_type)))); + } + if (auto boundary_pos = content_type.find(boundary_name); + boundary_pos != content_type.npos) { + boundary = content_type.substr(boundary_pos + boundary_name.size()); + // remove whitespace (if present) and find exactly one equals sign + int n_eq = 0; + while (!boundary.empty()) { + auto c = boundary.front(); + bool is_eq = c == '='; + bool is_whitespace = (c == ' ' || c == '\t'); + if (!is_eq && !is_whitespace) { + break; + } + n_eq += is_eq; + boundary = boundary.substr(1); + } + if (n_eq != 1) { + boundary = {}; + } + // Remove quotes if present + if (!boundary.empty() && boundary.front() == '"') { + boundary = boundary.substr(1); + } + if (!boundary.empty() && boundary.back() == '"') { + boundary = boundary.substr(0, boundary.size() - 1); + } + } + if (boundary.empty()) { + return std::unexpected( + std::make_exception_ptr( + std::runtime_error( + fmt::format( + "find_multipart_boundary: Boundary missing from multipart " + "response Content-Type: {}", + content_type)))); + } + return boundary; +} + } // namespace cloud_storage_clients::util diff --git a/src/v/cloud_storage_clients/util.h b/src/v/cloud_storage_clients/util.h index d5321d7c61dcb..4a4f1c2574900 100644 --- a/src/v/cloud_storage_clients/util.h +++ b/src/v/cloud_storage_clients/util.h @@ -19,6 +19,8 @@ #include +#include + namespace cloud_storage_clients::util { /// \brief; Handle all client errors which are not error responses from the @@ -202,4 +204,7 @@ struct multipart_subresponse { iobuf _body; }; +std::expected +find_multipart_boundary(const http::client::response_header&); + } // namespace cloud_storage_clients::util From 84e1679b2bfede3512953dd43801ed641e98f4dc Mon Sep 17 00:00:00 2001 From: Oren Leiman Date: Wed, 7 Jan 2026 00:03:24 -0800 Subject: [PATCH 2/2] cs_clients/tests: Add some gmock flavor to util_test Signed-off-by: Oren Leiman --- .../cloud_storage_clients/tests/util_test.cc | 43 +++++++++++-------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/src/v/cloud_storage_clients/tests/util_test.cc b/src/v/cloud_storage_clients/tests/util_test.cc index 5e482850c62e5..b2ef1414ea424 100644 --- a/src/v/cloud_storage_clients/tests/util_test.cc +++ b/src/v/cloud_storage_clients/tests/util_test.cc @@ -147,8 +147,8 @@ TEST(MultipartParser, SinglePart) { EXPECT_TRUE(part1.has_value()); auto content = part1.value().linearize_to_string(); - EXPECT_TRUE(content.contains("Content-Type: text/plain")); - EXPECT_TRUE(content.contains("Hello World")); + EXPECT_THAT(content, testing::HasSubstr("Content-Type: text/plain")); + EXPECT_THAT(content, testing::HasSubstr("Hello World")); // Should be no more parts auto part2 = parser.get_part(); @@ -191,9 +191,12 @@ TEST(MultipartParser, MultipleParts) { EXPECT_TRUE(part3.has_value()); // Verify content - EXPECT_TRUE(part1.value().linearize_to_string().contains("First part")); - EXPECT_TRUE(part2.value().linearize_to_string().contains("Second part")); - EXPECT_TRUE(part3.value().linearize_to_string().contains("Third part")); + EXPECT_THAT( + part1.value().linearize_to_string(), testing::HasSubstr("First part")); + EXPECT_THAT( + part2.value().linearize_to_string(), testing::HasSubstr("Second part")); + EXPECT_THAT( + part3.value().linearize_to_string(), testing::HasSubstr("Third part")); // Should be no more parts auto part4 = parser.get_part(); @@ -325,10 +328,11 @@ TEST(MultipartSubresponse, ParseErrorHeader) { // Should extract error message auto error = subresponse.error("x-ms-error-code"); EXPECT_TRUE(error.has_value()); - EXPECT_TRUE(error.value().contains("403")); - EXPECT_TRUE( - error.value().contains(fmt::format("{}", subresponse.result()))); - EXPECT_TRUE(error.value().contains("AuthenticationFailed")); + EXPECT_THAT(error.value(), testing::HasSubstr("403")); + EXPECT_THAT( + error.value(), + testing::HasSubstr(fmt::format("{}", subresponse.result()))); + EXPECT_THAT(error.value(), testing::HasSubstr("AuthenticationFailed")); } TEST(MultipartSubresponse, ParseErrorNoMessage) { @@ -350,10 +354,11 @@ TEST(MultipartSubresponse, ParseErrorNoMessage) { // Error should still be returned with "Unknown" reason auto error = subresponse.error("x-ms-error-code"); EXPECT_TRUE(error.has_value()); - EXPECT_TRUE(error.value().contains("500")); - EXPECT_TRUE( - error.value().contains(fmt::format("{}", subresponse.result()))); - EXPECT_TRUE(error.value().contains("Unknown")); + EXPECT_THAT(error.value(), testing::HasSubstr("500")); + EXPECT_THAT( + error.value(), + testing::HasSubstr(fmt::format("{}", subresponse.result()))); + EXPECT_THAT(error.value(), testing::HasSubstr("Unknown")); } TEST(MultipartSubresponse, ParseErrorInBody) { @@ -378,10 +383,11 @@ TEST(MultipartSubresponse, ParseErrorInBody) { auto error = subresponse.error( [](iobuf b) { return b.linearize_to_string(); }); EXPECT_TRUE(error.has_value()); - EXPECT_TRUE(error.value().contains("500")); - EXPECT_TRUE( - error.value().contains(fmt::format("{}", subresponse.result()))); - EXPECT_TRUE(error.value().contains("OOPS")); + EXPECT_THAT(error.value(), testing::HasSubstr("500")); + EXPECT_THAT( + error.value(), + testing::HasSubstr(fmt::format("{}", subresponse.result()))); + EXPECT_THAT(error.value(), testing::HasSubstr("OOPS")); } // ============================================================================ @@ -635,7 +641,8 @@ TEST(MimeHeaderMalformed, MultipleColons) { auto content_type = header.get(boost::beast::http::field::content_type); EXPECT_TRUE(content_type.has_value()); // Should include everything after first ": " - EXPECT_TRUE(content_type.value().contains("text/plain: with: colons")); + EXPECT_THAT( + content_type.value(), testing::HasSubstr("text/plain: with: colons")); } TEST(MimeHeaderMalformed, ParseEmpty) {