Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 160 additions & 18 deletions src/v/cloud_storage_clients/tests/util_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "bytes/iobuf_parser.h"
#include "cloud_storage_clients/util.h"

#include <gmock/gmock.h>
#include <gtest/gtest.h>

TEST(Util, AllPathsToFile) {
Expand Down Expand Up @@ -146,8 +147,8 @@ TEST(MultipartParser, SinglePart) {
EXPECT_TRUE(part1.has_value());

auto content = part1.value().linearize_to_string();
EXPECT_TRUE(content.contains("Content-Type: text/plain"));
EXPECT_TRUE(content.contains("Hello World"));
EXPECT_THAT(content, testing::HasSubstr("Content-Type: text/plain"));
EXPECT_THAT(content, testing::HasSubstr("Hello World"));

// Should be no more parts
auto part2 = parser.get_part();
Expand Down Expand Up @@ -190,9 +191,12 @@ TEST(MultipartParser, MultipleParts) {
EXPECT_TRUE(part3.has_value());

// Verify content
EXPECT_TRUE(part1.value().linearize_to_string().contains("First part"));
EXPECT_TRUE(part2.value().linearize_to_string().contains("Second part"));
EXPECT_TRUE(part3.value().linearize_to_string().contains("Third part"));
EXPECT_THAT(
part1.value().linearize_to_string(), testing::HasSubstr("First part"));
EXPECT_THAT(
part2.value().linearize_to_string(), testing::HasSubstr("Second part"));
EXPECT_THAT(
part3.value().linearize_to_string(), testing::HasSubstr("Third part"));

// Should be no more parts
auto part4 = parser.get_part();
Expand Down Expand Up @@ -324,10 +328,11 @@ TEST(MultipartSubresponse, ParseErrorHeader) {
// Should extract error message
auto error = subresponse.error("x-ms-error-code");
EXPECT_TRUE(error.has_value());
EXPECT_TRUE(error.value().contains("403"));
EXPECT_TRUE(
error.value().contains(fmt::format("{}", subresponse.result())));
EXPECT_TRUE(error.value().contains("AuthenticationFailed"));
EXPECT_THAT(error.value(), testing::HasSubstr("403"));
EXPECT_THAT(
error.value(),
testing::HasSubstr(fmt::format("{}", subresponse.result())));
EXPECT_THAT(error.value(), testing::HasSubstr("AuthenticationFailed"));
}

TEST(MultipartSubresponse, ParseErrorNoMessage) {
Expand All @@ -349,10 +354,11 @@ TEST(MultipartSubresponse, ParseErrorNoMessage) {
// Error should still be returned with "Unknown" reason
auto error = subresponse.error("x-ms-error-code");
EXPECT_TRUE(error.has_value());
EXPECT_TRUE(error.value().contains("500"));
EXPECT_TRUE(
error.value().contains(fmt::format("{}", subresponse.result())));
EXPECT_TRUE(error.value().contains("Unknown"));
EXPECT_THAT(error.value(), testing::HasSubstr("500"));
EXPECT_THAT(
error.value(),
testing::HasSubstr(fmt::format("{}", subresponse.result())));
EXPECT_THAT(error.value(), testing::HasSubstr("Unknown"));
}

TEST(MultipartSubresponse, ParseErrorInBody) {
Expand All @@ -377,10 +383,11 @@ TEST(MultipartSubresponse, ParseErrorInBody) {
auto error = subresponse.error(
[](iobuf b) { return b.linearize_to_string(); });
EXPECT_TRUE(error.has_value());
EXPECT_TRUE(error.value().contains("500"));
EXPECT_TRUE(
error.value().contains(fmt::format("{}", subresponse.result())));
EXPECT_TRUE(error.value().contains("OOPS"));
EXPECT_THAT(error.value(), testing::HasSubstr("500"));
EXPECT_THAT(
error.value(),
testing::HasSubstr(fmt::format("{}", subresponse.result())));
EXPECT_THAT(error.value(), testing::HasSubstr("OOPS"));
}

// ============================================================================
Expand Down Expand Up @@ -634,7 +641,8 @@ TEST(MimeHeaderMalformed, MultipleColons) {
auto content_type = header.get(boost::beast::http::field::content_type);
EXPECT_TRUE(content_type.has_value());
// Should include everything after first ": "
EXPECT_TRUE(content_type.value().contains("text/plain: with: colons"));
EXPECT_THAT(
content_type.value(), testing::HasSubstr("text/plain: with: colons"));
}

TEST(MimeHeaderMalformed, ParseEmpty) {
Expand Down Expand Up @@ -805,3 +813,137 @@ TEST(MultipartMalformed, OnlyEndBoundary) {
// accomplish here. As long as multipart_response_parser returns iobuf parts
// that themselves contain properly trimmed HTTP responses (header + body w/
// correct line endings), then multipart_subresponse should work as expected.

// ============================================================================
// find_multipart_boundary tests
// ============================================================================

TEST(FindMultipartBoundary, ValidBoundary) {
using namespace cloud_storage_clients;

http::client::response_header headers;
headers.insert(
boost::beast::http::field::content_type,
"multipart/mixed; boundary=batch_boundary_123");

auto result = util::find_multipart_boundary(headers);

EXPECT_TRUE(result.has_value());
EXPECT_EQ(result.value(), "batch_boundary_123");
}

TEST(FindMultipartBoundary, BoundaryWithWhitespace) {
using namespace cloud_storage_clients;

http::client::response_header headers;
headers.insert(
boost::beast::http::field::content_type,
"multipart/mixed; boundary =\t batch_boundary_123");

auto result = util::find_multipart_boundary(headers);

EXPECT_TRUE(result.has_value());
// Whitespace should be stripped
EXPECT_EQ(result.value(), "batch_boundary_123");
}

TEST(FindMultipartBoundary, BoundaryWithQuotes) {
using namespace cloud_storage_clients;

http::client::response_header headers;
headers.insert(
boost::beast::http::field::content_type,
R"(multipart/mixed; boundary="batch_boundary_123")");

auto result = util::find_multipart_boundary(headers);

EXPECT_TRUE(result.has_value());
// Quotes should be stripped
EXPECT_EQ(result.value(), "batch_boundary_123");
}

namespace {
ss::sstring eptr_what(std::exception_ptr eptr) {
if (eptr == nullptr) {
return {};
}
try {
std::rethrow_exception(eptr);
} catch (const std::runtime_error& e) {
return e.what();
}
return {};
}
} // namespace

TEST(FindMultipartBoundary, MissingContentType) {
using namespace cloud_storage_clients;

http::client::response_header headers;
// No Content-Type header set

auto result = util::find_multipart_boundary(headers);

EXPECT_FALSE(result.has_value()) << result.value();
EXPECT_THAT(
eptr_what(result.error()), testing::HasSubstr("Content-Type missing"));
}

TEST(FindMultipartBoundary, NotMultipartContentType) {
using namespace cloud_storage_clients;

http::client::response_header headers;
headers.insert(boost::beast::http::field::content_type, "application/json");

auto result = util::find_multipart_boundary(headers);

EXPECT_FALSE(result.has_value()) << result.value();
EXPECT_THAT(
eptr_what(result.error()),
testing::HasSubstr("Expected multipart Content-Type"));
}

TEST(FindMultipartBoundary, MissingBoundaryParameter) {
using namespace cloud_storage_clients;

http::client::response_header headers;
headers.insert(boost::beast::http::field::content_type, "multipart/mixed");

auto result = util::find_multipart_boundary(headers);

EXPECT_FALSE(result.has_value()) << result.value();
EXPECT_THAT(
eptr_what(result.error()),
testing::HasSubstr("Boundary missing from multipart"));
}

TEST(FindMultipartBoundary, MissingBoundaryEqualSign) {
using namespace cloud_storage_clients;

http::client::response_header headers;
headers.insert(
boost::beast::http::field::content_type,
"multipart/mixed; boundary foobar");

auto result = util::find_multipart_boundary(headers);

EXPECT_FALSE(result.has_value()) << result.value();
EXPECT_THAT(
eptr_what(result.error()),
testing::HasSubstr("Boundary missing from multipart"));
}

TEST(FindMultipartBoundary, EmptyBoundaryParameter) {
using namespace cloud_storage_clients;

http::client::response_header headers;
headers.insert(
boost::beast::http::field::content_type, R"(multipart/mixed; boundary=)");

auto result = util::find_multipart_boundary(headers);

EXPECT_FALSE(result.has_value()) << result.value();
EXPECT_THAT(
eptr_what(result.error()),
testing::HasSubstr("Boundary missing from multipart"));
}
62 changes: 62 additions & 0 deletions src/v/cloud_storage_clients/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -512,4 +512,66 @@ multipart_subresponse::iobuf_to_constbufseq(const iobuf& buf) {
return seq;
};

std::expected<std::string_view, std::exception_ptr>
find_multipart_boundary(const http::client::response_header& headers) {
constexpr std::string_view boundary_name{"boundary"};
constexpr std::string_view content_type_multipart{"multipart/mixed"};

auto content_type_it = headers.find(
boost::beast::http::field::content_type);
std::string_view boundary;
if (content_type_it == headers.end()) {
return std::unexpected(
std::make_exception_ptr(
std::runtime_error(
"find_multipart_boundary: Content-Type missing")));
}
std::string_view content_type{content_type_it->value()};
if (auto pos = content_type.find(content_type_multipart);
pos == content_type.npos) {
return std::unexpected(
std::make_exception_ptr(
std::runtime_error(
fmt::format(
"find_multipart_boundary: Expected multipart Content-Type: {}",
content_type))));
}
if (auto boundary_pos = content_type.find(boundary_name);
boundary_pos != content_type.npos) {
boundary = content_type.substr(boundary_pos + boundary_name.size());
// remove whitespace (if present) and find exactly one equals sign
int n_eq = 0;
while (!boundary.empty()) {
auto c = boundary.front();
bool is_eq = c == '=';
bool is_whitespace = (c == ' ' || c == '\t');
if (!is_eq && !is_whitespace) {
break;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have "multipart/mixed; boundaryfffff", then boundary will be the substr fffff, and then c will be f and we'll break here, and return fffff as the boundary, but I would have expected that the lack of = would mean that it is malformed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

boundary will be the substring fffff... but the lack of = would mean that it is malformed

correct, but a few lines down we blow away the string view if we didn't find exactly one '=' before the first non-whitespace character. there is a unit test for this 🙂

This function could use some documentation. If the diff looks ok to you aside from that, I'll add a nice block comment in the next PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh yeh makes sense i was thinking those checks were inside the while loop

n_eq += is_eq;
boundary = boundary.substr(1);
}
if (n_eq != 1) {
boundary = {};
}
// Remove quotes if present
if (!boundary.empty() && boundary.front() == '"') {
boundary = boundary.substr(1);
}
if (!boundary.empty() && boundary.back() == '"') {
boundary = boundary.substr(0, boundary.size() - 1);
}
}
if (boundary.empty()) {
return std::unexpected(
std::make_exception_ptr(
std::runtime_error(
fmt::format(
"find_multipart_boundary: Boundary missing from multipart "
"response Content-Type: {}",
content_type))));
}
return boundary;
}

} // namespace cloud_storage_clients::util
5 changes: 5 additions & 0 deletions src/v/cloud_storage_clients/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

#include <boost/property_tree/ptree.hpp>

#include <expected>

namespace cloud_storage_clients::util {

/// \brief; Handle all client errors which are not error responses from the
Expand Down Expand Up @@ -202,4 +204,7 @@ struct multipart_subresponse {
iobuf _body;
};

std::expected<std::string_view, std::exception_ptr>
find_multipart_boundary(const http::client::response_header&);

} // namespace cloud_storage_clients::util