diff --git a/include/ylt/struct_pack.hpp b/include/ylt/struct_pack.hpp index 971874801..d240585f0 100644 --- a/include/ylt/struct_pack.hpp +++ b/include/ylt/struct_pack.hpp @@ -309,26 +309,16 @@ template 0) { - if SP_UNLIKELY (delta > consume_len) { - ret = struct_pack::errc::invalid_buffer; - if constexpr (struct_pack::seek_reader_t) - if SP_UNLIKELY (!reader.seekg(old_pos)) { - return struct_pack::errc::seek_failed; - } - } - else { - reader.ignore(consume_len - delta); - } + if SP_LIKELY (consume_len > 0) { + if SP_UNLIKELY (delta > consume_len) { + // TODO test this branch + ret = struct_pack::errc::invalid_buffer; + } + else { + reader.ignore(consume_len - delta); } } - else { - if constexpr (struct_pack::seek_reader_t) - if SP_UNLIKELY (!reader.seekg(old_pos)) { - return struct_pack::errc::seek_failed; - } - } + return ret; } #if __cpp_concepts >= 201907L diff --git a/include/ylt/struct_pack/error_code.hpp b/include/ylt/struct_pack/error_code.hpp index 81b67df79..20fe62f34 100644 --- a/include/ylt/struct_pack/error_code.hpp +++ b/include/ylt/struct_pack/error_code.hpp @@ -23,8 +23,7 @@ enum class errc { no_buffer_space, invalid_buffer, hash_conflict, - seek_failed, - too_width_size + invalid_width_of_container_length, }; namespace detail { diff --git a/include/ylt/struct_pack/reflection.hpp b/include/ylt/struct_pack/reflection.hpp index f92779654..e45719dbb 100644 --- a/include/ylt/struct_pack/reflection.hpp +++ b/include/ylt/struct_pack/reflection.hpp @@ -106,11 +106,6 @@ concept view_reader_t = reader_t && requires(T t) { { t.read_view(std::size_t{}) } -> std::convertible_to; }; -template -concept seek_reader_t = reader_t && requires(T t) { - t.seekg(std::size_t{}); -}; - #else template @@ -147,17 +142,59 @@ struct view_reader_t_impl< template constexpr bool view_reader_t = reader_t &&view_reader_t_impl::value; +#endif + +#if __cpp_concepts >= 201907L + +template +concept check_reader_t = reader_t && requires(T t) { + t.check(std::size_t{}); +}; + +template +concept can_reserve = requires(T t) { + t.reserve(std::size_t{}); +}; + +template +concept can_shrink_to_fit = requires(T t) { + t.shrink_to_fit(); +}; + +#else + +template +struct check_reader_t_impl : std::false_type {}; + +template +struct check_reader_t_impl< + T, std::void_t().check(std::size_t{}))>> + : std::true_type {}; + +template +constexpr bool check_reader_t = reader_t &&check_reader_t_impl::value; + +template +struct can_reserve_impl : std::false_type {}; + +template +struct can_reserve_impl< + T, std::void_t().reserve(std::size_t{}))>> + : std::true_type {}; + +template +constexpr bool can_reserve = can_reserve_impl::value; template -struct seek_reader_t_impl : std::false_type {}; +struct can_shrink_to_fit_impl : std::false_type {}; template -struct seek_reader_t_impl< - T, std::void_t().seekg(std::size_t{}))>> +struct can_shrink_to_fit_impl< + T, std::void_t().shrink_to_fit())>> : std::true_type {}; template -constexpr bool seek_reader_t = reader_t &&seek_reader_t_impl::value; +constexpr bool can_shrink_to_fit = can_shrink_to_fit_impl::value; #endif diff --git a/include/ylt/struct_pack/unpacker.hpp b/include/ylt/struct_pack/unpacker.hpp index 786c655c8..a2d92e5f8 100644 --- a/include/ylt/struct_pack/unpacker.hpp +++ b/include/ylt/struct_pack/unpacker.hpp @@ -19,13 +19,7 @@ #include #include #include -#include -#include #include -#include -#include -#include -#include #include #include #include @@ -36,15 +30,14 @@ #include "derived_helper.hpp" #include "endian_wrapper.hpp" #include "error_code.hpp" -#include "md5_constexpr.hpp" -#include "packer.hpp" #include "reflection.hpp" -#include "trivial_view.hpp" #include "type_calculate.hpp" #include "type_id.hpp" #include "type_trait.hpp" #include "varint.hpp" +#define STRUCT_PACK_MAX_UNCONFIRM_PREREAD_SIZE 1 * 1024 * 1024 // 1MB + namespace struct_pack { namespace detail { @@ -55,15 +48,16 @@ struct memory_reader { constexpr memory_reader(const char *beg, const char *end) noexcept : now(beg), end(end) {} bool read(char *target, size_t len) { - if SP_UNLIKELY (now + len > end) { + if SP_UNLIKELY (end - now < len) { return false; } memcpy(target, now, len); now += len; return true; } + bool check(size_t len) { return end - now >= len; } const char *read_view(size_t len) { - if SP_UNLIKELY (now + len > end) { + if SP_UNLIKELY (end - now < len) { return nullptr; } auto ret = now; @@ -71,7 +65,7 @@ struct memory_reader { return ret; } bool ignore(size_t len) { - if SP_UNLIKELY (now + len > end) { + if SP_UNLIKELY (end - now < len) { return false; } now += len; @@ -133,14 +127,14 @@ class unpacker { err_code = deserialize_many<8, UINT64_MAX, true>(t, args...); } else { - return struct_pack::errc::too_width_size; + return struct_pack::errc::invalid_width_of_container_length; } break; #else case 3: if constexpr (sizeof(std::size_t) < 8) { - return struct_pack::errc::too_width_size; + return struct_pack::errc::invalid_width_of_container_length; } case 2: case 1: @@ -194,13 +188,13 @@ class unpacker { err_code = deserialize_many<8, UINT64_MAX, true>(t, args...); } else { - return struct_pack::errc::too_width_size; + return struct_pack::errc::invalid_width_of_container_length; } break; #else case 3: if constexpr (sizeof(std::size_t) < 8) { - return struct_pack::errc::too_width_size; + return struct_pack::errc::invalid_width_of_container_length; } case 2: case 1: @@ -256,13 +250,13 @@ class unpacker { err_code = get_field_impl<8, UINT64_MAX, U, I>(field); } else { - return struct_pack::errc::too_width_size; + return struct_pack::errc::invalid_width_of_container_length; } break; #else case 3: if constexpr (sizeof(std::size_t) < 8) { - return struct_pack::errc::too_width_size; + return struct_pack::errc::invalid_width_of_container_length; } case 2: case 1: @@ -329,14 +323,14 @@ class unpacker { ...); } else { - return struct_pack::errc::too_width_size; + return struct_pack::errc::invalid_width_of_container_length; } break; #else case 3: if constexpr (sizeof(std::size_t) < 8) { - return struct_pack::errc::too_width_size; + return struct_pack::errc::invalid_width_of_container_length; } case 2: case 1: @@ -407,13 +401,13 @@ class unpacker { ...); } else { - return errc::too_width_size; + return errc::invalid_width_of_container_length; } break; #else case 3: if constexpr (sizeof(std::size_t) < 8) { - return errc::too_width_size; + return errc::invalid_width_of_container_length; } case 2: case 1: @@ -509,7 +503,7 @@ class unpacker { } } else { - return {errc::too_width_size, 0}; + return {errc::invalid_width_of_container_length, 0}; } break; default: @@ -869,28 +863,27 @@ class unpacker { } } else if constexpr (container) { - std::size_t size64 = 0; + std::size_t size = 0; bool result{}; if constexpr (size_type == 1) { - if SP_UNLIKELY (!low_bytes_read_wrapper(reader_, size64)) { + if SP_UNLIKELY (!low_bytes_read_wrapper(reader_, size)) { return struct_pack::errc::no_buffer_space; } } #ifdef STRUCT_PACK_OPTIMIZE else if constexpr (size_type == 2) { - if SP_UNLIKELY (!low_bytes_read_wrapper(reader_, size64)) { + if SP_UNLIKELY (!low_bytes_read_wrapper(reader_, size)) { return struct_pack::errc::no_buffer_space; } } else if constexpr (size_type == 4) { - if SP_UNLIKELY (!low_bytes_read_wrapper(reader_, size64)) { + if SP_UNLIKELY (!low_bytes_read_wrapper(reader_, size)) { return struct_pack::errc::no_buffer_space; } } else if constexpr (size_type == 8) { if constexpr (sizeof(std::size_t) >= 8) { - if SP_UNLIKELY (!low_bytes_read_wrapper(reader_, - size64)) { + if SP_UNLIKELY (!low_bytes_read_wrapper(reader_, size)) { return struct_pack::errc::no_buffer_space; } } @@ -905,18 +898,18 @@ class unpacker { else { switch (size_type_) { case 1: - if SP_UNLIKELY (!low_bytes_read_wrapper<2>(reader_, size64)) { + if SP_UNLIKELY (!low_bytes_read_wrapper<2>(reader_, size)) { return struct_pack::errc::no_buffer_space; } break; case 2: - if SP_UNLIKELY (!low_bytes_read_wrapper<4>(reader_, size64)) { + if SP_UNLIKELY (!low_bytes_read_wrapper<4>(reader_, size)) { return struct_pack::errc::no_buffer_space; } break; case 3: if constexpr (sizeof(std::size_t) >= 8) { - if SP_UNLIKELY (!low_bytes_read_wrapper<8>(reader_, size64)) { + if SP_UNLIKELY (!low_bytes_read_wrapper<8>(reader_, size)) { return struct_pack::errc::no_buffer_space; } } @@ -929,7 +922,7 @@ class unpacker { } } #endif - if (size64 == 0) { + if (size == 0) { return {}; } if constexpr (map_container) { @@ -937,12 +930,17 @@ class unpacker { value{}; if constexpr (is_trivial_serializable::value && !NotSkip) { - return reader_.ignore(size64 * sizeof(value)) - ? errc{} - : errc::no_buffer_space; + if constexpr (sizeof(value) > 1) { + if SP_UNLIKELY (size > SIZE_MAX / sizeof(value)) { + return errc::no_buffer_space; + } + } + return reader_.ignore(size * sizeof(value)) ? errc{} + : errc::no_buffer_space; } else { - for (uint64_t i = 0; i < size64; ++i) { + item.clear(); + for (uint64_t i = 0; i < size; ++i) { code = deserialize_one(value); if SP_UNLIKELY (code != struct_pack::errc{}) { return code; @@ -958,12 +956,17 @@ class unpacker { typename type::value_type value{}; if constexpr (is_trivial_serializable::value && !NotSkip) { - return reader_.ignore(size64 * sizeof(value)) - ? errc{} - : errc::no_buffer_space; + if constexpr (sizeof(value) > 1) { + if SP_UNLIKELY (size > SIZE_MAX / sizeof(value)) { + return errc::no_buffer_space; + } + } + return reader_.ignore(size * sizeof(value)) ? errc{} + : errc::no_buffer_space; } else { - for (uint64_t i = 0; i < size64; ++i) { + item.clear(); + for (uint64_t i = 0; i < size; ++i) { code = deserialize_one(value); if SP_UNLIKELY (code != struct_pack::errc{}) { return code; @@ -977,37 +980,75 @@ class unpacker { } else { using value_type = typename type::value_type; + constexpr std::size_t block_lim_cnt = + STRUCT_PACK_MAX_UNCONFIRM_PREREAD_SIZE / sizeof(value_type); if constexpr (trivially_copyable_container) { - uint64_t mem_sz = size64 * sizeof(value_type); + if constexpr (sizeof(value_type) > 1) { + if SP_UNLIKELY (size > SIZE_MAX / sizeof(value_type)) { + return errc::no_buffer_space; + } + } + std::size_t mem_sz = size * sizeof(value_type); if constexpr (NotSkip) { if constexpr (string_view || dynamic_span) { static_assert( view_reader_t, "The Reader isn't a view_reader, can't deserialize " "a string_view/span"); - static_assert(is_little_endian_copyable, - "zero-copy in big endian is limit."); const char *view = reader_.read_view(mem_sz); if SP_UNLIKELY (view == nullptr) { return struct_pack::errc::no_buffer_space; } - item = {(value_type *)(view), (std::size_t)size64}; - } - else if constexpr (is_little_endian_copyable) { - resize(item, size64); - // item.resize(size64); - if SP_UNLIKELY (!read_bytes_array(reader_, (char *)item.data(), - mem_sz)) { - return struct_pack::errc::no_buffer_space; - } + item = {(value_type *)(view), (std::size_t)size}; } else { - resize(item, size64); - for (auto &i : item) { - code = deserialize_one(i); - if SP_UNLIKELY (code != struct_pack::errc{}) { - return code; + if constexpr (check_reader_t) { + if SP_UNLIKELY (!reader_.check(mem_sz)) { + return struct_pack::errc::no_buffer_space; + } + resize(item, size); + if constexpr (is_little_endian_copyable) { + auto ec = + read_bytes_array(reader_, (char *)item.data(), mem_sz); + assert(ec == true); + } + else { + for (auto &i : item) { + code = deserialize_one(i); + if SP_UNLIKELY (code != struct_pack::errc{}) { + return code; + } + } + } + } + else { + for (size_t i = 0, len = block_lim_cnt; i < size; + i += block_lim_cnt) { + if (i + block_lim_cnt >= size) { + len = size - i; + } + resize(item, i + len); + if constexpr (is_little_endian_copyable) { + if SP_UNLIKELY (!read_bytes_array( + reader_, (char *)(item.data() + i), + len * sizeof(value_type))) { + item.resize(i); + if constexpr (can_shrink_to_fit) { + item.shrink_to_fit(); + } + return struct_pack::errc::no_buffer_space; + } + } + else { + for (size_t j = i; j < i + len; ++j) { + code = deserialize_one( + item[j]); + if SP_UNLIKELY (code != struct_pack::errc{}) { + return code; + } + } + } } } } @@ -1017,25 +1058,33 @@ class unpacker { } } else { - if constexpr (NotSkip) { - if constexpr (dynamic_span) { - static_assert(!dynamic_span, - "It's illegal to deserialize a span which T " - "is a non-trival-serializable type."); + if constexpr (dynamic_span) { + static_assert(!dynamic_span, + "It's illegal to deserialize a span which T " + "is a non-trival-serializable type."); + } + else if constexpr (NotSkip) { + item.clear(); + if constexpr (can_reserve) { + item.reserve((std::min)(size, block_lim_cnt)); } - else { - item.resize(size64); - for (auto &i : item) { - code = deserialize_one(i); - if SP_UNLIKELY (code != struct_pack::errc{}) { - return code; + for (size_t i = 0; i < size; ++i) { + item.emplace_back(); + code = + deserialize_one(item.back()); + if SP_UNLIKELY (code != struct_pack::errc{}) { + if constexpr (can_reserve) { + if constexpr (can_shrink_to_fit) { + item.shrink_to_fit(); // release reserve memory + } } + return code; } } } else { value_type useless; - for (size_t i = 0; i < size64; ++i) { + for (size_t i = 0; i < size; ++i) { code = deserialize_one(useless); if SP_UNLIKELY (code != struct_pack::errc{}) { return code; @@ -1393,3 +1442,5 @@ template } // namespace detail } // namespace struct_pack + +#undef STRUCT_PACK_MAX_UNCONFIRM_PREREAD_SIZE \ No newline at end of file diff --git a/src/struct_pack/benchmark/benchmark.cpp b/src/struct_pack/benchmark/benchmark.cpp index 6e32613d5..4749552ab 100644 --- a/src/struct_pack/benchmark/benchmark.cpp +++ b/src/struct_pack/benchmark/benchmark.cpp @@ -1,7 +1,6 @@ +#include #include #include -#include -#include #include "struct_pack_sample.hpp" diff --git a/src/struct_pack/tests/test_pragma_pack.cpp b/src/struct_pack/tests/test_pragma_pack.cpp index 85757fa2b..04e9206e3 100644 --- a/src/struct_pack/tests/test_pragma_pack.cpp +++ b/src/struct_pack/tests/test_pragma_pack.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ #include -#include +#include #include #include diff --git a/src/struct_pack/tests/test_serialize.cpp b/src/struct_pack/tests/test_serialize.cpp index 9bf0de4c3..0edab4546 100644 --- a/src/struct_pack/tests/test_serialize.cpp +++ b/src/struct_pack/tests/test_serialize.cpp @@ -28,6 +28,8 @@ #include "ylt/struct_pack/compatible.hpp" #include "ylt/struct_pack/endian_wrapper.hpp" +#include "ylt/struct_pack/error_code.hpp" +#include "ylt/struct_pack/reflection.hpp" #define private public #include @@ -1292,7 +1294,8 @@ TEST_CASE("test width too big") { std::string>(buffer); REQUIRE(result.has_value() == false); if constexpr (sizeof(std::size_t) < 8) { - CHECK(result.error() == struct_pack::errc::too_width_size); + CHECK(result.error() == + struct_pack::errc::invalid_width_of_container_length); } else { CHECK(result.error() == struct_pack::errc::no_buffer_space); @@ -1306,7 +1309,8 @@ TEST_CASE("test width too big") { std::string>(buffer, len); REQUIRE(result.has_value() == false); if constexpr (sizeof(std::size_t) < 8) { - CHECK(result.error() == struct_pack::errc::too_width_size); + CHECK(result.error() == + struct_pack::errc::invalid_width_of_container_length); } else { CHECK(result.error() == struct_pack::errc::no_buffer_space); @@ -1320,7 +1324,8 @@ TEST_CASE("test width too big") { struct_pack::DISABLE_ALL_META_INFO>(buffer); REQUIRE(result.has_value() == false); if constexpr (sizeof(std::size_t) < 8) { - CHECK(result.error() == struct_pack::errc::too_width_size); + CHECK(result.error() == + struct_pack::errc::invalid_width_of_container_length); } else { CHECK(result.error() == struct_pack::errc::no_buffer_space); @@ -1340,10 +1345,52 @@ TEST_CASE("test width too big") { std::pair>>(buffer); REQUIRE(result.has_value() == false); if constexpr (sizeof(std::size_t) < 8) { - CHECK(result.error() == struct_pack::errc::too_width_size); + CHECK(result.error() == + struct_pack::errc::invalid_width_of_container_length); } else { CHECK(result.error() == struct_pack::errc::no_buffer_space); } } +} + +TEST_CASE("test broken length") { + auto buffer = + struct_pack::serialize( + std::string{"ABCDEFGHIJKL"}); + if (sizeof(std::size_t) == 8) { + buffer[0] = 0b11000; + std::size_t i = UINT64_MAX; + memcpy(buffer.data() + 1, &i, sizeof(i)); + } + else { + buffer[0] = 0b10000; + std::size_t i = UINT32_MAX; + memcpy(buffer.data() + 1, &i, sizeof(i)); + } + auto result = + struct_pack::deserialize( + buffer); + REQUIRE(result.has_value() == false); + CHECK(result.error() == struct_pack::errc::no_buffer_space); +} + +TEST_CASE("test broken length with overflow") { + auto buffer = + struct_pack::serialize( + std::u16string{u"ABCDEFGHIJKL"}); + if (sizeof(std::size_t) == 8) { + buffer[0] = 0b11000; + std::size_t i = UINT64_MAX; + memcpy(buffer.data() + 1, &i, sizeof(i)); + } + else { + buffer[0] = 0b10000; + std::size_t i = UINT32_MAX; + memcpy(buffer.data() + 1, &i, sizeof(i)); + } + auto result = struct_pack::deserialize(buffer); + REQUIRE(result.has_value() == false); + CHECK(result.error() == struct_pack::errc::no_buffer_space); } \ No newline at end of file diff --git a/src/struct_pack/tests/test_stream.cpp b/src/struct_pack/tests/test_stream.cpp index 5710f51b2..fa8525e39 100644 --- a/src/struct_pack/tests/test_stream.cpp +++ b/src/struct_pack/tests/test_stream.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -200,4 +201,62 @@ TEST_CASE("test compatible obj") { CHECK(*res == std::nullopt); } } +} + +TEST_CASE("testing file size no enough") { + std::vector data = {"Hello", "Hi", "Hey", "Hoo"}; + { + std::ofstream of("tmp.data", std::ofstream::binary | std::ofstream::out); + auto buffer = struct_pack::serialize(data); + for (int i = 0; i < 5; ++i) buffer.pop_back(); + of.write(buffer.data(), buffer.size()); + } + { + std::ifstream ifi("tmp.data", std::ios::in | std::ios::binary); + std::vector data2; + std::vector data3 = {"Hello", "Hi", ""}; + auto ec = struct_pack::deserialize_to(data2, ifi); + CHECK(ec == struct_pack::errc::no_buffer_space); + CHECK(data3 == data2); + CHECK(data2.capacity() == 3); + } + std::filesystem::remove("tmp.data"); +} + +TEST_CASE("testing broken container size") { + SUBCASE("hacker 1") { + std::string data(2 * 1024 * 1024, 'A'); + { + std::ofstream of("tmp.data", std::ofstream::binary | std::ofstream::out); + auto buffer = struct_pack::serialize(data); + buffer = buffer.substr(0, 16); + of.write(buffer.data(), buffer.size()); + } + { + std::ifstream ifi("tmp.data", std::ios::in | std::ios::binary); + std::string data2; + auto ec = struct_pack::deserialize_to(data2, ifi); + CHECK(ec == struct_pack::errc::no_buffer_space); + CHECK(data2.size() == 0); + CHECK(data2.capacity() < 100); // SSO + } + } + SUBCASE("hacker 2") { + std::string data(2 * 1024 * 1024, 'A'); + { + std::ofstream of("tmp.data", std::ofstream::binary | std::ofstream::out); + auto buffer = struct_pack::serialize(data); + buffer = buffer.substr(0, 2 * 1024 * 1024 - 10000); + of.write(buffer.data(), buffer.size()); + } + { + std::ifstream ifi("tmp.data", std::ios::in | std::ios::binary); + std::string data2; + auto ec = struct_pack::deserialize_to(data2, ifi); + CHECK(ec == struct_pack::errc::no_buffer_space); + CHECK(data2.size() == 1 * 1024 * 1024); + CHECK(data2.capacity() < 2 * 1024 * 1024); + } + } + std::filesystem::remove("tmp.data"); } \ No newline at end of file