Skip to content

Commit df8b8be

Browse files
author
kelbon
committed
stream decoder
1 parent e937f2e commit df8b8be

7 files changed

Lines changed: 190 additions & 25 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,11 @@ include(cmake/get_cpm.cmake)
2525
set(BOOST_INCLUDE_LIBRARIES intrusive)
2626
CPMAddPackage(
2727
NAME Boost
28-
VERSION 1.84.0
28+
VERSION 1.87.0
2929
URL https://github.com/boostorg/boost/releases/download/boost-1.84.0/boost-1.84.0.tar.xz
3030
OPTIONS "BOOST_ENABLE_CMAKE ON"
3131
)
3232
unset(BOOST_INCLUDE_LIBRARIES)
33-
find_package(Boost 1.84 COMPONENTS intrusive REQUIRED)
3433

3534
endif()
3635

include/hpack/basic_types.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@ struct protocol_error : std::exception {
1919
}
2020
};
2121

22-
} // namespace hpack
23-
24-
namespace hpack {
22+
// thrown if there are not enough data for reading header
23+
struct incomplete_data_error : hpack::protocol_error {
24+
incomplete_data_error() : hpack::protocol_error("incomplete data") {
25+
}
26+
};
2527

2628
struct sym_info_t {
2729
uint32_t bits;

include/hpack/decoder.hpp

Lines changed: 81 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,16 @@
33
#include "hpack/basic_types.hpp"
44
#include "hpack/dynamic_table.hpp"
55

6+
#include <span>
67
#include <utility>
78

89
namespace hpack {
910

11+
// helper for `decode_string`. Temporal storage for decoded strings
12+
//
13+
// may store string_view (AND NOT OWN IT!), may store huffman decoded string
14+
// and in this case memory will be allocated and owned
15+
// tries to reuse memory when new huffman string setted and memory already allocated
1016
struct decoded_string {
1117
private:
1218
const char* data = nullptr;
@@ -15,10 +21,6 @@ struct decoded_string {
1521
// default -1 for removing ambiguity between 'not allocated' and 'allocated 1 byte' (log2(1) == 0)
1622
int8_t allocated_sz_log2 = -1;
1723

18-
friend void decode_string(In&, In, decoded_string&);
19-
20-
void set_huffman(const char* ptr, size_type len);
21-
2224
public:
2325
decoded_string() = default;
2426

@@ -43,12 +45,18 @@ struct decoded_string {
4345
return *this;
4446
}
4547

48+
void set_huffman(const char* ptr, size_type len);
49+
// Note: *this will not own `ptr` memory, only contain a view
50+
void set_not_huffman(const char* ptr, size_type len) {
51+
reset();
52+
data = ptr;
53+
sz = len;
54+
}
55+
4656
// not huffman encoded string
4757
decoded_string& operator=(std::string_view str) noexcept {
4858
assert(std::in_range<size_type>(str.size()));
49-
reset();
50-
data = str.data();
51-
sz = str.size();
59+
set_not_huffman(str.data(), str.size());
5260
return *this;
5361
}
5462

@@ -123,7 +131,6 @@ struct header_view {
123131
}
124132
};
125133

126-
// precondition: in != e
127134
void decode_string(In& in, In e, decoded_string& out);
128135

129136
struct decoder {
@@ -137,17 +144,82 @@ struct decoder {
137144

138145
decoder(decoder&&) = default;
139146
decoder& operator=(decoder&&) noexcept = default;
147+
140148
/*
141149
Note: this function ignores special 'cookie' header case
142150
https://www.rfc-editor.org/rfc/rfc7540#section-8.1.2.5
143151
and protocol error if decoded header name is not lowercase
144152
*/
145-
// precondition: in != e
146153
void decode_header(In& in, In e, header_view& out);
147154

148155
// returns status code
149156
// its always first header of response, so 'in' must point to first byte of headers block
150157
int decode_response_status(In& in, In e);
151158
};
152159

160+
// eats parts of headers fragment, allowing to parse CONTINUATIONS in HTTP/2 part by part
161+
struct stream_decoder {
162+
private:
163+
decoder& dec;
164+
std::vector<byte_t> incomplete;
165+
166+
// returns where first unparsed byte starts
167+
template <typename V>
168+
In do_feed(std::span<byte_t> chunk, bool last_chunk, V&& visitor) {
169+
In in = chunk.data();
170+
In e = in + chunk.size();
171+
assert(in != e);
172+
In in_just_before_fail;
173+
try {
174+
header_view header;
175+
while (in != e) {
176+
in_just_before_fail = in;
177+
178+
dec.decode_header(in, e, header);
179+
180+
if (header) [[likely]] // dynamic size update decoded without error
181+
visitor(header.name.str(), header.value.str());
182+
}
183+
// successfully parsed all headers
184+
return e;
185+
} catch (hpack::incomplete_data_error&) {
186+
if (last_chunk)
187+
throw;
188+
}
189+
return in_just_before_fail;
190+
}
191+
192+
public:
193+
stream_decoder(decoder& d) noexcept : dec(d) {
194+
}
195+
196+
stream_decoder(stream_decoder&&) = delete;
197+
void operator=(stream_decoder&&) = delete;
198+
199+
// `visitor` should accept two string_views, name and value
200+
// optimized for case when each `chunk` >> 1 header
201+
template <typename V>
202+
void feed(std::span<byte_t> chunk, bool last_chunk, V&& visitor) {
203+
if (chunk.empty()) [[unlikely]]
204+
return;
205+
if (!incomplete.empty()) {
206+
incomplete.insert(incomplete.end(), chunk.begin(), chunk.end());
207+
In i = do_feed(incomplete, last_chunk, std::forward<V>(visitor));
208+
In e = incomplete.data() + incomplete.size();
209+
auto sz = e - i;
210+
// avoid UB on .assign (iterators into vector itself)
211+
memmove(incomplete.data(), i, sz);
212+
incomplete.resize(sz);
213+
} else {
214+
In i = do_feed(chunk, last_chunk, std::forward<V>(visitor));
215+
incomplete.assign(i, In(chunk.data()) + chunk.size());
216+
}
217+
}
218+
219+
// makes possible start from beginning, forgetting previous `feed` calls
220+
void clear() noexcept {
221+
incomplete.clear();
222+
}
223+
};
224+
153225
} // namespace hpack

include/hpack/hpack.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ V decode_headers_block(decoder& dec, std::span<const byte_t> bytes, V visitor) {
2929
header_view header;
3030
while (in != e) {
3131
dec.decode_header(in, e, header);
32-
if (header) // dynamic size update decoded without error
32+
if (header) [[likely]] // dynamic size update decoded without error
3333
visitor(header.name.str(), header.value.str());
3434
}
3535
return visitor;

include/hpack/integers.hpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#pragma once
22

33
#include <concepts>
4-
4+
#include <cassert>
55
#include "hpack/basic_types.hpp"
66

77
namespace hpack {
@@ -49,17 +49,19 @@ O encode_integer(std::type_identity_t<UInt> I, uint8_t N, O _out) noexcept {
4949
return noexport::unadapt<O>(out);
5050
}
5151

52+
// precondition: N <= 8
5253
template <std::unsigned_integral UInt = size_type>
53-
[[nodiscard]] size_type decode_integer(In& in, In e, uint8_t N) {
54+
[[nodiscard]] UInt decode_integer(In& in, In e, uint8_t N) {
55+
assert(N <= 8);
5456
const UInt prefix_mask = (1 << N) - 1;
55-
// get first N bits
5657
auto pull = [&] {
5758
if (in == e)
58-
throw HPACK_PROTOCOL_ERROR(invalid integer representation);
59+
throw incomplete_data_error();
5960
auto i = *in;
6061
++in;
6162
return i;
6263
};
64+
// get first N bits
6365
UInt I = pull() & prefix_mask;
6466
if (I < prefix_mask)
6567
return I;

src/decoder.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,7 @@ static void decode_header_fully_indexed(In& in, In e, dynamic_table_t& dyntab, h
132132
out = entry;
133133
}
134134

135-
// header with incremental indexing
136-
static void decode_header_cache(In& in, In e, dynamic_table_t& dyntab, header_view& out) {
135+
static void decode_header_incremental_indexing(In& in, In e, dynamic_table_t& dyntab, header_view& out) {
137136
assert(in != e && *in & 0b0100'0000);
138137
decode_header_impl(in, e, 6, dyntab, out);
139138
dyntab.add_entry(out.name.str(), out.value.str());
@@ -159,15 +158,15 @@ static size_type decode_dynamic_table_size_update(In& in, In e) {
159158

160159
void decode_string(In& in, In e, decoded_string& out) {
161160
if (in == e)
162-
throw HPACK_PROTOCOL_ERROR(incorrectly encoded string);
161+
throw incomplete_data_error();
163162
bool is_huffman = *in & 0b1000'0000;
164163
size_type str_len = decode_integer(in, e, 7);
165164
if (str_len > std::distance(in, e))
166-
throw HPACK_PROTOCOL_ERROR(size of encoded string not equal to data length);
165+
throw incomplete_data_error();
167166
if (is_huffman)
168167
out.set_huffman((const char*)in, str_len);
169168
else
170-
out = std::string_view((const char*)in, str_len);
169+
out.set_not_huffman((const char*)in, str_len);
171170
in += str_len;
172171
}
173172

@@ -176,7 +175,7 @@ void decoder::decode_header(In& in, In e, header_view& out) {
176175
if (*in & 0b1000'0000)
177176
return decode_header_fully_indexed(in, e, dyntab, out);
178177
if (*in & 0b0100'0000)
179-
return decode_header_cache(in, e, dyntab, out);
178+
return decode_header_incremental_indexing(in, e, dyntab, out);
180179
if (*in & 0b0010'0000) {
181180
dyntab.update_size(decode_dynamic_table_size_update(in, e));
182181
out.name.reset();

tests/test_hpack.cpp

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,95 @@ TEST(tg_answer) {
479479
error_if(result != expected);
480480
}
481481

482+
TEST(tg_answer_parts) {
483+
std::vector<hpack::byte_t> bytes = {
484+
0x88, 0x76, 0x89, 0xaa, 0x63, 0x55, 0xe5, 0x80, 0xae, 0x17, 0x97, 0x7, 0x61, 0x96, 0xc3, 0x61, 0xbe,
485+
0x94, 0x3, 0x8a, 0x6e, 0x2d, 0x6a, 0x8, 0x2, 0x69, 0x40, 0x3b, 0x70, 0xf, 0x5c, 0x13, 0x4a, 0x62,
486+
0xd1, 0xbf, 0x5f, 0x8b, 0x1d, 0x75, 0xd0, 0x62, 0xd, 0x26, 0x3d, 0x4c, 0x74, 0x41, 0xea, 0x5c, 0x4,
487+
0x31, 0x39, 0x32, 0x36, 0x0, 0x91, 0x42, 0x6c, 0x31, 0x12, 0xb2, 0x6c, 0x1d, 0x48, 0xac, 0xf6, 0x25,
488+
0x64, 0x14, 0x96, 0xd8, 0x64, 0xfa, 0xa0, 0xa4, 0x7e, 0x56, 0x1c, 0xc5, 0x81, 0x90, 0xb6, 0xcb, 0x80,
489+
0x0, 0x3e, 0xd4, 0x35, 0x44, 0xa2, 0xd9, 0xb, 0xba, 0xd8, 0xef, 0x9e, 0x91, 0x9a, 0xa4, 0x7d, 0xa9,
490+
0x5d, 0x85, 0xa0, 0xe3, 0x93, 0x0, 0x93, 0x19, 0x8, 0x54, 0x21, 0x62, 0x1e, 0xa4, 0xd8, 0x7a, 0x16,
491+
0x1d, 0x14, 0x1f, 0xc2, 0xc7, 0xb0, 0xd3, 0x1a, 0xaf, 0x1, 0x2a, 0x0, 0x94, 0x19, 0x8, 0x54, 0x21,
492+
0x62, 0x1e, 0xa4, 0xd8, 0x7a, 0x16, 0x1d, 0x14, 0x1f, 0xc2, 0xd4, 0x95, 0x33, 0x9e, 0x44, 0x7f, 0x90,
493+
0xc5, 0x83, 0x7f, 0xd2, 0x9a, 0xf5, 0x6e, 0xdf, 0xf4, 0xa6, 0xad, 0x7b, 0xf2, 0x6a, 0xd3, 0xbb, 0x0,
494+
0x94, 0x19, 0x8, 0x54, 0x21, 0x62, 0x1e, 0xa4, 0xd8, 0x7a, 0x16, 0x2f, 0x9a, 0xce, 0x82, 0xad, 0x39,
495+
0x47, 0x21, 0x6c, 0x47, 0xa5, 0xbc, 0x7a, 0x92, 0x5a, 0x92, 0xb6, 0x72, 0xd5, 0x32, 0x67, 0xfa, 0xbc,
496+
0x7a, 0x92, 0x5a, 0x92, 0xb6, 0xff, 0x55, 0x97, 0xea, 0xf8, 0xd2, 0x5f, 0xad, 0xc5, 0xb3, 0xb9, 0x6c,
497+
0xfa, 0xbc, 0x7a, 0xaa, 0x29, 0x12, 0x63, 0xd5,
498+
};
499+
hpack::decoder e;
500+
headers_t expected{
501+
{":status", "200"},
502+
{"server", "nginx/1.18.0"},
503+
{"date", "Fri, 06 Sep 2024 07:08:24 GMT"},
504+
{"content-type", "application/json"},
505+
{"content-length", "1926"},
506+
{"strict-transport-security", "max-age=31536000; includeSubDomains; preload"},
507+
{"access-control-allow-origin", "*"},
508+
{"access-control-allow-methods", "GET, POST, OPTIONS"},
509+
{"access-control-expose-headers", "Content-Length,Content-Type,Date,Server,Connection"},
510+
};
511+
headers_t result;
512+
hpack::stream_decoder decoder(e);
513+
std::span<hpack::byte_t> chunks = bytes;
514+
auto vtor = [&](std::string_view name, std::string_view value) {
515+
result.push_back({std::string(name), std::string(value)});
516+
};
517+
518+
while (!chunks.empty()) {
519+
decoder.feed(chunks.subspan(0, 1), chunks.size() == 1, vtor);
520+
chunks = chunks.subspan(1);
521+
}
522+
error_if(result != expected);
523+
}
524+
525+
TEST(stream_decoder) {
526+
headers_t expected{
527+
{":status", "200"},
528+
{"server", "nginx/1.18.0"},
529+
{"longstr", std::string(15000, 'A')},
530+
{"content-type", "application/json"},
531+
{"content-length", "1926"},
532+
{"strict-transport-security", "max-age=31536000; includeSubDomains; preload"},
533+
{"access-control-allow-origin", "*"},
534+
{"access-control-allow-methods", "GET, POST, OPTIONS"},
535+
{"access-control-expose-headers", "Content-Length,Content-Type,Date,Server,Connection"},
536+
{std::string(15000, 'B'), std::string(15000, 'A')},
537+
};
538+
std::mt19937 g(146);
539+
using dist = std::uniform_int_distribution<>;
540+
bytes_t bytes;
541+
hpack::encoder enc;
542+
// encode twice for using indexes
543+
auto copy = expected;
544+
expected.insert(expected.end(), copy.begin(), copy.end());
545+
for (auto& [n, v] : expected) {
546+
enc.encode(n, v, std::back_inserter(bytes));
547+
}
548+
hpack::decoder dec;
549+
550+
headers_t result;
551+
hpack::stream_decoder decoder(dec);
552+
553+
auto vtor = [&](std::string_view name, std::string_view value) {
554+
result.push_back({std::string(name), std::string(value)});
555+
};
556+
std::span<hpack::byte_t> chunks;
557+
for (int i = 0; i < 100; ++i) {
558+
chunks = bytes;
559+
result.clear();
560+
// `decoder` must be empty after each last_chunk decoder.clear();
561+
while (!chunks.empty()) {
562+
auto sz = dist(0, chunks.size())(g);
563+
decoder.feed(chunks.subspan(0, sz), chunks.size() == sz, vtor);
564+
chunks = chunks.subspan(sz);
565+
}
566+
g.discard(10);
567+
error_if(result != expected);
568+
}
569+
}
570+
482571
TEST(decode_status) {
483572
hpack::encoder e;
484573
hpack::decoder de;
@@ -1013,6 +1102,8 @@ TEST(encode_with_cache) {
10131102
}
10141103

10151104
int main() {
1105+
test_stream_decoder();
1106+
test_tg_answer_parts();
10161107
test_dyntable_move();
10171108
test_encode_status();
10181109
test_encode_with_cache();

0 commit comments

Comments
 (0)