Skip to content

Commit

Permalink
improve skip_till[xml]
Browse files Browse the repository at this point in the history
  • Loading branch information
bbbgan committed Jul 9, 2023
1 parent fae19bf commit 155057d
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 21 deletions.
39 changes: 19 additions & 20 deletions iguana/xml_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,12 @@ IGUANA_INLINE void parse_attr(U &&value, It &&it, It &&end) {
template <plain_t U, typename It>
IGUANA_INLINE void parse_item(U &value, It &&it, It &&end,
std::string_view name) {
skip_till<'>'>(it, end);
skip_till_greater(it, end);
++it;
skip_sapces_and_newline(it, end);
auto value_begin = it;
auto value_end = skip_pass<'<'>(it, end);
auto value_end = skip_pass_smaller(it, end);
;
parse_value(value, value_begin, value_end);
match_close_tag(it, end, name);
}
Expand All @@ -96,14 +97,14 @@ IGUANA_INLINE void parse_item(U &value, It &&it, It &&end,
--it;
return;
} else {
skip_till<'>'>(it, end);
skip_till_greater(it, end);
++it;
skip_sapces_and_newline(it, end);
continue;
}
}
auto start = it;
skip_till<' ', '>'>(it, end);
skip_till_greater_or_space(it, end);
std::string_view key = std::string_view{
&*start, static_cast<size_t>(std::distance(start, it))};
if (key != name) [[unlikely]] {
Expand All @@ -119,20 +120,21 @@ template <optional_t U, typename It>
IGUANA_INLINE void parse_item(U &value, It &&it, It &&end,
std::string_view name) {
using value_type = typename std::remove_reference_t<U>::value_type;
skip_till<'>'>(it, end);
if (*(it - 1) == '/') {
skip_till_greater(it, end);
if (*(it - 1) == '/') [[likely]] {
++it;
return;
}

if constexpr (plain_t<value_type>) {
// The following code is for option not to be emplaced
// when parse "...> <..."
skip_till<'>'>(it, end);
skip_till_greater(it, end);
++it;
skip_sapces_and_newline(it, end);
auto value_begin = it;
auto value_end = skip_pass<'<'>(it, end);
auto value_end = skip_pass_smaller(it, end);
;
if (value_begin == value_end) {
match_close_tag(it, end, name);
return;
Expand Down Expand Up @@ -162,7 +164,7 @@ template <refletable T, typename It>
IGUANA_INLINE void parse_item(T &value, It &&it, It &&end,
std::string_view name) {
constexpr auto cdata_idx = get_type_index<is_cdata_t, std::decay_t<T>>();
skip_till<'>'>(it, end);
skip_till_greater(it, end);
++it;
skip_sapces_and_newline(it, end);
while (it != end) {
Expand All @@ -174,7 +176,7 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end,
return; // reach the close tag
} else if (*it == '?') [[unlikely]] {
// <? ... ?>
skip_till<'>'>(it, end);
skip_till_greater(it, end);
++it;
skip_sapces_and_newline(it, end);
continue;
Expand Down Expand Up @@ -212,22 +214,22 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end,
} else if (*it == 'D') {
// <!D
++it;
skip_till<'>'>(it, end);
skip_till_greater(it, end);
++it;
skip_sapces_and_newline(it, end);
continue;
} else {
// <!-- -->
++it;
skip_till<'>'>(it, end);
skip_till_greater(it, end);
++it;
skip_sapces_and_newline(it, end);
continue;
}
}

auto start = it;
skip_till<' ', '>'>(it, end);
skip_till_greater_or_space(it, end);
std::string_view key = std::string_view{
&*start, static_cast<size_t>(std::distance(start, it))};
static constexpr auto frozen_map = get_iguana_struct_map<T>();
Expand Down Expand Up @@ -259,14 +261,14 @@ IGUANA_INLINE void from_xml(U &value, It &&it, It &&end) {
skip_sapces_and_newline(it, end);
match<'<'>(it, end);
if (*it == '?') {
skip_till<'>'>(it, end);
skip_till_greater(it, end);
++it;
} else {
break;
}
}
auto start = it;
skip_till<' ', '>'>(it, end);
skip_till_greater_or_space(it, end);
std::string_view key =
std::string_view{&*start, static_cast<size_t>(std::distance(start, it))};
detail::parse_attr(value.attr(), it, end);
Expand All @@ -279,17 +281,14 @@ IGUANA_INLINE void from_xml(U &value, It &&it, It &&end) {
skip_sapces_and_newline(it, end);
match<'<'>(it, end);
if (*it == '?') {
skip_till<'>'>(it, end);
skip_till_greater(it, end);
++it; // skip >
// '<?xml ' - xml declaration
// or
// Parse PI
} else {
break;
}
}
auto start = it;
skip_till<' ', '>'>(it, end);
skip_till_greater_or_space(it, end);
std::string_view key =
std::string_view{&*start, static_cast<size_t>(std::distance(start, it))};
detail::parse_item(value, it, end, key);
Expand Down
119 changes: 119 additions & 0 deletions iguana/xml_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,4 +253,123 @@ IGUANA_INLINE void match_close_tag(auto &&it, auto &&end,
// ++it;
}

// skip_till<'>'>(it, end);
IGUANA_INLINE void skip_till_greater(auto &&it, auto &&end) {
static_assert(std::contiguous_iterator<std::decay_t<decltype(it)>>);

auto has_zero = [](uint64_t chunk) {
return (((chunk - 0x0101010101010101) & ~chunk) & 0x8080808080808080);
};
auto has_greater = [&](uint64_t chunk) {
return has_zero(
chunk ^
0b0011111000111110001111100011111000111110001111100011111000111110);
};

if (std::distance(it, end) >= 7) [[likely]] {
const auto end_m7 = end - 7;
for (; it < end_m7; it += 8) {
const auto chunk = *reinterpret_cast<const uint64_t *>(&*it);
uint64_t test = has_greater(chunk);
if (test != 0) {
it += (std::countr_zero(test) >> 3);
return;
}
}
}

// Tail end of buffer. Should be rare we even get here
while (it < end) {
switch (*it) {
case '>':
return;
}
++it;
}
throw std::runtime_error("Expected >");
}

IGUANA_INLINE void skip_till_greater_or_space(auto &&it, auto &&end) {
static_assert(std::contiguous_iterator<std::decay_t<decltype(it)>>);

auto has_zero = [](uint64_t chunk) {
return (((chunk - 0x0101010101010101) & ~chunk) & 0x8080808080808080);
};
auto has_greater = [&](uint64_t chunk) {
return has_zero(
chunk ^
0b0011111000111110001111100011111000111110001111100011111000111110);
};
auto has_space = [&](uint64_t chunk) {
return has_zero(
chunk ^
0b0010000000100000001000000010000000100000001000000010000000100000);
};
if (std::distance(it, end) >= 7) [[likely]] {
const auto end_m7 = end - 7;
for (; it < end_m7; it += 8) {
const auto chunk = *reinterpret_cast<const uint64_t *>(&*it);
uint64_t test = has_greater(chunk) | has_space(chunk);
if (test != 0) {
it += (std::countr_zero(test) >> 3);
return;
}
}
}

// Tail end of buffer. Should be rare we even get here
while (it < end) {
switch (*it) {
case '>':
case ' ':
return;
}
++it;
}
throw std::runtime_error("Expected > or space");
}

IGUANA_INLINE void skip_till_smaller(auto &&it, auto &&end) {
static_assert(std::contiguous_iterator<std::decay_t<decltype(it)>>);

auto has_zero = [](uint64_t chunk) {
return (((chunk - 0x0101010101010101) & ~chunk) & 0x8080808080808080);
};
auto has_smaller = [&](uint64_t chunk) {
return has_zero(
chunk ^
0b0011110000111100001111000011110000111100001111000011110000111100);
};

if (std::distance(it, end) >= 7) [[likely]] {
const auto end_m7 = end - 7;
for (; it < end_m7; it += 8) {
const auto chunk = *reinterpret_cast<const uint64_t *>(&*it);
uint64_t test = has_smaller(chunk);
if (test != 0) {
it += (std::countr_zero(test) >> 3);
return;
}
}
}

// Tail end of buffer. Should be rare we even get here
while (it < end) {
switch (*it) {
case '>':
return;
}
++it;
}
throw std::runtime_error("Expected >");
}

IGUANA_INLINE auto skip_pass_smaller(auto &&it, auto &&end) {
skip_till_smaller(it, end);
auto res = it++ - 1;
while (*res == ' ') {
--res;
}
return res + 1;
}
} // namespace iguana
2 changes: 1 addition & 1 deletion test/test_xml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ TEST_CASE("test vector with attr") {
<item index="0">1</item>
<item index="1">2</item>
<item index="2">3</item>
<item index="3">4</item>
<item index="3">4 </item>
</test_arr_t>
)";
test_arr_t arr;
Expand Down

0 comments on commit 155057d

Please sign in to comment.