Skip to content

Commit 6af3218

Browse files
goldvitalycopybara-github
authored andcommitted
Reimplement ExtensionSet::MergeFrom for the copy case for better performance.
PiperOrigin-RevId: 698707503
1 parent fc6ae67 commit 6af3218

File tree

2 files changed

+228
-113
lines changed

2 files changed

+228
-113
lines changed

src/google/protobuf/extension_set.cc

+194-110
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "absl/hash/hash.h"
2727
#include "absl/log/absl_check.h"
2828
#include "absl/log/absl_log.h"
29+
#include "absl/numeric/bits.h"
2930
#include "google/protobuf/arena.h"
3031
#include "google/protobuf/extension_set_inl.h"
3132
#include "google/protobuf/io/coded_stream.h"
@@ -184,6 +185,19 @@ ExtensionSet::~ExtensionSet() {
184185
}
185186
}
186187

188+
ExtensionSet::KeyValue* ExtensionSet::AllocateFlatMap(
189+
Arena* arena, uint16_t powerof2_flat_capacity) {
190+
// It is important to allocate power-of-2 bytes in order to reuse
191+
// allocated blocks in arena for ExtensionSet and RepeatedFields.
192+
// ReturnArrayMemory is also more efficient with power-of-2 bytes, and
193+
// sizeof(KeyValue) is a power-of-2 on 64-bit platforms.
194+
static_assert(absl::has_single_bit(sizeof(KeyValue)) || sizeof(void*) != 8,
195+
"sizeof(KeyValue) must be a power of 2");
196+
ABSL_DCHECK(absl::has_single_bit(powerof2_flat_capacity));
197+
return Arena::CreateArray<ExtensionSet::KeyValue>(arena,
198+
powerof2_flat_capacity);
199+
}
200+
187201
void ExtensionSet::DeleteFlatMap(const ExtensionSet::KeyValue* flat,
188202
uint16_t flat_capacity) {
189203
// Arena::CreateArray already requires a trivially destructible type, but
@@ -986,6 +1000,43 @@ size_t SizeOfUnion(ItX it_dest, ItX end_dest, ItY it_source, ItY end_source) {
9861000
void ExtensionSet::MergeFrom(const MessageLite* extendee,
9871001
const ExtensionSet& other) {
9881002
Prefetch5LinesFrom1Line(&other);
1003+
if (ABSL_PREDICT_TRUE(IsCompletelyEmpty() && !other.is_large())) {
1004+
InternalMergeFromSmallToEmpty(extendee, other);
1005+
return;
1006+
}
1007+
InternalMergeFromSlow(extendee, other);
1008+
}
1009+
1010+
void ExtensionSet::InternalMergeFromSmallToEmpty(const MessageLite* extendee,
1011+
const ExtensionSet& other) {
1012+
ABSL_DCHECK(!other.is_large());
1013+
// Compiler is complaining on potential side effects for `!other.is_large()`.
1014+
ABSL_ASSUME(static_cast<int16_t>(flat_size_) >= 0);
1015+
ABSL_DCHECK(IsCompletelyEmpty());
1016+
1017+
size_t count = other.NumExtensions();
1018+
if (count == 0) {
1019+
return;
1020+
}
1021+
1022+
InternalReserveSmallCapacityFromEmpty(count);
1023+
flat_size_ = static_cast<uint16_t>(count);
1024+
auto dst_it = map_.flat;
1025+
other.ForEach(
1026+
[extendee, this, &dst_it, &other](int number, const Extension& ext) {
1027+
if (ext.is_cleared) {
1028+
return;
1029+
}
1030+
dst_it->first = number;
1031+
this->InternalExtensionMergeFromIntoUninitializedExtension(
1032+
dst_it->second, extendee, number, ext, other.arena_);
1033+
++dst_it;
1034+
},
1035+
Prefetch{});
1036+
}
1037+
1038+
void ExtensionSet::InternalMergeFromSlow(const MessageLite* extendee,
1039+
const ExtensionSet& other) {
9891040
if (ABSL_PREDICT_TRUE(!is_large())) {
9901041
if (ABSL_PREDICT_TRUE(!other.is_large())) {
9911042
GrowCapacity(SizeOfUnion(flat_begin(), flat_end(), other.flat_begin(),
@@ -1003,35 +1054,22 @@ void ExtensionSet::MergeFrom(const MessageLite* extendee,
10031054
Prefetch{});
10041055
}
10051056

1006-
void ExtensionSet::InternalExtensionMergeFrom(const MessageLite* extendee,
1007-
int number,
1008-
const Extension& other_extension,
1009-
Arena* other_arena) {
1010-
if (other_extension.is_repeated) {
1011-
Extension* extension;
1012-
bool is_new =
1013-
MaybeNewExtension(number, other_extension.descriptor, &extension);
1014-
if (is_new) {
1015-
// Extension did not already exist in set.
1016-
extension->type = other_extension.type;
1017-
extension->is_packed = other_extension.is_packed;
1018-
extension->is_repeated = true;
1019-
extension->is_pointer = true;
1020-
} else {
1021-
ABSL_DCHECK_EQ(extension->type, other_extension.type);
1022-
ABSL_DCHECK_EQ(extension->is_packed, other_extension.is_packed);
1023-
ABSL_DCHECK(extension->is_repeated);
1024-
}
1057+
void ExtensionSet::InternalExtensionMergeFromIntoUninitializedExtension(
1058+
Extension& dst_extension, const MessageLite* extendee, int number,
1059+
const Extension& other_extension, Arena* other_arena) {
1060+
// Copy and initialize all the fields.
1061+
// We fix up incorrect pointers later.
1062+
// Primitive values are copied here.
1063+
dst_extension = other_extension;
10251064

1065+
if (other_extension.is_repeated) {
10261066
switch (cpp_type(other_extension.type)) {
1027-
#define HANDLE_TYPE(UPPERCASE, LOWERCASE, REPEATED_TYPE) \
1028-
case WireFormatLite::CPPTYPE_##UPPERCASE: \
1029-
if (is_new) { \
1030-
extension->ptr.repeated_##LOWERCASE##_value = \
1031-
Arena::Create<REPEATED_TYPE>(arena_); \
1032-
} \
1033-
extension->ptr.repeated_##LOWERCASE##_value->MergeFrom( \
1034-
*other_extension.ptr.repeated_##LOWERCASE##_value); \
1067+
#define HANDLE_TYPE(UPPERCASE, LOWERCASE, REPEATED_TYPE) \
1068+
case WireFormatLite::CPPTYPE_##UPPERCASE: \
1069+
dst_extension.ptr.repeated_##LOWERCASE##_value = \
1070+
Arena::Create<REPEATED_TYPE>(arena_); \
1071+
dst_extension.ptr.repeated_##LOWERCASE##_value->MergeFrom( \
1072+
*other_extension.ptr.repeated_##LOWERCASE##_value); \
10351073
break;
10361074

10371075
HANDLE_TYPE(INT32, int32_t, RepeatedField<int32_t>);
@@ -1046,83 +1084,129 @@ void ExtensionSet::InternalExtensionMergeFrom(const MessageLite* extendee,
10461084
HANDLE_TYPE(MESSAGE, message, RepeatedPtrField<MessageLite>);
10471085
#undef HANDLE_TYPE
10481086
}
1049-
} else {
1050-
if (!other_extension.is_cleared) {
1051-
switch (cpp_type(other_extension.type)) {
1052-
#define HANDLE_TYPE(UPPERCASE, LOWERCASE, CAMELCASE) \
1053-
case WireFormatLite::CPPTYPE_##UPPERCASE: \
1054-
Set##CAMELCASE(number, other_extension.type, \
1055-
other_extension.LOWERCASE##_value, \
1056-
other_extension.descriptor); \
1087+
return;
1088+
}
1089+
1090+
// Non-repeated extension
1091+
switch (cpp_type(other_extension.type)) {
1092+
case WireFormatLite::CPPTYPE_INT32:
1093+
case WireFormatLite::CPPTYPE_INT64:
1094+
case WireFormatLite::CPPTYPE_UINT32:
1095+
case WireFormatLite::CPPTYPE_UINT64:
1096+
case WireFormatLite::CPPTYPE_FLOAT:
1097+
case WireFormatLite::CPPTYPE_DOUBLE:
1098+
case WireFormatLite::CPPTYPE_BOOL:
1099+
case WireFormatLite::CPPTYPE_ENUM:
1100+
break; // Do nothing.
1101+
case WireFormatLite::CPPTYPE_STRING:
1102+
dst_extension.ptr.string_value =
1103+
Arena::Create<std::string>(arena_, *other_extension.ptr.string_value);
1104+
break;
1105+
case WireFormatLite::CPPTYPE_MESSAGE: {
1106+
if (other_extension.is_lazy) {
1107+
dst_extension.ptr.lazymessage_value =
1108+
other_extension.ptr.lazymessage_value->New(arena_);
1109+
dst_extension.ptr.lazymessage_value->MergeFrom(
1110+
GetPrototypeForLazyMessage(extendee, number),
1111+
*other_extension.ptr.lazymessage_value, arena_, other_arena);
1112+
} else {
1113+
dst_extension.ptr.message_value =
1114+
other_extension.ptr.message_value->New(arena_);
1115+
dst_extension.ptr.message_value->CheckTypeAndMergeFrom(
1116+
*other_extension.ptr.message_value);
1117+
}
1118+
break;
1119+
}
1120+
}
1121+
}
1122+
1123+
void ExtensionSet::InternalExtensionMergeFrom(const MessageLite* extendee,
1124+
int number,
1125+
const Extension& other_extension,
1126+
Arena* other_arena) {
1127+
Extension* dst_extension;
1128+
bool is_new =
1129+
MaybeNewExtension(number, other_extension.descriptor, &dst_extension);
1130+
if (is_new) {
1131+
InternalExtensionMergeFromIntoUninitializedExtension(
1132+
*dst_extension, extendee, number, other_extension, other_arena);
1133+
return;
1134+
}
1135+
if (other_extension.is_repeated) {
1136+
ABSL_DCHECK_EQ(dst_extension->type, other_extension.type);
1137+
ABSL_DCHECK_EQ(dst_extension->is_packed, other_extension.is_packed);
1138+
ABSL_DCHECK(dst_extension->is_repeated);
1139+
1140+
switch (cpp_type(other_extension.type)) {
1141+
#define HANDLE_TYPE(UPPERCASE, LOWERCASE) \
1142+
case WireFormatLite::CPPTYPE_##UPPERCASE: \
1143+
dst_extension->ptr.repeated_##LOWERCASE##_value->MergeFrom( \
1144+
*other_extension.ptr.repeated_##LOWERCASE##_value); \
10571145
break;
10581146

1059-
HANDLE_TYPE(INT32, int32_t, Int32);
1060-
HANDLE_TYPE(INT64, int64_t, Int64);
1061-
HANDLE_TYPE(UINT32, uint32_t, UInt32);
1062-
HANDLE_TYPE(UINT64, uint64_t, UInt64);
1063-
HANDLE_TYPE(FLOAT, float, Float);
1064-
HANDLE_TYPE(DOUBLE, double, Double);
1065-
HANDLE_TYPE(BOOL, bool, Bool);
1066-
HANDLE_TYPE(ENUM, enum, Enum);
1147+
HANDLE_TYPE(INT32, int32_t);
1148+
HANDLE_TYPE(INT64, int64_t);
1149+
HANDLE_TYPE(UINT32, uint32_t);
1150+
HANDLE_TYPE(UINT64, uint64_t);
1151+
HANDLE_TYPE(FLOAT, float);
1152+
HANDLE_TYPE(DOUBLE, double);
1153+
HANDLE_TYPE(BOOL, bool);
1154+
HANDLE_TYPE(ENUM, enum);
1155+
HANDLE_TYPE(STRING, string);
1156+
HANDLE_TYPE(MESSAGE, message);
10671157
#undef HANDLE_TYPE
1068-
case WireFormatLite::CPPTYPE_STRING:
1069-
SetString(number, other_extension.type,
1070-
*other_extension.ptr.string_value,
1071-
other_extension.descriptor);
1072-
break;
1073-
case WireFormatLite::CPPTYPE_MESSAGE: {
1074-
Arena* const arena = arena_;
1075-
Extension* extension;
1076-
bool is_new =
1077-
MaybeNewExtension(number, other_extension.descriptor, &extension);
1078-
if (is_new) {
1079-
extension->type = other_extension.type;
1080-
extension->is_packed = other_extension.is_packed;
1081-
extension->is_repeated = false;
1082-
extension->is_pointer = true;
1083-
if (other_extension.is_lazy) {
1084-
extension->is_lazy = true;
1085-
extension->ptr.lazymessage_value =
1086-
other_extension.ptr.lazymessage_value->New(arena);
1087-
extension->ptr.lazymessage_value->MergeFrom(
1088-
GetPrototypeForLazyMessage(extendee, number),
1089-
*other_extension.ptr.lazymessage_value, arena, other_arena);
1090-
} else {
1091-
extension->is_lazy = false;
1092-
extension->ptr.message_value =
1093-
other_extension.ptr.message_value->New(arena);
1094-
extension->ptr.message_value->CheckTypeAndMergeFrom(
1095-
*other_extension.ptr.message_value);
1096-
}
1097-
} else {
1098-
ABSL_DCHECK_EQ(extension->type, other_extension.type);
1099-
ABSL_DCHECK_EQ(extension->is_packed, other_extension.is_packed);
1100-
ABSL_DCHECK(!extension->is_repeated);
1101-
if (other_extension.is_lazy) {
1102-
if (extension->is_lazy) {
1103-
extension->ptr.lazymessage_value->MergeFrom(
1104-
GetPrototypeForLazyMessage(extendee, number),
1105-
*other_extension.ptr.lazymessage_value, arena, other_arena);
1106-
} else {
1107-
extension->ptr.message_value->CheckTypeAndMergeFrom(
1108-
other_extension.ptr.lazymessage_value->GetMessage(
1109-
*extension->ptr.message_value, other_arena));
1110-
}
1111-
} else {
1112-
if (extension->is_lazy) {
1113-
extension->ptr.lazymessage_value
1114-
->MutableMessage(*other_extension.ptr.message_value, arena)
1115-
->CheckTypeAndMergeFrom(*other_extension.ptr.message_value);
1116-
} else {
1117-
extension->ptr.message_value->CheckTypeAndMergeFrom(
1118-
*other_extension.ptr.message_value);
1119-
}
1120-
}
1121-
}
1122-
extension->is_cleared = false;
1123-
break;
1158+
}
1159+
return;
1160+
}
1161+
1162+
if (other_extension.is_cleared) {
1163+
return;
1164+
}
1165+
dst_extension->is_cleared = false;
1166+
switch (cpp_type(other_extension.type)) {
1167+
#define HANDLE_TYPE(UPPERCASE, LOWERCASE) \
1168+
case WireFormatLite::CPPTYPE_##UPPERCASE: \
1169+
dst_extension->LOWERCASE##_value = other_extension.LOWERCASE##_value; \
1170+
break;
1171+
1172+
HANDLE_TYPE(INT32, int32_t);
1173+
HANDLE_TYPE(INT64, int64_t);
1174+
HANDLE_TYPE(UINT32, uint32_t);
1175+
HANDLE_TYPE(UINT64, uint64_t);
1176+
HANDLE_TYPE(FLOAT, float);
1177+
HANDLE_TYPE(DOUBLE, double);
1178+
HANDLE_TYPE(BOOL, bool);
1179+
HANDLE_TYPE(ENUM, enum);
1180+
#undef HANDLE_TYPE
1181+
case WireFormatLite::CPPTYPE_STRING:
1182+
dst_extension->ptr.string_value->assign(
1183+
*other_extension.ptr.string_value);
1184+
break;
1185+
case WireFormatLite::CPPTYPE_MESSAGE: {
1186+
ABSL_DCHECK_EQ(dst_extension->type, other_extension.type);
1187+
ABSL_DCHECK_EQ(dst_extension->is_packed, other_extension.is_packed);
1188+
ABSL_DCHECK(!dst_extension->is_repeated);
1189+
if (other_extension.is_lazy) {
1190+
if (dst_extension->is_lazy) {
1191+
dst_extension->ptr.lazymessage_value->MergeFrom(
1192+
GetPrototypeForLazyMessage(extendee, number),
1193+
*other_extension.ptr.lazymessage_value, arena_, other_arena);
1194+
} else {
1195+
dst_extension->ptr.message_value->CheckTypeAndMergeFrom(
1196+
other_extension.ptr.lazymessage_value->GetMessage(
1197+
*dst_extension->ptr.message_value, other_arena));
1198+
}
1199+
} else {
1200+
if (dst_extension->is_lazy) {
1201+
dst_extension->ptr.lazymessage_value
1202+
->MutableMessage(*other_extension.ptr.message_value, arena_)
1203+
->CheckTypeAndMergeFrom(*other_extension.ptr.message_value);
1204+
} else {
1205+
dst_extension->ptr.message_value->CheckTypeAndMergeFrom(
1206+
*other_extension.ptr.message_value);
11241207
}
11251208
}
1209+
break;
11261210
}
11271211
}
11281212
}
@@ -1681,10 +1765,6 @@ std::pair<ExtensionSet::Extension*, bool> ExtensionSet::Insert(int key) {
16811765
return Insert(key);
16821766
}
16831767

1684-
namespace {
1685-
constexpr bool IsPowerOfTwo(size_t n) { return (n & (n - 1)) == 0; }
1686-
} // namespace
1687-
16881768
void ExtensionSet::GrowCapacity(size_t minimum_new_capacity) {
16891769
if (ABSL_PREDICT_FALSE(is_large())) {
16901770
return; // LargeMap does not have a "reserve" method.
@@ -1711,16 +1791,10 @@ void ExtensionSet::GrowCapacity(size_t minimum_new_capacity) {
17111791
flat_size_ = static_cast<uint16_t>(-1);
17121792
ABSL_DCHECK(is_large());
17131793
} else {
1714-
new_map.flat = Arena::CreateArray<KeyValue>(arena, new_flat_capacity);
1794+
new_map.flat = AllocateFlatMap(arena, new_flat_capacity);
17151795
std::copy(begin, end, new_map.flat);
17161796
}
17171797

1718-
// ReturnArrayMemory is more efficient with power-of-2 bytes, and
1719-
// sizeof(KeyValue) is a power-of-2 on 64-bit platforms. flat_capacity_ is
1720-
// always a power-of-2.
1721-
ABSL_DCHECK(IsPowerOfTwo(sizeof(KeyValue)) || sizeof(void*) != 8)
1722-
<< sizeof(KeyValue) << " " << sizeof(void*);
1723-
ABSL_DCHECK(IsPowerOfTwo(flat_capacity_));
17241798
if (flat_capacity_ > 0) {
17251799
if (arena == nullptr) {
17261800
DeleteFlatMap(begin, flat_capacity_);
@@ -1732,6 +1806,16 @@ void ExtensionSet::GrowCapacity(size_t minimum_new_capacity) {
17321806
map_ = new_map;
17331807
}
17341808

1809+
void ExtensionSet::InternalReserveSmallCapacityFromEmpty(
1810+
size_t minimum_new_capacity) {
1811+
ABSL_DCHECK(flat_capacity_ == 0);
1812+
ABSL_DCHECK(minimum_new_capacity <= kMaximumFlatCapacity);
1813+
ABSL_DCHECK(minimum_new_capacity > 0);
1814+
const size_t new_flat_capacity = absl::bit_ceil(minimum_new_capacity);
1815+
flat_capacity_ = new_flat_capacity;
1816+
map_.flat = AllocateFlatMap(arena_, new_flat_capacity);
1817+
}
1818+
17351819
#if (__cplusplus < 201703) && \
17361820
(!defined(_MSC_VER) || (_MSC_VER >= 1900 && _MSC_VER < 1912))
17371821
// static

0 commit comments

Comments
 (0)