Skip to content

Commit 51d95e0

Browse files
honglookercopybara-github
authored andcommitted
In this CL, we update generated_message_tctable_lite to support both length-prefixed and delimited when it comes to parsing submessages.
In the past, the parser would do error checking based on the schema. If a .proto file declared a submessage to be length-prefixed but an SGROUP was encountered on the wire, it would invoke the fallback. This behavior is now updated: the parser will solely look at the tag when dealing with submessages. This increases the flexibility of the parser and will make our Editions rollout smoother. PiperOrigin-RevId: 617275654
1 parent 1f6580d commit 51d95e0

File tree

3 files changed

+127
-52
lines changed

3 files changed

+127
-52
lines changed

src/google/protobuf/generated_message_tctable_lite.cc

+16-29
Original file line numberDiff line numberDiff line change
@@ -2297,44 +2297,35 @@ PROTOBUF_NOINLINE const char* TcParser::MpMessage(PROTOBUF_TC_PARAM_DECL) {
22972297
const uint16_t type_card = entry.type_card;
22982298
const uint16_t card = type_card & field_layout::kFcMask;
22992299

2300+
const uint32_t decoded_tag = data.tag();
2301+
const uint32_t decoded_wiretype = decoded_tag & 7;
2302+
23002303
// Check for repeated parsing:
23012304
if (card == field_layout::kFcRepeated) {
2302-
const uint16_t rep = type_card & field_layout::kRepMask;
2303-
switch (rep) {
2304-
case field_layout::kRepMessage:
2305+
switch (decoded_wiretype) {
2306+
case WireFormatLite::WIRETYPE_LENGTH_DELIMITED:
23052307
PROTOBUF_MUSTTAIL return MpRepeatedMessageOrGroup<is_split, false>(
23062308
PROTOBUF_TC_PARAM_PASS);
2307-
case field_layout::kRepGroup:
2309+
case WireFormatLite::WIRETYPE_START_GROUP:
23082310
PROTOBUF_MUSTTAIL return MpRepeatedMessageOrGroup<is_split, true>(
23092311
PROTOBUF_TC_PARAM_PASS);
23102312
default:
23112313
PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
23122314
}
23132315
}
2314-
2315-
const uint32_t decoded_tag = data.tag();
2316-
const uint32_t decoded_wiretype = decoded_tag & 7;
23172316
const uint16_t rep = type_card & field_layout::kRepMask;
2318-
const bool is_group = rep == field_layout::kRepGroup;
2319-
2320-
// Validate wiretype:
2321-
switch (rep) {
2322-
case field_layout::kRepMessage:
2323-
if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
2324-
goto fallback;
2325-
}
2326-
break;
2327-
case field_layout::kRepGroup:
2328-
if (decoded_wiretype != WireFormatLite::WIRETYPE_START_GROUP) {
2329-
goto fallback;
2330-
}
2331-
break;
2332-
default: {
2333-
fallback:
2334-
PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
2335-
}
2317+
// note that we solely rely on wiretype for parsing messages (schema ignored)
2318+
const bool is_group =
2319+
decoded_wiretype == WireFormatLite::WIRETYPE_START_GROUP;
2320+
2321+
// If we don't see a wiretype of START_GROUP or DELIM even though we're in the
2322+
// entry point for MpMessage, something is wrong. Bail out!
2323+
if (decoded_wiretype != WireFormatLite::WIRETYPE_START_GROUP &&
2324+
decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
2325+
PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
23362326
}
23372327

2328+
23382329
const bool is_oneof = card == field_layout::kFcOneof;
23392330
bool need_init = false;
23402331
if (card == field_layout::kFcOptional) {
@@ -2386,14 +2377,10 @@ const char* TcParser::MpRepeatedMessageOrGroup(PROTOBUF_TC_PARAM_DECL) {
23862377

23872378
// Validate wiretype:
23882379
if (!is_group) {
2389-
ABSL_DCHECK_EQ(type_card & field_layout::kRepMask,
2390-
static_cast<uint16_t>(field_layout::kRepMessage));
23912380
if (decoded_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED) {
23922381
PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
23932382
}
23942383
} else {
2395-
ABSL_DCHECK_EQ(type_card & field_layout::kRepMask,
2396-
static_cast<uint16_t>(field_layout::kRepGroup));
23972384
if (decoded_wiretype != WireFormatLite::WIRETYPE_START_GROUP) {
23982385
PROTOBUF_MUSTTAIL return table->fallback(PROTOBUF_TC_PARAM_PASS);
23992386
}

src/google/protobuf/lite_unittest.cc

+20
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,26 @@ TYPED_TEST(LiteTest, CorrectEnding) {
11361136
}
11371137
}
11381138

1139+
TYPED_TEST(LiteTest, MessageEncoding) {
1140+
protobuf_unittest::TestAllTypesLite msg;
1141+
{
1142+
// Make sure that we support length-prefixed encoding for submsgs
1143+
static const char kWireFormat[] = "\n\002\010\003"; // 1: {1: 3}
1144+
io::CodedInputStream cis(reinterpret_cast<const uint8_t*>(kWireFormat), 4);
1145+
EXPECT_TRUE(msg.MergePartialFromCodedStream(&cis));
1146+
EXPECT_TRUE(cis.ConsumedEntireMessage());
1147+
EXPECT_TRUE(cis.LastTagWas(0));
1148+
}
1149+
{
1150+
// Make sure that we support delimited encoding for submsgs
1151+
static const char kWireFormat[] = "\013\010\003\014"; // 1: !{1: 3}
1152+
io::CodedInputStream cis(reinterpret_cast<const uint8_t*>(kWireFormat), 4);
1153+
EXPECT_TRUE(msg.MergePartialFromCodedStream(&cis));
1154+
EXPECT_TRUE(cis.ConsumedEntireMessage());
1155+
EXPECT_TRUE(cis.LastTagWas(0));
1156+
}
1157+
}
1158+
11391159
TYPED_TEST(LiteTest, DebugString) {
11401160
protobuf_unittest::TestAllTypesLite message1, message2;
11411161
EXPECT_TRUE(absl::StartsWith(message1.DebugString(), "MessageLite at 0x"));

src/google/protobuf/wire_format.cc

+91-23
Original file line numberDiff line numberDiff line change
@@ -578,31 +578,59 @@ bool WireFormat::ParseAndMergeField(
578578

579579
case FieldDescriptor::TYPE_GROUP: {
580580
Message* sub_message;
581-
if (field->is_repeated()) {
582-
sub_message = message_reflection->AddMessage(
583-
message, field, input->GetExtensionFactory());
581+
if (WireFormatLite::GetTagWireType(tag) ==
582+
WireFormatLite::WIRETYPE_START_GROUP) {
583+
if (field->is_repeated()) {
584+
sub_message = message_reflection->AddMessage(
585+
message, field, input->GetExtensionFactory());
586+
} else {
587+
sub_message = message_reflection->MutableMessage(
588+
message, field, input->GetExtensionFactory());
589+
}
590+
591+
if (!WireFormatLite::ReadGroup(WireFormatLite::GetTagFieldNumber(tag),
592+
input, sub_message))
593+
return false;
584594
} else {
585-
sub_message = message_reflection->MutableMessage(
586-
message, field, input->GetExtensionFactory());
587-
}
595+
if (field->is_repeated()) {
596+
sub_message = message_reflection->AddMessage(
597+
message, field, input->GetExtensionFactory());
598+
} else {
599+
sub_message = message_reflection->MutableMessage(
600+
message, field, input->GetExtensionFactory());
601+
}
588602

589-
if (!WireFormatLite::ReadGroup(WireFormatLite::GetTagFieldNumber(tag),
590-
input, sub_message))
591-
return false;
603+
if (!WireFormatLite::ReadMessage(input, sub_message)) return false;
604+
}
592605
break;
593606
}
594607

595608
case FieldDescriptor::TYPE_MESSAGE: {
596609
Message* sub_message;
597-
if (field->is_repeated()) {
598-
sub_message = message_reflection->AddMessage(
599-
message, field, input->GetExtensionFactory());
610+
if (WireFormatLite::GetTagWireType(tag) ==
611+
WireFormatLite::WIRETYPE_START_GROUP) {
612+
if (field->is_repeated()) {
613+
sub_message = message_reflection->AddMessage(
614+
message, field, input->GetExtensionFactory());
615+
} else {
616+
sub_message = message_reflection->MutableMessage(
617+
message, field, input->GetExtensionFactory());
618+
}
619+
620+
if (!WireFormatLite::ReadGroup(WireFormatLite::GetTagFieldNumber(tag),
621+
input, sub_message))
622+
return false;
600623
} else {
601-
sub_message = message_reflection->MutableMessage(
602-
message, field, input->GetExtensionFactory());
603-
}
624+
if (field->is_repeated()) {
625+
sub_message = message_reflection->AddMessage(
626+
message, field, input->GetExtensionFactory());
627+
} else {
628+
sub_message = message_reflection->MutableMessage(
629+
message, field, input->GetExtensionFactory());
630+
}
604631

605-
if (!WireFormatLite::ReadMessage(input, sub_message)) return false;
632+
if (!WireFormatLite::ReadMessage(input, sub_message)) return false;
633+
}
606634
break;
607635
}
608636
}
@@ -997,19 +1025,59 @@ const char* WireFormat::_InternalParseAndMergeField(
9971025

9981026
case FieldDescriptor::TYPE_GROUP: {
9991027
Message* sub_message;
1000-
if (field->is_repeated()) {
1001-
sub_message = reflection->AddMessage(msg, field, ctx->data().factory);
1028+
1029+
if (WireFormatLite::GetTagWireType(tag) ==
1030+
WireFormatLite::WIRETYPE_START_GROUP) {
1031+
if (field->is_repeated()) {
1032+
sub_message = reflection->AddMessage(msg, field, ctx->data().factory);
1033+
} else {
1034+
sub_message =
1035+
reflection->MutableMessage(msg, field, ctx->data().factory);
1036+
}
1037+
1038+
return ctx->ParseGroup(sub_message, ptr, tag);
10021039
} else {
1003-
sub_message =
1004-
reflection->MutableMessage(msg, field, ctx->data().factory);
1005-
}
1040+
if (field->is_repeated()) {
1041+
sub_message = reflection->AddMessage(msg, field, ctx->data().factory);
1042+
} else {
1043+
sub_message =
1044+
reflection->MutableMessage(msg, field, ctx->data().factory);
1045+
}
1046+
ptr = ctx->ParseMessage(sub_message, ptr);
1047+
1048+
// For map entries, if the value is an unknown enum we have to push it
1049+
// into the unknown field set and remove it from the list.
1050+
if (ptr != nullptr && field->is_map()) {
1051+
auto* value_field = field->message_type()->map_value();
1052+
auto* enum_type = value_field->enum_type();
1053+
if (enum_type != nullptr &&
1054+
!internal::cpp::HasPreservingUnknownEnumSemantics(value_field) &&
1055+
enum_type->FindValueByNumber(
1056+
sub_message->GetReflection()->GetEnumValue(
1057+
*sub_message, value_field)) == nullptr) {
1058+
reflection->MutableUnknownFields(msg)->AddLengthDelimited(
1059+
field->number(), sub_message->SerializeAsString());
1060+
reflection->RemoveLast(msg, field);
1061+
}
1062+
}
10061063

1007-
return ctx->ParseGroup(sub_message, ptr, tag);
1064+
return ptr;
1065+
}
10081066
}
10091067

10101068
case FieldDescriptor::TYPE_MESSAGE: {
10111069
Message* sub_message;
1012-
if (field->is_repeated()) {
1070+
if (WireFormatLite::GetTagWireType(tag) ==
1071+
WireFormatLite::WIRETYPE_START_GROUP) {
1072+
if (field->is_repeated()) {
1073+
sub_message = reflection->AddMessage(msg, field, ctx->data().factory);
1074+
} else {
1075+
sub_message =
1076+
reflection->MutableMessage(msg, field, ctx->data().factory);
1077+
}
1078+
1079+
return ctx->ParseGroup(sub_message, ptr, tag);
1080+
} else if (field->is_repeated()) {
10131081
sub_message = reflection->AddMessage(msg, field, ctx->data().factory);
10141082
} else {
10151083
sub_message =

0 commit comments

Comments
 (0)