Skip to content

Commit 3b5301c

Browse files
Refactoring Java parsing (21.x) (#10665)
* Porting java cleanup * Update changelog * Fix absl usage * Extension patch * Remove extra allocations
1 parent bea6726 commit 3b5301c

40 files changed

+1690
-940
lines changed

CHANGES.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
2022-09-27 version 21.7 (C++/Java/Python/PHP/Objective-C/C#/Ruby)
2+
Java
3+
* Refactoring java full runtime to reuse sub-message builders and prepare to
4+
migrate parsing logic from parse constructor to builder.
5+
* Move proto wireformat parsing functionality from the private "parsing
6+
constructor" to the Builder class.
7+
* Change the Lite runtime to prefer merging from the wireformat into mutable
8+
messages rather than building up a new immutable object before merging. This
9+
way results in fewer allocations and copy operations.
10+
* Make message-type extensions merge from wire-format instead of building up instances and merging afterwards. This has much better performance.
11+
112
2022-09-13 version 21.6 (C++/Java/Python/PHP/Objective-C/C#/Ruby)
213

314
C++

java/core/src/main/java/com/google/protobuf/AbstractMessage.java

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -424,27 +424,22 @@ public BuilderType mergeFrom(
424424
throws IOException {
425425
boolean discardUnknown = input.shouldDiscardUnknownFields();
426426
final UnknownFieldSet.Builder unknownFields =
427-
discardUnknown ? null : UnknownFieldSet.newBuilder(getUnknownFields());
428-
while (true) {
429-
final int tag = input.readTag();
430-
if (tag == 0) {
431-
break;
432-
}
433-
434-
MessageReflection.BuilderAdapter builderAdapter =
435-
new MessageReflection.BuilderAdapter(this);
436-
if (!MessageReflection.mergeFieldFrom(
437-
input, unknownFields, extensionRegistry, getDescriptorForType(), builderAdapter, tag)) {
438-
// end group tag
439-
break;
440-
}
441-
}
427+
discardUnknown ? null : getUnknownFieldSetBuilder();
428+
MessageReflection.mergeMessageFrom(this, unknownFields, input, extensionRegistry);
442429
if (unknownFields != null) {
443-
setUnknownFields(unknownFields.build());
430+
setUnknownFieldSetBuilder(unknownFields);
444431
}
445432
return (BuilderType) this;
446433
}
447434

435+
protected UnknownFieldSet.Builder getUnknownFieldSetBuilder() {
436+
return UnknownFieldSet.newBuilder(getUnknownFields());
437+
}
438+
439+
protected void setUnknownFieldSetBuilder(final UnknownFieldSet.Builder builder) {
440+
setUnknownFields(builder.build());
441+
}
442+
448443
@Override
449444
public BuilderType mergeUnknownFields(final UnknownFieldSet unknownFields) {
450445
setUnknownFields(

java/core/src/main/java/com/google/protobuf/ArrayDecoders.java

Lines changed: 86 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,29 @@ static int decodeBytes(byte[] data, int position, Registers registers)
237237
@SuppressWarnings({"unchecked", "rawtypes"})
238238
static int decodeMessageField(
239239
Schema schema, byte[] data, int position, int limit, Registers registers) throws IOException {
240+
Object msg = schema.newInstance();
241+
int offset = mergeMessageField(msg, schema, data, position, limit, registers);
242+
schema.makeImmutable(msg);
243+
registers.object1 = msg;
244+
return offset;
245+
}
246+
247+
/** Decodes a group value. */
248+
@SuppressWarnings({"unchecked", "rawtypes"})
249+
static int decodeGroupField(
250+
Schema schema, byte[] data, int position, int limit, int endGroup, Registers registers)
251+
throws IOException {
252+
Object msg = schema.newInstance();
253+
int offset = mergeGroupField(msg, schema, data, position, limit, endGroup, registers);
254+
schema.makeImmutable(msg);
255+
registers.object1 = msg;
256+
return offset;
257+
}
258+
259+
@SuppressWarnings({"unchecked", "rawtypes"})
260+
static int mergeMessageField(
261+
Object msg, Schema schema, byte[] data, int position, int limit, Registers registers)
262+
throws IOException {
240263
int length = data[position++];
241264
if (length < 0) {
242265
position = decodeVarint32(length, data, position, registers);
@@ -245,27 +268,28 @@ static int decodeMessageField(
245268
if (length < 0 || length > limit - position) {
246269
throw InvalidProtocolBufferException.truncatedMessage();
247270
}
248-
Object result = schema.newInstance();
249-
schema.mergeFrom(result, data, position, position + length, registers);
250-
schema.makeImmutable(result);
251-
registers.object1 = result;
271+
schema.mergeFrom(msg, data, position, position + length, registers);
272+
registers.object1 = msg;
252273
return position + length;
253274
}
254275

255-
/** Decodes a group value. */
256276
@SuppressWarnings({"unchecked", "rawtypes"})
257-
static int decodeGroupField(
258-
Schema schema, byte[] data, int position, int limit, int endGroup, Registers registers)
277+
static int mergeGroupField(
278+
Object msg,
279+
Schema schema,
280+
byte[] data,
281+
int position,
282+
int limit,
283+
int endGroup,
284+
Registers registers)
259285
throws IOException {
260286
// A group field must has a MessageSchema (the only other subclass of Schema is MessageSetSchema
261287
// and it can't be used in group fields).
262288
final MessageSchema messageSchema = (MessageSchema) schema;
263-
Object result = messageSchema.newInstance();
264289
// It's OK to directly use parseProto2Message since proto3 doesn't have group.
265290
final int endPosition =
266-
messageSchema.parseProto2Message(result, data, position, limit, endGroup, registers);
267-
messageSchema.makeImmutable(result);
268-
registers.object1 = result;
291+
messageSchema.parseProto2Message(msg, data, position, limit, endGroup, registers);
292+
registers.object1 = msg;
269293
return endPosition;
270294
}
271295

@@ -851,26 +875,19 @@ static int decodeExtension(
851875
break;
852876
}
853877
case ENUM:
854-
{
855-
IntArrayList list = new IntArrayList();
856-
position = decodePackedVarint32List(data, position, list, registers);
857-
UnknownFieldSetLite unknownFields = message.unknownFields;
858-
if (unknownFields == UnknownFieldSetLite.getDefaultInstance()) {
859-
unknownFields = null;
860-
}
861-
unknownFields =
862-
SchemaUtil.filterUnknownEnumList(
863-
fieldNumber,
864-
list,
865-
extension.descriptor.getEnumType(),
866-
unknownFields,
867-
unknownFieldSchema);
868-
if (unknownFields != null) {
869-
message.unknownFields = unknownFields;
878+
{
879+
IntArrayList list = new IntArrayList();
880+
position = decodePackedVarint32List(data, position, list, registers);
881+
SchemaUtil.filterUnknownEnumList(
882+
message,
883+
fieldNumber,
884+
list,
885+
extension.descriptor.getEnumType(),
886+
null,
887+
unknownFieldSchema);
888+
extensions.setField(extension.descriptor, list);
889+
break;
870890
}
871-
extensions.setField(extension.descriptor, list);
872-
break;
873-
}
874891
default:
875892
throw new IllegalStateException(
876893
"Type cannot be packed: " + extension.descriptor.getLiteType());
@@ -882,13 +899,8 @@ static int decodeExtension(
882899
position = decodeVarint32(data, position, registers);
883900
Object enumValue = extension.descriptor.getEnumType().findValueByNumber(registers.int1);
884901
if (enumValue == null) {
885-
UnknownFieldSetLite unknownFields = ((GeneratedMessageLite) message).unknownFields;
886-
if (unknownFields == UnknownFieldSetLite.getDefaultInstance()) {
887-
unknownFields = UnknownFieldSetLite.newInstance();
888-
((GeneratedMessageLite) message).unknownFields = unknownFields;
889-
}
890902
SchemaUtil.storeUnknownEnum(
891-
fieldNumber, registers.int1, unknownFields, unknownFieldSchema);
903+
message, fieldNumber, registers.int1, null, unknownFieldSchema);
892904
return position;
893905
}
894906
// Note, we store the integer value instead of the actual enum object in FieldSet.
@@ -945,38 +957,52 @@ static int decodeExtension(
945957
value = registers.object1;
946958
break;
947959
case GROUP:
948-
final int endTag = (fieldNumber << 3) | WireFormat.WIRETYPE_END_GROUP;
949-
position = decodeGroupField(
950-
Protobuf.getInstance().schemaFor(extension.getMessageDefaultInstance().getClass()),
951-
data, position, limit, endTag, registers);
952-
value = registers.object1;
953-
break;
954-
960+
{
961+
final int endTag = (fieldNumber << 3) | WireFormat.WIRETYPE_END_GROUP;
962+
final Schema fieldSchema =
963+
Protobuf.getInstance()
964+
.schemaFor(extension.getMessageDefaultInstance().getClass());
965+
if (extension.isRepeated()) {
966+
position = decodeGroupField(fieldSchema, data, position, limit, endTag, registers);
967+
extensions.addRepeatedField(extension.descriptor, registers.object1);
968+
} else {
969+
Object oldValue = extensions.getField(extension.descriptor);
970+
if (oldValue == null) {
971+
oldValue = fieldSchema.newInstance();
972+
extensions.setField(extension.descriptor, oldValue);
973+
}
974+
position =
975+
mergeGroupField(
976+
oldValue, fieldSchema, data, position, limit, endTag, registers);
977+
}
978+
return position;
979+
}
955980
case MESSAGE:
956-
position = decodeMessageField(
957-
Protobuf.getInstance().schemaFor(extension.getMessageDefaultInstance().getClass()),
958-
data, position, limit, registers);
959-
value = registers.object1;
960-
break;
961-
981+
{
982+
final Schema fieldSchema =
983+
Protobuf.getInstance()
984+
.schemaFor(extension.getMessageDefaultInstance().getClass());
985+
if (extension.isRepeated()) {
986+
position = decodeMessageField(fieldSchema, data, position, limit, registers);
987+
extensions.addRepeatedField(extension.descriptor, registers.object1);
988+
} else {
989+
Object oldValue = extensions.getField(extension.descriptor);
990+
if (oldValue == null) {
991+
oldValue = fieldSchema.newInstance();
992+
extensions.setField(extension.descriptor, oldValue);
993+
}
994+
position =
995+
mergeMessageField(oldValue, fieldSchema, data, position, limit, registers);
996+
}
997+
return position;
998+
}
962999
case ENUM:
9631000
throw new IllegalStateException("Shouldn't reach here.");
9641001
}
9651002
}
9661003
if (extension.isRepeated()) {
9671004
extensions.addRepeatedField(extension.descriptor, value);
9681005
} else {
969-
switch (extension.getLiteType()) {
970-
case MESSAGE:
971-
case GROUP:
972-
Object oldValue = extensions.getField(extension.descriptor);
973-
if (oldValue != null) {
974-
value = Internal.mergeMessage(oldValue, value);
975-
}
976-
break;
977-
default:
978-
break;
979-
}
9801006
extensions.setField(extension.descriptor, value);
9811007
}
9821008
}

java/core/src/main/java/com/google/protobuf/BinaryReader.java

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,15 @@ public <T> T readMessageBySchemaWithCheck(
248248

249249
private <T> T readMessage(Schema<T> schema, ExtensionRegistryLite extensionRegistry)
250250
throws IOException {
251+
T newInstance = schema.newInstance();
252+
mergeMessageField(newInstance, schema, extensionRegistry);
253+
schema.makeImmutable(newInstance);
254+
return newInstance;
255+
}
256+
257+
@Override
258+
public <T> void mergeMessageField(
259+
T target, Schema<T> schema, ExtensionRegistryLite extensionRegistry) throws IOException {
251260
int size = readVarint32();
252261
requireBytes(size);
253262

@@ -257,15 +266,10 @@ private <T> T readMessage(Schema<T> schema, ExtensionRegistryLite extensionRegis
257266
limit = newLimit;
258267

259268
try {
260-
// Allocate and read the message.
261-
T message = schema.newInstance();
262-
schema.mergeFrom(message, this, extensionRegistry);
263-
schema.makeImmutable(message);
264-
269+
schema.mergeFrom(target, this, extensionRegistry);
265270
if (pos != newLimit) {
266271
throw InvalidProtocolBufferException.parseFailure();
267272
}
268-
return message;
269273
} finally {
270274
// Restore the limit.
271275
limit = prevLimit;
@@ -290,19 +294,23 @@ public <T> T readGroupBySchemaWithCheck(
290294

291295
private <T> T readGroup(Schema<T> schema, ExtensionRegistryLite extensionRegistry)
292296
throws IOException {
297+
T newInstance = schema.newInstance();
298+
mergeGroupField(newInstance, schema, extensionRegistry);
299+
schema.makeImmutable(newInstance);
300+
return newInstance;
301+
}
302+
303+
@Override
304+
public <T> void mergeGroupField(
305+
T target, Schema<T> schema, ExtensionRegistryLite extensionRegistry) throws IOException {
293306
int prevEndGroupTag = endGroupTag;
294307
endGroupTag = WireFormat.makeTag(WireFormat.getTagFieldNumber(tag), WIRETYPE_END_GROUP);
295308

296309
try {
297-
// Allocate and read the message.
298-
T message = schema.newInstance();
299-
schema.mergeFrom(message, this, extensionRegistry);
300-
schema.makeImmutable(message);
301-
310+
schema.mergeFrom(target, this, extensionRegistry);
302311
if (tag != endGroupTag) {
303312
throw InvalidProtocolBufferException.parseFailure();
304313
}
305-
return message;
306314
} finally {
307315
// Restore the old end group tag.
308316
endGroupTag = prevEndGroupTag;

0 commit comments

Comments
 (0)