From 702498e4d1a4752f687eda2cc166cfb748acd599 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Thu, 24 Jun 2021 18:34:17 -0700 Subject: [PATCH 1/3] Use `StringConcatFactory` for string concatenation on JDK 9+ JEP 280, released in JDK 9, proposes a new way to compile string concatenation using `invokedynamic` and `StringConcatFactory`. This new approach generates less bytecode, doesn't have to incur the overhead of `StringBuilder` allocations, and allows users to pick swap the concatenation technique at runtime. This changes the codegen when the target is at least Java 9 to leverage `invokedynamic` and `StringConcatFactory`. On Java 8, the old `StringBuilder` approach is still used. Ported from scala#9556 --- .../tools/backend/jvm/BCodeBodyBuilder.scala | 97 ++++++++++++++++-- .../tools/backend/jvm/BCodeIdiomatic.scala | 42 ++++++-- tests/disabled/partest/run/StringConcat.check | Bin 0 -> 5587 bytes tests/disabled/partest/run/StringConcat.scala | 86 ++++++++++++++++ 4 files changed, 209 insertions(+), 16 deletions(-) create mode 100644 tests/disabled/partest/run/StringConcat.check create mode 100644 tests/disabled/partest/run/StringConcat.scala diff --git a/compiler/src/dotty/tools/backend/jvm/BCodeBodyBuilder.scala b/compiler/src/dotty/tools/backend/jvm/BCodeBodyBuilder.scala index 47b2d3a69a10..334c47f3511d 100644 --- a/compiler/src/dotty/tools/backend/jvm/BCodeBodyBuilder.scala +++ b/compiler/src/dotty/tools/backend/jvm/BCodeBodyBuilder.scala @@ -1063,30 +1063,109 @@ trait BCodeBodyBuilder extends BCodeSkelBuilder { } } + /* Generate string concatenation + * + * On JDK 8: create and append using `StringBuilder` + * On JDK 9+: use `invokedynamic` with `StringConcatFactory` + */ def genStringConcat(tree: Tree): BType = { lineNumber(tree) liftStringConcat(tree) match { - // Optimization for expressions of the form "" + x. We can avoid the StringBuilder. + // Optimization for expressions of the form "" + x case List(Literal(Constant("")), arg) => genLoad(arg, ObjectReference) genCallMethod(defn.String_valueOf_Object, InvokeStyle.Static) case concatenations => - bc.genStartConcat - for (elem <- concatenations) { - val loadedElem = elem match { + val concatArguments = concatenations.view + .filter { + case Literal(Constant("")) => false // empty strings are no-ops in concatenation + case _ => true + } + .map { case Apply(boxOp, value :: Nil) if Erasure.Boxing.isBox(boxOp.symbol) && boxOp.symbol.denot.owner != defn.UnitModuleClass => // Eliminate boxing of primitive values. Boxing is introduced by erasure because // there's only a single synthetic `+` method "added" to the string class. value + case other => other + } + .toList + + // `StringConcatFactory` only got added in JDK 9, so use `StringBuilder` for lower + if (classfileVersion < asm.Opcodes.V9) { + + // Estimate capacity needed for the string builder + val approxBuilderSize = concatArguments.view.map { + case Literal(Constant(s: String)) => s.length + case Literal(c @ Constant(_)) if c.isNonUnitAnyVal => String.valueOf(c).length + case _ => 0 + }.sum + bc.genNewStringBuilder(approxBuilderSize) + + for (elem <- concatArguments) { + val elemType = tpeTK(elem) + genLoad(elem, elemType) + bc.genStringBuilderAppend(elemType) + } + bc.genStringBuilderEnd + } else { + + /* `StringConcatFactory#makeConcatWithConstants` accepts max 200 argument slots. If + * the string concatenation is longer (unlikely), we spill into multiple calls + */ + val MaxIndySlots = 200 + val TagArg = '\u0001' // indicates a hole (in the recipe string) for an argument + val TagConst = '\u0002' // indicates a hole (in the recipe string) for a constant + + val recipe = new StringBuilder() + val argTypes = Seq.newBuilder[asm.Type] + val constVals = Seq.newBuilder[String] + var totalArgSlots = 0 + var countConcats = 1 // ie. 1 + how many times we spilled + + for (elem <- concatArguments) { + val tpe = tpeTK(elem) + val elemSlots = tpe.size + + // Unlikely spill case + if (totalArgSlots + elemSlots >= MaxIndySlots) { + bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result()) + countConcats += 1 + totalArgSlots = 0 + recipe.setLength(0) + argTypes.clear() + constVals.clear() + } - case _ => elem + elem match { + case Literal(Constant(s: String)) => + if (s.contains(TagArg) || s.contains(TagConst)) { + totalArgSlots += elemSlots + recipe.append(TagConst) + constVals += s + } else { + recipe.append(s) + } + + case other => + totalArgSlots += elemSlots + recipe.append(TagArg) + val tpe = tpeTK(elem) + argTypes += tpe.toASMType + genLoad(elem, tpe) + } + } + bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result()) + + // If we spilled, generate one final concat + if (countConcats > 1) { + bc.genIndyStringConcat( + TagArg.toString * countConcats, + Seq.fill(countConcats)(StringRef.toASMType), + Seq.empty + ) } - val elemType = tpeTK(loadedElem) - genLoad(loadedElem, elemType) - bc.genConcat(elemType) } - bc.genEndConcat } StringRef } diff --git a/compiler/src/dotty/tools/backend/jvm/BCodeIdiomatic.scala b/compiler/src/dotty/tools/backend/jvm/BCodeIdiomatic.scala index 63dc4f430abc..ea257ee959ea 100644 --- a/compiler/src/dotty/tools/backend/jvm/BCodeIdiomatic.scala +++ b/compiler/src/dotty/tools/backend/jvm/BCodeIdiomatic.scala @@ -224,24 +224,27 @@ trait BCodeIdiomatic { } // end of method genPrimitiveShift() - /* + /* Creates a new `StringBuilder` instance with the requested capacity + * * can-multi-thread */ - final def genStartConcat: Unit = { + final def genNewStringBuilder(size: Int): Unit = { jmethod.visitTypeInsn(Opcodes.NEW, JavaStringBuilderClassName) jmethod.visitInsn(Opcodes.DUP) + jmethod.visitLdcInsn(Integer.valueOf(size)) invokespecial( JavaStringBuilderClassName, INSTANCE_CONSTRUCTOR_NAME, - "()V", + "(I)V", itf = false ) } - /* + /* Issue a call to `StringBuilder#append` for the right element type + * * can-multi-thread */ - def genConcat(elemType: BType): Unit = { + final def genStringBuilderAppend(elemType: BType): Unit = { val paramType = elemType match { case ct: ClassBType if ct.isSubtypeOf(StringRef) => StringRef case ct: ClassBType if ct.isSubtypeOf(jlStringBufferRef) => jlStringBufferRef @@ -257,13 +260,38 @@ trait BCodeIdiomatic { invokevirtual(JavaStringBuilderClassName, "append", bt.descriptor) } - /* + /* Extract the built `String` from the `StringBuilder` + * * can-multi-thread */ - final def genEndConcat: Unit = { + final def genStringBuilderEnd: Unit = { invokevirtual(JavaStringBuilderClassName, "toString", "()Ljava/lang/String;") } + /* Concatenate top N arguments on the stack with `StringConcatFactory#makeConcatWithConstants` + * (only works for JDK 9+) + * + * can-multi-thread + */ + final def genIndyStringConcat( + recipe: String, + argTypes: Seq[asm.Type], + constants: Seq[String] + ): Unit = { + jmethod.visitInvokeDynamicInsn( + "makeConcatWithConstants", + asm.Type.getMethodDescriptor(StringRef.toASMType, argTypes:_*), + new asm.Handle( + asm.Opcodes.H_INVOKESTATIC, + "java/lang/invoke/StringConcatFactory", + "makeConcatWithConstants", + "(Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;Ljava/lang/String;[Ljava/lang/Object;)Ljava/lang/invoke/CallSite;", + false + ), + (recipe +: constants):_* + ) + } + /* * Emits one or more conversion instructions based on the types given as arguments. * diff --git a/tests/disabled/partest/run/StringConcat.check b/tests/disabled/partest/run/StringConcat.check new file mode 100644 index 0000000000000000000000000000000000000000..10eaa9a20d1b98d974875029c1d16d893b13f4e1 GIT binary patch literal 5587 zcmeHKOHbQC5H3CXSB$J45~&!4kF{g9i(`7BnJR!WndA!XgU3;{}f2{S1QkM9yHFWGm>^ z17j{QF(XMTElWluC12qD9PU?%1i9jQx_~8RRFE{?0iV+yczwqkIi3SNhHORQ+wBxO z(@fwl;EFTc0A+d;-`ALz;R2`Ka_$9(=l7h@c z4YehIqzc33>_f!-5OA5tF%>WYMz>}r4Rz{y^CHUbn)FDH;c7+1ls@HDu{MDRvLOyj zxTA1AgM$okb+}I7?K)(=rfku%BlJs?S5BCW;f7^6jpqkpe=r!60Bo^4-^sxGflg0m21r7VmB+NY;1wE8@A!;2CIFl=TWTd z#a`|f4 Date: Thu, 24 Jun 2021 21:32:14 -0700 Subject: [PATCH 2/3] update test --- compiler/test/dotty/tools/backend/jvm/StringConcatTest.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/test/dotty/tools/backend/jvm/StringConcatTest.scala b/compiler/test/dotty/tools/backend/jvm/StringConcatTest.scala index f288a8d6ff33..613e72b32e52 100644 --- a/compiler/test/dotty/tools/backend/jvm/StringConcatTest.scala +++ b/compiler/test/dotty/tools/backend/jvm/StringConcatTest.scala @@ -61,7 +61,7 @@ class StringConcatTest extends DottyBytecodeTest { } assertEquals(List( - "()V", + "(I)V", "toString()Ljava/lang/String;", "append(Ljava/lang/String;)Ljava/lang/StringBuilder;", "append(Ljava/lang/Object;)Ljava/lang/StringBuilder;", @@ -82,7 +82,7 @@ class StringConcatTest extends DottyBytecodeTest { ) assertEquals(List( - "()V", + "(I)V", "toString()Ljava/lang/String;", "append(Ljava/lang/String;)Ljava/lang/StringBuilder;", "append(Ljava/lang/String;)Ljava/lang/StringBuilder;", From 84ed337b517406b0a58e387e2a16bf0bf419c742 Mon Sep 17 00:00:00 2001 From: Alec Theriault Date: Tue, 27 Jul 2021 10:40:09 -0700 Subject: [PATCH 3/3] move tests to Dotty format --- tests/disabled/partest/run/StringConcat.check | Bin 5587 -> 0 bytes tests/disabled/partest/run/StringConcat.scala | 86 ------------------ tests/run/StringConcat.check | Bin 0 -> 1127 bytes tests/run/StringConcat.scala | 79 ++++++++++++++++ 4 files changed, 79 insertions(+), 86 deletions(-) delete mode 100644 tests/disabled/partest/run/StringConcat.check delete mode 100644 tests/disabled/partest/run/StringConcat.scala create mode 100644 tests/run/StringConcat.check create mode 100644 tests/run/StringConcat.scala diff --git a/tests/disabled/partest/run/StringConcat.check b/tests/disabled/partest/run/StringConcat.check deleted file mode 100644 index 10eaa9a20d1b98d974875029c1d16d893b13f4e1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5587 zcmeHKOHbQC5H3CXSB$J45~&!4kF{g9i(`7BnJR!WndA!XgU3;{}f2{S1QkM9yHFWGm>^ z17j{QF(XMTElWluC12qD9PU?%1i9jQx_~8RRFE{?0iV+yczwqkIi3SNhHORQ+wBxO z(@fwl;EFTc0A+d;-`ALz;R2`Ka_$9(=l7h@c z4YehIqzc33>_f!-5OA5tF%>WYMz>}r4Rz{y^CHUbn)FDH;c7+1ls@HDu{MDRvLOyj zxTA1AgM$okb+}I7?K)(=rfku%BlJs?S5BCW;f7^6jpqkpe=r!60Bo^4-^sxGflg0m21r7VmB+NY;1wE8@A!;2CIFl=TWTd z#a`|f4{?Kt-Y^t)6weEM@Hc-BX-J~z z8&G<8?atWqjj$i|Csl-|AvZSaAp@IBQ02Sp91!sN0p>H%Z*(_d12>KUQj$e zi2((3OI3&71%h46mDnq#;lR?{+L}C3+Gv~R+Q61W-O}Kz4B-2dD1sd`&hZ!?SX2%d lq6YaI*Yt6`>|-S)LMmt>6LP@_7N(kMKGSaecNgPZdjmuV6`cS8 literal 0 HcmV?d00001 diff --git a/tests/run/StringConcat.scala b/tests/run/StringConcat.scala new file mode 100644 index 000000000000..774147ba1303 --- /dev/null +++ b/tests/run/StringConcat.scala @@ -0,0 +1,79 @@ +@main def Test() = { + + // This should generally obey 15.18.1. of the JLS (String Concatenation Operator +) + def concatenatingVariousTypes(): String = { + val str: String = "some string" + val sb: StringBuffer = new StringBuffer("some stringbuffer") + val cs: CharSequence = java.nio.CharBuffer.allocate(50).append("charsequence") + val i: Int = 123456789 + val s: Short = 345 + val b: Byte = 12 + val z: Boolean = true + val f: Float = 3.14f + val j: Long = 98762147483647L + val d: Double = 3.1415d + + "String " + str + "\n" + + "StringBuffer " + sb + "\n" + + "CharSequence " + cs + "\n" + + "Int " + i + "\n" + + "Short " + s + "\n" + + "Byte " + b + "\n" + + "Boolean " + z + "\n" + + "Float " + f + "\n" + + "Long " + j + "\n" + + "Double " + d + "\n" + } + // The characters `\u0001` and `\u0002` play a special role in `StringConcatFactory` + def concatenationInvolvingSpecialCharacters(): String = { + val s1 = "Qux" + val s2 = "Quux" + + s"Foo \u0001 $s1 Bar \u0002 $s2 Baz" + } + // Concatenation involving more than 200 elements + def largeConcatenation(): String = { + val s00 = "s00" + val s01 = "s01" + val s02 = "s02" + val s03 = "s03" + val s04 = "s04" + val s05 = "s05" + val s06 = "s06" + val s07 = "s07" + val s08 = "s08" + + // 24 rows follow + ((s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n") + + (s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n")) + + ((s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n") + + (s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n")) + } + println("----------") + println(concatenatingVariousTypes()) + println("----------") + println(concatenationInvolvingSpecialCharacters()) + println("----------") + println(largeConcatenation()) + println("----------") +}