Skip to content

Backport "Add UTF8 abstraction in the TASTy format" to LTS #20766

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,8 @@ object CommentPickler:

def pickleComment(addr: Addr, comment: Comment): Unit =
if addr != NoAddr then
val bytes = comment.raw.getBytes(StandardCharsets.UTF_8).nn
val length = bytes.length
buf.writeAddr(addr)
buf.writeNat(length)
buf.writeBytes(bytes, length)
buf.writeUtf8(comment.raw)
buf.writeLongInt(comment.span.coords)

def traverse(x: Any): Unit = x match
Expand Down
10 changes: 3 additions & 7 deletions compiler/src/dotty/tools/dotc/core/tasty/CommentUnpickler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,9 @@ class CommentUnpickler(reader: TastyReader) {
val comments = new HashMap[Addr, Comment]
while (!isAtEnd) {
val addr = readAddr()
val length = readNat()
if (length > 0) {
val bytes = readBytes(length)
val position = new Span(readLongInt())
val rawComment = new String(bytes, StandardCharsets.UTF_8)
comments(addr) = Comment(position, rawComment)
}
val rawComment = readUtf8()
val position = new Span(readLongInt())
comments(addr) = Comment(position, rawComment)
}
comments
}
Expand Down
15 changes: 4 additions & 11 deletions compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,8 @@ import collection.mutable
import core.Symbols.ClassSymbol
import Decorators._

object TastyPickler {

private val versionStringBytes = {
val compilerString = s"Scala ${config.Properties.simpleVersionString}"
compilerString.getBytes(java.nio.charset.StandardCharsets.UTF_8)
}

}
object TastyPickler:
private val versionString = s"Scala ${config.Properties.simpleVersionString}"

class TastyPickler(val rootCls: ClassSymbol) {

Expand Down Expand Up @@ -48,13 +42,12 @@ class TastyPickler(val rootCls: ClassSymbol) {
val uuidHi: Long = otherSectionHashes.fold(0L)(_ ^ _)

val headerBuffer = {
val buf = new TastyBuffer(header.length + TastyPickler.versionStringBytes.length + 32)
val buf = new TastyBuffer(header.length + TastyPickler.versionString.length + 32)
for (ch <- header) buf.writeByte(ch.toByte)
buf.writeNat(MajorVersion)
buf.writeNat(MinorVersion)
buf.writeNat(ExperimentalVersion)
buf.writeNat(TastyPickler.versionStringBytes.length)
buf.writeBytes(TastyPickler.versionStringBytes, TastyPickler.versionStringBytes.length)
buf.writeUtf8(TastyPickler.versionString)
buf.writeUncompressedLong(uuidLow)
buf.writeUncompressedLong(uuidHi)
buf
Expand Down
11 changes: 11 additions & 0 deletions tasty/src/dotty/tools/tasty/TastyBuffer.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package dotty.tools.tasty

import util.Util.dble
import java.nio.charset.StandardCharsets

object TastyBuffer {

Expand Down Expand Up @@ -115,6 +116,16 @@ class TastyBuffer(initialSize: Int) {
writeBytes(bytes, 8)
}

/** Write a UTF8 string encoded as `Nat UTF8-CodePoint*`,
* where the `Nat` is the length of the code-points bytes.
*/
def writeUtf8(x: String): Unit = {
val bytes = x.getBytes(StandardCharsets.UTF_8)
val length = bytes.length
writeNat(length)
writeBytes(bytes, length)
}

// -- Address handling --------------------------------------------

/** Write natural number `x` right-adjusted in a field of `width` bytes
Expand Down
7 changes: 4 additions & 3 deletions tasty/src/dotty/tools/tasty/TastyFormat.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Micro-syntax:
Nat = LongInt -- non-negative value, fits in an Int without overflow
Digit = 0 | ... | 127
StopDigit = 128 | ... | 255 -- value = digit - 128
Utf8 = Nat UTF8-CodePoint*
```

Macro-format:
Expand All @@ -24,12 +25,12 @@ Macro-format:
nameTable_Length Name* Section*
Header = 0x5CA1AB1F
UUID = Byte*16 -- random UUID
VersionString = Length UTF8-CodePoint* -- string that represents the compiler that produced the TASTy
VersionString = Utf8 -- string that represents the compiler that produced the TASTy

Section = NameRef Length Bytes
Length = Nat -- length of rest of entry in bytes

Name = UTF8 Length UTF8-CodePoint*
Name = UTF8 Utf8
QUALIFIED Length qualified_NameRef selector_NameRef -- A.B
EXPANDED Length qualified_NameRef selector_NameRef -- A$$B, semantically a NameKinds.ExpandedName
EXPANDPREFIX Length qualified_NameRef selector_NameRef -- A$B, prefix of expanded name, see NamedKinds.ExpandPrefixName
Expand Down Expand Up @@ -263,7 +264,7 @@ All elements of a position section are serialized as Ints

Standard Section: "Comments" Comment*
```none
Comment = Length Bytes LongInt // Raw comment's bytes encoded as UTF-8, followed by the comment's coordinates.
Comment = Utf8 LongInt // Raw comment's bytes encoded as UTF-8, followed by the comment's coordinates.
```
**************************************************************************************/

Expand Down
10 changes: 10 additions & 0 deletions tasty/src/dotty/tools/tasty/TastyReader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package dotty.tools.tasty
import collection.mutable

import TastyBuffer._
import java.nio.charset.StandardCharsets

/** A byte array buffer that can be filled with bytes or natural numbers in TASTY format,
* and that supports reading and patching addresses represented as natural numbers.
Expand Down Expand Up @@ -104,6 +105,15 @@ class TastyReader(val bytes: Array[Byte], start: Int, end: Int, val base: Int =
x
}

/** Read a UTF8 string encoded as `Nat UTF8-CodePoint*`,
* where the `Nat` is the length of the code-points bytes.
*/
def readUtf8(): String = {
val length = readNat()
if (length == 0) ""
else new String(readBytes(length), StandardCharsets.UTF_8)
}

/** Read a natural number and return as a NameRef */
def readNameRef(): NameRef = NameRef(readNat())

Expand Down