From bd41bee12e93f7ecd242c2e8af0bb93c59779a9e Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Sun, 29 Jul 2018 15:42:19 +0200 Subject: [PATCH 01/10] Assemble buffer before computing the hash Previously we where computing the hash for nameBuffer on an empty array. --- .../src/dotty/tools/dotc/core/tasty/TastyPickler.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala index a15c8c861fec..321c0631ba72 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala @@ -20,10 +20,11 @@ class TastyPickler(val rootCls: ClassSymbol) { sections += ((nameBuffer.nameIndex(name.toTermName), buf)) def assembleParts(): Array[Byte] = { - def lengthWithLength(buf: TastyBuffer) = { - buf.assemble() + def lengthWithLength(buf: TastyBuffer) = buf.length + natSize(buf.length) - } + + nameBuffer.assemble() + sections.foreach(_._2.assemble()) val uuidLow: Long = pjwHash64(nameBuffer.bytes) val uuidHi: Long = sections.iterator.map(x => pjwHash64(x._2.bytes)).fold(0L)(_ ^ _) From c905ff287d03c56d742566e69d3e3bb5b1dec1c4 Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Sun, 29 Jul 2018 16:09:55 +0200 Subject: [PATCH 02/10] Add missing if in pjwHash64 implementation --- compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala index 321c0631ba72..8c9bf7549895 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala @@ -79,12 +79,12 @@ class TastyPickler(val rootCls: ClassSymbol) { */ private def pjwHash64(data: Array[Byte]): Long = { var h = 0L - var high = 0L var i = 0 while (i < data.length) { h = (h << 4) + data(i) - high = h & 0xF0000000L - h ^= high >> 24 + val high = h & 0xF0000000L + if (high != 0) + h ^= high >> 24 h &= ~high i += 1 } From 3ddfa3c11c14ee00b9af22665ecbf2c6cadb21fd Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Sun, 29 Jul 2018 16:37:11 +0200 Subject: [PATCH 03/10] Fix input of pjwHash64 Originaly it was a C `unsigned char` --- compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala index 8c9bf7549895..db7a43c060cf 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala @@ -81,7 +81,8 @@ class TastyPickler(val rootCls: ClassSymbol) { var h = 0L var i = 0 while (i < data.length) { - h = (h << 4) + data(i) + val d = data(i) & 0xFFL // Interpret byte as unsigned byte + h = (h << 4) + d val high = h & 0xF0000000L if (high != 0) h ^= high >> 24 From 05ab4cfb8a2f996599b467a737584fcd20d712a7 Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Sun, 29 Jul 2018 17:12:17 +0200 Subject: [PATCH 04/10] Fix pjwHash64 to generate 64-bit Longs Previously the constants where set to generate 32-bit ints. --- .../dotty/tools/dotc/core/tasty/TastyPickler.scala | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala index db7a43c060cf..b7e1008d03ce 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala @@ -75,18 +75,19 @@ class TastyPickler(val rootCls: ClassSymbol) { /** Returns a non-cryptographic 64-bit hash of the array. * - * from https://en.wikipedia.org/wiki/PJW_hash_function#Implementation + * from https://en.wikipedia.org/wiki/PJW_hash_function#Algorithm */ private def pjwHash64(data: Array[Byte]): Long = { var h = 0L var i = 0 while (i < data.length) { val d = data(i) & 0xFFL // Interpret byte as unsigned byte - h = (h << 4) + d - val high = h & 0xF0000000L - if (high != 0) - h ^= high >> 24 - h &= ~high + h = (h << 8) + d + val high = h & 0xFF00000000000000L + if (high != 0) { + h ^= high >> 48L + h &= ~high + } i += 1 } h From d6898b9fd1ff697e9f33ec3707ef9b48e588109c Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Sun, 29 Jul 2018 17:30:31 +0200 Subject: [PATCH 05/10] Improve entropy of low bits of the Tasty UUID --- .../src/dotty/tools/dotc/core/tasty/TastyPickler.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala index b7e1008d03ce..3ffd43d6121b 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala @@ -26,8 +26,13 @@ class TastyPickler(val rootCls: ClassSymbol) { nameBuffer.assemble() sections.foreach(_._2.assemble()) - val uuidLow: Long = pjwHash64(nameBuffer.bytes) - val uuidHi: Long = sections.iterator.map(x => pjwHash64(x._2.bytes)).fold(0L)(_ ^ _) + val nameBufferHash = pjwHash64(nameBuffer.bytes) + val treeSectionHash +: otherSectionHashes = sections.map(x => pjwHash64(x._2.bytes)) + + // Hash of name table and tree + val uuidLow: Long = nameBufferHash ^ treeSectionHash + // Hash of positions, comments and any additional section + val uuidHi: Long = otherSectionHashes.fold(0L)(_ ^ _) val headerBuffer = { val buf = new TastyBuffer(header.length + 24) From 26679a460fd8098c037179a3138736bcd3ad67a2 Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Sun, 29 Jul 2018 18:10:05 +0200 Subject: [PATCH 06/10] Remove branching in while loop --- compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala index 3ffd43d6121b..6d5a8bbb22c2 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala @@ -89,10 +89,8 @@ class TastyPickler(val rootCls: ClassSymbol) { val d = data(i) & 0xFFL // Interpret byte as unsigned byte h = (h << 8) + d val high = h & 0xFF00000000000000L - if (high != 0) { - h ^= high >> 48L - h &= ~high - } + h ^= high >> 48L + h &= ~high i += 1 } h From 2bf86963a8313e15a48ce88224a92155c1a327e3 Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Mon, 6 Aug 2018 17:59:06 +0200 Subject: [PATCH 07/10] Add pjwHash64 regression tests --- .../tools/dotc/core/tasty/TastyHash.scala | 23 +++++++++++++++++ .../tools/dotc/core/tasty/TastyPickler.scala | 21 ++-------------- tests/run-with-compiler/tasty-hash.scala | 25 +++++++++++++++++++ 3 files changed, 50 insertions(+), 19 deletions(-) create mode 100644 compiler/src/dotty/tools/dotc/core/tasty/TastyHash.scala create mode 100644 tests/run-with-compiler/tasty-hash.scala diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyHash.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyHash.scala new file mode 100644 index 000000000000..482aab2edccb --- /dev/null +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyHash.scala @@ -0,0 +1,23 @@ +package dotty.tools.dotc.core.tasty + +object TastyHash { + + /** Returns a non-cryptographic 64-bit hash of the array. + * + * from https://en.wikipedia.org/wiki/PJW_hash_function#Algorithm + */ + def pjwHash64(data: Array[Byte]): Long = { + var h = 0L + var i = 0 + while (i < data.length) { + val d = data(i) & 0xFFL // Interpret byte as unsigned byte + h = (h << 8) + d + val high = h & 0xFF00000000000000L + h ^= high >> 48L + h &= ~high + i += 1 + } + h + } + +} diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala index 6d5a8bbb22c2..d7e1ddf0d42f 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala @@ -26,8 +26,8 @@ class TastyPickler(val rootCls: ClassSymbol) { nameBuffer.assemble() sections.foreach(_._2.assemble()) - val nameBufferHash = pjwHash64(nameBuffer.bytes) - val treeSectionHash +: otherSectionHashes = sections.map(x => pjwHash64(x._2.bytes)) + val nameBufferHash = TastyHash.pjwHash64(nameBuffer.bytes) + val treeSectionHash +: otherSectionHashes = sections.map(x => TastyHash.pjwHash64(x._2.bytes)) // Hash of name table and tree val uuidLow: Long = nameBufferHash ^ treeSectionHash @@ -78,21 +78,4 @@ class TastyPickler(val rootCls: ClassSymbol) { val treePkl = new TreePickler(this) - /** Returns a non-cryptographic 64-bit hash of the array. - * - * from https://en.wikipedia.org/wiki/PJW_hash_function#Algorithm - */ - private def pjwHash64(data: Array[Byte]): Long = { - var h = 0L - var i = 0 - while (i < data.length) { - val d = data(i) & 0xFFL // Interpret byte as unsigned byte - h = (h << 8) + d - val high = h & 0xFF00000000000000L - h ^= high >> 48L - h &= ~high - i += 1 - } - h - } } diff --git a/tests/run-with-compiler/tasty-hash.scala b/tests/run-with-compiler/tasty-hash.scala new file mode 100644 index 000000000000..9a5750925004 --- /dev/null +++ b/tests/run-with-compiler/tasty-hash.scala @@ -0,0 +1,25 @@ +import dotty.tools.dotc.core.tasty.TastyHash.pjwHash64 + +object Test { + def main(args: Array[String]): Unit = { + testHash(0L, Array.empty) + testHash(0L, Array(0)) + testHash(1L, Array(1)) + testHash(0x7fL, Array(Byte.MaxValue)) + testHash(0x80L, Array(Byte.MinValue)) + testHash(0x101L, Array(1, 1)) + testHash(0X10101L, Array(1, 1, 1)) + testHash(0X1010101L, Array(1, 1, 1, 1)) + testHash(0X101010101L, Array(1, 1, 1, 1, 1)) + testHash(0X202020202L, Array(2, 2, 2, 2, 2)) + testHash(0X1010101L, Array.fill(1024)(1)) + testHash(0X55aa01fe55ab54ffL, Array.tabulate(1024)(_.toByte)) + testHash(0x34545c16020230L, "abcdefghijklmnopqrstuvwxyz1234567890".getBytes) + } + + def testHash(expected: Long, arr: Array[Byte]): Unit = { + val res = pjwHash64(arr) + assert(res == expected, s"Exprected 0x${expected.toHexString}L but got 0X${res.toHexString}L") + } + +} From 38a6cf9461211d49f49c14f1a73cd2182fc642de Mon Sep 17 00:00:00 2001 From: "Paolo G. Giarrusso" Date: Mon, 6 Aug 2018 19:24:55 +0200 Subject: [PATCH 08/10] Use an *unsigned* right shift Found by comparing testcases with the C version, and by then comparing the source code; both C versions use an unsigned right shift (or rather a right shift on unsigned integers). * Background: Right-shifting a signed number copies the sign bit into all the new "empty bits"; C uses the integer sign, while Java (and Scala) use a separate "unsigned right shift" operator. --- compiler/src/dotty/tools/dotc/core/tasty/TastyHash.scala | 2 +- tests/run-with-compiler/tasty-hash.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyHash.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyHash.scala index 482aab2edccb..6845f36fcf06 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyHash.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyHash.scala @@ -13,7 +13,7 @@ object TastyHash { val d = data(i) & 0xFFL // Interpret byte as unsigned byte h = (h << 8) + d val high = h & 0xFF00000000000000L - h ^= high >> 48L + h ^= high >>> 48L h &= ~high i += 1 } diff --git a/tests/run-with-compiler/tasty-hash.scala b/tests/run-with-compiler/tasty-hash.scala index 9a5750925004..5e4a7dee025a 100644 --- a/tests/run-with-compiler/tasty-hash.scala +++ b/tests/run-with-compiler/tasty-hash.scala @@ -13,7 +13,7 @@ object Test { testHash(0X101010101L, Array(1, 1, 1, 1, 1)) testHash(0X202020202L, Array(2, 2, 2, 2, 2)) testHash(0X1010101L, Array.fill(1024)(1)) - testHash(0X55aa01fe55ab54ffL, Array.tabulate(1024)(_.toByte)) + testHash(0xaafefeaaab54ffL, Array.tabulate(1024)(_.toByte)) testHash(0x34545c16020230L, "abcdefghijklmnopqrstuvwxyz1234567890".getBytes) } From 88ca356a78b86e68000b7c4a64de1e7aff5c407b Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Tue, 7 Aug 2018 08:14:28 +0200 Subject: [PATCH 09/10] Make tasty hash test a JUnit test --- .../test/dotty/tools/dotc/TastyHashTest.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) rename tests/run-with-compiler/tasty-hash.scala => compiler/test/dotty/tools/dotc/TastyHashTest.scala (88%) diff --git a/tests/run-with-compiler/tasty-hash.scala b/compiler/test/dotty/tools/dotc/TastyHashTest.scala similarity index 88% rename from tests/run-with-compiler/tasty-hash.scala rename to compiler/test/dotty/tools/dotc/TastyHashTest.scala index 5e4a7dee025a..edcc983c8817 100644 --- a/tests/run-with-compiler/tasty-hash.scala +++ b/compiler/test/dotty/tools/dotc/TastyHashTest.scala @@ -1,7 +1,11 @@ +package dotty.tools.dotc + +import org.junit.Test + import dotty.tools.dotc.core.tasty.TastyHash.pjwHash64 -object Test { - def main(args: Array[String]): Unit = { +class TastyHashTest { + @Test def pjwHash64Tests(): Unit = { testHash(0L, Array.empty) testHash(0L, Array(0)) testHash(1L, Array(1)) From ac7647f96de2eaf9303b5df8bcd39dde4dbf6677 Mon Sep 17 00:00:00 2001 From: "Paolo G. Giarrusso" Date: Tue, 7 Aug 2018 13:18:27 +0200 Subject: [PATCH 10/10] Use JUnit assert --- compiler/test/dotty/tools/dotc/TastyHashTest.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/test/dotty/tools/dotc/TastyHashTest.scala b/compiler/test/dotty/tools/dotc/TastyHashTest.scala index edcc983c8817..b8cb5c9c0bc2 100644 --- a/compiler/test/dotty/tools/dotc/TastyHashTest.scala +++ b/compiler/test/dotty/tools/dotc/TastyHashTest.scala @@ -1,6 +1,7 @@ package dotty.tools.dotc import org.junit.Test +import org.junit.Assert.assertEquals import dotty.tools.dotc.core.tasty.TastyHash.pjwHash64 @@ -22,8 +23,7 @@ class TastyHashTest { } def testHash(expected: Long, arr: Array[Byte]): Unit = { - val res = pjwHash64(arr) - assert(res == expected, s"Exprected 0x${expected.toHexString}L but got 0X${res.toHexString}L") + assertEquals(s"0x${expected.toHexString}L", s"0x${pjwHash64(arr).toHexString}L") } }