From 5d29e7e95c085fdb03bc1f96e25fb6e20e3d9334 Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Mon, 17 Apr 2017 22:50:44 +0200 Subject: [PATCH 1/8] Fix #2185: Add bytecode idempotency checks. --- .../tools/dotc/core/tasty/TastyPickler.scala | 2 +- .../dotty/tools/dotc/CompilationTests.scala | 133 ++++++++++++++---- 2 files changed, 110 insertions(+), 25 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala index cc2e4dd58a24..4c18daea7094 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala @@ -14,7 +14,7 @@ import Decorators._ class TastyPickler { private val sections = new mutable.ArrayBuffer[(NameRef, TastyBuffer)] - val uuid = UUID.randomUUID() + private val uuid = UUID.fromString("3cee1b79-c03a-4125-b337-d067b5cb3a94") // TODO: use a hash of the tasty tree private val headerBuffer = { val buf = new TastyBuffer(24) diff --git a/compiler/test/dotty/tools/dotc/CompilationTests.scala b/compiler/test/dotty/tools/dotc/CompilationTests.scala index df7a1141cec9..bff15db1be84 100644 --- a/compiler/test/dotty/tools/dotc/CompilationTests.scala +++ b/compiler/test/dotty/tools/dotc/CompilationTests.scala @@ -207,30 +207,6 @@ class CompilationTests extends ParallelTesting { compileDir("../library/src", allowDeepSubtypes.and("-Ycheck-reentrant", "-strict", "-priorityclasspath", defaultOutputDir)) - def sources(paths: JStream[Path], excludedFiles: List[String] = Nil): List[String] = - paths.iterator().asScala - .filter(path => - (path.toString.endsWith(".scala") || path.toString.endsWith(".java")) - && !excludedFiles.contains(path.getFileName.toString)) - .map(_.toString).toList - - val compilerDir = Paths.get("../compiler/src") - val compilerSources = sources(Files.walk(compilerDir)) - - val backendDir = Paths.get("../scala-backend/src/compiler/scala/tools/nsc/backend") - val backendJvmDir = Paths.get("../scala-backend/src/compiler/scala/tools/nsc/backend/jvm") - - // NOTE: Keep these exclusions synchronized with the ones in the sbt build (Build.scala) - val backendExcluded = - List("JavaPlatform.scala", "Platform.scala", "ScalaPrimitives.scala") - val backendJvmExcluded = - List("BCodeICodeCommon.scala", "GenASM.scala", "GenBCode.scala", "ScalacBackendInterface.scala") - - val backendSources = - sources(Files.list(backendDir), excludedFiles = backendExcluded) - val backendJvmSources = - sources(Files.list(backendJvmDir), excludedFiles = backendJvmExcluded) - def dotty1 = { compileList( "dotty1", @@ -261,6 +237,115 @@ class CompilationTests extends ParallelTesting { } :: Nil }.map(_.checkCompile()).foreach(_.delete()) } + + @Test def bytecodeIdemporency: Unit = { + var failed = 0 + var total = 0 + val blacklisted = Set( + "pos/Map/scala/collection/immutable/Map", + "pos/Map/scala/collection/immutable/AbstractMap", + "pos/t1203a/NodeSeq" + ) + def checkIdempotency(): Unit = { + val groupedBytecodeFiles: List[(Path, Path, Path, Path)] = { + val bytecodeFiles = { + def bytecodeFiles(paths: JStream[Path]): List[Path] = { + def isBytecode(file: String) = file.endsWith(".class") || file.endsWith(".tasty") + paths.iterator.asScala.filter(path => isBytecode(path.toString)).toList + } + val compilerDir1 = Paths.get("../out/idempotency1") + val compilerDir2 = Paths.get("../out/idempotency2") + bytecodeFiles(Files.walk(compilerDir1)) ++ bytecodeFiles(Files.walk(compilerDir2)) + } + val groups = bytecodeFiles.groupBy(f => f.toString.substring("../out/idempotencyN/".length, f.toString.length - 6)) + groups.filterNot(x => blacklisted(x._1)).valuesIterator.flatMap { g => + def pred(f: Path, i: Int, isTasty: Boolean) = + f.toString.contains("idempotency" + i) && f.toString.endsWith(if (isTasty) ".tasty" else ".class") + val class1 = g.find(f => pred(f, 1, isTasty = false)) + val class2 = g.find(f => pred(f, 2, isTasty = false)) + val tasty1 = g.find(f => pred(f, 1, isTasty = true)) + val tasty2 = g.find(f => pred(f, 2, isTasty = true)) + assert(class1.isDefined, "Could not find class in idempotency1 for " + class2) + assert(class2.isDefined, "Could not find class in idempotency2 for " + class1) + if (tasty1.isEmpty || tasty2.isEmpty) Nil + else List(Tuple4(class1.get, tasty1.get, class2.get, tasty2.get)) + }.toList + } + + for ((class1, tasty1, class2, tasty2) <- groupedBytecodeFiles) { + total += 1 + val bytes1 = Files.readAllBytes(class1) + val bytes2 = Files.readAllBytes(class2) + if (!java.util.Arrays.equals(bytes1, bytes2)) { + failed += 1 + val tastyBytes1 = Files.readAllBytes(tasty1) + val tastyBytes2 = Files.readAllBytes(tasty2) + if (java.util.Arrays.equals(tastyBytes1, tastyBytes2)) + println(s"Idempotency test failed between $class1 and $class1 (same tasty)") + else + println(s"Idempotency test failed between $tasty1 and $tasty2") + /* Dump bytes to console, could be useful if issue only appears in CI. + * Create the .class locally with Files.write(path, Array[Byte](...)) with the printed array + */ + // println(bytes1.mkString("Array[Byte](", ",", ")")) + // println(bytes2.mkString("Array[Byte](", ",", ")")) + } + } + } + + val opt = defaultOptions.and("-YemitTasty") + + def idempotency1() = { + compileList("dotty1", compilerSources ++ backendSources ++ backendJvmSources, opt) + + compileFilesInDir("../tests/pos", opt) + } + def idempotency2() = { + compileList("dotty1", compilerSources ++ backendSources ++ backendJvmSources, opt) + + compileFilesInDir("../tests/pos", opt) + } + + val tests = (idempotency1() + idempotency2()).keepOutput.checkCompile() + + assert(new java.io.File("../out/idempotency1/").exists) + assert(new java.io.File("../out/idempotency2/").exists) + + val t0 = System.currentTimeMillis() + checkIdempotency() + println(s"checked bytecode idempotency (${(System.currentTimeMillis() - t0) / 1000.0} sec)") + + tests.delete() + + assert(failed == 0, s"Failed $failed idempotency checks (out of $total)") + } + + + private val (compilerSources, backendSources, backendJvmSources) = { + def sources(paths: JStream[Path], excludedFiles: List[String] = Nil): List[String] = + paths.iterator().asScala + .filter(path => + (path.toString.endsWith(".scala") || path.toString.endsWith(".java")) + && !excludedFiles.contains(path.getFileName.toString)) + .map(_.toString).toList + + val compilerDir = Paths.get("../compiler/src") + val compilerSources0 = sources(Files.walk(compilerDir)) + + val backendDir = Paths.get("../scala-backend/src/compiler/scala/tools/nsc/backend") + val backendJvmDir = Paths.get("../scala-backend/src/compiler/scala/tools/nsc/backend/jvm") + + // NOTE: Keep these exclusions synchronized with the ones in the sbt build (Build.scala) + val backendExcluded = + List("JavaPlatform.scala", "Platform.scala", "ScalaPrimitives.scala") + val backendJvmExcluded = + List("BCodeICodeCommon.scala", "GenASM.scala", "GenBCode.scala", "ScalacBackendInterface.scala") + + val backendSources0 = + sources(Files.list(backendDir), excludedFiles = backendExcluded) + val backendJvmSources0 = + sources(Files.list(backendJvmDir), excludedFiles = backendJvmExcluded) + + (compilerSources0, backendSources0, backendJvmSources0) + } } object CompilationTests { From 1dd46f2033758a8d14238d2219fd9e13336475c2 Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Wed, 19 Apr 2017 11:57:07 +0200 Subject: [PATCH 2/8] Compute tasty UUID based on nameBuffer and sections hash. --- .../tools/dotc/core/tasty/TastyPickler.scala | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala index 4c18daea7094..e89341a7636f 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala @@ -6,7 +6,6 @@ package tasty import TastyFormat._ import collection.mutable import TastyBuffer._ -import java.util.UUID import core.Symbols.Symbol import ast.tpd import Decorators._ @@ -14,17 +13,6 @@ import Decorators._ class TastyPickler { private val sections = new mutable.ArrayBuffer[(NameRef, TastyBuffer)] - private val uuid = UUID.fromString("3cee1b79-c03a-4125-b337-d067b5cb3a94") // TODO: use a hash of the tasty tree - - private val headerBuffer = { - val buf = new TastyBuffer(24) - for (ch <- header) buf.writeByte(ch.toByte) - buf.writeNat(MajorVersion) - buf.writeNat(MinorVersion) - buf.writeUncompressedLong(uuid.getMostSignificantBits) - buf.writeUncompressedLong(uuid.getLeastSignificantBits) - buf - } val nameBuffer = new NameBuffer @@ -36,6 +24,20 @@ class TastyPickler { buf.assemble() buf.length + natSize(buf.length) } + + val uuidLow: Long = longHash(nameBuffer.bytes) + val uuidHi: Long = sections.iterator.map(x => longHash(x._2.bytes)).fold(0L)(_ ^ _) + + val headerBuffer = { + val buf = new TastyBuffer(header.length + 24) + for (ch <- header) buf.writeByte(ch.toByte) + buf.writeNat(MajorVersion) + buf.writeNat(MinorVersion) + buf.writeUncompressedLong(uuidLow) + buf.writeUncompressedLong(uuidHi) + buf + } + val totalSize = headerBuffer.length + lengthWithLength(nameBuffer) + { @@ -69,4 +71,8 @@ class TastyPickler { var addrOfSym: Symbol => Option[Addr] = (_ => None) val treePkl = new TreePickler(this) + + private def longHash(arr: Array[Byte], i: Int = 0, acc: Long = 1): Long = + if (i < arr.length) longHash(arr, i + 1, 31L * acc + arr(i)) else acc + } From 3caa001d24e7acedf111628a076080e72e9ccd0d Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Wed, 19 Apr 2017 17:04:01 +0200 Subject: [PATCH 3/8] Use mumur hashing for tasty uuid. --- .../tools/dotc/core/tasty/TastyPickler.scala | 9 +- .../tools/dotc/util/MurmurLongHash3.scala | 261 ++++++++++++++++++ 2 files changed, 264 insertions(+), 6 deletions(-) create mode 100644 compiler/src/dotty/tools/dotc/util/MurmurLongHash3.scala diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala index e89341a7636f..094fca964314 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala @@ -6,6 +6,7 @@ package tasty import TastyFormat._ import collection.mutable import TastyBuffer._ +import util.MurmurLongHash3 import core.Symbols.Symbol import ast.tpd import Decorators._ @@ -25,8 +26,8 @@ class TastyPickler { buf.length + natSize(buf.length) } - val uuidLow: Long = longHash(nameBuffer.bytes) - val uuidHi: Long = sections.iterator.map(x => longHash(x._2.bytes)).fold(0L)(_ ^ _) + val uuidLow: Long = MurmurLongHash3.bytesHash(nameBuffer.bytes) + val uuidHi: Long = sections.iterator.map(x => MurmurLongHash3.bytesHash(x._2.bytes)).fold(0L)(_ ^ _) val headerBuffer = { val buf = new TastyBuffer(header.length + 24) @@ -71,8 +72,4 @@ class TastyPickler { var addrOfSym: Symbol => Option[Addr] = (_ => None) val treePkl = new TreePickler(this) - - private def longHash(arr: Array[Byte], i: Int = 0, acc: Long = 1): Long = - if (i < arr.length) longHash(arr, i + 1, 31L * acc + arr(i)) else acc - } diff --git a/compiler/src/dotty/tools/dotc/util/MurmurLongHash3.scala b/compiler/src/dotty/tools/dotc/util/MurmurLongHash3.scala new file mode 100644 index 000000000000..1da998a02e9c --- /dev/null +++ b/compiler/src/dotty/tools/dotc/util/MurmurLongHash3.scala @@ -0,0 +1,261 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +/* Copied and adapted from scala.util.hashing.MurmurHash3 to use Longs */ + +package dotty.tools.dotc.util + +import java.lang.Long.{ rotateLeft => rotl } + +private[util] class MurmurLongHash3 { + /** Mix in a block of data into an intermediate hash value. */ + final def mix(hash: Long, data: Long): Long = { + var h = mixLast(hash, data) + h = rotl(h, 13) + h * 5L + 0x96cd1c3532ac3b17L + } + + /** May optionally be used as the last mixing step. Is a little bit faster than mix, + * as it does no further mixing of the resulting hash. For the last element this is not + * necessary as the hash is thoroughly mixed during finalization anyway. */ + final def mixLast(hash: Long, data: Long): Long = { + var k = data + + k *= 0x239b961bab0e9789L + k = rotl(k, 15) + k *= 0x38b34ae5a1e38b93L + + hash ^ k + } + + /** Finalize a hash to incorporate the length and make sure all bits avalanche. */ + final def finalizeHash(hash: Long, length: Long): Long = avalanche(hash ^ length) + + /** Force all bits of the hash to avalanche. Used for finalizing the hash. */ + private final def avalanche(hash: Long): Long = { + var h = hash + + h ^= h >>> 33 + h *= 0xff51afd7ed558ccdL + h ^= h >>> 33 + h *= 0xc4ceb9fe1a85ec53L + h ^= h >>> 33 + + h + } + + /** Compute the hash of a product */ + final def productHash(x: Product, seed: Long): Long = { + val arr = x.productArity + // Case objects have the hashCode inlined directly into the + // synthetic hashCode method, but this method should still give + // a correct result if passed a case object. + if (arr == 0) { + x.productPrefix.hashCode + } + else { + var h = seed + var i = 0 + while (i < arr) { + h = mix(h, x.productElement(i).##) + i += 1 + } + finalizeHash(h, arr) + } + } + + /** Compute the hash of a string */ + final def stringHash(str: String, seed: Long): Long = { + var h = seed + var i = 0 + while (i + 1 < str.length) { + val data = (str.charAt(i) << 16) + str.charAt(i + 1) + h = mix(h, data) + i += 2 + } + if (i < str.length) h = mixLast(h, str.charAt(i).toLong) + finalizeHash(h, str.length) + } + + /** Compute a hash that is symmetric in its arguments - that is a hash + * where the order of appearance of elements does not matter. + * This is useful for hashing sets, for example. + */ + final def unorderedHash(xs: TraversableOnce[Any], seed: Long): Long = { + var a, b, n = 0L + var c = 1 + xs foreach { x => + val h = x.## + a += h + b ^= h + if (h != 0) c *= h + n += 1 + } + var h = seed + h = mix(h, a) + h = mix(h, b) + h = mixLast(h, c) + finalizeHash(h, n) + } + /** Compute a hash that depends on the order of its arguments. + */ + final def orderedHash(xs: TraversableOnce[Any], seed: Long): Long = { + var n = 0 + var h = seed + xs foreach { x => + h = mix(h, x.##) + n += 1 + } + finalizeHash(h, n) + } + + /** Compute the hash of an array. + */ + final def arrayHash[@specialized T](a: Array[T], seed: Long): Long = { + var h = seed + var i = 0 + while (i < a.length) { + h = mix(h, a(i).##) + i += 1 + } + finalizeHash(h, a.length) + } + + /** Compute the hash of a byte array. Faster than arrayHash, because + * it hashes 4 bytes at once. + */ + final def bytesHash(data: Array[Byte], seed: Long): Long = { + var len = data.length + var h = seed + + // Body + var i = 0 + while(len >= 8) { + var k = data(i + 0) & 0xFF + k |= (data(i + 1) & 0xFF) << 8 + k |= (data(i + 2) & 0xFF) << 16 + k |= (data(i + 3) & 0xFF) << 24 + k |= (data(i + 4) & 0xFF) << 32 + k |= (data(i + 5) & 0xFF) << 40 + k |= (data(i + 6) & 0xFF) << 48 + k |= (data(i + 7) & 0xFF) << 56 + + h = mix(h, k) + + i += 8 + len -= 8 + } + + // Tail + var k = 0 + if(len == 7) k ^= (data(i + 6) & 0xFF) << 48 + if(len >= 6) k ^= (data(i + 5) & 0xFF) << 40 + if(len >= 5) k ^= (data(i + 4) & 0xFF) << 32 + if(len >= 4) k ^= (data(i + 3) & 0xFF) << 24 + if(len >= 3) k ^= (data(i + 2) & 0xFF) << 16 + if(len >= 2) k ^= (data(i + 1) & 0xFF) << 8 + if(len >= 1) { + k ^= (data(i + 0) & 0xFF) + h = mixLast(h, k) + } + + // Finalization + finalizeHash(h, data.length) + } + + final def listHash(xs: scala.collection.immutable.List[_], seed: Long): Long = { + var n = 0 + var h = seed + var elems = xs + while (!elems.isEmpty) { + val head = elems.head + val tail = elems.tail + h = mix(h, head.##) + n += 1 + elems = tail + } + finalizeHash(h, n) + } +} + +/** + * An implementation of Austin Appleby's MurmurHash 3 algorithm + * (MurmurHash3_x86_32). This object contains methods that hash + * values of various types as well as means to construct `Hashing` + * objects. + * + * This algorithm is designed to generate well-distributed non-cryptographic + * hashes. It is designed to hash data in 32 bit chunks (ints). + * + * The mix method needs to be called at each step to update the intermediate + * hash value. For the last chunk to incorporate into the hash mixLast may + * be used instead, which is slightly faster. Finally finalizeHash needs to + * be called to compute the final hash value. + * + * This is based on the earlier MurmurHash3 code by Rex Kerr, but the + * MurmurHash3 algorithm was since changed by its creator Austin Appleby + * to remedy some weaknesses and improve performance. This represents the + * latest and supposedly final version of the algortihm (revision 136). + * + * @see [[http://code.google.com/p/smhasher]] + */ +object MurmurLongHash3 extends MurmurLongHash3 { + final val arraySeed = 0x3c074a61 + final val stringSeed = 0xf7ca7fd2 + final val productSeed = 0xcafebabe + final val symmetricSeed = 0xb592f7ae + final val traversableSeed = 0xe73a8b15 + final val seqSeed = "Seq".hashCode + final val mapSeed = "Map".hashCode + final val setSeed = "Set".hashCode + + def arrayHash[@specialized T](a: Array[T]): Long = arrayHash(a, arraySeed) + def bytesHash(data: Array[Byte]): Long = bytesHash(data, arraySeed) + def orderedHash(xs: TraversableOnce[Any]): Long = orderedHash(xs, symmetricSeed) + def productHash(x: Product): Long = productHash(x, productSeed) + def stringHash(x: String): Long = stringHash(x, stringSeed) + def unorderedHash(xs: TraversableOnce[Any]): Long = unorderedHash(xs, traversableSeed) + + /** To offer some potential for optimization. + */ + def seqHash(xs: scala.collection.Seq[_]): Long = xs match { + case xs: List[_] => listHash(xs, seqSeed) + case xs => orderedHash(xs, seqSeed) + } + + def mapHash(xs: scala.collection.Map[_, _]): Long = unorderedHash(xs, mapSeed) + def setHash(xs: scala.collection.Set[_]): Long = unorderedHash(xs, setSeed) + + /* Need to adapt Hashing[_] + class ArrayHashing[@specialized T] extends Hashing[Array[T]] { + def hash(a: Array[T]) = arrayHash(a) + } + + def arrayHashing[@specialized T] = new ArrayHashing[T] + + def bytesHashing = new Hashing[Array[Byte]] { + def hash(data: Array[Byte]) = bytesHash(data) + } + + def orderedHashing = new Hashing[TraversableOnce[Any]] { + def hash(xs: TraversableOnce[Any]) = orderedHash(xs) + } + + def productHashing = new Hashing[Product] { + def hash(x: Product) = productHash(x) + } + + def stringHashing = new Hashing[String] { + def hash(x: String) = stringHash(x) + } + + def unorderedHashing = new Hashing[TraversableOnce[Any]] { + def hash(xs: TraversableOnce[Any]) = unorderedHash(xs) + } + */ +} From 54560a66d674358638154603b54013c7fe9d8d4c Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Thu, 27 Apr 2017 10:49:04 +0200 Subject: [PATCH 4/8] Port and use 64-bit CityHash for tasty uuid. --- .../tools/dotc/core/tasty/TastyPickler.scala | 6 +- .../src/dotty/tools/dotc/util/CityHash.scala | 186 +++++++++++++ .../tools/dotc/util/MurmurLongHash3.scala | 261 ------------------ 3 files changed, 189 insertions(+), 264 deletions(-) create mode 100644 compiler/src/dotty/tools/dotc/util/CityHash.scala delete mode 100644 compiler/src/dotty/tools/dotc/util/MurmurLongHash3.scala diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala index 094fca964314..1fa4fd995bd4 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala @@ -6,7 +6,7 @@ package tasty import TastyFormat._ import collection.mutable import TastyBuffer._ -import util.MurmurLongHash3 +import util.CityHash import core.Symbols.Symbol import ast.tpd import Decorators._ @@ -26,8 +26,8 @@ class TastyPickler { buf.length + natSize(buf.length) } - val uuidLow: Long = MurmurLongHash3.bytesHash(nameBuffer.bytes) - val uuidHi: Long = sections.iterator.map(x => MurmurLongHash3.bytesHash(x._2.bytes)).fold(0L)(_ ^ _) + val uuidLow: Long = CityHash.bytesHash(nameBuffer.bytes) + val uuidHi: Long = sections.iterator.map(x => CityHash.bytesHash(x._2.bytes)).fold(0L)(_ ^ _) val headerBuffer = { val buf = new TastyBuffer(header.length + 24) diff --git a/compiler/src/dotty/tools/dotc/util/CityHash.scala b/compiler/src/dotty/tools/dotc/util/CityHash.scala new file mode 100644 index 000000000000..17551592fc90 --- /dev/null +++ b/compiler/src/dotty/tools/dotc/util/CityHash.scala @@ -0,0 +1,186 @@ +package dotty.tools.dotc.util + +/* Ported from https://github.com/google/cityhash/blob/master/src/city.cc */ +private[util] class CityHash { + + // Some primes between 2^63 and 2^64 for various uses. + final val k0 = 0xc3a5c85c97cb3127L + final val k1 = 0xb492b66fbe98f273L + final val k2 = 0x9ae16a3b2f90404fL + + protected final def cityHash64(data: Array[Byte]): Long = { + implicit val implicitData: Array[Byte] = data + var s = 0 + var len = data.length + if (len <= 32) { + if (len <= 16) hashLen0to16(len) + else hashLen17to32(len) + } else if (len <= 64) { + hashLen33to64(len) + } else { + // For strings over 64 bytes we hash the end first, and then as we + // loop we keep 56 bytes of state: v, w, x, y, and z. + var x: Long = fetch64(s + len - 40) + var y: Long = fetch64(s + len - 16) + fetch64(s + len - 56) + var z: Long = hashLen16(fetch64(s + len - 48) + len, fetch64(s + len - 24)) + var v: (Long, Long) = weakHashLen32WithSeeds(s + len - 64, len, z) + var w: (Long, Long) = weakHashLen32WithSeeds(s + len - 32, y + k1, x) + x = x * k1 + fetch64(s) + + // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. + len = (len - 1) & ~63 + do { + x = rotate(x + y + v._1 + fetch64(s + 8), 37) * k1 + y = rotate(y + v._2 + fetch64(s + 48), 42) * k1 + x ^= w._2 + y += v._1 + fetch64(s + 40) + z = rotate(z + w._1, 33) * k1 + v = weakHashLen32WithSeeds(s, v._2 * k1, x + w._1) + w = weakHashLen32WithSeeds(s + 32, z + w._2, y + fetch64(s + 16)) + val tmp = z + z = x + x = tmp + s += 64 + len -= 64 + } while (len != 0) + + hashLen16(hashLen16(v._1, w._1) + shiftMix(y) * k1 + z,hashLen16(v._2, w._2) + x) + } + } + + private final def hashLen0to16(len: Int)(implicit data: Array[Byte]): Long = { + if (len >= 8) { + val mul: Long = k2 + len * 2 + val a: Long = fetch64(0) + k2 + val b: Long = fetch64(len - 8) + val c: Long = rotate(b, 37) * mul + a + val d: Long = (rotate(a, 25) + b) * mul + hashLen16(c, d, mul) + } else if (len >= 4) { + val mul = k2 + len * 2 + val a = fetch32(0) + hashLen16(len + (a << 3), fetch32(len - 4), mul) + } else if (len > 0) { + val a: Byte = data(0) + val b: Byte = data(len >> 1) + val c: Byte = data(len - 1) + val y: Int = a.toInt + (b.toInt << 8) + val z: Int = len + (c.toInt << 2) + shiftMix(y * k2 ^ z * k0) * k2 + } else { + k2 + } + } + + // This probably works well for 16-byte strings as well, but it may be overkill + // in that case. + private final def hashLen17to32(len: Int)(implicit data: Array[Byte]): Long = { + val mul: Long = k2 + len * 2 + val a: Long = fetch64(0) * k1 + val b: Long = fetch64(8) + val c: Long = fetch64(len - 8) * mul + val d: Long = fetch64(len - 16) * k2 + hashLen16(rotate(a + b, 43) + rotate(c, 30) + d, a + rotate(b + k2, 18) + c, mul) + } + + /** Return an 8-byte hash for 33 to 64 bytes. */ + private final def hashLen33to64(len: Int)(implicit data: Array[Byte]): Long = { + val mul: Long = k2 + len * 2 + val a = fetch64(0) * k2 + val b = fetch64(8) + val c = fetch64(len - 24) + val d = fetch64(len - 32) + val e = fetch64(16) * k2 + val f = fetch64(24) * 9 + val g = fetch64(len - 8) + val h = fetch64(len - 16) * mul + val u = rotate(a + g, 43) + (rotate(b, 30) + c) * 9 + val v = ((a + g) ^ d) + f + 1 + val w = bswap64((u + v) * mul) + h + val x = rotate(e + f, 42) + c + val y = (bswap64((v + w) * mul) + g) * mul + val z = e + f + c + val a2 = bswap64((x + z) * mul + y) + b + shiftMix((z + a2) * mul + d + h) * mul + x + } + + private final def hashLen16(hi: Long, lo: Long): Long = { + val kMul = 0x9ddfea08eb382d69L + var a = (lo ^ hi) * kMul + a ^= (a >> 47) + var b = (hi ^ a) * kMul + b ^= (b >> 47) + b * kMul + } + + private final def hashLen16(u: Long, v: Long, mul: Long): Long = { + // Murmur-inspired hashing. + var a = (u ^ v) * mul + a ^= (a >> 47) + a = (v ^ a) * mul + a ^= (a >> 47) + a * mul + } + + /** Return a 16-byte hash for 48 bytes. Quick and dirty. + * Callers do best to use "random-looking" values for a and b. + */ + private final def weakHashLen32WithSeeds(w: Long, x: Long, y: Long, z: Long, a0: Long, b0: Long): (Long, Long) = { + var a = a0 + var b = b0 + a += w + b = rotate(b + a + z, 21) + val c: Long = a + a += x + a += y + b += rotate(a, 44) + (a + z, b + c) + } + + /** Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. */ + private final def weakHashLen32WithSeeds(s: Int, a: Long, b: Long)(implicit data: Array[Byte]): (Long, Long) = + weakHashLen32WithSeeds(fetch64(s), fetch64(s + 8), fetch64(s + 16), fetch64(s + 24), a, b) + + private final def fetch64(idx: Int)(implicit data: Array[Byte]): Long = { + var x: Long = data(idx) + x = data(idx + 1) | (x << 8) + x = data(idx + 2) | (x << 8) + x = data(idx + 3) | (x << 8) + x = data(idx + 4) | (x << 8) + x = data(idx + 5) | (x << 8) + x = data(idx + 6) | (x << 8) + data(idx + 7) | (x << 8) + } + + private final def fetch32(idx: Int)(implicit data: Array[Byte]): Long = { + var x: Int = data(idx) + x = data(idx + 1) | (x << 8) + x = data(idx + 2) | (x << 8) + data(idx + 3) | (x << 8) + } + + private final def bswap64(x: Long): Long = { + ((x & 0xff00000000000000L) >> 56) | + ((x & 0x00ff000000000000L) >> 40) | + ((x & 0x0000ff0000000000L) >> 24) | + ((x & 0x000000ff00000000L) >> 8) | + ((x & 0x00000000ff000000L) << 8) | + ((x & 0x0000000000ff0000L) << 24) | + ((x & 0x000000000000ff00L) << 40) | + ((x & 0x00000000000000ffL) << 56) + } + + // Bitwise right rotate. Normally this will compile to a single + // instruction, especially if the shift is a manifest constant. + private final def rotate(v: Long, shift: Int): Long = { + // Avoid shifting by 64: doing so yields an undefined result. + if (shift == 0) v else (v >> shift) | (v << (64 - shift)) + } + + private final def shiftMix(v: Long): Long = v ^ (v >> 47) + +} + +object CityHash extends CityHash { + def bytesHash(data: Array[Byte]): Long = cityHash64(data) +} diff --git a/compiler/src/dotty/tools/dotc/util/MurmurLongHash3.scala b/compiler/src/dotty/tools/dotc/util/MurmurLongHash3.scala deleted file mode 100644 index 1da998a02e9c..000000000000 --- a/compiler/src/dotty/tools/dotc/util/MurmurLongHash3.scala +++ /dev/null @@ -1,261 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -/* Copied and adapted from scala.util.hashing.MurmurHash3 to use Longs */ - -package dotty.tools.dotc.util - -import java.lang.Long.{ rotateLeft => rotl } - -private[util] class MurmurLongHash3 { - /** Mix in a block of data into an intermediate hash value. */ - final def mix(hash: Long, data: Long): Long = { - var h = mixLast(hash, data) - h = rotl(h, 13) - h * 5L + 0x96cd1c3532ac3b17L - } - - /** May optionally be used as the last mixing step. Is a little bit faster than mix, - * as it does no further mixing of the resulting hash. For the last element this is not - * necessary as the hash is thoroughly mixed during finalization anyway. */ - final def mixLast(hash: Long, data: Long): Long = { - var k = data - - k *= 0x239b961bab0e9789L - k = rotl(k, 15) - k *= 0x38b34ae5a1e38b93L - - hash ^ k - } - - /** Finalize a hash to incorporate the length and make sure all bits avalanche. */ - final def finalizeHash(hash: Long, length: Long): Long = avalanche(hash ^ length) - - /** Force all bits of the hash to avalanche. Used for finalizing the hash. */ - private final def avalanche(hash: Long): Long = { - var h = hash - - h ^= h >>> 33 - h *= 0xff51afd7ed558ccdL - h ^= h >>> 33 - h *= 0xc4ceb9fe1a85ec53L - h ^= h >>> 33 - - h - } - - /** Compute the hash of a product */ - final def productHash(x: Product, seed: Long): Long = { - val arr = x.productArity - // Case objects have the hashCode inlined directly into the - // synthetic hashCode method, but this method should still give - // a correct result if passed a case object. - if (arr == 0) { - x.productPrefix.hashCode - } - else { - var h = seed - var i = 0 - while (i < arr) { - h = mix(h, x.productElement(i).##) - i += 1 - } - finalizeHash(h, arr) - } - } - - /** Compute the hash of a string */ - final def stringHash(str: String, seed: Long): Long = { - var h = seed - var i = 0 - while (i + 1 < str.length) { - val data = (str.charAt(i) << 16) + str.charAt(i + 1) - h = mix(h, data) - i += 2 - } - if (i < str.length) h = mixLast(h, str.charAt(i).toLong) - finalizeHash(h, str.length) - } - - /** Compute a hash that is symmetric in its arguments - that is a hash - * where the order of appearance of elements does not matter. - * This is useful for hashing sets, for example. - */ - final def unorderedHash(xs: TraversableOnce[Any], seed: Long): Long = { - var a, b, n = 0L - var c = 1 - xs foreach { x => - val h = x.## - a += h - b ^= h - if (h != 0) c *= h - n += 1 - } - var h = seed - h = mix(h, a) - h = mix(h, b) - h = mixLast(h, c) - finalizeHash(h, n) - } - /** Compute a hash that depends on the order of its arguments. - */ - final def orderedHash(xs: TraversableOnce[Any], seed: Long): Long = { - var n = 0 - var h = seed - xs foreach { x => - h = mix(h, x.##) - n += 1 - } - finalizeHash(h, n) - } - - /** Compute the hash of an array. - */ - final def arrayHash[@specialized T](a: Array[T], seed: Long): Long = { - var h = seed - var i = 0 - while (i < a.length) { - h = mix(h, a(i).##) - i += 1 - } - finalizeHash(h, a.length) - } - - /** Compute the hash of a byte array. Faster than arrayHash, because - * it hashes 4 bytes at once. - */ - final def bytesHash(data: Array[Byte], seed: Long): Long = { - var len = data.length - var h = seed - - // Body - var i = 0 - while(len >= 8) { - var k = data(i + 0) & 0xFF - k |= (data(i + 1) & 0xFF) << 8 - k |= (data(i + 2) & 0xFF) << 16 - k |= (data(i + 3) & 0xFF) << 24 - k |= (data(i + 4) & 0xFF) << 32 - k |= (data(i + 5) & 0xFF) << 40 - k |= (data(i + 6) & 0xFF) << 48 - k |= (data(i + 7) & 0xFF) << 56 - - h = mix(h, k) - - i += 8 - len -= 8 - } - - // Tail - var k = 0 - if(len == 7) k ^= (data(i + 6) & 0xFF) << 48 - if(len >= 6) k ^= (data(i + 5) & 0xFF) << 40 - if(len >= 5) k ^= (data(i + 4) & 0xFF) << 32 - if(len >= 4) k ^= (data(i + 3) & 0xFF) << 24 - if(len >= 3) k ^= (data(i + 2) & 0xFF) << 16 - if(len >= 2) k ^= (data(i + 1) & 0xFF) << 8 - if(len >= 1) { - k ^= (data(i + 0) & 0xFF) - h = mixLast(h, k) - } - - // Finalization - finalizeHash(h, data.length) - } - - final def listHash(xs: scala.collection.immutable.List[_], seed: Long): Long = { - var n = 0 - var h = seed - var elems = xs - while (!elems.isEmpty) { - val head = elems.head - val tail = elems.tail - h = mix(h, head.##) - n += 1 - elems = tail - } - finalizeHash(h, n) - } -} - -/** - * An implementation of Austin Appleby's MurmurHash 3 algorithm - * (MurmurHash3_x86_32). This object contains methods that hash - * values of various types as well as means to construct `Hashing` - * objects. - * - * This algorithm is designed to generate well-distributed non-cryptographic - * hashes. It is designed to hash data in 32 bit chunks (ints). - * - * The mix method needs to be called at each step to update the intermediate - * hash value. For the last chunk to incorporate into the hash mixLast may - * be used instead, which is slightly faster. Finally finalizeHash needs to - * be called to compute the final hash value. - * - * This is based on the earlier MurmurHash3 code by Rex Kerr, but the - * MurmurHash3 algorithm was since changed by its creator Austin Appleby - * to remedy some weaknesses and improve performance. This represents the - * latest and supposedly final version of the algortihm (revision 136). - * - * @see [[http://code.google.com/p/smhasher]] - */ -object MurmurLongHash3 extends MurmurLongHash3 { - final val arraySeed = 0x3c074a61 - final val stringSeed = 0xf7ca7fd2 - final val productSeed = 0xcafebabe - final val symmetricSeed = 0xb592f7ae - final val traversableSeed = 0xe73a8b15 - final val seqSeed = "Seq".hashCode - final val mapSeed = "Map".hashCode - final val setSeed = "Set".hashCode - - def arrayHash[@specialized T](a: Array[T]): Long = arrayHash(a, arraySeed) - def bytesHash(data: Array[Byte]): Long = bytesHash(data, arraySeed) - def orderedHash(xs: TraversableOnce[Any]): Long = orderedHash(xs, symmetricSeed) - def productHash(x: Product): Long = productHash(x, productSeed) - def stringHash(x: String): Long = stringHash(x, stringSeed) - def unorderedHash(xs: TraversableOnce[Any]): Long = unorderedHash(xs, traversableSeed) - - /** To offer some potential for optimization. - */ - def seqHash(xs: scala.collection.Seq[_]): Long = xs match { - case xs: List[_] => listHash(xs, seqSeed) - case xs => orderedHash(xs, seqSeed) - } - - def mapHash(xs: scala.collection.Map[_, _]): Long = unorderedHash(xs, mapSeed) - def setHash(xs: scala.collection.Set[_]): Long = unorderedHash(xs, setSeed) - - /* Need to adapt Hashing[_] - class ArrayHashing[@specialized T] extends Hashing[Array[T]] { - def hash(a: Array[T]) = arrayHash(a) - } - - def arrayHashing[@specialized T] = new ArrayHashing[T] - - def bytesHashing = new Hashing[Array[Byte]] { - def hash(data: Array[Byte]) = bytesHash(data) - } - - def orderedHashing = new Hashing[TraversableOnce[Any]] { - def hash(xs: TraversableOnce[Any]) = orderedHash(xs) - } - - def productHashing = new Hashing[Product] { - def hash(x: Product) = productHash(x) - } - - def stringHashing = new Hashing[String] { - def hash(x: String) = stringHash(x) - } - - def unorderedHashing = new Hashing[TraversableOnce[Any]] { - def hash(xs: TraversableOnce[Any]) = unorderedHash(xs) - } - */ -} From 5cbd788e293086df9921055d94b3628f6ed59d33 Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Fri, 28 Apr 2017 13:47:01 +0200 Subject: [PATCH 5/8] Add copyright --- .../src/dotty/tools/dotc/util/CityHash.scala | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/compiler/src/dotty/tools/dotc/util/CityHash.scala b/compiler/src/dotty/tools/dotc/util/CityHash.scala index 17551592fc90..eeb5de75f7f8 100644 --- a/compiler/src/dotty/tools/dotc/util/CityHash.scala +++ b/compiler/src/dotty/tools/dotc/util/CityHash.scala @@ -1,6 +1,36 @@ +// Ported from https://github.com/google/cityhash/blob/master/src/city.cc +// +// Copyright (c) 2011 Google, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// CityHash, by Geoff Pike and Jyrki Alakuijala +// +// This file provides CityHash64() and related functions. +// +// It's probably possible to create even faster hash functions by +// writing a program that systematically explores some of the space of +// possible hash functions, by using SIMD instructions, or by +// compromising on hash quality. + package dotty.tools.dotc.util -/* Ported from https://github.com/google/cityhash/blob/master/src/city.cc */ private[util] class CityHash { // Some primes between 2^63 and 2^64 for various uses. From c73fd4b0073de3599b3bcd36186977c286b54076 Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Wed, 3 May 2017 11:50:12 +0200 Subject: [PATCH 6/8] Fix byte to long casts. --- .../src/dotty/tools/dotc/util/CityHash.scala | 46 +++++++++++-------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/util/CityHash.scala b/compiler/src/dotty/tools/dotc/util/CityHash.scala index eeb5de75f7f8..97a415a8b932 100644 --- a/compiler/src/dotty/tools/dotc/util/CityHash.scala +++ b/compiler/src/dotty/tools/dotc/util/CityHash.scala @@ -34,9 +34,15 @@ package dotty.tools.dotc.util private[util] class CityHash { // Some primes between 2^63 and 2^64 for various uses. - final val k0 = 0xc3a5c85c97cb3127L - final val k1 = 0xb492b66fbe98f273L - final val k2 = 0x9ae16a3b2f90404fL + private val k0 = 0xc3a5c85c97cb3127L + private val k1 = 0xb492b66fbe98f273L + private val k2 = 0x9ae16a3b2f90404fL + + protected final def cityHash64WithSeed(data: Array[Byte], seed: Long): Long = + cityHash64WithSeeds(data, k2, seed) + + protected final def cityHash64WithSeeds(data: Array[Byte], seed0: Long, seed1: Long): Long = + hashLen16(cityHash64(data) - seed0, seed1) protected final def cityHash64(data: Array[Byte]): Long = { implicit val implicitData: Array[Byte] = data @@ -91,11 +97,11 @@ private[util] class CityHash { val a = fetch32(0) hashLen16(len + (a << 3), fetch32(len - 4), mul) } else if (len > 0) { - val a: Byte = data(0) - val b: Byte = data(len >> 1) - val c: Byte = data(len - 1) - val y: Int = a.toInt + (b.toInt << 8) - val z: Int = len + (c.toInt << 2) + val a: Int = data(0) & 0xFF + val b: Int = data(len >> 1) & 0xFF + val c: Int = data(len - 1) & 0xFF + val y: Int = a + (b << 8) + val z: Int = len + (c << 2) shiftMix(y * k2 ^ z * k0) * k2 } else { k2 @@ -172,21 +178,21 @@ private[util] class CityHash { weakHashLen32WithSeeds(fetch64(s), fetch64(s + 8), fetch64(s + 16), fetch64(s + 24), a, b) private final def fetch64(idx: Int)(implicit data: Array[Byte]): Long = { - var x: Long = data(idx) - x = data(idx + 1) | (x << 8) - x = data(idx + 2) | (x << 8) - x = data(idx + 3) | (x << 8) - x = data(idx + 4) | (x << 8) - x = data(idx + 5) | (x << 8) - x = data(idx + 6) | (x << 8) - data(idx + 7) | (x << 8) + var x: Long = data(idx) & 0xFFL + x = data(idx + 1) & 0xFFL | (x << 8) + x = data(idx + 2) & 0xFFL | (x << 8) + x = data(idx + 3) & 0xFFL | (x << 8) + x = data(idx + 4) & 0xFFL | (x << 8) + x = data(idx + 5) & 0xFFL | (x << 8) + x = data(idx + 6) & 0xFFL | (x << 8) + data(idx + 7) & 0xFFL | (x << 8) } private final def fetch32(idx: Int)(implicit data: Array[Byte]): Long = { - var x: Int = data(idx) - x = data(idx + 1) | (x << 8) - x = data(idx + 2) | (x << 8) - data(idx + 3) | (x << 8) + var x: Int = data(idx) & 0xFF + x = data(idx + 1) & 0xFF | (x << 8) + x = data(idx + 2) & 0xFF | (x << 8) + data(idx + 3) & 0xFF | (x << 8) } private final def bswap64(x: Long): Long = { From eed4b9b427d2a00822eaf05a63920fa719c79e19 Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Wed, 3 May 2017 12:07:28 +0200 Subject: [PATCH 7/8] Add test to idempotency blacklist. --- compiler/test/dotty/tools/dotc/CompilationTests.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler/test/dotty/tools/dotc/CompilationTests.scala b/compiler/test/dotty/tools/dotc/CompilationTests.scala index bff15db1be84..162b93d87067 100644 --- a/compiler/test/dotty/tools/dotc/CompilationTests.scala +++ b/compiler/test/dotty/tools/dotc/CompilationTests.scala @@ -242,9 +242,11 @@ class CompilationTests extends ParallelTesting { var failed = 0 var total = 0 val blacklisted = Set( + // Bridges on collections in different order. Second one in scala2 order. "pos/Map/scala/collection/immutable/Map", "pos/Map/scala/collection/immutable/AbstractMap", - "pos/t1203a/NodeSeq" + "pos/t1203a/NodeSeq", + "pos/i2345/Whatever" ) def checkIdempotency(): Unit = { val groupedBytecodeFiles: List[(Path, Path, Path, Path)] = { From 000b4324fcd8375f867f0979908ea98aa7d373a1 Mon Sep 17 00:00:00 2001 From: Nicolas Stucki Date: Wed, 3 May 2017 12:06:14 +0200 Subject: [PATCH 8/8] Implement TastyHash based on PJW hash. --- .../tools/dotc/core/tasty/TastyPickler.scala | 24 +- .../src/dotty/tools/dotc/util/CityHash.scala | 222 ------------------ 2 files changed, 21 insertions(+), 225 deletions(-) delete mode 100644 compiler/src/dotty/tools/dotc/util/CityHash.scala diff --git a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala index 1fa4fd995bd4..9806470cb926 100644 --- a/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala +++ b/compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala @@ -6,7 +6,6 @@ package tasty import TastyFormat._ import collection.mutable import TastyBuffer._ -import util.CityHash import core.Symbols.Symbol import ast.tpd import Decorators._ @@ -26,8 +25,8 @@ class TastyPickler { buf.length + natSize(buf.length) } - val uuidLow: Long = CityHash.bytesHash(nameBuffer.bytes) - val uuidHi: Long = sections.iterator.map(x => CityHash.bytesHash(x._2.bytes)).fold(0L)(_ ^ _) + val uuidLow: Long = pjwHash64(nameBuffer.bytes) + val uuidHi: Long = sections.iterator.map(x => pjwHash64(x._2.bytes)).fold(0L)(_ ^ _) val headerBuffer = { val buf = new TastyBuffer(header.length + 24) @@ -72,4 +71,23 @@ class TastyPickler { var addrOfSym: Symbol => Option[Addr] = (_ => None) val treePkl = new TreePickler(this) + + /** Returns a non-cryptographic 64-bit hash of the array. + * + * from https://en.wikipedia.org/wiki/PJW_hash_function#Implementation + */ + private def pjwHash64(data: Array[Byte]): Long = { + var h = 0L + var high = 0L + var i = 0 + while (i < data.length) { + h = (h << 4) + data(i) + high = h & 0xF0000000L + if (high != 0) + h ^= high >> 24 + h &= ~high + i += 1 + } + h + } } diff --git a/compiler/src/dotty/tools/dotc/util/CityHash.scala b/compiler/src/dotty/tools/dotc/util/CityHash.scala deleted file mode 100644 index 97a415a8b932..000000000000 --- a/compiler/src/dotty/tools/dotc/util/CityHash.scala +++ /dev/null @@ -1,222 +0,0 @@ -// Ported from https://github.com/google/cityhash/blob/master/src/city.cc -// -// Copyright (c) 2011 Google, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -// -// CityHash, by Geoff Pike and Jyrki Alakuijala -// -// This file provides CityHash64() and related functions. -// -// It's probably possible to create even faster hash functions by -// writing a program that systematically explores some of the space of -// possible hash functions, by using SIMD instructions, or by -// compromising on hash quality. - -package dotty.tools.dotc.util - -private[util] class CityHash { - - // Some primes between 2^63 and 2^64 for various uses. - private val k0 = 0xc3a5c85c97cb3127L - private val k1 = 0xb492b66fbe98f273L - private val k2 = 0x9ae16a3b2f90404fL - - protected final def cityHash64WithSeed(data: Array[Byte], seed: Long): Long = - cityHash64WithSeeds(data, k2, seed) - - protected final def cityHash64WithSeeds(data: Array[Byte], seed0: Long, seed1: Long): Long = - hashLen16(cityHash64(data) - seed0, seed1) - - protected final def cityHash64(data: Array[Byte]): Long = { - implicit val implicitData: Array[Byte] = data - var s = 0 - var len = data.length - if (len <= 32) { - if (len <= 16) hashLen0to16(len) - else hashLen17to32(len) - } else if (len <= 64) { - hashLen33to64(len) - } else { - // For strings over 64 bytes we hash the end first, and then as we - // loop we keep 56 bytes of state: v, w, x, y, and z. - var x: Long = fetch64(s + len - 40) - var y: Long = fetch64(s + len - 16) + fetch64(s + len - 56) - var z: Long = hashLen16(fetch64(s + len - 48) + len, fetch64(s + len - 24)) - var v: (Long, Long) = weakHashLen32WithSeeds(s + len - 64, len, z) - var w: (Long, Long) = weakHashLen32WithSeeds(s + len - 32, y + k1, x) - x = x * k1 + fetch64(s) - - // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. - len = (len - 1) & ~63 - do { - x = rotate(x + y + v._1 + fetch64(s + 8), 37) * k1 - y = rotate(y + v._2 + fetch64(s + 48), 42) * k1 - x ^= w._2 - y += v._1 + fetch64(s + 40) - z = rotate(z + w._1, 33) * k1 - v = weakHashLen32WithSeeds(s, v._2 * k1, x + w._1) - w = weakHashLen32WithSeeds(s + 32, z + w._2, y + fetch64(s + 16)) - val tmp = z - z = x - x = tmp - s += 64 - len -= 64 - } while (len != 0) - - hashLen16(hashLen16(v._1, w._1) + shiftMix(y) * k1 + z,hashLen16(v._2, w._2) + x) - } - } - - private final def hashLen0to16(len: Int)(implicit data: Array[Byte]): Long = { - if (len >= 8) { - val mul: Long = k2 + len * 2 - val a: Long = fetch64(0) + k2 - val b: Long = fetch64(len - 8) - val c: Long = rotate(b, 37) * mul + a - val d: Long = (rotate(a, 25) + b) * mul - hashLen16(c, d, mul) - } else if (len >= 4) { - val mul = k2 + len * 2 - val a = fetch32(0) - hashLen16(len + (a << 3), fetch32(len - 4), mul) - } else if (len > 0) { - val a: Int = data(0) & 0xFF - val b: Int = data(len >> 1) & 0xFF - val c: Int = data(len - 1) & 0xFF - val y: Int = a + (b << 8) - val z: Int = len + (c << 2) - shiftMix(y * k2 ^ z * k0) * k2 - } else { - k2 - } - } - - // This probably works well for 16-byte strings as well, but it may be overkill - // in that case. - private final def hashLen17to32(len: Int)(implicit data: Array[Byte]): Long = { - val mul: Long = k2 + len * 2 - val a: Long = fetch64(0) * k1 - val b: Long = fetch64(8) - val c: Long = fetch64(len - 8) * mul - val d: Long = fetch64(len - 16) * k2 - hashLen16(rotate(a + b, 43) + rotate(c, 30) + d, a + rotate(b + k2, 18) + c, mul) - } - - /** Return an 8-byte hash for 33 to 64 bytes. */ - private final def hashLen33to64(len: Int)(implicit data: Array[Byte]): Long = { - val mul: Long = k2 + len * 2 - val a = fetch64(0) * k2 - val b = fetch64(8) - val c = fetch64(len - 24) - val d = fetch64(len - 32) - val e = fetch64(16) * k2 - val f = fetch64(24) * 9 - val g = fetch64(len - 8) - val h = fetch64(len - 16) * mul - val u = rotate(a + g, 43) + (rotate(b, 30) + c) * 9 - val v = ((a + g) ^ d) + f + 1 - val w = bswap64((u + v) * mul) + h - val x = rotate(e + f, 42) + c - val y = (bswap64((v + w) * mul) + g) * mul - val z = e + f + c - val a2 = bswap64((x + z) * mul + y) + b - shiftMix((z + a2) * mul + d + h) * mul + x - } - - private final def hashLen16(hi: Long, lo: Long): Long = { - val kMul = 0x9ddfea08eb382d69L - var a = (lo ^ hi) * kMul - a ^= (a >> 47) - var b = (hi ^ a) * kMul - b ^= (b >> 47) - b * kMul - } - - private final def hashLen16(u: Long, v: Long, mul: Long): Long = { - // Murmur-inspired hashing. - var a = (u ^ v) * mul - a ^= (a >> 47) - a = (v ^ a) * mul - a ^= (a >> 47) - a * mul - } - - /** Return a 16-byte hash for 48 bytes. Quick and dirty. - * Callers do best to use "random-looking" values for a and b. - */ - private final def weakHashLen32WithSeeds(w: Long, x: Long, y: Long, z: Long, a0: Long, b0: Long): (Long, Long) = { - var a = a0 - var b = b0 - a += w - b = rotate(b + a + z, 21) - val c: Long = a - a += x - a += y - b += rotate(a, 44) - (a + z, b + c) - } - - /** Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. */ - private final def weakHashLen32WithSeeds(s: Int, a: Long, b: Long)(implicit data: Array[Byte]): (Long, Long) = - weakHashLen32WithSeeds(fetch64(s), fetch64(s + 8), fetch64(s + 16), fetch64(s + 24), a, b) - - private final def fetch64(idx: Int)(implicit data: Array[Byte]): Long = { - var x: Long = data(idx) & 0xFFL - x = data(idx + 1) & 0xFFL | (x << 8) - x = data(idx + 2) & 0xFFL | (x << 8) - x = data(idx + 3) & 0xFFL | (x << 8) - x = data(idx + 4) & 0xFFL | (x << 8) - x = data(idx + 5) & 0xFFL | (x << 8) - x = data(idx + 6) & 0xFFL | (x << 8) - data(idx + 7) & 0xFFL | (x << 8) - } - - private final def fetch32(idx: Int)(implicit data: Array[Byte]): Long = { - var x: Int = data(idx) & 0xFF - x = data(idx + 1) & 0xFF | (x << 8) - x = data(idx + 2) & 0xFF | (x << 8) - data(idx + 3) & 0xFF | (x << 8) - } - - private final def bswap64(x: Long): Long = { - ((x & 0xff00000000000000L) >> 56) | - ((x & 0x00ff000000000000L) >> 40) | - ((x & 0x0000ff0000000000L) >> 24) | - ((x & 0x000000ff00000000L) >> 8) | - ((x & 0x00000000ff000000L) << 8) | - ((x & 0x0000000000ff0000L) << 24) | - ((x & 0x000000000000ff00L) << 40) | - ((x & 0x00000000000000ffL) << 56) - } - - // Bitwise right rotate. Normally this will compile to a single - // instruction, especially if the shift is a manifest constant. - private final def rotate(v: Long, shift: Int): Long = { - // Avoid shifting by 64: doing so yields an undefined result. - if (shift == 0) v else (v >> shift) | (v << (64 - shift)) - } - - private final def shiftMix(v: Long): Long = v ^ (v >> 47) - -} - -object CityHash extends CityHash { - def bytesHash(data: Array[Byte]): Long = cityHash64(data) -}