Skip to content

Some improvements to util.HashSet and HashMap #9865

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions compiler/src/dotty/tools/dotc/core/Definitions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -845,8 +845,6 @@ class Definitions {
@tu lazy val Not_value: Symbol = NotClass.companionModule.requiredMethod(nme.value)

@tu lazy val ValueOfClass: ClassSymbol = requiredClass("scala.ValueOf")
@tu lazy val StatsModule: Symbol = requiredModule("dotty.tools.dotc.util.Stats")
@tu lazy val Stats_doRecord: Symbol = StatsModule.requiredMethod("doRecord")

@tu lazy val FromDigitsClass: ClassSymbol = requiredClass("scala.util.FromDigits")
@tu lazy val FromDigits_WithRadixClass: ClassSymbol = requiredClass("scala.util.FromDigits.WithRadix")
Expand Down
3 changes: 3 additions & 0 deletions compiler/src/dotty/tools/dotc/core/NameKinds.scala
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ object NameKinds {
case class QualInfo(name: SimpleName) extends Info with QualifiedInfo {
override def map(f: SimpleName => SimpleName): NameInfo = new QualInfo(f(name))
override def toString: String = s"$infoString $name"
override def hashCode = scala.runtime.ScalaRunTime._hashCode(this) * 31 + kind.hashCode
}

def apply(qual: TermName, name: SimpleName): TermName =
Expand Down Expand Up @@ -173,6 +174,7 @@ object NameKinds {
type ThisInfo = NumberedInfo
case class NumberedInfo(val num: Int) extends Info with NameKinds.NumberedInfo {
override def toString: String = s"$infoString $num"
override def hashCode = scala.runtime.ScalaRunTime._hashCode(this) * 31 + kind.hashCode
}
def apply(qual: TermName, num: Int): TermName =
qual.derived(new NumberedInfo(num))
Expand Down Expand Up @@ -371,6 +373,7 @@ object NameKinds {
case class SignedInfo(sig: Signature) extends Info {
assert(sig ne Signature.NotAMethod)
override def toString: String = s"$infoString $sig"
override def hashCode = scala.runtime.ScalaRunTime._hashCode(this) * 31 + kind.hashCode
}
type ThisInfo = SignedInfo

Expand Down
47 changes: 36 additions & 11 deletions compiler/src/dotty/tools/dotc/transform/Instrumentation.scala
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
package dotty.tools.dotc
package dotty.tools
package dotc
package transform

import core._
Expand Down Expand Up @@ -27,33 +28,57 @@ class Instrumentation extends MiniPhase { thisPhase =>
override def isEnabled(using Context) =
ctx.settings.Yinstrument.value

private val namesOfInterest = List(
"::", "+=", "toString", "newArray", "box", "toCharArray",
"map", "flatMap", "filter", "withFilter", "collect", "foldLeft", "foldRight", "take",
"reverse", "mapConserve", "mapconserve", "filterConserve", "zip",
"denotsNamed", "lookup", "lookupEntry", "lookupAll", "toList")
private var namesToRecord: Set[Name] = _
private val collectionNamesOfInterest = List(
"map", "flatMap", "filter", "filterNot", "withFilter", "collect", "flatten", "foldLeft", "foldRight", "take",
"reverse", "zip", "++", ":::", ":+", "distinct", "dropRight", "takeRight", "groupBy", "groupMap", "init", "inits",
"interect", "mkString", "partition", "reverse_:::", "scanLeft", "scanRight",
"sortBy", "sortWith", "sorted", "span", "splitAt", "takeWhile", "transpose", "unzip", "unzip3",
"updated", "zipAll", "zipWithIndex",
"mapConserve", "mapconserve", "filterConserve", "zipWithConserve", "mapWithIndexConserve"
)

private val namesOfInterest = collectionNamesOfInterest ++ List(
"::", "+=", "toString", "newArray", "box", "toCharArray", "termName", "typeName",
"slice", "staticRef", "requiredClass")

private var consName: TermName = _
private var consEqName: TermName = _
private var namesToRecord: Set[Name] = _
private var collectionNamesToRecord: Set[Name] = _
private var Stats_doRecord: Symbol = _
private var Stats_doRecordSize: Symbol = _
private var CollectionIterableClass: ClassSymbol = _

override def prepareForUnit(tree: Tree)(using Context): Context =
namesToRecord = namesOfInterest.map(_.toTermName).toSet
collectionNamesToRecord = collectionNamesOfInterest.map(_.toTermName).toSet
val StatsModule = requiredModule("dotty.tools.dotc.util.Stats")
Stats_doRecord = StatsModule.requiredMethod("doRecord")
Stats_doRecordSize = StatsModule.requiredMethod("doRecordSize")
CollectionIterableClass = requiredClass("scala.collection.Iterable")
ctx

private def record(category: String, tree: Tree)(using Context): Tree = {
val key = Literal(Constant(s"$category@${tree.sourcePos.show}"))
ref(defn.Stats_doRecord).appliedTo(key, Literal(Constant(1)))
ref(Stats_doRecord).appliedTo(key, Literal(Constant(1)))
}

private def recordSize(tree: Apply)(using Context): Tree = tree.fun match
case sel @ Select(qual, name)
if collectionNamesToRecord.contains(name)
&& qual.tpe.widen.derivesFrom(CollectionIterableClass) =>
val key = Literal(Constant(s"totalSize/${name} in ${qual.tpe.widen.classSymbol.name}@${tree.sourcePos.show}"))
val qual1 = ref(Stats_doRecordSize).appliedTo(key, qual).cast(qual.tpe.widen)
cpy.Apply(tree)(cpy.Select(sel)(qual1, name), tree.args)
case _ =>
tree

private def ok(using Context) =
!ctx.owner.ownersIterator.exists(_.name.toString.startsWith("Stats"))

override def transformApply(tree: Apply)(using Context): Tree = tree.fun match {
case Select(nu: New, _) =>
cpy.Block(tree)(record(i"alloc/${nu.tpe}", tree) :: Nil, tree)
case ref: RefTree if namesToRecord.contains(ref.name) && ok =>
cpy.Block(tree)(record(i"call/${ref.name}", tree) :: Nil, tree)
cpy.Block(tree)(record(i"call/${ref.name}", tree) :: Nil, recordSize(tree))
case _ =>
tree
}
Expand Down
8 changes: 5 additions & 3 deletions compiler/src/dotty/tools/dotc/util/GenericHashMap.scala
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,11 @@ abstract class GenericHashMap[Key, Value]
k = keyAt(idx)
k != null
do
val eidx = index(hash(k))
if isDense
|| index(hole - index(hash(k))) < limit * 2
// hash(k) is then logically at or before hole; can be moved forward to fill hole
|| index(eidx - (hole + 2)) > index(idx - (hole + 2))
// entry `e` at `idx` can move unless `index(hash(e))` is in
// the (ring-)interval [hole + 2 .. idx]
then
setKey(hole, k)
setValue(hole, valueAt(idx))
Expand Down Expand Up @@ -156,7 +158,7 @@ abstract class GenericHashMap[Key, Value]
protected def growTable(): Unit =
val oldTable = table
val newLength =
if oldTable.length == DenseLimit then DenseLimit * 2 * roundToPower(capacityMultiple)
if table.length == DenseLimit * 2 then table.length * roundToPower(capacityMultiple)
Copy link
Contributor

@liufengyun liufengyun Sep 25, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't quite understand why we need the if here, instead of simply growing the table, since all calls to growTable is guarded by if used > limit. Is it to just create a larger array at the size DenseLimit * 2 (instead of simply doubling the size)?

Meanwhile, if we change the code here, do we need similar changes in HashSet.growTable?

else table.length
allocate(newLength)
copyFrom(oldTable)
Expand Down
7 changes: 6 additions & 1 deletion compiler/src/dotty/tools/dotc/util/HashMap.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@ extends GenericHashMap[Key, Value](initialCapacity, capacityMultiple):
/** Hashcode is left-shifted by 1, so lowest bit is not lost
* when taking the index.
*/
final def hash(x: Key): Int = x.hashCode << 1
final def hash(key: Key): Int =
val h = key.hashCode
// Part of the MurmurHash3 32 bit finalizer
val i = (h ^ (h >>> 16)) * 0x85EBCA6B
val j = (i ^ (i >>> 13)) & 0x7FFFFFFF
(if j==0 then 0x41081989 else j) << 1

final def isEqual(x: Key, y: Key): Boolean = x.equals(y)

Expand Down
15 changes: 11 additions & 4 deletions compiler/src/dotty/tools/dotc/util/HashSet.scala
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,13 @@ class HashSet[T](initialCapacity: Int = 8, capacityMultiple: Int = 2) extends Mu

protected def isDense = limit < DenseLimit

/** Hashcode, by defualt `x.hashCode`, can be overridden */
protected def hash(x: T): Int = x.hashCode
/** Hashcode, by default a processed `x.hashCode`, can be overridden */
protected def hash(key: T): Int =
val h = key.hashCode
// Part of the MurmurHash3 32 bit finalizer
val i = (h ^ (h >>> 16)) * 0x85EBCA6B
val j = (i ^ (i >>> 13)) & 0x7FFFFFFF
if j==0 then 0x41081989 else j

/** Hashcode, by default `equals`, can be overridden */
protected def isEqual(x: T, y: T): Boolean = x.equals(y)
Expand Down Expand Up @@ -109,9 +114,11 @@ class HashSet[T](initialCapacity: Int = 8, capacityMultiple: Int = 2) extends Mu
e = entryAt(idx)
e != null
do
val eidx = index(hash(e))
if isDense
|| index(hole - index(hash(e))) < limit
// hash(k) is then logically at or before hole; can be moved forward to fill hole
|| index(eidx - (hole + 1)) > index(idx - (hole + 1))
// entry `e` at `idx` can move unless `index(hash(e))` is in
// the (ring-)interval [hole + 1 .. idx]
then
setEntry(hole, e)
hole = idx
Expand Down
4 changes: 4 additions & 0 deletions compiler/src/dotty/tools/dotc/util/Stats.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ import collection.mutable
hits(name) += n
}

def doRecordSize(fn: String, coll: scala.collection.Iterable[_]): coll.type =
doRecord(fn, coll.size)
coll

inline def trackTime[T](fn: String)(inline op: T): T =
if (enabled) doTrackTime(fn)(op) else op

Expand Down
71 changes: 71 additions & 0 deletions tests/run-with-compiler/settest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
trait Generator[+T]:
self =>
def generate: T
def map[S](f: T => S) = new Generator[S]:
def generate: S = f(self.generate)
def flatMap[S](f: T => Generator[S]) = new Generator[S]:
def generate: S = f(self.generate).generate

object Generator:
val NumLimit = 300
val Iterations = 10000

given integers as Generator[Int]:
val rand = new java.util.Random
def generate = rand.nextInt()

given booleans as Generator[Boolean] =
integers.map(x => x > 0)

def range(end: Int): Generator[Int] =
integers.map(x => (x % end).abs)

enum Op:
case Lookup, Update, Remove
export Op._

given ops as Generator[Op] =
range(10).map {
case 0 | 1 | 2 | 3 => Lookup
case 4 | 5 | 6 | 7 => Update
case 8 | 9 => Remove
}

val nums: Generator[Integer] = range(NumLimit).map(Integer(_))

@main def Test =
import Generator._

val set1 = dotty.tools.dotc.util.HashSet[Int]()
val set2 = scala.collection.mutable.HashSet[Int]()

def checkSame() =
assert(set1.size == set2.size)
for e <- set1.iterator do
assert(set2.contains(e))
for e <- set2.iterator do
assert(set1.contains(e))

def lookupTest(num: Integer) =
val res1 = set1.contains(num)
val res2 = set2.contains(num)
assert(res1 == res2)

def updateTest(num: Integer) =
lookupTest(num)
set1 += num
set2 += num
checkSame()

def removeTest(num: Integer) =
//println(s"test remove $num")
set1 -= num
set2 -= num
checkSame()

for i <- 0 until Iterations do
val num = nums.generate
Generator.ops.generate match
case Lookup => lookupTest(num)
case Update => updateTest(num)
case Remove => removeTest(num)