From 82d4a010bb9278bd73da66b3452bc5c1f88f3579 Mon Sep 17 00:00:00 2001 From: Antoine Gourlay Date: Mon, 6 Oct 2014 14:50:29 +0200 Subject: [PATCH] SI-8879 fix quadratic reading time in StreamReader StreamReader.nextEol used to loop all the way to Eol every time an element was read. That's very costly when lines are long. Furthermore, it used to call PagedSeq.length, forcing PagedSeq to load the whole input in memory, even when a single character was read. nextEol is now saved as part of the state of StreamReader, and is passed to child readers when created (as long as we do not read past the end of the line). Thus it computed only once per line, whatever the length. With the example in the ticket (SI-8879), we get: * before: User time (seconds): 82.12 System time (seconds): 0.07 Elapsed (wall clock) time (h:mm:ss or m:ss): 1:21.52 * after: User time (seconds): 1.05 System time (seconds): 0.06 Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.68 * for comparison, using PagedSeqReader directly: User time (seconds): 1.06 System time (seconds): 0.06 Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.69 `isDefinedAt` is used instead of `length` so that pages beyond the tested index do not need to be read. The test only tests this part. --- .../util/parsing/input/StreamReader.scala | 24 ++++++----- .../scala/util/parsing/combinator/t8879.scala | 42 +++++++++++++++++++ 2 files changed, 55 insertions(+), 11 deletions(-) create mode 100644 src/test/scala/scala/util/parsing/combinator/t8879.scala diff --git a/src/main/scala/scala/util/parsing/input/StreamReader.scala b/src/main/scala/scala/util/parsing/input/StreamReader.scala index 384ecf0c..1883187d 100644 --- a/src/main/scala/scala/util/parsing/input/StreamReader.scala +++ b/src/main/scala/scala/util/parsing/input/StreamReader.scala @@ -45,27 +45,29 @@ object StreamReader { * @author Miles Sabin * @author Martin Odersky */ -sealed class StreamReader(seq: PagedSeq[Char], off: Int, lnum: Int) extends PagedSeqReader(seq, off) { - import StreamReader._ +sealed class StreamReader private (seq: PagedSeq[Char], off: Int, lnum: Int, nextEol0: Int) extends PagedSeqReader(seq, off) { + def this(seq: PagedSeq[Char], off: Int, lnum: Int) = this(seq, off, lnum, -1) + + import StreamReader.EofCh override def rest: StreamReader = - if (off == seq.length) this + if (!seq.isDefinedAt(off)) this else if (seq(off) == '\n') - new StreamReader(seq.slice(off + 1), 0, lnum + 1) - else new StreamReader(seq, off + 1, lnum) + new StreamReader(seq.slice(off + 1), 0, lnum + 1, -1) + else new StreamReader(seq, off + 1, lnum, nextEol0) - private def nextEol = { + private def nextEol = if (nextEol0 == -1) { var i = off - while (i < seq.length && seq(i) != '\n' && seq(i) != EofCh) i += 1 + while (seq.isDefinedAt(i) && seq(i) != '\n' && seq(i) != EofCh) i += 1 i - } + } else nextEol0 override def drop(n: Int): StreamReader = { val eolPos = nextEol - if (eolPos < off + n && eolPos < seq.length) - new StreamReader(seq.slice(eolPos + 1), 0, lnum + 1).drop(off + n - (eolPos + 1)) + if (eolPos < off + n && seq.isDefinedAt(eolPos)) + new StreamReader(seq.slice(eolPos + 1), 0, lnum + 1, -1).drop(off + n - (eolPos + 1)) else - new StreamReader(seq, off + n, lnum) + new StreamReader(seq, off + n, lnum, eolPos) } override def pos: Position = new Position { diff --git a/src/test/scala/scala/util/parsing/combinator/t8879.scala b/src/test/scala/scala/util/parsing/combinator/t8879.scala new file mode 100644 index 00000000..2be5a563 --- /dev/null +++ b/src/test/scala/scala/util/parsing/combinator/t8879.scala @@ -0,0 +1,42 @@ +import scala.util.parsing.input._ +import scala.collection.immutable.PagedSeq + +import org.junit.Test +import org.junit.Assert.fail + +class t8879 { + + @Test + def test: Unit = { + val testPagedSeq = { + var nbpage = 0 + def more(data: Array[Char], start: Int, len: Int): Int = { + if (nbpage < 1) { + var i = 0 + while (i < len && nbpage < 3) { + if (i % 100 != 0) { + data(start + i) = 'a' + } else { + data(start + i) = '\n' + } + i += 1 + } + if (i == 0) -1 else { + nbpage += 1 + i + } + } else { + fail("Should not read more than 1 page!") + 0 + } + } + + new PagedSeq(more(_: Array[Char], _: Int, _: Int)) + } + + val s = new StreamReader(testPagedSeq, 0, 1) + + // should not trigger reading of the second page + s.drop(20) + } +}