Merge pull request #79 from DarkDimius/noxml

odersky · odersky · commit 1dfe6567fee9 · 2014-03-19T09:53:46.000+01:00
Remove dependency on scala-xml.
diff --git a/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala b/src/dotty/tools/dotc/parsing/MarkupParserCommon.scala
@@ -8,21 +8,16 @@
 package dotty.tools.dotc
 package parsing
 
-import scala.xml._
-import scala.xml.parsing._
+import Utility._
+import scala.reflect.internal.Chars.SU
 
-import scala.io.Source
-import scala.xml.dtd._
-import scala.annotation.switch
-import Utility.Escapes.{ pairs => unescape }
 
-import Utility.SU
 
 /** This is not a public trait - it contains common code shared
  *  between the library level XML parser and the compiler's.
  *  All members should be accessed through those.
  */
-private[dotty] trait MarkupParserCommon extends TokenTests {
+private[dotty] trait MarkupParserCommon {
   protected def unreachable = scala.sys.error("Cannot be reached.")
 
   // type HandleType       // MarkupHandler, SymbolicXMLBuilder
diff --git a/src/dotty/tools/dotc/parsing/MarkupParsers.scala b/src/dotty/tools/dotc/parsing/MarkupParsers.scala
@@ -5,15 +5,12 @@ package parsing
 import scala.collection.mutable
 import mutable.{ Buffer, ArrayBuffer, ListBuffer }
 import scala.util.control.ControlThrowable
-import util.SourceFile
-import scala.xml.{ Text, TextBuffer }
-import scala.xml.Utility.{ isNameStart, isNameChar, isSpace }
-import scala.reflect.internal.Chars.{ SU, LF }
+import scala.reflect.internal.Chars.SU
 import Parsers._
 import util.Positions._
 import core._
-import ast.Trees._
 import Constants._
+import Utility._
 
 
 // XXX/Note: many/most of the functions in here are almost direct cut and pastes
@@ -50,7 +47,7 @@ object MarkupParsers {
 
   class MarkupParser(parser: Parser, final val preserveWS: Boolean) extends MarkupParserCommon {
 
-    import Tokens.{ EMPTY, LBRACE, RBRACE }
+    import Tokens.{ LBRACE, RBRACE }
 
     type PositionType = Position
     type InputType    = CharArrayReader
@@ -181,11 +178,20 @@ object MarkupParsers {
     }
 
     def appendText(pos: Position, ts: Buffer[Tree], txt: String): Unit = {
-      val toAppend =
-        if (preserveWS) Seq(txt)
-        else TextBuffer.fromString(txt).toText map (_.text)
+      def append(t: String) = ts append handle.text(pos, t)
 
-      toAppend foreach (t => ts append handle.text(pos, t))
+      if (preserveWS) append(txt)
+      else {
+        val sb = new StringBuilder()
+
+        txt foreach { c =>
+          if (!isSpace(c)) sb append c
+          else if (sb.isEmpty || !isSpace(sb.last)) sb append ' '
+        }
+
+        val trimmed = sb.toString.trim
+        if (!trimmed.isEmpty) append(trimmed)
+      }
     }
 
     /** adds entity/character to ts as side-effect
diff --git a/src/dotty/tools/dotc/parsing/Parsers.scala b/src/dotty/tools/dotc/parsing/Parsers.scala
@@ -13,14 +13,10 @@ import Flags._
 import Contexts._
 import Names._
 import ast.Trees._
-import ast.TreeInfo
 import Decorators._
 import StdNames._
 import util.Positions._
-import Types._
 import Constants._
-import NameOps._
-import util.Chars._
 import ScriptParsers._
 import annotation.switch
 import util.DotClass
diff --git a/src/dotty/tools/dotc/parsing/Scanners.scala b/src/dotty/tools/dotc/parsing/Scanners.scala
@@ -2,17 +2,17 @@ package dotty.tools
 package dotc
 package parsing
 
-import Tokens._
 import core.Names._, core.Contexts._, core.Decorators._, util.Positions._
 import core.StdNames._
 import util.SourceFile
 import java.lang.Character.isDigit
 import scala.reflect.internal.Chars._
 import Tokens._
 import scala.annotation.{ switch, tailrec }
-import scala.collection.{ mutable, immutable }
-import mutable.{ ListBuffer, ArrayBuffer }
-import scala.xml.Utility.isNameStart
+import scala.collection.mutable
+import mutable.ListBuffer
+import Utility.isNameStart
+
 
 object Scanners {
 
diff --git a/src/dotty/tools/dotc/parsing/ScriptParsers.scala b/src/dotty/tools/dotc/parsing/ScriptParsers.scala
@@ -2,21 +2,9 @@ package dotty.tools
 package dotc
 package parsing
 
-import util.{ SourceFile, FreshNameCreator }
+import util.SourceFile
 import core._
-import Flags._
 import Contexts._
-import Names._
-import ast.Trees._
-import Decorators._
-import StdNames._
-import util.Chars.isScalaLetter
-import util.Positions._
-import Types._
-import Constants._
-import NameOps._
-import scala.reflect.internal.Chars._
-import annotation.switch
 import Parsers._
 
 
diff --git a/src/dotty/tools/dotc/parsing/SymbolicXMLBuilder.scala b/src/dotty/tools/dotc/parsing/SymbolicXMLBuilder.scala
@@ -2,12 +2,11 @@ package dotty.tools
 package dotc
 package parsing
 
-import scala.collection.{ mutable, immutable }
+import scala.collection.mutable
 import scala.xml.{ EntityRef, Text }
-import scala.xml.XML.{ xmlns }
 import core._
 import Flags.Mutable
-import Names._, NameOps._, StdNames._, Decorators._, ast.Trees._, ast.{tpd, untpd}, Constants._
+import Names._, StdNames._, ast.Trees._, ast.{tpd, untpd}
 import Symbols._, Contexts._
 import util.Positions._
 import Parsers.Parser
@@ -203,7 +202,7 @@ class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean)(implicit ctx: Cont
 
     /** Extract all the namespaces from the attribute map. */
     val namespaces: List[Tree] =
-      for (z <- attrMap.keys.toList ; if z startsWith xmlns) yield {
+      for (z <- attrMap.keys.toList ; if z startsWith "xmlns") yield {
         val ns = splitPrefix(z) match {
           case (Some(_), rest)  => rest
           case _                => null
diff --git a/src/dotty/tools/dotc/parsing/Tokens.scala b/src/dotty/tools/dotc/parsing/Tokens.scala
@@ -2,9 +2,7 @@ package dotty.tools
 package dotc
 package parsing
 
-import collection.mutable
 import collection.immutable.BitSet
-import scala.annotation.switch
 
 object Tokens {
 
diff --git a/src/dotty/tools/dotc/parsing/Utility.scala b/src/dotty/tools/dotc/parsing/Utility.scala
@@ -0,0 +1,169 @@
+package dotty.tools.dotc.parsing
+
+import scala.collection.mutable
+
+
+/**
+ * The `Utility` object provides utility functions for processing instances
+ * of bound and not bound XML classes, as well as escaping text nodes.
+ *
+ * @author Burak Emir
+ */
+object Utility {
+  import scala.reflect.internal.Chars.SU
+
+  private val unescMap = Map(
+    "lt"    -> '<',
+    "gt"    -> '>',
+    "amp"   -> '&',
+    "quot"  -> '"',
+    "apos"  -> '\''
+  )
+
+  /**
+   * Appends unescaped string to `s`, `amp` becomes `&amp;`,
+   * `lt` becomes `&lt;` etc..
+   *
+   * @return    `'''null'''` if `ref` was not a predefined entity.
+   */
+  private final def unescape(ref: String, s: StringBuilder): StringBuilder =
+    ((unescMap get ref) map (s append _)).orNull
+
+  def parseAttributeValue[T](value: String, text: String => T, entityRef: String => T): List[T] = {
+    val sb  = new StringBuilder
+    var rfb: StringBuilder = null
+    val nb = new mutable.ListBuffer[T]()
+
+    val it = value.iterator
+    while (it.hasNext) {
+      var c = it.next()
+      // entity! flush buffer into text node
+      if (c == '&') {
+        c = it.next()
+        if (c == '#') {
+          c = it.next()
+          val theChar = parseCharRef ({ ()=> c },{ () => c = it.next() },{s => throw new RuntimeException(s)}, {s => throw new RuntimeException(s)})
+          sb.append(theChar)
+        }
+        else {
+          if (rfb eq null) rfb = new StringBuilder()
+          rfb append c
+          c = it.next()
+          while (c != ';') {
+            rfb.append(c)
+            c = it.next()
+          }
+          val ref = rfb.toString()
+          rfb.clear()
+          unescape(ref,sb) match {
+            case null =>
+              if (!sb.isEmpty) {  // flush buffer
+                nb += text(sb.toString())
+                sb.clear()
+              }
+              nb += entityRef(ref) // add entityref
+            case _ =>
+          }
+        }
+      }
+      else sb append c
+    }
+
+    if(!sb.isEmpty) // flush buffer
+      nb += text(sb.toString())
+
+    nb.toList
+  }
+
+  /**
+   * {{{
+   *   CharRef ::= "&amp;#" '0'..'9' {'0'..'9'} ";"
+   *             | "&amp;#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+   * }}}
+   * See [66]
+   */
+  def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError: String => Unit, reportTruncatedError: String => Unit): String = {
+    val hex  = (ch() == 'x') && { nextch(); true }
+    val base = if (hex) 16 else 10
+    var i = 0
+    while (ch() != ';') {
+      ch() match {
+        case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
+          i = i * base + ch().asDigit
+        case 'a' | 'b' | 'c' | 'd' | 'e' | 'f'
+           | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' =>
+          if (! hex)
+            reportSyntaxError("hex char not allowed in decimal char ref\n" +
+                              "Did you mean to write &#x ?")
+          else
+            i = i * base + ch().asDigit
+        case SU =>
+          reportTruncatedError("")
+        case _ =>
+          reportSyntaxError("character '" + ch() + "' not allowed in char ref\n")
+      }
+      nextch()
+    }
+    new String(Array(i), 0, 1)
+  }
+
+  /** {{{
+   *  (#x20 | #x9 | #xD | #xA)
+   *  }}} */
+  final def isSpace(ch: Char): Boolean = ch match {
+    case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true
+    case _                                         => false
+  }
+  /** {{{
+   *  (#x20 | #x9 | #xD | #xA)+
+   *  }}} */
+  final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace)
+
+  /** {{{
+   *  NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
+   *             | CombiningChar | Extender
+   *  }}}
+   *  See [4] and Appendix B of XML 1.0 specification.
+  */
+  def isNameChar(ch: Char) = {
+    import java.lang.Character._
+    // The constants represent groups Mc, Me, Mn, Lm, and Nd.
+
+    isNameStart(ch) || (getType(ch).toByte match {
+      case COMBINING_SPACING_MARK |
+              ENCLOSING_MARK | NON_SPACING_MARK |
+              MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true
+      case _                                         => ".-:" contains ch
+    })
+  }
+
+  /** {{{
+   *  NameStart ::= ( Letter | '_' )
+   *  }}}
+   *  where Letter means in one of the Unicode general
+   *  categories `{ Ll, Lu, Lo, Lt, Nl }`.
+   *
+   *  We do not allow a name to start with `:`.
+   *  See [3] and Appendix B of XML 1.0 specification
+   */
+  def isNameStart(ch: Char) = {
+    import java.lang.Character._
+
+    getType(ch).toByte match {
+      case LOWERCASE_LETTER |
+              UPPERCASE_LETTER | OTHER_LETTER |
+              TITLECASE_LETTER | LETTER_NUMBER => true
+      case _                                   => ch == '_'
+    }
+  }
+
+  /** {{{
+   *  Name ::= ( Letter | '_' ) (NameChar)*
+   *  }}}
+   *  See [5] of XML 1.0 specification.
+   */
+  def isName(s: String) =
+    s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar)
+
+}
+