|
| 1 | +/* __ *\ |
| 2 | +** ________ ___ / / ___ Scala API ** |
| 3 | +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** |
| 4 | +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** |
| 5 | +** /____/\___/_/ |_/____/_/ | | ** |
| 6 | +** |/ ** |
| 7 | +\* */ |
| 8 | +package dotty.tools.dotc |
| 9 | +package parsing |
| 10 | +package xml |
| 11 | + |
| 12 | +import Utility._ |
| 13 | +import util.Chars.SU |
| 14 | + |
| 15 | + |
| 16 | + |
| 17 | +/** This is not a public trait - it contains common code shared |
| 18 | + * between the library level XML parser and the compiler's. |
| 19 | + * All members should be accessed through those. |
| 20 | + */ |
| 21 | +private[dotty] trait MarkupParserCommon { |
| 22 | + protected def unreachable = scala.sys.error("Cannot be reached.") |
| 23 | + |
| 24 | + // type HandleType // MarkupHandler, SymbolicXMLBuilder |
| 25 | + type InputType // Source, CharArrayReader |
| 26 | + type PositionType // Int, Position |
| 27 | + type ElementType // NodeSeq, Tree |
| 28 | + type NamespaceType // NamespaceBinding, Any |
| 29 | + type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree] |
| 30 | + |
| 31 | + def mkAttributes(name: String, pscope: NamespaceType): AttributesType |
| 32 | + def mkProcInstr(position: PositionType, name: String, text: String): ElementType |
| 33 | + |
| 34 | + /** parse a start or empty tag. |
| 35 | + * [40] STag ::= '<' Name { S Attribute } [S] |
| 36 | + * [44] EmptyElemTag ::= '<' Name { S Attribute } [S] |
| 37 | + */ |
| 38 | + protected def xTag(pscope: NamespaceType): (String, AttributesType) = { |
| 39 | + val name = xName |
| 40 | + xSpaceOpt() |
| 41 | + |
| 42 | + (name, mkAttributes(name, pscope)) |
| 43 | + } |
| 44 | + |
| 45 | + /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>' |
| 46 | + * |
| 47 | + * see [15] |
| 48 | + */ |
| 49 | + def xProcInstr: ElementType = { |
| 50 | + val n = xName |
| 51 | + xSpaceOpt() |
| 52 | + xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>") |
| 53 | + } |
| 54 | + |
| 55 | + /** attribute value, terminated by either `'` or `"`. value may not contain `<`. |
| 56 | + @param endCh either `'` or `"` |
| 57 | + */ |
| 58 | + def xAttributeValue(endCh: Char): String = { |
| 59 | + val buf = new StringBuilder |
| 60 | + while (ch != endCh) { |
| 61 | + // well-formedness constraint |
| 62 | + if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "") |
| 63 | + else if (ch == SU) truncatedError("") |
| 64 | + else buf append ch_returning_nextch |
| 65 | + } |
| 66 | + ch_returning_nextch |
| 67 | + // @todo: normalize attribute value |
| 68 | + buf.toString |
| 69 | + } |
| 70 | + |
| 71 | + def xAttributeValue(): String = { |
| 72 | + val str = xAttributeValue(ch_returning_nextch) |
| 73 | + // well-formedness constraint |
| 74 | + normalizeAttributeValue(str) |
| 75 | + } |
| 76 | + |
| 77 | + private def takeUntilChar(it: Iterator[Char], end: Char): String = { |
| 78 | + val buf = new StringBuilder |
| 79 | + while (it.hasNext) it.next() match { |
| 80 | + case `end` => return buf.toString |
| 81 | + case ch => buf append ch |
| 82 | + } |
| 83 | + scala.sys.error("Expected '%s'".format(end)) |
| 84 | + } |
| 85 | + |
| 86 | + /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>' |
| 87 | + */ |
| 88 | + def xEndTag(startName: String): Unit = { |
| 89 | + xToken('/') |
| 90 | + if (xName != startName) |
| 91 | + errorNoEnd(startName) |
| 92 | + |
| 93 | + xSpaceOpt() |
| 94 | + xToken('>') |
| 95 | + } |
| 96 | + |
| 97 | + /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen |
| 98 | + * Name ::= (Letter | '_') (NameChar)* |
| 99 | + * |
| 100 | + * see [5] of XML 1.0 specification |
| 101 | + * |
| 102 | + * pre-condition: ch != ':' // assured by definition of XMLSTART token |
| 103 | + * post-condition: name does neither start, nor end in ':' |
| 104 | + */ |
| 105 | + def xName: String = { |
| 106 | + if (ch == SU) |
| 107 | + truncatedError("") |
| 108 | + else if (!isNameStart(ch)) |
| 109 | + return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "") |
| 110 | + |
| 111 | + val buf = new StringBuilder |
| 112 | + |
| 113 | + do buf append ch_returning_nextch |
| 114 | + while (isNameChar(ch)) |
| 115 | + |
| 116 | + if (buf.last == ':') { |
| 117 | + reportSyntaxError( "name cannot end in ':'" ) |
| 118 | + buf.toString dropRight 1 |
| 119 | + } |
| 120 | + else buf.toString |
| 121 | + } |
| 122 | + |
| 123 | + private def attr_unescape(s: String) = s match { |
| 124 | + case "lt" => "<" |
| 125 | + case "gt" => ">" |
| 126 | + case "amp" => "&" |
| 127 | + case "apos" => "'" |
| 128 | + case "quot" => "\"" |
| 129 | + case "quote" => "\"" |
| 130 | + case _ => "&" + s + ";" |
| 131 | + } |
| 132 | + |
| 133 | + /** Replaces only character references right now. |
| 134 | + * see spec 3.3.3 |
| 135 | + */ |
| 136 | + private def normalizeAttributeValue(attval: String): String = { |
| 137 | + val buf = new StringBuilder |
| 138 | + val it = attval.iterator.buffered |
| 139 | + |
| 140 | + while (it.hasNext) buf append (it.next() match { |
| 141 | + case ' ' | '\t' | '\n' | '\r' => " " |
| 142 | + case '&' if it.head == '#' => it.next() ; xCharRef(it) |
| 143 | + case '&' => attr_unescape(takeUntilChar(it, ';')) |
| 144 | + case c => c |
| 145 | + }) |
| 146 | + |
| 147 | + buf.toString |
| 148 | + } |
| 149 | + |
| 150 | + /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" |
| 151 | + * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" |
| 152 | + * |
| 153 | + * see [66] |
| 154 | + */ |
| 155 | + def xCharRef(ch: () => Char, nextch: () => Unit): String = |
| 156 | + Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _) |
| 157 | + |
| 158 | + def xCharRef(it: Iterator[Char]): String = { |
| 159 | + var c = it.next() |
| 160 | + Utility.parseCharRef(() => c, () => { c = it.next() }, reportSyntaxError _, truncatedError _) |
| 161 | + } |
| 162 | + |
| 163 | + def xCharRef: String = xCharRef(() => ch, () => nextch()) |
| 164 | + |
| 165 | + /** Create a lookahead reader which does not influence the input */ |
| 166 | + def lookahead(): BufferedIterator[Char] |
| 167 | + |
| 168 | + /** The library and compiler parsers had the interesting distinction of |
| 169 | + * different behavior for nextch (a function for which there are a total |
| 170 | + * of two plausible behaviors, so we know the design space was fully |
| 171 | + * explored.) One of them returned the value of nextch before the increment |
| 172 | + * and one of them the new value. So to unify code we have to at least |
| 173 | + * temporarily abstract over the nextchs. |
| 174 | + */ |
| 175 | + def ch: Char |
| 176 | + def nextch(): Unit |
| 177 | + protected def ch_returning_nextch: Char |
| 178 | + def eof: Boolean |
| 179 | + |
| 180 | + // def handle: HandleType |
| 181 | + var tmppos: PositionType |
| 182 | + |
| 183 | + def xHandleError(that: Char, msg: String): Unit |
| 184 | + def reportSyntaxError(str: String): Unit |
| 185 | + def reportSyntaxError(pos: Int, str: String): Unit |
| 186 | + |
| 187 | + def truncatedError(msg: String): Nothing |
| 188 | + def errorNoEnd(tag: String): Nothing |
| 189 | + |
| 190 | + protected def errorAndResult[T](msg: String, x: T): T = { |
| 191 | + reportSyntaxError(msg) |
| 192 | + x |
| 193 | + } |
| 194 | + |
| 195 | + def xToken(that: Char): Unit = { |
| 196 | + if (ch == that) nextch() |
| 197 | + else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch)) |
| 198 | + } |
| 199 | + def xToken(that: Seq[Char]): Unit = { that foreach xToken } |
| 200 | + |
| 201 | + /** scan [S] '=' [S]*/ |
| 202 | + def xEQ() = { xSpaceOpt(); xToken('='); xSpaceOpt() } |
| 203 | + |
| 204 | + /** skip optional space S? */ |
| 205 | + def xSpaceOpt() = while (isSpace(ch) && !eof) nextch() |
| 206 | + |
| 207 | + /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ |
| 208 | + def xSpace() = |
| 209 | + if (isSpace(ch)) { nextch(); xSpaceOpt() } |
| 210 | + else xHandleError(ch, "whitespace expected") |
| 211 | + |
| 212 | + /** Apply a function and return the passed value */ |
| 213 | + def returning[T](x: T)(f: T => Unit): T = { f(x); x } |
| 214 | + |
| 215 | + /** Execute body with a variable saved and restored after execution */ |
| 216 | + def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = { |
| 217 | + val saved = getter |
| 218 | + try body |
| 219 | + finally setter(saved) |
| 220 | + } |
| 221 | + |
| 222 | + /** Take characters from input stream until given String "until" |
| 223 | + * is seen. Once seen, the accumulated characters are passed |
| 224 | + * along with the current Position to the supplied handler function. |
| 225 | + */ |
| 226 | + protected def xTakeUntil[T]( |
| 227 | + handler: (PositionType, String) => T, |
| 228 | + positioner: () => PositionType, |
| 229 | + until: String): T = |
| 230 | + { |
| 231 | + val sb = new StringBuilder |
| 232 | + val head = until.head |
| 233 | + val rest = until.tail |
| 234 | + |
| 235 | + while (true) { |
| 236 | + if (ch == head && peek(rest)) |
| 237 | + return handler(positioner(), sb.toString) |
| 238 | + else if (ch == SU) |
| 239 | + truncatedError("") // throws TruncatedXMLControl in compiler |
| 240 | + |
| 241 | + sb append ch |
| 242 | + nextch() |
| 243 | + } |
| 244 | + unreachable |
| 245 | + } |
| 246 | + |
| 247 | + /** Create a non-destructive lookahead reader and see if the head |
| 248 | + * of the input would match the given String. If yes, return true |
| 249 | + * and drop the entire String from input; if no, return false |
| 250 | + * and leave input unchanged. |
| 251 | + */ |
| 252 | + private def peek(lookingFor: String): Boolean = |
| 253 | + (lookahead() take lookingFor.length sameElements lookingFor.iterator) && { |
| 254 | + // drop the chars from the real reader (all lookahead + orig) |
| 255 | + (0 to lookingFor.length) foreach (_ => nextch()) |
| 256 | + true |
| 257 | + } |
| 258 | +} |
0 commit comments