diff --git a/compiler/src/dotty/tools/dotc/config/ScalaSettings.scala b/compiler/src/dotty/tools/dotc/config/ScalaSettings.scala index 8a66b5abca8a..696a8ba53102 100644 --- a/compiler/src/dotty/tools/dotc/config/ScalaSettings.scala +++ b/compiler/src/dotty/tools/dotc/config/ScalaSettings.scala @@ -253,6 +253,10 @@ private sealed trait XSettings: } val XmacroSettings: Setting[List[String]] = MultiStringSetting("-Xmacro-settings", "setting1,setting2,..settingN", "List of settings which exposed to the macros") + + // XML parsing options + //"Convert PCData to Text and coalesce sibling nodes" + val Xxml = ChoiceSetting("-Xxml", "property", "Configure XML parsing.", List("coalescing"), "coalescing") end XSettings /** -Y "Forking" as in forked tongue or "Private" settings */ diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala index 28f02b7db2a0..c6099570da99 100644 --- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala @@ -523,7 +523,7 @@ object Parsers { */ lazy val xmlp: xml.MarkupParsers.MarkupParser = { myFirstXmlPos = source.atSpan(Span(in.offset)) - new MarkupParser(this, true) + new MarkupParser(this, preserveWS = true, isCoalescing = ctx.settings.Xxml.value == "coalescing") } /** The position of the first XML literal encountered while parsing, @@ -532,7 +532,7 @@ object Parsers { def firstXmlPos: SourcePosition = myFirstXmlPos private var myFirstXmlPos: SourcePosition = NoSourcePosition - object symbXMLBuilder extends xml.SymbolicXMLBuilder(this, true) // DEBUG choices + object symbXMLBuilder extends xml.SymbolicXMLBuilder(this, preserveWS = true, isCoalescing = ctx.settings.Xxml.value == "coalescing") def xmlLiteral() : Tree = xmlp.xLiteral def xmlLiteralPattern() : Tree = xmlp.xLiteralPattern diff --git a/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParserCommon.scala b/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParserCommon.scala index 2c6c5361e51c..7edf50d539ed 100644 --- a/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParserCommon.scala +++ b/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParserCommon.scala @@ -232,7 +232,7 @@ private[dotty] trait MarkupParserCommon { val rest = until.tail while (true) { - if (ch == head && peek(rest)) + if ch == head && peek(rest) then return handler(positioner(), sb.toString) else if (ch == SU) truncatedError("") // throws TruncatedXMLControl in compiler diff --git a/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala b/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala index 591042961dbb..440fb35c60c8 100644 --- a/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala +++ b/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala @@ -5,6 +5,7 @@ package xml import scala.language.unsafeNulls +import scala.annotation.tailrec import scala.collection.mutable import mutable.{ Buffer, ArrayBuffer, ListBuffer } import scala.util.control.ControlThrowable @@ -16,10 +17,12 @@ import Constants._ import util.SourceFile import Utility._ +import SymbolicXMLBuilder.* + // XXX/Note: many/most of the functions in here are almost direct cut and pastes // from another file - scala.xml.parsing.MarkupParser, it looks like. -// (It was like that when I got here.) They used to be commented "[Duplicate]" butx +// (It was like that when I got here.) They used to be commented "[Duplicate]" but // since approximately all of them were, I snipped it as noise. As far as I can // tell this wasn't for any particularly good reason, but slightly different // compiler and library parser interfaces meant it would take some setup. @@ -49,7 +52,7 @@ object MarkupParsers { override def getMessage: String = "input ended while parsing XML" } - class MarkupParser(parser: Parser, final val preserveWS: Boolean)(implicit src: SourceFile) extends MarkupParserCommon { + class MarkupParser(parser: Parser, final val preserveWS: Boolean, isCoalescing: Boolean)(implicit src: SourceFile) extends MarkupParserCommon { import Tokens.{ LBRACE, RBRACE } @@ -68,17 +71,18 @@ object MarkupParsers { if (ch == SU) throw TruncatedXMLControl else reportSyntaxError(msg) - var input : CharArrayReader = _ + var input: CharArrayReader = _ def lookahead(): BufferedIterator[Char] = (input.buf drop input.charOffset).iterator.buffered import parser.{ symbXMLBuilder => handle } - def curOffset : Int = input.charOffset - 1 - var tmppos : Span = NoSpan + def curOffset: Int = input.lastCharOffset + + var tmppos: Span = NoSpan def ch: Char = input.ch /** this method assign the next character to ch and advances in input */ - def nextch(): Unit = { input.nextChar() } + def nextch(): Unit = input.nextChar() protected def ch_returning_nextch: Char = { val result = ch; input.nextChar(); result @@ -181,22 +185,17 @@ object MarkupParsers { xTakeUntil(handle.comment, () => Span(start, curOffset, start), "-->") } - def appendText(span: Span, ts: Buffer[Tree], txt: String): Unit = { - def append(t: String) = ts append handle.text(span, t) - - if (preserveWS) append(txt) - else { - val sb = new StringBuilder() - - txt foreach { c => - if (!isSpace(c)) sb append c - else if (sb.isEmpty || !isSpace(sb.last)) sb append ' ' - } - - val trimmed = sb.toString.trim - if (!trimmed.isEmpty) append(trimmed) - } - } + def appendText(span: Span, ts: Buffer[Tree], txt: String): Unit = + val clean = + if preserveWS then txt + else + val sb = StringBuilder() + txt foreach { c => + if !isSpace(c) then sb += c + else if sb.isEmpty || !isSpace(sb.last) then sb += ' ' + } + sb.toString.trim + if !clean.isEmpty then ts += handle.text(span, clean) /** adds entity/character to ts as side-effect * @precond ch == '&' @@ -226,48 +225,74 @@ object MarkupParsers { if (xCheckEmbeddedBlock) ts append xEmbeddedExpr else appendText(p, ts, xText) - /** Returns true if it encounters an end tag (without consuming it), - * appends trees to ts as side-effect. - * - * @param ts ... - * @return ... + /** At an open angle-bracket, detects an end tag + * or consumes CDATA, comment, PI or element. + * Trees are appended to `ts` as a side-effect. + * @return true if an end tag (without consuming it) */ - private def content_LT(ts: ArrayBuffer[Tree]): Boolean = { - if (ch == '/') - return true // end tag - - val toAppend = ch match { - case '!' => nextch() ; if (ch =='[') xCharData else xComment // CDATA or Comment - case '?' => nextch() ; xProcInstr // PI - case _ => element // child node + private def content_LT(ts: ArrayBuffer[Tree]): Boolean = + (ch == '/') || { + val toAppend = ch match + case '!' => nextch() ; if (ch =='[') xCharData else xComment // CDATA or Comment + case '?' => nextch() ; xProcInstr // PI + case _ => element // child node + ts += toAppend + false } - ts append toAppend - false - } - - def content: Buffer[Tree] = { + def content: Buffer[Tree] = val ts = new ArrayBuffer[Tree] - while (true) { - if (xEmbeddedBlock) - ts append xEmbeddedExpr - else { + @tailrec def loopContent(): Unit = + if xEmbeddedBlock then + ts += xEmbeddedExpr + loopContent() + else tmppos = Span(curOffset) - ch match { - // end tag, cdata, comment, pi or child node - case '<' => nextch() ; if (content_LT(ts)) return ts - // either the character '{' or an embedded scala block } - case '{' => content_BRACE(tmppos, ts) // } - // EntityRef or CharRef - case '&' => content_AMP(ts) - case SU => return ts - // text content - here xEmbeddedBlock might be true - case _ => appendText(tmppos, ts, xText) - } - } - } - unreachable - } + ch match + case '<' => // end tag, cdata, comment, pi or child node + nextch() + if !content_LT(ts) then loopContent() + case '{' => // literal brace or embedded Scala block + content_BRACE(tmppos, ts) + loopContent() + case '&' => // EntityRef or CharRef + content_AMP(ts) + loopContent() + case SU => () + case _ => // text content - here xEmbeddedBlock might be true + appendText(tmppos, ts, xText) + loopContent() + end if + // merge text sections and strip attachments + def coalesce(): ArrayBuffer[Tree] = + def copy() = + val buf = ArrayBuffer.empty[Tree] + val acc = StringBuilder() + var pos: PositionType = NoSpan + def emit() = + if acc.nonEmpty then + appendText(pos, buf, acc.toString) + acc.clear() + for t <- ts do + t.getAttachment(TextAttacheKey) match { + case Some(ta) => + if acc.isEmpty then pos = ta.span + acc append ta.text + case _ => + emit() + buf += t + } + emit() + buf + end copy + // begin + val res = if ts.count(_.hasAttachment(TextAttacheKey)) > 1 then copy() else ts + for t <- res do t.removeAttachment(TextAttacheKey) + res + end coalesce + loopContent() + if isCoalescing then coalesce() else ts + end content /** '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag * | xmlTag1 '/' '>' @@ -299,24 +324,19 @@ object MarkupParsers { /** parse character data. * precondition: xEmbeddedBlock == false (we are not in a scala block) */ - private def xText: String = { + private def xText: String = assert(!xEmbeddedBlock, "internal error: encountered embedded block") - val buf = new StringBuilder - def done = buf.toString - - while (ch != SU) { - if (ch == '}') { - if (charComingAfter(nextch()) == '}') nextch() - else errorBraces() - } - - buf append ch - nextch() - if (xCheckEmbeddedBlock || ch == '<' || ch == '&') - return done - } - done - } + val buf = StringBuilder() + if (ch != SU) + while + if ch == '}' then + if charComingAfter(nextch()) == '}' then nextch() + else errorBraces() + buf += ch + nextch() + !(ch == SU || xCheckEmbeddedBlock || ch == '<' || ch == '&') + do () + buf.toString /** Some try/catch/finally logic used by xLiteral and xLiteralPattern. */ inline private def xLiteralCommon(f: () => Tree, ifTruncated: String => Unit): Tree = { @@ -329,9 +349,9 @@ object MarkupParsers { case c @ TruncatedXMLControl => ifTruncated(c.getMessage) case c @ (MissingEndTagControl | ConfusedAboutBracesControl) => - parser.syntaxError(c.getMessage + debugLastElem + ">", debugLastPos) + parser.syntaxError(s"${c.getMessage}$debugLastElem>", debugLastPos) case _: ArrayIndexOutOfBoundsException => - parser.syntaxError("missing end tag in XML literal for <%s>" format debugLastElem, debugLastPos) + parser.syntaxError(s"missing end tag in XML literal for <$debugLastElem>", debugLastPos) } finally parser.in.resume(saved) @@ -342,14 +362,13 @@ object MarkupParsers { } /** Use a lookahead parser to run speculative body, and return the first char afterward. */ - private def charComingAfter(body: => Unit): Char = { + private def charComingAfter(body: => Unit): Char = try { input = input.lookaheadReader() body ch } finally input = parser.in - } /** xLiteral = element { element } * @return Scala representation of this xml literal @@ -369,7 +388,7 @@ object MarkupParsers { while { xSpaceOpt() nextch() - ts.append(element) + content_LT(ts) charComingAfter(xSpaceOpt()) == '<' } do () handle.makeXMLseq(Span(start, curOffset, start), ts) @@ -431,7 +450,7 @@ object MarkupParsers { * | Name [S] '/' '>' */ def xPattern: Tree = { - var start = curOffset + val start = curOffset val qname = xName debugLastStartElement = (start, qname) :: debugLastStartElement xSpaceOpt() @@ -453,11 +472,11 @@ object MarkupParsers { if (ch != '/') ts append xPattern // child else return false // terminate - case '{' => // embedded Scala patterns - while (ch == '{') { - nextch() + case '{' if xCheckEmbeddedBlock => // embedded Scala patterns, if not double brace + while ts ++= xScalaPatterns - } + xCheckEmbeddedBlock + do () assert(!xEmbeddedBlock, "problem with embedded block") case SU => diff --git a/compiler/src/dotty/tools/dotc/parsing/xml/SymbolicXMLBuilder.scala b/compiler/src/dotty/tools/dotc/parsing/xml/SymbolicXMLBuilder.scala index 0e70cc077fa4..fe277254a162 100644 --- a/compiler/src/dotty/tools/dotc/parsing/xml/SymbolicXMLBuilder.scala +++ b/compiler/src/dotty/tools/dotc/parsing/xml/SymbolicXMLBuilder.scala @@ -12,6 +12,7 @@ import Flags.Mutable import Names._, StdNames._, ast.Trees._, ast.{tpd, untpd} import Symbols._, Contexts._ import util.Spans._ +import util.Property import Parsers.Parser /** This class builds instance of `Tree` that represent XML. @@ -25,12 +26,13 @@ import Parsers.Parser * @author Burak Emir * @version 1.0 */ -class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean)(using Context) { +class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean, isCoalescing: Boolean)(using Context) { import Constants.Constant import untpd._ import parser.atSpan + import SymbolicXMLBuilder.* private[parsing] var isPattern: Boolean = _ @@ -115,8 +117,9 @@ class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean)(using Context) { // create scala.xml.Text here <: scala.xml.Node final def text(span: Span, txt: String): Tree = atSpan(span) { - if (isPattern) makeTextPat(const(txt)) - else makeText1(const(txt)) + val t = if isPattern then makeTextPat(const(txt)) else makeText1(const(txt)) + if isCoalescing then t.putAttachment(TextAttacheKey, TextAttache(span, txt)) + t } def makeTextPat(txt: Tree): Apply = Apply(_scala_xml__Text, List(txt)) @@ -259,3 +262,8 @@ class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean)(using Context) { atSpan(span.toSynthetic)(new XMLBlock(nsResult, new XMLBlock(attrResult, body))) } } +object SymbolicXMLBuilder: + val TextAttacheKey: Property.Key[TextAttache] = Property.Key[TextAttache]() + /** Attachment for trees deriving from text nodes (Text, CData, entities). Used for coalescing. */ + case class TextAttache(span: Span, text: String) +end SymbolicXMLBuilder diff --git a/tests/neg/t2275a.scala b/tests/neg/t2275a.scala new file mode 100644 index 000000000000..6b80935e6772 --- /dev/null +++ b/tests/neg/t2275a.scala @@ -0,0 +1,7 @@ +object Test { + if (true) { +
// error maybe this tag isn't closed // error + }else{ // error // error in XML content, use double brace + {"louenesee"} + } +} // anypos-error diff --git a/tests/untried/neg/t2275a.scala b/tests/untried/neg/t2275a.scala deleted file mode 100644 index 8e25a38fee16..000000000000 --- a/tests/untried/neg/t2275a.scala +++ /dev/null @@ -1,7 +0,0 @@ -object Test { - if (true) { -
- }else{ - {"louenesee"} - } -}