diff --git a/compiler/src/dotty/tools/dotc/config/ScalaSettings.scala b/compiler/src/dotty/tools/dotc/config/ScalaSettings.scala
index 8a66b5abca8a..696a8ba53102 100644
--- a/compiler/src/dotty/tools/dotc/config/ScalaSettings.scala
+++ b/compiler/src/dotty/tools/dotc/config/ScalaSettings.scala
@@ -253,6 +253,10 @@ private sealed trait XSettings:
}
val XmacroSettings: Setting[List[String]] = MultiStringSetting("-Xmacro-settings", "setting1,setting2,..settingN", "List of settings which exposed to the macros")
+
+ // XML parsing options
+ //"Convert PCData to Text and coalesce sibling nodes"
+ val Xxml = ChoiceSetting("-Xxml", "property", "Configure XML parsing.", List("coalescing"), "coalescing")
end XSettings
/** -Y "Forking" as in forked tongue or "Private" settings */
diff --git a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala
index 28f02b7db2a0..c6099570da99 100644
--- a/compiler/src/dotty/tools/dotc/parsing/Parsers.scala
+++ b/compiler/src/dotty/tools/dotc/parsing/Parsers.scala
@@ -523,7 +523,7 @@ object Parsers {
*/
lazy val xmlp: xml.MarkupParsers.MarkupParser = {
myFirstXmlPos = source.atSpan(Span(in.offset))
- new MarkupParser(this, true)
+ new MarkupParser(this, preserveWS = true, isCoalescing = ctx.settings.Xxml.value == "coalescing")
}
/** The position of the first XML literal encountered while parsing,
@@ -532,7 +532,7 @@ object Parsers {
def firstXmlPos: SourcePosition = myFirstXmlPos
private var myFirstXmlPos: SourcePosition = NoSourcePosition
- object symbXMLBuilder extends xml.SymbolicXMLBuilder(this, true) // DEBUG choices
+ object symbXMLBuilder extends xml.SymbolicXMLBuilder(this, preserveWS = true, isCoalescing = ctx.settings.Xxml.value == "coalescing")
def xmlLiteral() : Tree = xmlp.xLiteral
def xmlLiteralPattern() : Tree = xmlp.xLiteralPattern
diff --git a/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParserCommon.scala b/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParserCommon.scala
index 2c6c5361e51c..7edf50d539ed 100644
--- a/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParserCommon.scala
+++ b/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParserCommon.scala
@@ -232,7 +232,7 @@ private[dotty] trait MarkupParserCommon {
val rest = until.tail
while (true) {
- if (ch == head && peek(rest))
+ if ch == head && peek(rest) then
return handler(positioner(), sb.toString)
else if (ch == SU)
truncatedError("") // throws TruncatedXMLControl in compiler
diff --git a/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala b/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala
index 591042961dbb..440fb35c60c8 100644
--- a/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala
+++ b/compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala
@@ -5,6 +5,7 @@ package xml
import scala.language.unsafeNulls
+import scala.annotation.tailrec
import scala.collection.mutable
import mutable.{ Buffer, ArrayBuffer, ListBuffer }
import scala.util.control.ControlThrowable
@@ -16,10 +17,12 @@ import Constants._
import util.SourceFile
import Utility._
+import SymbolicXMLBuilder.*
+
// XXX/Note: many/most of the functions in here are almost direct cut and pastes
// from another file - scala.xml.parsing.MarkupParser, it looks like.
-// (It was like that when I got here.) They used to be commented "[Duplicate]" butx
+// (It was like that when I got here.) They used to be commented "[Duplicate]" but
// since approximately all of them were, I snipped it as noise. As far as I can
// tell this wasn't for any particularly good reason, but slightly different
// compiler and library parser interfaces meant it would take some setup.
@@ -49,7 +52,7 @@ object MarkupParsers {
override def getMessage: String = "input ended while parsing XML"
}
- class MarkupParser(parser: Parser, final val preserveWS: Boolean)(implicit src: SourceFile) extends MarkupParserCommon {
+ class MarkupParser(parser: Parser, final val preserveWS: Boolean, isCoalescing: Boolean)(implicit src: SourceFile) extends MarkupParserCommon {
import Tokens.{ LBRACE, RBRACE }
@@ -68,17 +71,18 @@ object MarkupParsers {
if (ch == SU) throw TruncatedXMLControl
else reportSyntaxError(msg)
- var input : CharArrayReader = _
+ var input: CharArrayReader = _
def lookahead(): BufferedIterator[Char] =
(input.buf drop input.charOffset).iterator.buffered
import parser.{ symbXMLBuilder => handle }
- def curOffset : Int = input.charOffset - 1
- var tmppos : Span = NoSpan
+ def curOffset: Int = input.lastCharOffset
+
+ var tmppos: Span = NoSpan
def ch: Char = input.ch
/** this method assign the next character to ch and advances in input */
- def nextch(): Unit = { input.nextChar() }
+ def nextch(): Unit = input.nextChar()
protected def ch_returning_nextch: Char = {
val result = ch; input.nextChar(); result
@@ -181,22 +185,17 @@ object MarkupParsers {
xTakeUntil(handle.comment, () => Span(start, curOffset, start), "-->")
}
- def appendText(span: Span, ts: Buffer[Tree], txt: String): Unit = {
- def append(t: String) = ts append handle.text(span, t)
-
- if (preserveWS) append(txt)
- else {
- val sb = new StringBuilder()
-
- txt foreach { c =>
- if (!isSpace(c)) sb append c
- else if (sb.isEmpty || !isSpace(sb.last)) sb append ' '
- }
-
- val trimmed = sb.toString.trim
- if (!trimmed.isEmpty) append(trimmed)
- }
- }
+ def appendText(span: Span, ts: Buffer[Tree], txt: String): Unit =
+ val clean =
+ if preserveWS then txt
+ else
+ val sb = StringBuilder()
+ txt foreach { c =>
+ if !isSpace(c) then sb += c
+ else if sb.isEmpty || !isSpace(sb.last) then sb += ' '
+ }
+ sb.toString.trim
+ if !clean.isEmpty then ts += handle.text(span, clean)
/** adds entity/character to ts as side-effect
* @precond ch == '&'
@@ -226,48 +225,74 @@ object MarkupParsers {
if (xCheckEmbeddedBlock) ts append xEmbeddedExpr
else appendText(p, ts, xText)
- /** Returns true if it encounters an end tag (without consuming it),
- * appends trees to ts as side-effect.
- *
- * @param ts ...
- * @return ...
+ /** At an open angle-bracket, detects an end tag
+ * or consumes CDATA, comment, PI or element.
+ * Trees are appended to `ts` as a side-effect.
+ * @return true if an end tag (without consuming it)
*/
- private def content_LT(ts: ArrayBuffer[Tree]): Boolean = {
- if (ch == '/')
- return true // end tag
-
- val toAppend = ch match {
- case '!' => nextch() ; if (ch =='[') xCharData else xComment // CDATA or Comment
- case '?' => nextch() ; xProcInstr // PI
- case _ => element // child node
+ private def content_LT(ts: ArrayBuffer[Tree]): Boolean =
+ (ch == '/') || {
+ val toAppend = ch match
+ case '!' => nextch() ; if (ch =='[') xCharData else xComment // CDATA or Comment
+ case '?' => nextch() ; xProcInstr // PI
+ case _ => element // child node
+ ts += toAppend
+ false
}
- ts append toAppend
- false
- }
-
- def content: Buffer[Tree] = {
+ def content: Buffer[Tree] =
val ts = new ArrayBuffer[Tree]
- while (true) {
- if (xEmbeddedBlock)
- ts append xEmbeddedExpr
- else {
+ @tailrec def loopContent(): Unit =
+ if xEmbeddedBlock then
+ ts += xEmbeddedExpr
+ loopContent()
+ else
tmppos = Span(curOffset)
- ch match {
- // end tag, cdata, comment, pi or child node
- case '<' => nextch() ; if (content_LT(ts)) return ts
- // either the character '{' or an embedded scala block }
- case '{' => content_BRACE(tmppos, ts) // }
- // EntityRef or CharRef
- case '&' => content_AMP(ts)
- case SU => return ts
- // text content - here xEmbeddedBlock might be true
- case _ => appendText(tmppos, ts, xText)
- }
- }
- }
- unreachable
- }
+ ch match
+ case '<' => // end tag, cdata, comment, pi or child node
+ nextch()
+ if !content_LT(ts) then loopContent()
+ case '{' => // literal brace or embedded Scala block
+ content_BRACE(tmppos, ts)
+ loopContent()
+ case '&' => // EntityRef or CharRef
+ content_AMP(ts)
+ loopContent()
+ case SU => ()
+ case _ => // text content - here xEmbeddedBlock might be true
+ appendText(tmppos, ts, xText)
+ loopContent()
+ end if
+ // merge text sections and strip attachments
+ def coalesce(): ArrayBuffer[Tree] =
+ def copy() =
+ val buf = ArrayBuffer.empty[Tree]
+ val acc = StringBuilder()
+ var pos: PositionType = NoSpan
+ def emit() =
+ if acc.nonEmpty then
+ appendText(pos, buf, acc.toString)
+ acc.clear()
+ for t <- ts do
+ t.getAttachment(TextAttacheKey) match {
+ case Some(ta) =>
+ if acc.isEmpty then pos = ta.span
+ acc append ta.text
+ case _ =>
+ emit()
+ buf += t
+ }
+ emit()
+ buf
+ end copy
+ // begin
+ val res = if ts.count(_.hasAttachment(TextAttacheKey)) > 1 then copy() else ts
+ for t <- res do t.removeAttachment(TextAttacheKey)
+ res
+ end coalesce
+ loopContent()
+ if isCoalescing then coalesce() else ts
+ end content
/** '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag
* | xmlTag1 '/' '>'
@@ -299,24 +324,19 @@ object MarkupParsers {
/** parse character data.
* precondition: xEmbeddedBlock == false (we are not in a scala block)
*/
- private def xText: String = {
+ private def xText: String =
assert(!xEmbeddedBlock, "internal error: encountered embedded block")
- val buf = new StringBuilder
- def done = buf.toString
-
- while (ch != SU) {
- if (ch == '}') {
- if (charComingAfter(nextch()) == '}') nextch()
- else errorBraces()
- }
-
- buf append ch
- nextch()
- if (xCheckEmbeddedBlock || ch == '<' || ch == '&')
- return done
- }
- done
- }
+ val buf = StringBuilder()
+ if (ch != SU)
+ while
+ if ch == '}' then
+ if charComingAfter(nextch()) == '}' then nextch()
+ else errorBraces()
+ buf += ch
+ nextch()
+ !(ch == SU || xCheckEmbeddedBlock || ch == '<' || ch == '&')
+ do ()
+ buf.toString
/** Some try/catch/finally logic used by xLiteral and xLiteralPattern. */
inline private def xLiteralCommon(f: () => Tree, ifTruncated: String => Unit): Tree = {
@@ -329,9 +349,9 @@ object MarkupParsers {
case c @ TruncatedXMLControl =>
ifTruncated(c.getMessage)
case c @ (MissingEndTagControl | ConfusedAboutBracesControl) =>
- parser.syntaxError(c.getMessage + debugLastElem + ">", debugLastPos)
+ parser.syntaxError(s"${c.getMessage}$debugLastElem>", debugLastPos)
case _: ArrayIndexOutOfBoundsException =>
- parser.syntaxError("missing end tag in XML literal for <%s>" format debugLastElem, debugLastPos)
+ parser.syntaxError(s"missing end tag in XML literal for <$debugLastElem>", debugLastPos)
}
finally parser.in.resume(saved)
@@ -342,14 +362,13 @@ object MarkupParsers {
}
/** Use a lookahead parser to run speculative body, and return the first char afterward. */
- private def charComingAfter(body: => Unit): Char = {
+ private def charComingAfter(body: => Unit): Char =
try {
input = input.lookaheadReader()
body
ch
}
finally input = parser.in
- }
/** xLiteral = element { element }
* @return Scala representation of this xml literal
@@ -369,7 +388,7 @@ object MarkupParsers {
while {
xSpaceOpt()
nextch()
- ts.append(element)
+ content_LT(ts)
charComingAfter(xSpaceOpt()) == '<'
} do ()
handle.makeXMLseq(Span(start, curOffset, start), ts)
@@ -431,7 +450,7 @@ object MarkupParsers {
* | Name [S] '/' '>'
*/
def xPattern: Tree = {
- var start = curOffset
+ val start = curOffset
val qname = xName
debugLastStartElement = (start, qname) :: debugLastStartElement
xSpaceOpt()
@@ -453,11 +472,11 @@ object MarkupParsers {
if (ch != '/') ts append xPattern // child
else return false // terminate
- case '{' => // embedded Scala patterns
- while (ch == '{') {
- nextch()
+ case '{' if xCheckEmbeddedBlock => // embedded Scala patterns, if not double brace
+ while
ts ++= xScalaPatterns
- }
+ xCheckEmbeddedBlock
+ do ()
assert(!xEmbeddedBlock, "problem with embedded block")
case SU =>
diff --git a/compiler/src/dotty/tools/dotc/parsing/xml/SymbolicXMLBuilder.scala b/compiler/src/dotty/tools/dotc/parsing/xml/SymbolicXMLBuilder.scala
index 0e70cc077fa4..fe277254a162 100644
--- a/compiler/src/dotty/tools/dotc/parsing/xml/SymbolicXMLBuilder.scala
+++ b/compiler/src/dotty/tools/dotc/parsing/xml/SymbolicXMLBuilder.scala
@@ -12,6 +12,7 @@ import Flags.Mutable
import Names._, StdNames._, ast.Trees._, ast.{tpd, untpd}
import Symbols._, Contexts._
import util.Spans._
+import util.Property
import Parsers.Parser
/** This class builds instance of `Tree` that represent XML.
@@ -25,12 +26,13 @@ import Parsers.Parser
* @author Burak Emir
* @version 1.0
*/
-class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean)(using Context) {
+class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean, isCoalescing: Boolean)(using Context) {
import Constants.Constant
import untpd._
import parser.atSpan
+ import SymbolicXMLBuilder.*
private[parsing] var isPattern: Boolean = _
@@ -115,8 +117,9 @@ class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean)(using Context) {
// create scala.xml.Text here <: scala.xml.Node
final def text(span: Span, txt: String): Tree = atSpan(span) {
- if (isPattern) makeTextPat(const(txt))
- else makeText1(const(txt))
+ val t = if isPattern then makeTextPat(const(txt)) else makeText1(const(txt))
+ if isCoalescing then t.putAttachment(TextAttacheKey, TextAttache(span, txt))
+ t
}
def makeTextPat(txt: Tree): Apply = Apply(_scala_xml__Text, List(txt))
@@ -259,3 +262,8 @@ class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean)(using Context) {
atSpan(span.toSynthetic)(new XMLBlock(nsResult, new XMLBlock(attrResult, body)))
}
}
+object SymbolicXMLBuilder:
+ val TextAttacheKey: Property.Key[TextAttache] = Property.Key[TextAttache]()
+ /** Attachment for trees deriving from text nodes (Text, CData, entities). Used for coalescing. */
+ case class TextAttache(span: Span, text: String)
+end SymbolicXMLBuilder
diff --git a/tests/neg/t2275a.scala b/tests/neg/t2275a.scala
new file mode 100644
index 000000000000..6b80935e6772
--- /dev/null
+++ b/tests/neg/t2275a.scala
@@ -0,0 +1,7 @@
+object Test {
+ if (true) {
+
// error maybe this tag isn't closed // error
+ }else{ // error // error in XML content, use double brace
+ {"louenesee"}
+ }
+} // anypos-error
diff --git a/tests/untried/neg/t2275a.scala b/tests/untried/neg/t2275a.scala
deleted file mode 100644
index 8e25a38fee16..000000000000
--- a/tests/untried/neg/t2275a.scala
+++ /dev/null
@@ -1,7 +0,0 @@
-object Test {
- if (true) {
-
- }else{
- {"louenesee"}
- }
-}