Skip to content

Commit b4e5de7

Browse files
committed
CDATA support
Adds `-Xxml:coalescing` to support cross-compilation until XML support is dropped. Forward port scala/scala@4df81aa
1 parent f2af157 commit b4e5de7

File tree

4 files changed

+113
-82
lines changed

4 files changed

+113
-82
lines changed

compiler/src/dotty/tools/dotc/config/ScalaSettings.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,10 @@ private sealed trait XSettings:
253253
}
254254

255255
val XmacroSettings: Setting[List[String]] = MultiStringSetting("-Xmacro-settings", "setting1,setting2,..settingN", "List of settings which exposed to the macros")
256+
257+
// XML parsing options
258+
//"Convert PCData to Text and coalesce sibling nodes"
259+
val Xxml = ChoiceSetting("-Xxml", "property", "Configure XML parsing.", List("coalescing"), "coalescing")
256260
end XSettings
257261

258262
/** -Y "Forking" as in forked tongue or "Private" settings */

compiler/src/dotty/tools/dotc/parsing/Parsers.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ object Parsers {
523523
*/
524524
lazy val xmlp: xml.MarkupParsers.MarkupParser = {
525525
myFirstXmlPos = source.atSpan(Span(in.offset))
526-
new MarkupParser(this, true)
526+
new MarkupParser(this, preserveWS = true, isCoalescing = ctx.settings.Xxml.value == "coalescing")
527527
}
528528

529529
/** The position of the first XML literal encountered while parsing,
@@ -532,7 +532,7 @@ object Parsers {
532532
def firstXmlPos: SourcePosition = myFirstXmlPos
533533
private var myFirstXmlPos: SourcePosition = NoSourcePosition
534534

535-
object symbXMLBuilder extends xml.SymbolicXMLBuilder(this, true) // DEBUG choices
535+
object symbXMLBuilder extends xml.SymbolicXMLBuilder(this, preserveWS = true, isCoalescing = ctx.settings.Xxml.value == "coalescing")
536536

537537
def xmlLiteral() : Tree = xmlp.xLiteral
538538
def xmlLiteralPattern() : Tree = xmlp.xLiteralPattern

compiler/src/dotty/tools/dotc/parsing/xml/MarkupParsers.scala

Lines changed: 96 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package xml
55

66
import scala.language.unsafeNulls
77

8+
import scala.annotation.tailrec
89
import scala.collection.mutable
910
import mutable.{ Buffer, ArrayBuffer, ListBuffer }
1011
import scala.util.control.ControlThrowable
@@ -16,10 +17,12 @@ import Constants._
1617
import util.SourceFile
1718
import Utility._
1819

20+
import SymbolicXMLBuilder.*
21+
1922

2023
// XXX/Note: many/most of the functions in here are almost direct cut and pastes
2124
// from another file - scala.xml.parsing.MarkupParser, it looks like.
22-
// (It was like that when I got here.) They used to be commented "[Duplicate]" butx
25+
// (It was like that when I got here.) They used to be commented "[Duplicate]" but
2326
// since approximately all of them were, I snipped it as noise. As far as I can
2427
// tell this wasn't for any particularly good reason, but slightly different
2528
// compiler and library parser interfaces meant it would take some setup.
@@ -49,7 +52,7 @@ object MarkupParsers {
4952
override def getMessage: String = "input ended while parsing XML"
5053
}
5154

52-
class MarkupParser(parser: Parser, final val preserveWS: Boolean)(implicit src: SourceFile) extends MarkupParserCommon {
55+
class MarkupParser(parser: Parser, final val preserveWS: Boolean, isCoalescing: Boolean)(implicit src: SourceFile) extends MarkupParserCommon {
5356

5457
import Tokens.{ LBRACE, RBRACE }
5558

@@ -182,22 +185,17 @@ object MarkupParsers {
182185
xTakeUntil(handle.comment, () => Span(start, curOffset, start), "-->")
183186
}
184187

185-
def appendText(span: Span, ts: Buffer[Tree], txt: String): Unit = {
186-
def append(t: String) = ts append handle.text(span, t)
187-
188-
if (preserveWS) append(txt)
189-
else {
190-
val sb = new StringBuilder()
191-
192-
txt foreach { c =>
193-
if (!isSpace(c)) sb append c
194-
else if (sb.isEmpty || !isSpace(sb.last)) sb append ' '
195-
}
196-
197-
val trimmed = sb.toString.trim
198-
if (!trimmed.isEmpty) append(trimmed)
199-
}
200-
}
188+
def appendText(span: Span, ts: Buffer[Tree], txt: String): Unit =
189+
val clean =
190+
if preserveWS then txt
191+
else
192+
val sb = StringBuilder()
193+
txt foreach { c =>
194+
if !isSpace(c) then sb += c
195+
else if sb.isEmpty || !isSpace(sb.last) then sb += ' '
196+
}
197+
sb.toString.trim
198+
if !clean.isEmpty then ts += handle.text(span, clean)
201199

202200
/** adds entity/character to ts as side-effect
203201
* @precond ch == '&'
@@ -227,48 +225,74 @@ object MarkupParsers {
227225
if (xCheckEmbeddedBlock) ts append xEmbeddedExpr
228226
else appendText(p, ts, xText)
229227

230-
/** Returns true if it encounters an end tag (without consuming it),
231-
* appends trees to ts as side-effect.
232-
*
233-
* @param ts ...
234-
* @return ...
228+
/** At an open angle-bracket, detects an end tag
229+
* or consumes CDATA, comment, PI or element.
230+
* Trees are appended to `ts` as a side-effect.
231+
* @return true if an end tag (without consuming it)
235232
*/
236-
private def content_LT(ts: ArrayBuffer[Tree]): Boolean = {
237-
if (ch == '/')
238-
return true // end tag
239-
240-
val toAppend = ch match {
241-
case '!' => nextch() ; if (ch =='[') xCharData else xComment // CDATA or Comment
242-
case '?' => nextch() ; xProcInstr // PI
243-
case _ => element // child node
233+
private def content_LT(ts: ArrayBuffer[Tree]): Boolean =
234+
(ch == '/') || {
235+
val toAppend = ch match
236+
case '!' => nextch() ; if (ch =='[') xCharData else xComment // CDATA or Comment
237+
case '?' => nextch() ; xProcInstr // PI
238+
case _ => element // child node
239+
ts += toAppend
240+
false
244241
}
245242

246-
ts append toAppend
247-
false
248-
}
249-
250-
def content: Buffer[Tree] = {
243+
def content: Buffer[Tree] =
251244
val ts = new ArrayBuffer[Tree]
252-
while (true) {
253-
if (xEmbeddedBlock)
254-
ts append xEmbeddedExpr
255-
else {
245+
@tailrec def loopContent(): Unit =
246+
if xEmbeddedBlock then
247+
ts += xEmbeddedExpr
248+
loopContent()
249+
else
256250
tmppos = Span(curOffset)
257-
ch match {
258-
// end tag, cdata, comment, pi or child node
259-
case '<' => nextch() ; if (content_LT(ts)) return ts
260-
// either the character '{' or an embedded scala block }
261-
case '{' => content_BRACE(tmppos, ts) // }
262-
// EntityRef or CharRef
263-
case '&' => content_AMP(ts)
264-
case SU => return ts
265-
// text content - here xEmbeddedBlock might be true
266-
case _ => appendText(tmppos, ts, xText)
267-
}
268-
}
269-
}
270-
unreachable
271-
}
251+
ch match
252+
case '<' => // end tag, cdata, comment, pi or child node
253+
nextch()
254+
if !content_LT(ts) then loopContent()
255+
case '{' => // literal brace or embedded Scala block
256+
content_BRACE(tmppos, ts)
257+
loopContent()
258+
case '&' => // EntityRef or CharRef
259+
content_AMP(ts)
260+
loopContent()
261+
case SU => ()
262+
case _ => // text content - here xEmbeddedBlock might be true
263+
appendText(tmppos, ts, xText)
264+
loopContent()
265+
end if
266+
// merge text sections and strip attachments
267+
def coalesce(): ArrayBuffer[Tree] =
268+
def copy() =
269+
val buf = ArrayBuffer.empty[Tree]
270+
val acc = StringBuilder()
271+
var pos: PositionType = NoSpan
272+
def emit() =
273+
if acc.nonEmpty then
274+
appendText(pos, buf, acc.toString)
275+
acc.clear()
276+
for t <- ts do
277+
t.getAttachment(TextAttacheKey) match {
278+
case Some(ta) =>
279+
if acc.isEmpty then pos = ta.span
280+
acc append ta.text
281+
case _ =>
282+
emit()
283+
buf += t
284+
}
285+
emit()
286+
buf
287+
end copy
288+
// begin
289+
val res = if ts.count(_.hasAttachment(TextAttacheKey)) > 1 then copy() else ts
290+
for t <- res do t.removeAttachment(TextAttacheKey)
291+
res
292+
end coalesce
293+
loopContent()
294+
if isCoalescing then coalesce() else ts
295+
end content
272296

273297
/** '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag
274298
* | xmlTag1 '/' '>'
@@ -300,24 +324,19 @@ object MarkupParsers {
300324
/** parse character data.
301325
* precondition: xEmbeddedBlock == false (we are not in a scala block)
302326
*/
303-
private def xText: String = {
327+
private def xText: String =
304328
assert(!xEmbeddedBlock, "internal error: encountered embedded block")
305-
val buf = new StringBuilder
306-
def done = buf.toString
307-
308-
while (ch != SU) {
309-
if (ch == '}') {
310-
if (charComingAfter(nextch()) == '}') nextch()
311-
else errorBraces()
312-
}
313-
314-
buf append ch
315-
nextch()
316-
if (xCheckEmbeddedBlock || ch == '<' || ch == '&')
317-
return done
318-
}
319-
done
320-
}
329+
val buf = StringBuilder()
330+
if (ch != SU)
331+
while
332+
if ch == '}' then
333+
if charComingAfter(nextch()) == '}' then nextch()
334+
else errorBraces()
335+
buf += ch
336+
nextch()
337+
!(ch == SU || xCheckEmbeddedBlock || ch == '<' || ch == '&')
338+
do ()
339+
buf.toString
321340

322341
/** Some try/catch/finally logic used by xLiteral and xLiteralPattern. */
323342
inline private def xLiteralCommon(f: () => Tree, ifTruncated: String => Unit): Tree = {
@@ -369,7 +388,7 @@ object MarkupParsers {
369388
while {
370389
xSpaceOpt()
371390
nextch()
372-
ts.append(element)
391+
content_LT(ts)
373392
charComingAfter(xSpaceOpt()) == '<'
374393
} do ()
375394
handle.makeXMLseq(Span(start, curOffset, start), ts)
@@ -453,11 +472,11 @@ object MarkupParsers {
453472
if (ch != '/') ts append xPattern // child
454473
else return false // terminate
455474

456-
case '{' => // embedded Scala patterns
457-
while (ch == '{') {
458-
nextch()
475+
case '{' if xCheckEmbeddedBlock => // embedded Scala patterns, if not double brace
476+
while
459477
ts ++= xScalaPatterns
460-
}
478+
xCheckEmbeddedBlock
479+
do ()
461480
assert(!xEmbeddedBlock, "problem with embedded block")
462481

463482
case SU =>

compiler/src/dotty/tools/dotc/parsing/xml/SymbolicXMLBuilder.scala

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import Flags.Mutable
1212
import Names._, StdNames._, ast.Trees._, ast.{tpd, untpd}
1313
import Symbols._, Contexts._
1414
import util.Spans._
15+
import util.Property
1516
import Parsers.Parser
1617

1718
/** This class builds instance of `Tree` that represent XML.
@@ -25,12 +26,13 @@ import Parsers.Parser
2526
* @author Burak Emir
2627
* @version 1.0
2728
*/
28-
class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean)(using Context) {
29+
class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean, isCoalescing: Boolean)(using Context) {
2930

3031
import Constants.Constant
3132
import untpd._
3233

3334
import parser.atSpan
35+
import SymbolicXMLBuilder.*
3436

3537
private[parsing] var isPattern: Boolean = _
3638

@@ -115,8 +117,9 @@ class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean)(using Context) {
115117

116118
// create scala.xml.Text here <: scala.xml.Node
117119
final def text(span: Span, txt: String): Tree = atSpan(span) {
118-
if (isPattern) makeTextPat(const(txt))
119-
else makeText1(const(txt))
120+
val t = if isPattern then makeTextPat(const(txt)) else makeText1(const(txt))
121+
if isCoalescing then t.putAttachment(TextAttacheKey, TextAttache(span, txt))
122+
t
120123
}
121124

122125
def makeTextPat(txt: Tree): Apply = Apply(_scala_xml__Text, List(txt))
@@ -259,3 +262,8 @@ class SymbolicXMLBuilder(parser: Parser, preserveWS: Boolean)(using Context) {
259262
atSpan(span.toSynthetic)(new XMLBlock(nsResult, new XMLBlock(attrResult, body)))
260263
}
261264
}
265+
object SymbolicXMLBuilder:
266+
val TextAttacheKey: Property.Key[TextAttache] = Property.Key[TextAttache]()
267+
/** Attachment for trees deriving from text nodes (Text, CData, entities). Used for coalescing. */
268+
case class TextAttache(span: Span, text: String)
269+
end SymbolicXMLBuilder

0 commit comments

Comments
 (0)