@@ -5,6 +5,7 @@ package xml
5
5
6
6
import scala .language .unsafeNulls
7
7
8
+ import scala .annotation .tailrec
8
9
import scala .collection .mutable
9
10
import mutable .{ Buffer , ArrayBuffer , ListBuffer }
10
11
import scala .util .control .ControlThrowable
@@ -16,10 +17,12 @@ import Constants._
16
17
import util .SourceFile
17
18
import Utility ._
18
19
20
+ import SymbolicXMLBuilder .*
21
+
19
22
20
23
// XXX/Note: many/most of the functions in here are almost direct cut and pastes
21
24
// from another file - scala.xml.parsing.MarkupParser, it looks like.
22
- // (It was like that when I got here.) They used to be commented "[Duplicate]" butx
25
+ // (It was like that when I got here.) They used to be commented "[Duplicate]" but
23
26
// since approximately all of them were, I snipped it as noise. As far as I can
24
27
// tell this wasn't for any particularly good reason, but slightly different
25
28
// compiler and library parser interfaces meant it would take some setup.
@@ -49,7 +52,7 @@ object MarkupParsers {
49
52
override def getMessage : String = " input ended while parsing XML"
50
53
}
51
54
52
- class MarkupParser (parser : Parser , final val preserveWS : Boolean )(implicit src : SourceFile ) extends MarkupParserCommon {
55
+ class MarkupParser (parser : Parser , final val preserveWS : Boolean , isCoalescing : Boolean )(implicit src : SourceFile ) extends MarkupParserCommon {
53
56
54
57
import Tokens .{ LBRACE , RBRACE }
55
58
@@ -182,22 +185,17 @@ object MarkupParsers {
182
185
xTakeUntil(handle.comment, () => Span (start, curOffset, start), " -->" )
183
186
}
184
187
185
- def appendText (span : Span , ts : Buffer [Tree ], txt : String ): Unit = {
186
- def append (t : String ) = ts append handle.text(span, t)
187
-
188
- if (preserveWS) append(txt)
189
- else {
190
- val sb = new StringBuilder ()
191
-
192
- txt foreach { c =>
193
- if (! isSpace(c)) sb append c
194
- else if (sb.isEmpty || ! isSpace(sb.last)) sb append ' '
195
- }
196
-
197
- val trimmed = sb.toString.trim
198
- if (! trimmed.isEmpty) append(trimmed)
199
- }
200
- }
188
+ def appendText (span : Span , ts : Buffer [Tree ], txt : String ): Unit =
189
+ val clean =
190
+ if preserveWS then txt
191
+ else
192
+ val sb = StringBuilder ()
193
+ txt foreach { c =>
194
+ if ! isSpace(c) then sb += c
195
+ else if sb.isEmpty || ! isSpace(sb.last) then sb += ' '
196
+ }
197
+ sb.toString.trim
198
+ if ! clean.isEmpty then ts += handle.text(span, clean)
201
199
202
200
/** adds entity/character to ts as side-effect
203
201
* @precond ch == '&'
@@ -227,48 +225,74 @@ object MarkupParsers {
227
225
if (xCheckEmbeddedBlock) ts append xEmbeddedExpr
228
226
else appendText(p, ts, xText)
229
227
230
- /** Returns true if it encounters an end tag (without consuming it),
231
- * appends trees to ts as side-effect.
232
- *
233
- * @param ts ...
234
- * @return ...
228
+ /** At an open angle-bracket, detects an end tag
229
+ * or consumes CDATA, comment, PI or element.
230
+ * Trees are appended to `ts` as a side-effect.
231
+ * @return true if an end tag (without consuming it)
235
232
*/
236
- private def content_LT (ts : ArrayBuffer [Tree ]): Boolean = {
237
- if (ch == '/' )
238
- return true // end tag
239
-
240
- val toAppend = ch match {
241
- case '!' => nextch() ; if (ch == '[' ) xCharData else xComment // CDATA or Comment
242
- case '?' => nextch() ; xProcInstr // PI
243
- case _ => element // child node
233
+ private def content_LT (ts : ArrayBuffer [Tree ]): Boolean =
234
+ (ch == '/' ) || {
235
+ val toAppend = ch match
236
+ case '!' => nextch() ; if (ch == '[' ) xCharData else xComment // CDATA or Comment
237
+ case '?' => nextch() ; xProcInstr // PI
238
+ case _ => element // child node
239
+ ts += toAppend
240
+ false
244
241
}
245
242
246
- ts append toAppend
247
- false
248
- }
249
-
250
- def content : Buffer [Tree ] = {
243
+ def content : Buffer [Tree ] =
251
244
val ts = new ArrayBuffer [Tree ]
252
- while (true ) {
253
- if (xEmbeddedBlock)
254
- ts append xEmbeddedExpr
255
- else {
245
+ @ tailrec def loopContent (): Unit =
246
+ if xEmbeddedBlock then
247
+ ts += xEmbeddedExpr
248
+ loopContent()
249
+ else
256
250
tmppos = Span (curOffset)
257
- ch match {
258
- // end tag, cdata, comment, pi or child node
259
- case '<' => nextch() ; if (content_LT(ts)) return ts
260
- // either the character '{' or an embedded scala block }
261
- case '{' => content_BRACE(tmppos, ts) // }
262
- // EntityRef or CharRef
263
- case '&' => content_AMP(ts)
264
- case SU => return ts
265
- // text content - here xEmbeddedBlock might be true
266
- case _ => appendText(tmppos, ts, xText)
267
- }
268
- }
269
- }
270
- unreachable
271
- }
251
+ ch match
252
+ case '<' => // end tag, cdata, comment, pi or child node
253
+ nextch()
254
+ if ! content_LT(ts) then loopContent()
255
+ case '{' => // literal brace or embedded Scala block
256
+ content_BRACE(tmppos, ts)
257
+ loopContent()
258
+ case '&' => // EntityRef or CharRef
259
+ content_AMP(ts)
260
+ loopContent()
261
+ case SU => ()
262
+ case _ => // text content - here xEmbeddedBlock might be true
263
+ appendText(tmppos, ts, xText)
264
+ loopContent()
265
+ end if
266
+ // merge text sections and strip attachments
267
+ def coalesce (): ArrayBuffer [Tree ] =
268
+ def copy () =
269
+ val buf = ArrayBuffer .empty[Tree ]
270
+ val acc = StringBuilder ()
271
+ var pos : PositionType = NoSpan
272
+ def emit () =
273
+ if acc.nonEmpty then
274
+ appendText(pos, buf, acc.toString)
275
+ acc.clear()
276
+ for t <- ts do
277
+ t.getAttachment(TextAttacheKey ) match {
278
+ case Some (ta) =>
279
+ if acc.isEmpty then pos = ta.span
280
+ acc append ta.text
281
+ case _ =>
282
+ emit()
283
+ buf += t
284
+ }
285
+ emit()
286
+ buf
287
+ end copy
288
+ // begin
289
+ val res = if ts.count(_.hasAttachment(TextAttacheKey )) > 1 then copy() else ts
290
+ for t <- res do t.removeAttachment(TextAttacheKey )
291
+ res
292
+ end coalesce
293
+ loopContent()
294
+ if isCoalescing then coalesce() else ts
295
+ end content
272
296
273
297
/** '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag
274
298
* | xmlTag1 '/' '>'
@@ -300,24 +324,19 @@ object MarkupParsers {
300
324
/** parse character data.
301
325
* precondition: xEmbeddedBlock == false (we are not in a scala block)
302
326
*/
303
- private def xText : String = {
327
+ private def xText : String =
304
328
assert(! xEmbeddedBlock, " internal error: encountered embedded block" )
305
- val buf = new StringBuilder
306
- def done = buf.toString
307
-
308
- while (ch != SU ) {
309
- if (ch == '}' ) {
310
- if (charComingAfter(nextch()) == '}' ) nextch()
311
- else errorBraces()
312
- }
313
-
314
- buf append ch
315
- nextch()
316
- if (xCheckEmbeddedBlock || ch == '<' || ch == '&' )
317
- return done
318
- }
319
- done
320
- }
329
+ val buf = StringBuilder ()
330
+ if (ch != SU )
331
+ while
332
+ if ch == '}' then
333
+ if charComingAfter(nextch()) == '}' then nextch()
334
+ else errorBraces()
335
+ buf += ch
336
+ nextch()
337
+ ! (ch == SU || xCheckEmbeddedBlock || ch == '<' || ch == '&' )
338
+ do ()
339
+ buf.toString
321
340
322
341
/** Some try/catch/finally logic used by xLiteral and xLiteralPattern. */
323
342
inline private def xLiteralCommon (f : () => Tree , ifTruncated : String => Unit ): Tree = {
@@ -369,7 +388,7 @@ object MarkupParsers {
369
388
while {
370
389
xSpaceOpt()
371
390
nextch()
372
- ts.append(element )
391
+ content_LT(ts )
373
392
charComingAfter(xSpaceOpt()) == '<'
374
393
} do ()
375
394
handle.makeXMLseq(Span (start, curOffset, start), ts)
@@ -453,11 +472,11 @@ object MarkupParsers {
453
472
if (ch != '/' ) ts append xPattern // child
454
473
else return false // terminate
455
474
456
- case '{' => // embedded Scala patterns
457
- while (ch == '{' ) {
458
- nextch()
475
+ case '{' if xCheckEmbeddedBlock => // embedded Scala patterns, if not double brace
476
+ while
459
477
ts ++= xScalaPatterns
460
- }
478
+ xCheckEmbeddedBlock
479
+ do ()
461
480
assert(! xEmbeddedBlock, " problem with embedded block" )
462
481
463
482
case SU =>
0 commit comments