@@ -107,28 +107,46 @@ func parseTest(
107
107
serializedCaptures. deallocate ( )
108
108
}
109
109
110
+ /// Test delimiter lexing. Takes an input string that starts with a regex
111
+ /// literal. If `ignoreTrailing` is true, there may be additional characters
112
+ /// that follow the literal that are not considered part of it.
113
+ @discardableResult
110
114
func delimiterLexingTest(
111
- _ input: String , file: StaticString = #file, line: UInt = #line
112
- ) {
115
+ _ input: String , ignoreTrailing: Bool = false ,
116
+ file: StaticString = #file, line: UInt = #line
117
+ ) -> String {
113
118
input. withCString ( encodedAs: UTF8 . self) { ptr in
114
119
let endPtr = ptr + input. utf8. count
115
120
let ( contents, delim, end) = try ! lexRegex ( start: ptr, end: endPtr)
116
- XCTAssertEqual ( end, endPtr, file: file, line: line)
121
+ if ignoreTrailing {
122
+ XCTAssertNotEqual ( end, endPtr, file: file, line: line)
123
+ } else {
124
+ XCTAssertEqual ( end, endPtr, file: file, line: line)
125
+ }
117
126
118
- let ( parseContents, parseDelim) = droppingRegexDelimiters ( input)
127
+ let rawPtr = UnsafeRawPointer ( ptr)
128
+ let buffer = UnsafeRawBufferPointer ( start: rawPtr, count: end - rawPtr)
129
+ let literal = String ( decoding: buffer, as: UTF8 . self)
130
+
131
+ let ( parseContents, parseDelim) = droppingRegexDelimiters ( literal)
119
132
XCTAssertEqual ( contents, parseContents, file: file, line: line)
120
133
XCTAssertEqual ( delim, parseDelim, file: file, line: line)
134
+ return literal
121
135
}
122
136
}
123
137
138
+ /// Test parsing an input string with regex delimiters. If `ignoreTrailing` is
139
+ /// true, there may be additional characters that follow the literal that are
140
+ /// not considered part of it.
124
141
func parseWithDelimitersTest(
125
- _ input: String , _ expecting: AST . Node ,
142
+ _ input: String , _ expecting: AST . Node , ignoreTrailing : Bool = false ,
126
143
file: StaticString = #file, line: UInt = #line
127
144
) {
128
145
// First try lexing.
129
- delimiterLexingTest ( input, file: file, line: line)
146
+ let literal = delimiterLexingTest (
147
+ input, ignoreTrailing: ignoreTrailing, file: file, line: line)
130
148
131
- let orig = try ! parseWithDelimiters ( input )
149
+ let orig = try ! parseWithDelimiters ( literal )
132
150
let ast = orig. root
133
151
guard ast == expecting
134
152
|| ast. _dump ( ) == expecting. _dump ( ) // EQ workaround
@@ -1505,6 +1523,63 @@ extension RegexTests {
1505
1523
1506
1524
// Printable ASCII characters.
1507
1525
delimiterLexingTest ( ##"re' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'"## )
1526
+
1527
+ // MARK: Delimiter skipping: Make sure we can skip over the ending delimiter
1528
+ // if it's clear that it's part of the regex syntax.
1529
+
1530
+ parseWithDelimitersTest (
1531
+ #"re'(?'a_bcA0'\')'"# , namedCapture ( " a_bcA0 " , " ' " ) )
1532
+ parseWithDelimitersTest (
1533
+ #"re'(?'a_bcA0-c1A'x*)'"# ,
1534
+ balancedCapture ( name: " a_bcA0 " , priorName: " c1A " , zeroOrMore ( of: " x " ) ) )
1535
+
1536
+ parseWithDelimitersTest (
1537
+ #"re'(?('a_bcA0')x|y)'"# , conditional (
1538
+ . groupMatched( ref ( " a_bcA0 " ) ) , trueBranch: " x " , falseBranch: " y " ) )
1539
+ parseWithDelimitersTest (
1540
+ #"re'(?('+20')\')'"# , conditional (
1541
+ . groupMatched( ref ( plus: 20 ) ) , trueBranch: " ' " , falseBranch: empty ( ) ) )
1542
+
1543
+ parseWithDelimitersTest (
1544
+ #"re'a\k'b0A''"# , concat ( " a " , backreference ( . named( " b0A " ) ) ) )
1545
+ parseWithDelimitersTest (
1546
+ #"re'\k'+2-1''"# , backreference ( . relative( 2 ) , recursionLevel: - 1 ) )
1547
+
1548
+ parseWithDelimitersTest (
1549
+ #"re'a\g'b0A''"# , concat ( " a " , subpattern ( . named( " b0A " ) ) ) )
1550
+ parseWithDelimitersTest (
1551
+ #"re'\g'-1'\''"# , concat ( subpattern ( . relative( - 1 ) ) , " ' " ) )
1552
+
1553
+ parseWithDelimitersTest (
1554
+ #"re'(?C'a*b\c 🔥_ ;')'"# , pcreCallout ( . string( #"a*b\c 🔥_ ;"# ) ) )
1555
+
1556
+ // Fine, because we don't end up skipping.
1557
+ delimiterLexingTest ( #"re'(?'"# )
1558
+ delimiterLexingTest ( #"re'(?('"# )
1559
+ delimiterLexingTest ( #"re'\k'"# )
1560
+ delimiterLexingTest ( #"re'\g'"# )
1561
+ delimiterLexingTest ( #"re'(?C'"# )
1562
+
1563
+ // Not a valid group name, but we can still skip over it.
1564
+ delimiterLexingTest ( #"re'(?'🔥')'"# )
1565
+
1566
+ // Escaped, so don't skip. These will ignore the ending `'` as we've already
1567
+ // closed the literal.
1568
+ parseWithDelimitersTest (
1569
+ #"re'\(?''"# , zeroOrOne ( of: " ( " ) , ignoreTrailing: true
1570
+ )
1571
+ parseWithDelimitersTest (
1572
+ #"re'\\k''"# , concat ( " \\ " , " k " ) , ignoreTrailing: true
1573
+ )
1574
+ parseWithDelimitersTest (
1575
+ #"re'\\g''"# , concat ( " \\ " , " g " ) , ignoreTrailing: true
1576
+ )
1577
+ parseWithDelimitersTest (
1578
+ #"re'\(?C''"# , concat ( zeroOrOne ( of: " ( " ) , " C " ) , ignoreTrailing: true
1579
+ )
1580
+ delimiterLexingTest ( #"re'(\?''"# , ignoreTrailing: true )
1581
+ delimiterLexingTest ( #"re'\(?(''"# , ignoreTrailing: true )
1582
+
1508
1583
// MARK: Parse not-equal
1509
1584
1510
1585
// Make sure dumping output correctly reflects differences in AST.
@@ -1811,6 +1886,12 @@ extension RegexTests {
1811
1886
diagnosticTest ( #"(?<#>)"# , . identifierMustBeAlphaNumeric( . groupName) )
1812
1887
diagnosticTest ( #"(?'1A')"# , . identifierCannotStartWithNumber( . groupName) )
1813
1888
1889
+ // TODO: It might be better if tried to consume up to the closing `'` and
1890
+ // diagnosed an invalid group name based on that.
1891
+ diagnosticTest ( #"(?'abc ')"# , . expected( " ' " ) )
1892
+
1893
+ diagnosticTest ( " (?'🔥') " , . identifierMustBeAlphaNumeric( . groupName) )
1894
+
1814
1895
diagnosticTest ( #"(?'-')"# , . expectedIdentifier( . groupName) )
1815
1896
diagnosticTest ( #"(?'--')"# , . identifierMustBeAlphaNumeric( . groupName) )
1816
1897
diagnosticTest ( #"(?'a-b-c')"# , . expected( " ' " ) )
@@ -1924,13 +2005,24 @@ extension RegexTests {
1924
2005
}
1925
2006
1926
2007
func testDelimiterLexingErrors( ) {
2008
+
2009
+ // MARK: Printable ASCII
2010
+
1927
2011
delimiterLexingDiagnosticTest ( #"re'\\#n'"# , . endOfString)
1928
2012
for i : UInt8 in 0x1 ..< 0x20 where i != 0xA && i != 0xD { // U+A & U+D are \n and \r.
1929
2013
delimiterLexingDiagnosticTest ( " re' \( UnicodeScalar ( i) ) ' " , . unprintableASCII)
1930
2014
}
1931
2015
delimiterLexingDiagnosticTest ( " re' \n ' " , . endOfString)
1932
2016
delimiterLexingDiagnosticTest ( " re' \r ' " , . endOfString)
1933
2017
delimiterLexingDiagnosticTest ( " re' \u{7F} ' " , . unprintableASCII)
2018
+
2019
+ // MARK: Delimiter skipping
2020
+
2021
+ delimiterLexingDiagnosticTest ( " re'(?'' " , . endOfString)
2022
+ delimiterLexingDiagnosticTest ( " re'(?'abc' " , . endOfString)
2023
+ delimiterLexingDiagnosticTest ( " re'(?('abc' " , . endOfString)
2024
+ delimiterLexingDiagnosticTest ( #"re'\k'ab_c0+-'"# , . endOfString)
2025
+ delimiterLexingDiagnosticTest ( #"re'\g'ab_c0+-'"# , . endOfString)
1934
2026
}
1935
2027
1936
2028
func testlibswiftDiagnostics( ) {
0 commit comments