@@ -107,28 +107,46 @@ func parseTest(
107
107
serializedCaptures. deallocate ( )
108
108
}
109
109
110
+ /// Test delimiter lexing. Takes an input string that starts with a regex
111
+ /// literal. If `ignoreTrailing` is true, there may be additional characters
112
+ /// that follow the literal that are not considered part of it.
113
+ @discardableResult
110
114
func delimiterLexingTest(
111
- _ input: String , file: StaticString = #file, line: UInt = #line
112
- ) {
115
+ _ input: String , ignoreTrailing: Bool = false ,
116
+ file: StaticString = #file, line: UInt = #line
117
+ ) -> String {
113
118
input. withCString ( encodedAs: UTF8 . self) { ptr in
114
119
let endPtr = ptr + input. utf8. count
115
120
let ( contents, delim, end) = try ! lexRegex ( start: ptr, end: endPtr)
116
- XCTAssertEqual ( end, endPtr, file: file, line: line)
121
+ if ignoreTrailing {
122
+ XCTAssertNotEqual ( end, endPtr, file: file, line: line)
123
+ } else {
124
+ XCTAssertEqual ( end, endPtr, file: file, line: line)
125
+ }
117
126
118
- let ( parseContents, parseDelim) = droppingRegexDelimiters ( input)
127
+ let rawPtr = UnsafeRawPointer ( ptr)
128
+ let buffer = UnsafeRawBufferPointer ( start: rawPtr, count: end - rawPtr)
129
+ let literal = String ( decoding: buffer, as: UTF8 . self)
130
+
131
+ let ( parseContents, parseDelim) = droppingRegexDelimiters ( literal)
119
132
XCTAssertEqual ( contents, parseContents, file: file, line: line)
120
133
XCTAssertEqual ( delim, parseDelim, file: file, line: line)
134
+ return literal
121
135
}
122
136
}
123
137
138
+ /// Test parsing an input string with regex delimiters. If `ignoreTrailing` is
139
+ /// true, there may be additional characters that follow the literal that are
140
+ /// not considered part of it.
124
141
func parseWithDelimitersTest(
125
- _ input: String , _ expecting: AST . Node ,
142
+ _ input: String , _ expecting: AST . Node , ignoreTrailing : Bool = false ,
126
143
file: StaticString = #file, line: UInt = #line
127
144
) {
128
145
// First try lexing.
129
- delimiterLexingTest ( input, file: file, line: line)
146
+ let literal = delimiterLexingTest (
147
+ input, ignoreTrailing: ignoreTrailing, file: file, line: line)
130
148
131
- let orig = try ! parseWithDelimiters ( input )
149
+ let orig = try ! parseWithDelimiters ( literal )
132
150
let ast = orig. root
133
151
guard ast == expecting
134
152
|| ast. _dump ( ) == expecting. _dump ( ) // EQ workaround
@@ -1509,6 +1527,63 @@ extension RegexTests {
1509
1527
1510
1528
// Printable ASCII characters.
1511
1529
delimiterLexingTest ( ##"re' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'"## )
1530
+
1531
+ // MARK: Delimiter skipping: Make sure we can skip over the ending delimiter
1532
+ // if it's clear that it's part of the regex syntax.
1533
+
1534
+ parseWithDelimitersTest (
1535
+ #"re'(?'a_bcA0'\')'"# , namedCapture ( " a_bcA0 " , " ' " ) )
1536
+ parseWithDelimitersTest (
1537
+ #"re'(?'a_bcA0-c1A'x*)'"# ,
1538
+ balancedCapture ( name: " a_bcA0 " , priorName: " c1A " , zeroOrMore ( of: " x " ) ) )
1539
+
1540
+ parseWithDelimitersTest (
1541
+ #"re'(?('a_bcA0')x|y)'"# , conditional (
1542
+ . groupMatched( ref ( " a_bcA0 " ) ) , trueBranch: " x " , falseBranch: " y " ) )
1543
+ parseWithDelimitersTest (
1544
+ #"re'(?('+20')\')'"# , conditional (
1545
+ . groupMatched( ref ( plus: 20 ) ) , trueBranch: " ' " , falseBranch: empty ( ) ) )
1546
+
1547
+ parseWithDelimitersTest (
1548
+ #"re'a\k'b0A''"# , concat ( " a " , backreference ( . named( " b0A " ) ) ) )
1549
+ parseWithDelimitersTest (
1550
+ #"re'\k'+2-1''"# , backreference ( . relative( 2 ) , recursionLevel: - 1 ) )
1551
+
1552
+ parseWithDelimitersTest (
1553
+ #"re'a\g'b0A''"# , concat ( " a " , subpattern ( . named( " b0A " ) ) ) )
1554
+ parseWithDelimitersTest (
1555
+ #"re'\g'-1'\''"# , concat ( subpattern ( . relative( - 1 ) ) , " ' " ) )
1556
+
1557
+ parseWithDelimitersTest (
1558
+ #"re'(?C'a*b\c 🔥_ ;')'"# , pcreCallout ( . string( #"a*b\c 🔥_ ;"# ) ) )
1559
+
1560
+ // Fine, because we don't end up skipping.
1561
+ delimiterLexingTest ( #"re'(?'"# )
1562
+ delimiterLexingTest ( #"re'(?('"# )
1563
+ delimiterLexingTest ( #"re'\k'"# )
1564
+ delimiterLexingTest ( #"re'\g'"# )
1565
+ delimiterLexingTest ( #"re'(?C'"# )
1566
+
1567
+ // Not a valid group name, but we can still skip over it.
1568
+ delimiterLexingTest ( #"re'(?'🔥')'"# )
1569
+
1570
+ // Escaped, so don't skip. These will ignore the ending `'` as we've already
1571
+ // closed the literal.
1572
+ parseWithDelimitersTest (
1573
+ #"re'\(?''"# , zeroOrOne ( of: " ( " ) , ignoreTrailing: true
1574
+ )
1575
+ parseWithDelimitersTest (
1576
+ #"re'\\k''"# , concat ( " \\ " , " k " ) , ignoreTrailing: true
1577
+ )
1578
+ parseWithDelimitersTest (
1579
+ #"re'\\g''"# , concat ( " \\ " , " g " ) , ignoreTrailing: true
1580
+ )
1581
+ parseWithDelimitersTest (
1582
+ #"re'\(?C''"# , concat ( zeroOrOne ( of: " ( " ) , " C " ) , ignoreTrailing: true
1583
+ )
1584
+ delimiterLexingTest ( #"re'(\?''"# , ignoreTrailing: true )
1585
+ delimiterLexingTest ( #"re'\(?(''"# , ignoreTrailing: true )
1586
+
1512
1587
// MARK: Parse not-equal
1513
1588
1514
1589
// Make sure dumping output correctly reflects differences in AST.
@@ -1815,6 +1890,12 @@ extension RegexTests {
1815
1890
diagnosticTest ( #"(?<#>)"# , . identifierMustBeAlphaNumeric( . groupName) )
1816
1891
diagnosticTest ( #"(?'1A')"# , . identifierCannotStartWithNumber( . groupName) )
1817
1892
1893
+ // TODO: It might be better if tried to consume up to the closing `'` and
1894
+ // diagnosed an invalid group name based on that.
1895
+ diagnosticTest ( #"(?'abc ')"# , . expected( " ' " ) )
1896
+
1897
+ diagnosticTest ( " (?'🔥') " , . identifierMustBeAlphaNumeric( . groupName) )
1898
+
1818
1899
diagnosticTest ( #"(?'-')"# , . expectedIdentifier( . groupName) )
1819
1900
diagnosticTest ( #"(?'--')"# , . identifierMustBeAlphaNumeric( . groupName) )
1820
1901
diagnosticTest ( #"(?'a-b-c')"# , . expected( " ' " ) )
@@ -1928,13 +2009,24 @@ extension RegexTests {
1928
2009
}
1929
2010
1930
2011
func testDelimiterLexingErrors( ) {
2012
+
2013
+ // MARK: Printable ASCII
2014
+
1931
2015
delimiterLexingDiagnosticTest ( #"re'\\#n'"# , . endOfString)
1932
2016
for i : UInt8 in 0x1 ..< 0x20 where i != 0xA && i != 0xD { // U+A & U+D are \n and \r.
1933
2017
delimiterLexingDiagnosticTest ( " re' \( UnicodeScalar ( i) ) ' " , . unprintableASCII)
1934
2018
}
1935
2019
delimiterLexingDiagnosticTest ( " re' \n ' " , . endOfString)
1936
2020
delimiterLexingDiagnosticTest ( " re' \r ' " , . endOfString)
1937
2021
delimiterLexingDiagnosticTest ( " re' \u{7F} ' " , . unprintableASCII)
2022
+
2023
+ // MARK: Delimiter skipping
2024
+
2025
+ delimiterLexingDiagnosticTest ( " re'(?'' " , . endOfString)
2026
+ delimiterLexingDiagnosticTest ( " re'(?'abc' " , . endOfString)
2027
+ delimiterLexingDiagnosticTest ( " re'(?('abc' " , . endOfString)
2028
+ delimiterLexingDiagnosticTest ( #"re'\k'ab_c0+-'"# , . endOfString)
2029
+ delimiterLexingDiagnosticTest ( #"re'\g'ab_c0+-'"# , . endOfString)
1938
2030
}
1939
2031
1940
2032
func testlibswiftDiagnostics( ) {
0 commit comments