Skip to content

Commit a2bacbe

Browse files
authored
Merge pull request #104 from hamishknight/dotting-and-crossing
2 parents e6ec173 + 451a9ac commit a2bacbe

File tree

5 files changed

+135
-62
lines changed

5 files changed

+135
-62
lines changed

Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift

Lines changed: 85 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -100,18 +100,35 @@ extension Source {
100100
}
101101
}
102102

103-
mutating func tryEatNonEmpty(_ c: Char) throws -> Bool {
104-
guard !isEmpty else { throw ParseError.expected(String(c)) }
105-
return tryEat(c)
106-
}
107-
108103
mutating func tryEatNonEmpty<C: Collection>(sequence c: C) throws -> Bool
109104
where C.Element == Char
110105
{
111-
guard !isEmpty else { throw ParseError.expected(String(c)) }
106+
_ = try recordLoc { src in
107+
guard !src.isEmpty else { throw ParseError.expected(String(c)) }
108+
}
112109
return tryEat(sequence: c)
113110
}
114111

112+
mutating func tryEatNonEmpty(_ c: Char) throws -> Bool {
113+
try tryEatNonEmpty(sequence: String(c))
114+
}
115+
116+
/// Attempt to make a series of lexing steps in `body`, returning `nil` if
117+
/// unsuccesful, which will revert the source back to its previous state. If
118+
/// an error is thrown, the source will not be reverted.
119+
mutating func tryEating<T>(
120+
_ body: (inout Source) throws -> T?
121+
) rethrows -> T? {
122+
// We don't revert the source if an error is thrown, as it's useful to
123+
// maintain the source location in that case.
124+
let current = self
125+
guard let result = try body(&self) else {
126+
self = current
127+
return nil
128+
}
129+
return result
130+
}
131+
115132
/// Throws an expected ASCII character error if not matched
116133
mutating func expectASCII() throws -> Located<Character> {
117134
try recordLoc { src in
@@ -277,7 +294,7 @@ extension Source {
277294
return try Source.validateUnicodeScalar(str, .octal)
278295

279296
default:
280-
throw ParseError.misc("TODO: Or is this an assert?")
297+
fatalError("Unexpected scalar start")
281298
}
282299
}
283300
}
@@ -295,16 +312,11 @@ extension Source {
295312
if src.tryEat("+") { return .oneOrMore }
296313
if src.tryEat("?") { return .zeroOrOne }
297314

298-
// FIXME: Actually, PCRE treats empty as literal `{}`...
299-
// But Java 8 errors out?
300-
if src.tryEat("{") {
301-
// FIXME: Erm, PCRE parses as literal if no lowerbound...
302-
let amt = try src.expectRange()
303-
try src.expect("}")
304-
return amt.value // FIXME: track actual range...
315+
return try src.tryEating { src in
316+
guard src.tryEat("{"), let range = try src.lexRange(), src.tryEat("}")
317+
else { return nil }
318+
return range.value
305319
}
306-
307-
return nil
308320
}
309321
guard let amt = amt else { return nil }
310322

@@ -317,56 +329,56 @@ extension Source {
317329
return (amt, kind)
318330
}
319331

320-
/// Consume a range
332+
/// Try to consume a range, returning `nil` if unsuccessful.
321333
///
322334
/// Range -> ',' <Int> | <Int> ',' <Int>? | <Int>
323335
/// | ExpRange
324336
/// ExpRange -> '..<' <Int> | '...' <Int>
325337
/// | <Int> '..<' <Int> | <Int> '...' <Int>?
326-
mutating func expectRange() throws -> Located<Quant.Amount> {
338+
mutating func lexRange() throws -> Located<Quant.Amount>? {
327339
try recordLoc { src in
328-
// TODO: lex positive numbers, more specifically...
329-
330-
let lowerOpt = try src.lexNumber()
331-
332-
// ',' or '...' or '..<' or nothing
333-
let closedRange: Bool?
334-
if src.tryEat(",") {
335-
closedRange = true
336-
} else if src.experimentalRanges && src.tryEat(".") {
337-
try src.expect(".")
338-
if src.tryEat(".") {
340+
try src.tryEating { src in
341+
let lowerOpt = try src.lexNumber()
342+
343+
// ',' or '...' or '..<' or nothing
344+
// TODO: We ought to try and consume whitespace here and emit a
345+
// diagnostic for the user warning them that it would cause the range to
346+
// be treated as literal.
347+
let closedRange: Bool?
348+
if src.tryEat(",") {
339349
closedRange = true
350+
} else if src.experimentalRanges && src.tryEat(".") {
351+
try src.expect(".")
352+
if src.tryEat(".") {
353+
closedRange = true
354+
} else {
355+
try src.expect("<")
356+
closedRange = false
357+
}
340358
} else {
341-
try src.expect("<")
342-
closedRange = false
359+
closedRange = nil
343360
}
344-
} else {
345-
closedRange = nil
346-
}
347-
// FIXME: wait, why `try!` ?
348-
let upperOpt: Located<Int>?
349-
if let u = try! src.lexNumber() {
350-
upperOpt = (closedRange == true) ? u : Located(u.value-1, u.location)
351-
} else {
352-
upperOpt = nil
353-
}
354361

355-
switch (lowerOpt, closedRange, upperOpt) {
356-
case let (l?, nil, nil):
357-
return .exactly(l)
358-
case let (l?, true, nil):
359-
return .nOrMore(l)
360-
case let (nil, _, u?):
361-
return .upToN(u)
362-
case let (l?, _, u?):
363-
// FIXME: source location tracking
364-
return .range(l, u)
365-
366-
case let (nil, nil, u) where u != nil:
367-
fatalError("Not possible")
368-
default:
369-
throw ParseError.misc("Invalid range")
362+
let upperOpt = try src.lexNumber()?.map { upper in
363+
// If we have an open range, the upper bound should be adjusted down.
364+
closedRange == true ? upper : upper - 1
365+
}
366+
367+
switch (lowerOpt, closedRange, upperOpt) {
368+
case let (l?, nil, nil):
369+
return .exactly(l)
370+
case let (l?, true, nil):
371+
return .nOrMore(l)
372+
case let (nil, _?, u?):
373+
return .upToN(u)
374+
case let (l?, _?, u?):
375+
return .range(l, u)
376+
377+
case (nil, nil, _?):
378+
fatalError("Didn't lex lower bound, but lexed upper bound?")
379+
default:
380+
return nil
381+
}
370382
}
371383
}
372384
}
@@ -393,12 +405,24 @@ extension Source {
393405
try lexUntil(eating: String(end))
394406
}
395407

396-
/// Expect a linear run of non-nested non-empty content
408+
/// Expect a linear run of non-nested non-empty content ending with a given
409+
/// delimiter. If `ignoreEscaped` is true, escaped characters will not be
410+
/// considered for the ending delimiter.
397411
private mutating func expectQuoted(
398-
endingWith end: String
412+
endingWith end: String, ignoreEscaped: Bool = false
399413
) throws -> Located<String> {
400414
try recordLoc { src in
401-
let result = try src.lexUntil(eating: end).value
415+
let result = try src.lexUntil { src in
416+
if try src.tryEatNonEmpty(sequence: end) {
417+
return true
418+
}
419+
// Ignore escapes if we're allowed to. lexUntil will consume the next
420+
// character.
421+
if ignoreEscaped, src.tryEat("\\") {
422+
try src.expectNonEmpty()
423+
}
424+
return false
425+
}.value
402426
guard !result.isEmpty else {
403427
throw ParseError.misc("Expected non-empty contents")
404428
}
@@ -412,7 +436,7 @@ extension Source {
412436
///
413437
/// With `SyntaxOptions.experimentalQuotes`, also accepts
414438
///
415-
/// ExpQuote -> '"' [^"]* '"'
439+
/// ExpQuote -> '"' ('\"' | [^"])* '"'
416440
///
417441
/// Future: Experimental quotes are full fledged Swift string literals
418442
///
@@ -424,8 +448,7 @@ extension Source {
424448
return try src.expectQuoted(endingWith: #"\E"#).value
425449
}
426450
if src.experimentalQuotes, src.tryEat("\"") {
427-
// TODO: escaped `"`, etc...
428-
return try src.expectQuoted(endingWith: "\"").value
451+
return try src.expectQuoted(endingWith: "\"", ignoreEscaped: true).value
429452
}
430453
return nil
431454
}

Sources/_MatchingEngine/Regex/Parse/SourceLocation.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ extension Source {
7676
// externally?
7777
self.init(v, .fake)
7878
}
79+
80+
public func map<U>(_ fn: (T) throws -> U) rethrows -> Located<U> {
81+
Located<U>(try fn(value), location)
82+
}
7983
}
8084
}
8185
extension AST {

Tests/RegexTests/LexTests.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,12 @@ extension RegexTests {
133133
_ = try $0.lexGroupStart()
134134
}
135135

136+
diagnose(#"\Qab"#, expecting: .expected("\\E")) { _ = try $0.lexQuote() }
137+
diagnose(#"\Qab\"#, expecting: .expected("\\E")) { _ = try $0.lexQuote() }
138+
diagnose(#""ab"#, expecting: .expected("\""), .experimental) { _ = try $0.lexQuote() }
139+
diagnose(#""ab\""#, expecting: .expected("\""), .experimental) { _ = try $0.lexQuote() }
140+
diagnose(#""ab\"#, expecting: .unexpectedEndOfInput, .experimental) { _ = try $0.lexQuote() }
141+
136142
// TODO: want to dummy print out source ranges, etc, test that.
137143
}
138144

Tests/RegexTests/MatchTests.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,10 @@ extension RegexTests {
181181
#"a{1,2}?"#, input: "123aaaxyz", match: "a")
182182
matchTest(
183183
#"a{1,2}?x"#, input: "123aaaxyz", match: "aax")
184+
matchTest(
185+
#"xa{0}y"#, input: "123aaaxyz", match: "xy")
186+
matchTest(
187+
#"xa{0,0}y"#, input: "123aaaxyz", match: "xy")
184188

185189
matchTest("a.*", input: "dcba", match: "a")
186190

Tests/RegexTests/ParseTests.swift

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,14 @@ extension RegexTests {
414414
#"a\Q \Q \\.\Eb"#,
415415
concat("a", quote(#" \Q \\."#), "b"))
416416

417+
parseTest(#"a" ."b"#, concat("a", quote(" ."), "b"),
418+
syntax: .experimental)
419+
parseTest(#"a" .""b""#, concat("a", quote(" ."), quote("b")),
420+
syntax: .experimental)
421+
parseTest(#"a" .\"\"b""#, concat("a", quote(" .\"\"b")),
422+
syntax: .experimental)
423+
parseTest(#""\"""#, quote("\""), syntax: .experimental)
424+
417425
// MARK: Comments
418426

419427
parseTest(
@@ -440,6 +448,34 @@ extension RegexTests {
440448
parseTest(
441449
#"a{1,2}?"#,
442450
quantRange(.reluctant, 1...2, "a"))
451+
parseTest(
452+
#"a{0}"#,
453+
exactly(.eager, 0, "a"))
454+
parseTest(
455+
#"a{0,0}"#,
456+
quantRange(.eager, 0...0, "a"))
457+
458+
// Make sure ranges get treated as literal if invalid.
459+
parseTest("{", "{")
460+
parseTest("{,", concat("{", ","))
461+
parseTest("{}", concat("{", "}"))
462+
parseTest("{,}", concat("{", ",", "}"))
463+
parseTest("{,6", concat("{", ",", "6"))
464+
parseTest("{6", concat("{", "6"))
465+
parseTest("{6,", concat("{", "6", ","))
466+
parseTest("{+", oneOrMore(.eager, "{"))
467+
parseTest("{6,+", concat("{", "6", oneOrMore(.eager, ",")))
468+
parseTest("x{", concat("x", "{"))
469+
parseTest("x{}", concat("x", "{", "}"))
470+
parseTest("x{,}", concat("x", "{", ",", "}"))
471+
parseTest("x{,6", concat("x", "{", ",", "6"))
472+
parseTest("x{6", concat("x", "{", "6"))
473+
parseTest("x{6,", concat("x", "{", "6", ","))
474+
parseTest("x{+", concat("x", oneOrMore(.eager, "{")))
475+
parseTest("x{6,+", concat("x", "{", "6", oneOrMore(.eager, ",")))
476+
477+
// TODO: We should emit a diagnostic for this.
478+
parseTest("x{3, 5}", concat("x", "{", "3", ",", " ", "5", "}"))
443479

444480
// MARK: Groups
445481

0 commit comments

Comments
 (0)