Skip to content

Commit 6e8d176

Browse files
committed
Move SyntaxOptions from Source to ParserContext
And plumb through `context` parameters for more lexing methods. This is necessary as `(?x)` will be able to affect the syntax options set during parsing.
1 parent e7cf183 commit 6e8d176

File tree

4 files changed

+68
-49
lines changed

4 files changed

+68
-49
lines changed

Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ extension Source {
332332
/// Quantifier -> ('*' | '+' | '?' | '{' Range '}') QuantKind?
333333
/// QuantKind -> '?' | '+'
334334
///
335-
mutating func lexQuantifier() throws -> (
335+
mutating func lexQuantifier(context: ParsingContext) throws -> (
336336
Located<Quant.Amount>, Located<Quant.Kind>
337337
)? {
338338
let amt: Located<Quant.Amount>? = try recordLoc { src in
@@ -341,7 +341,9 @@ extension Source {
341341
if src.tryEat("?") { return .zeroOrOne }
342342

343343
return try src.tryEating { src in
344-
guard src.tryEat("{"), let range = try src.lexRange(), src.tryEat("}")
344+
guard src.tryEat("{"),
345+
let range = try src.lexRange(context: context),
346+
src.tryEat("}")
345347
else { return nil }
346348
return range.value
347349
}
@@ -363,7 +365,7 @@ extension Source {
363365
/// | ExpRange
364366
/// ExpRange -> '..<' <Int> | '...' <Int>
365367
/// | <Int> '..<' <Int> | <Int> '...' <Int>?
366-
mutating func lexRange() throws -> Located<Quant.Amount>? {
368+
mutating func lexRange(context: ParsingContext) throws -> Located<Quant.Amount>? {
367369
try recordLoc { src in
368370
try src.tryEating { src in
369371
let lowerOpt = try src.lexNumber()
@@ -375,7 +377,7 @@ extension Source {
375377
let closedRange: Bool?
376378
if src.tryEat(",") {
377379
closedRange = true
378-
} else if src.experimentalRanges && src.tryEat(".") {
380+
} else if context.experimentalRanges && src.tryEat(".") {
379381
try src.expect(".")
380382
if src.tryEat(".") {
381383
closedRange = true
@@ -477,12 +479,12 @@ extension Source {
477479
///
478480
/// TODO: Need to support some escapes
479481
///
480-
mutating func lexQuote() throws -> AST.Quote? {
482+
mutating func lexQuote(context: ParsingContext) throws -> AST.Quote? {
481483
let str = try recordLoc { src -> String? in
482484
if src.tryEat(sequence: #"\Q"#) {
483485
return try src.expectQuoted(endingWith: #"\E"#).value
484486
}
485-
if src.experimentalQuotes, src.tryEat("\"") {
487+
if context.experimentalQuotes, src.tryEat("\"") {
486488
return try src.expectQuoted(endingWith: "\"", ignoreEscaped: true).value
487489
}
488490
return nil
@@ -501,12 +503,12 @@ extension Source {
501503
///
502504
/// TODO: Swift-style nested comments, line-ending comments, etc
503505
///
504-
mutating func lexComment() throws -> AST.Trivia? {
506+
mutating func lexComment(context: ParsingContext) throws -> AST.Trivia? {
505507
let trivia: Located<String>? = try recordLoc { src in
506508
if src.tryEat(sequence: "(?#") {
507509
return try src.expectQuoted(endingWith: ")").value
508510
}
509-
if src.experimentalComments, src.tryEat(sequence: "/*") {
511+
if context.experimentalComments, src.tryEat(sequence: "/*") {
510512
return try src.expectQuoted(endingWith: "*/").value
511513
}
512514
return nil
@@ -517,16 +519,34 @@ extension Source {
517519

518520
/// Try to consume non-semantic whitespace as trivia
519521
///
522+
/// Whitespace -> ' '+
523+
///
520524
/// Does nothing unless `SyntaxOptions.nonSemanticWhitespace` is set
521-
mutating func lexNonSemanticWhitespace() throws -> AST.Trivia? {
522-
guard syntax.ignoreWhitespace else { return nil }
525+
mutating func lexNonSemanticWhitespace(
526+
context: ParsingContext
527+
) throws -> AST.Trivia? {
528+
guard context.ignoreWhitespace else { return nil }
523529
let trivia: Located<String>? = recordLoc { src in
524530
src.tryEatPrefix { $0 == " " }?.string
525531
}
526532
guard let trivia = trivia else { return nil }
527533
return AST.Trivia(trivia)
528534
}
529535

536+
/// Try to consume trivia.
537+
///
538+
/// Trivia -> Comment | Whitespace
539+
///
540+
mutating func lexTrivia(context: ParsingContext) throws -> AST.Trivia? {
541+
if let comment = try lexComment(context: context) {
542+
return comment
543+
}
544+
if let whitespace = try lexNonSemanticWhitespace(context: context) {
545+
return whitespace
546+
}
547+
return nil
548+
}
549+
530550
/// Try to lex a matching option.
531551
///
532552
/// MatchingOption -> 'i' | 'J' | 'm' | 'n' | 's' | 'U' | 'x' | 'xx' | 'w'
@@ -752,6 +772,7 @@ extension Source {
752772
/// comments, like quotes, cannot be quantified.
753773
///
754774
mutating func lexGroupStart(
775+
context: ParsingContext
755776
) throws -> Located<AST.Group.Kind>? {
756777
try recordLoc { src in
757778
try src.tryEating { src in
@@ -816,7 +837,7 @@ extension Source {
816837
}
817838

818839
// (_:)
819-
if src.experimentalCaptures && src.tryEat(sequence: "_:") {
840+
if context.experimentalCaptures && src.tryEat(sequence: "_:") {
820841
return .nonCapture
821842
}
822843
// TODO: (name:)
@@ -951,9 +972,12 @@ extension Source {
951972
///
952973
/// GroupConditionalStart -> '(?' GroupStart
953974
///
954-
mutating func lexGroupConditionalStart() throws -> Located<AST.Group.Kind>? {
975+
mutating func lexGroupConditionalStart(
976+
context: ParsingContext
977+
) throws -> Located<AST.Group.Kind>? {
955978
try tryEating { src in
956-
guard src.tryEat(sequence: "(?"), let group = try src.lexGroupStart()
979+
guard src.tryEat(sequence: "(?"),
980+
let group = try src.lexGroupStart(context: context)
957981
else { return nil }
958982

959983
// Implicitly scoped groups are not supported here.

Sources/_MatchingEngine/Regex/Parse/Parse.swift

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,17 @@ Lexical analysis provides the following:
5353

5454
struct ParsingContext {
5555
/// Whether we're currently parsing in a custom character class.
56-
var isInCustomCharacterClass = false
56+
fileprivate(set) var isInCustomCharacterClass = false
5757

5858
/// Tracks the number of group openings we've seen, to disambiguate the '\n'
5959
/// syntax as a backreference or an octal sequence.
60-
fileprivate var priorGroupCount = 0
60+
private var priorGroupCount = 0
6161

6262
/// A set of used group names.
63-
fileprivate var usedGroupNames = Set<String>()
63+
private var usedGroupNames = Set<String>()
64+
65+
/// The syntax options currently set.
66+
fileprivate(set) var syntax: SyntaxOptions
6467

6568
fileprivate mutating func recordGroup(_ g: AST.Group.Kind) {
6669
// TODO: Needs to track group number resets (?|...).
@@ -70,8 +73,9 @@ struct ParsingContext {
7073
}
7174
}
7275

73-
private init() {}
74-
static var none: ParsingContext { .init() }
76+
init(syntax: SyntaxOptions) {
77+
self.syntax = syntax
78+
}
7579

7680
/// Check whether a given reference refers to a prior group.
7781
func isPriorGroupRef(_ ref: AST.Reference.Kind) -> Bool {
@@ -88,13 +92,22 @@ struct ParsingContext {
8892

8993
private struct Parser {
9094
var source: Source
91-
var context: ParsingContext = .none
95+
var context: ParsingContext
9296

93-
init(_ source: Source) {
97+
init(_ source: Source, syntax: SyntaxOptions) {
9498
self.source = source
99+
self.context = ParsingContext(syntax: syntax)
95100
}
96101
}
97102

103+
extension ParsingContext {
104+
var experimentalRanges: Bool { syntax.contains(.experimentalRanges) }
105+
var experimentalCaptures: Bool { syntax.contains(.experimentalCaptures) }
106+
var experimentalQuotes: Bool { syntax.contains(.experimentalQuotes) }
107+
var experimentalComments: Bool { syntax.contains(.experimentalComments) }
108+
var ignoreWhitespace: Bool { syntax.contains(.nonSemanticWhitespace) }
109+
}
110+
98111
// Diagnostics
99112
extension Parser {
100113
mutating func report(
@@ -182,24 +195,20 @@ extension Parser {
182195
// TODO: refactor loop body into function
183196
let _start = source.currentPosition
184197

185-
// Trivia -> `lexComment` | `lexNonSemanticWhitespace`
186-
if let triv = try source.lexComment() {
187-
result.append(.trivia(triv))
188-
continue
189-
}
190-
if let triv = try source.lexNonSemanticWhitespace() {
198+
// Trivia -> `lexTrivia`
199+
if let triv = try source.lexTrivia(context: context) {
191200
result.append(.trivia(triv))
192201
continue
193202
}
194203

195204
// Quote -> `lexQuote`
196-
if let quote = try source.lexQuote() {
205+
if let quote = try source.lexQuote(context: context) {
197206
result.append(.quote(quote))
198207
continue
199208
}
200209
// Quantification -> QuantOperand Quantifier?
201210
if let operand = try parseQuantifierOperand() {
202-
if let (amt, kind) = try source.lexQuantifier() {
211+
if let (amt, kind) = try source.lexQuantifier(context: context) {
203212
let location = loc(_start)
204213
guard operand.isQuantifiable else {
205214
throw Source.LocatedError(ParseError.notQuantifiable, location)
@@ -333,7 +342,7 @@ extension Parser {
333342
if let cond = try source.lexKnownConditionalStart(context: context) {
334343
return try parseConditionalBranches(start: _start, cond)
335344
}
336-
if let kind = try source.lexGroupConditionalStart() {
345+
if let kind = try source.lexGroupConditionalStart(context: context) {
337346
let groupStart = kind.location.start
338347
let group = try parseGroupBody(start: groupStart, kind)
339348
return try parseConditionalBranches(
@@ -346,7 +355,7 @@ extension Parser {
346355
}
347356

348357
// Check if we have the start of a group '('.
349-
if let kind = try source.lexGroupStart() {
358+
if let kind = try source.lexGroupStart(context: context) {
350359
return .group(try parseGroupBody(start: _start, kind))
351360
}
352361

@@ -435,7 +444,7 @@ extension Parser {
435444
}
436445

437446
// Quoted sequence.
438-
if let quote = try source.lexQuote() {
447+
if let quote = try source.lexQuote(context: context) {
439448
members.append(.quote(quote))
440449
continue
441450
}
@@ -463,8 +472,8 @@ public func parse<S: StringProtocol>(
463472
_ regex: S, _ syntax: SyntaxOptions
464473
) throws -> AST where S.SubSequence == Substring
465474
{
466-
let source = Source(String(regex), syntax)
467-
var parser = Parser(source)
475+
let source = Source(String(regex))
476+
var parser = Parser(source, syntax: syntax)
468477
return try parser.parse()
469478
}
470479

Sources/_MatchingEngine/Regex/Parse/Source.swift

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,13 @@
1717
public struct Source {
1818
var input: Input
1919
var bounds: Range<Input.Index>
20-
var syntax: SyntaxOptions
2120

2221
// TODO: source should hold outer collection and range, at least
2322
// for error reporting if nothing else
2423

25-
init(_ str: Input, _ syntax: SyntaxOptions) {
24+
init(_ str: Input) {
2625
self.input = str
2726
self.bounds = str.startIndex ..< str.endIndex
28-
self.syntax = syntax
2927
}
3028

3129
subscript(_ range: Range<Input.Index>) -> Input.SubSequence { input[range] }
@@ -43,18 +41,6 @@ extension Source {
4341
public typealias Position = String.Index
4442
}
4543

46-
// MARK: - Syntax
47-
48-
extension Source {
49-
var experimentalRanges: Bool { syntax.contains(.experimentalRanges) }
50-
var experimentalCaptures: Bool { syntax.contains(.experimentalCaptures) }
51-
var experimentalQuotes: Bool { syntax.contains(.experimentalQuotes) }
52-
var experimentalComments: Bool { syntax.contains(.experimentalComments) }
53-
var nonSemanticWhitespace: Bool {
54-
syntax.contains(.nonSemanticWhitespace)
55-
}
56-
}
57-
5844
// MARK: - Source as a peekable consumer
5945

6046
extension Source {

Tests/RegexTests/LexTests.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ func diagnose(
2020
_ syntax: SyntaxOptions = .traditional,
2121
_ f: (inout Source) throws -> ()
2222
) {
23-
var src = Source(input, syntax)
23+
var src = Source(input)
2424
do {
2525
try f(&src)
2626
XCTFail("""

0 commit comments

Comments
 (0)