From 73980e1be049353aa1217dc6cd155a9813b2e0a5 Mon Sep 17 00:00:00 2001
From: Hamish Knight <hamish_github@mediocremail.com>
Date: Mon, 31 Jan 2022 11:29:39 +0000
Subject: [PATCH 1/3] Move SyntaxOptions from Source to ParserContext

And plumb through `context` parameters for more
lexing methods. This is necessary as `(?x)` will
be able to affect the syntax options set during
parsing.
---
 .../Regex/Parse/LexicalAnalysis.swift         | 50 ++++++++++++++-----
 .../_MatchingEngine/Regex/Parse/Parse.swift   | 49 ++++++++++--------
 .../_MatchingEngine/Regex/Parse/Source.swift  | 16 +-----
 Tests/RegexTests/LexTests.swift               |  2 +-
 4 files changed, 68 insertions(+), 49 deletions(-)
diff --git a/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift b/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift
index 18b536005..afefbf5d9 100644
--- a/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift
+++ b/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift
@@ -332,7 +332,7 @@ extension Source {
   ///     Quantifier -> ('*' | '+' | '?' | '{' Range '}') QuantKind?
   ///     QuantKind  -> '?' | '+'
   ///
-  mutating func lexQuantifier() throws -> (
+  mutating func lexQuantifier(context: ParsingContext) throws -> (
     Located<Quant.Amount>, Located<Quant.Kind>
   )? {
     let amt: Located<Quant.Amount>? = try recordLoc { src in
@@ -341,7 +341,9 @@ extension Source {
       if src.tryEat("?") { return .zeroOrOne }
 
       return try src.tryEating { src in
-        guard src.tryEat("{"), let range = try src.lexRange(), src.tryEat("}")
+        guard src.tryEat("{"),
+              let range = try src.lexRange(context: context),
+              src.tryEat("}")
         else { return nil }
         return range.value
       }
@@ -363,7 +365,7 @@ extension Source {
   ///                  | ExpRange
   ///     ExpRange    -> '..<' <Int> | '...' <Int>
   ///                  | <Int> '..<' <Int> | <Int> '...' <Int>?
-  mutating func lexRange() throws -> Located<Quant.Amount>? {
+  mutating func lexRange(context: ParsingContext) throws -> Located<Quant.Amount>? {
     try recordLoc { src in
       try src.tryEating { src in
         let lowerOpt = try src.lexNumber()
@@ -375,7 +377,7 @@ extension Source {
         let closedRange: Bool?
         if src.tryEat(",") {
           closedRange = true
-        } else if src.experimentalRanges && src.tryEat(".") {
+        } else if context.experimentalRanges && src.tryEat(".") {
           try src.expect(".")
           if src.tryEat(".") {
             closedRange = true
@@ -477,12 +479,12 @@ extension Source {
   ///
   /// TODO: Need to support some escapes
   ///
-  mutating func lexQuote() throws -> AST.Quote? {
+  mutating func lexQuote(context: ParsingContext) throws -> AST.Quote? {
     let str = try recordLoc { src -> String? in
       if src.tryEat(sequence: #"\Q"#) {
         return try src.expectQuoted(endingWith: #"\E"#).value
       }
-      if src.experimentalQuotes, src.tryEat("\"") {
+      if context.experimentalQuotes, src.tryEat("\"") {
         return try src.expectQuoted(endingWith: "\"", ignoreEscaped: true).value
       }
       return nil
@@ -501,12 +503,12 @@ extension Source {
   ///
   /// TODO: Swift-style nested comments, line-ending comments, etc
   ///
-  mutating func lexComment() throws -> AST.Trivia? {
+  mutating func lexComment(context: ParsingContext) throws -> AST.Trivia? {
     let trivia: Located<String>? = try recordLoc { src in
       if src.tryEat(sequence: "(?#") {
         return try src.expectQuoted(endingWith: ")").value
       }
-      if src.experimentalComments, src.tryEat(sequence: "/*") {
+      if context.experimentalComments, src.tryEat(sequence: "/*") {
         return try src.expectQuoted(endingWith: "*/").value
       }
       return nil
@@ -517,9 +519,13 @@ extension Source {
 
   /// Try to consume non-semantic whitespace as trivia
   ///
+  ///     Whitespace -> ' '+
+  ///
   /// Does nothing unless `SyntaxOptions.nonSemanticWhitespace` is set
-  mutating func lexNonSemanticWhitespace() throws -> AST.Trivia? {
-    guard syntax.ignoreWhitespace else { return nil }
+  mutating func lexNonSemanticWhitespace(
+    context: ParsingContext
+  ) throws -> AST.Trivia? {
+    guard context.ignoreWhitespace else { return nil }
     let trivia: Located<String>? = recordLoc { src in
       src.tryEatPrefix { $0 == " " }?.string
     }
@@ -527,6 +533,20 @@ extension Source {
     return AST.Trivia(trivia)
   }
 
+  /// Try to consume trivia.
+  ///
+  ///     Trivia -> Comment | Whitespace
+  ///
+  mutating func lexTrivia(context: ParsingContext) throws -> AST.Trivia? {
+    if let comment = try lexComment(context: context) {
+      return comment
+    }
+    if let whitespace = try lexNonSemanticWhitespace(context: context) {
+      return whitespace
+    }
+    return nil
+  }
+
   /// Try to lex a matching option.
   ///
   ///     MatchingOption -> 'i' | 'J' | 'm' | 'n' | 's' | 'U' | 'x' | 'xx' | 'w'
@@ -761,6 +781,7 @@ extension Source {
   /// comments, like quotes, cannot be quantified.
   ///
   mutating func lexGroupStart(
+    context: ParsingContext
   ) throws -> Located<AST.Group.Kind>? {
     try recordLoc { src in
       try src.tryEating { src in
@@ -825,7 +846,7 @@ extension Source {
         }
 
         // (_:)
-        if src.experimentalCaptures && src.tryEat(sequence: "_:") {
+        if context.experimentalCaptures && src.tryEat(sequence: "_:") {
           return .nonCapture
         }
         // TODO: (name:)
@@ -960,9 +981,12 @@ extension Source {
   ///
   ///     GroupConditionalStart -> '(?' GroupStart
   ///
-  mutating func lexGroupConditionalStart() throws -> Located<AST.Group.Kind>? {
+  mutating func lexGroupConditionalStart(
+    context: ParsingContext
+  ) throws -> Located<AST.Group.Kind>? {
     try tryEating { src in
-      guard src.tryEat(sequence: "(?"), let group = try src.lexGroupStart()
+      guard src.tryEat(sequence: "(?"),
+            let group = try src.lexGroupStart(context: context)
       else { return nil }
 
       // Implicitly scoped groups are not supported here.
diff --git a/Sources/_MatchingEngine/Regex/Parse/Parse.swift b/Sources/_MatchingEngine/Regex/Parse/Parse.swift
index 08c8cf77e..4d6221e24 100644
--- a/Sources/_MatchingEngine/Regex/Parse/Parse.swift
+++ b/Sources/_MatchingEngine/Regex/Parse/Parse.swift
@@ -53,14 +53,17 @@ Lexical analysis provides the following:
 
 struct ParsingContext {
   /// Whether we're currently parsing in a custom character class.
-  var isInCustomCharacterClass = false
+  fileprivate(set) var isInCustomCharacterClass = false
 
   /// Tracks the number of group openings we've seen, to disambiguate the '\n'
   /// syntax as a backreference or an octal sequence.
-  fileprivate var priorGroupCount = 0
+  private var priorGroupCount = 0
 
   /// A set of used group names.
-  fileprivate var usedGroupNames = Set<String>()
+  private var usedGroupNames = Set<String>()
+
+  /// The syntax options currently set.
+  fileprivate(set) var syntax: SyntaxOptions
 
   fileprivate mutating func recordGroup(_ g: AST.Group.Kind) {
     // TODO: Needs to track group number resets (?|...).
@@ -70,8 +73,9 @@ struct ParsingContext {
     }
   }
 
-  private init() {}
-  static var none: ParsingContext { .init() }
+  init(syntax: SyntaxOptions) {
+    self.syntax = syntax
+  }
 
   /// Check whether a given reference refers to a prior group.
   func isPriorGroupRef(_ ref: AST.Reference.Kind) -> Bool {
@@ -88,13 +92,22 @@ struct ParsingContext {
 
 private struct Parser {
   var source: Source
-  var context: ParsingContext = .none
+  var context: ParsingContext
 
-  init(_ source: Source) {
+  init(_ source: Source, syntax: SyntaxOptions) {
     self.source = source
+    self.context = ParsingContext(syntax: syntax)
   }
 }
 
+extension ParsingContext {
+  var experimentalRanges: Bool { syntax.contains(.experimentalRanges) }
+  var experimentalCaptures: Bool { syntax.contains(.experimentalCaptures) }
+  var experimentalQuotes: Bool { syntax.contains(.experimentalQuotes) }
+  var experimentalComments: Bool { syntax.contains(.experimentalComments) }
+  var ignoreWhitespace: Bool { syntax.contains(.nonSemanticWhitespace) }
+}
+
 // Diagnostics
 extension Parser {
   mutating func report(
@@ -182,24 +195,20 @@ extension Parser {
       // TODO: refactor loop body into function
       let _start = source.currentPosition
 
-      //     Trivia -> `lexComment` | `lexNonSemanticWhitespace`
-      if let triv = try source.lexComment() {
-        result.append(.trivia(triv))
-        continue
-      }
-      if let triv = try source.lexNonSemanticWhitespace() {
+      //     Trivia -> `lexTrivia`
+      if let triv = try source.lexTrivia(context: context) {
         result.append(.trivia(triv))
         continue
       }
 
       //     Quote      -> `lexQuote`
-      if let quote = try source.lexQuote() {
+      if let quote = try source.lexQuote(context: context) {
         result.append(.quote(quote))
         continue
       }
       //     Quantification  -> QuantOperand Quantifier?
       if let operand = try parseQuantifierOperand() {
-        if let (amt, kind) = try source.lexQuantifier() {
+        if let (amt, kind) = try source.lexQuantifier(context: context) {
           let location = loc(_start)
           guard operand.isQuantifiable else {
             throw Source.LocatedError(ParseError.notQuantifiable, location)
@@ -333,7 +342,7 @@ extension Parser {
     if let cond = try source.lexKnownConditionalStart(context: context) {
       return try parseConditionalBranches(start: _start, cond)
     }
-    if let kind = try source.lexGroupConditionalStart() {
+    if let kind = try source.lexGroupConditionalStart(context: context) {
       let groupStart = kind.location.start
       let group = try parseGroupBody(start: groupStart, kind)
       return try parseConditionalBranches(
@@ -346,7 +355,7 @@ extension Parser {
     }
 
     // Check if we have the start of a group '('.
-    if let kind = try source.lexGroupStart() {
+    if let kind = try source.lexGroupStart(context: context) {
       return .group(try parseGroupBody(start: _start, kind))
     }
 
@@ -435,7 +444,7 @@ extension Parser {
       }
 
       // Quoted sequence.
-      if let quote = try source.lexQuote() {
+      if let quote = try source.lexQuote(context: context) {
         members.append(.quote(quote))
         continue
       }
@@ -463,8 +472,8 @@ public func parse<S: StringProtocol>(
   _ regex: S, _ syntax: SyntaxOptions
 ) throws -> AST where S.SubSequence == Substring
 {
-  let source = Source(String(regex), syntax)
-  var parser = Parser(source)
+  let source = Source(String(regex))
+  var parser = Parser(source, syntax: syntax)
   return try parser.parse()
 }
 
diff --git a/Sources/_MatchingEngine/Regex/Parse/Source.swift b/Sources/_MatchingEngine/Regex/Parse/Source.swift
index 260ee3b63..11bd8152f 100644
--- a/Sources/_MatchingEngine/Regex/Parse/Source.swift
+++ b/Sources/_MatchingEngine/Regex/Parse/Source.swift
@@ -17,15 +17,13 @@
 public struct Source {
   var input: Input
   var bounds: Range<Input.Index>
-  var syntax: SyntaxOptions
 
   // TODO: source should hold outer collection and range, at least
   // for error reporting if nothing else
 
-  init(_ str: Input, _ syntax: SyntaxOptions) {
+  init(_ str: Input) {
     self.input = str
     self.bounds = str.startIndex ..< str.endIndex
-    self.syntax = syntax
   }
 
   subscript(_ range: Range<Input.Index>) -> Input.SubSequence { input[range] }
@@ -43,18 +41,6 @@ extension Source {
   public typealias Position = String.Index
 }
 
-// MARK: - Syntax
-
-extension Source {
-  var experimentalRanges: Bool { syntax.contains(.experimentalRanges) }
-  var experimentalCaptures: Bool { syntax.contains(.experimentalCaptures) }
-  var experimentalQuotes: Bool { syntax.contains(.experimentalQuotes) }
-  var experimentalComments: Bool { syntax.contains(.experimentalComments) }
-  var nonSemanticWhitespace: Bool {
-    syntax.contains(.nonSemanticWhitespace)
-  }
-}
-
 // MARK: - Source as a peekable consumer
 
 extension Source {
diff --git a/Tests/RegexTests/LexTests.swift b/Tests/RegexTests/LexTests.swift
index e908a11da..ce82f8b8f 100644
--- a/Tests/RegexTests/LexTests.swift
+++ b/Tests/RegexTests/LexTests.swift
@@ -22,7 +22,7 @@ func diagnose(
   file: StaticString = #file,
   line: UInt = #line
 ) {
-  var src = Source(input, syntax)
+  var src = Source(input)
   do {
     try f(&src)
     XCTFail("""

From 7d139cd360f5f8602defe8ebd27389e42954de6b Mon Sep 17 00:00:00 2001
From: Hamish Knight <hamish_github@mediocremail.com>
Date: Mon, 31 Jan 2022 11:29:40 +0000
Subject: [PATCH 2/3] Parse extended syntax

If the `(?x)` or `(?xx)` options are active in
a given scope, treat whitespace as non-semantic,
including in custom character classes. Additionally
parse end-of-line comments `# comment`.
---
 .../Regex/AST/CustomCharClass.swift           |  19 +++
 .../Regex/AST/MatchingOptions.swift           |  19 ++-
 .../Regex/AST/Quantification.swift            |   9 +-
 .../Regex/Parse/LexicalAnalysis.swift         |  57 ++++++-
 .../_MatchingEngine/Regex/Parse/Parse.swift   |  37 +++-
 .../Regex/Parse/SyntaxOptions.swift           |  23 ++-
 .../Regex/Printing/DumpAST.swift              |  10 +-
 .../Regex/Printing/PrintAsCanonical.swift     |  13 +-
 .../Regex/Printing/PrintAsPattern.swift       |   3 +
 .../_StringProcessing/CharacterClass.swift    |   4 +
 .../_StringProcessing/ConsumerInterface.swift |  30 +++-
 Tests/RegexTests/ParseTests.swift             | 161 ++++++++++++++++++
 12 files changed, 359 insertions(+), 26 deletions(-)

diff --git a/Sources/_MatchingEngine/Regex/AST/CustomCharClass.swift b/Sources/_MatchingEngine/Regex/AST/CustomCharClass.swift
index 3044d45bd..614048f0a 100644
--- a/Sources/_MatchingEngine/Regex/AST/CustomCharClass.swift
+++ b/Sources/_MatchingEngine/Regex/AST/CustomCharClass.swift
@@ -42,6 +42,9 @@ extension AST {
       /// the contents should be interpreted literally.
       case quote(Quote)
 
+      /// Trivia such as non-semantic whitespace.
+      case trivia(Trivia)
+
       /// A binary operator applied to sets of members `abc&&def`
       case setOperation([Member], Located<SetOp>, [Member])
     }
@@ -81,6 +84,7 @@ extension CustomCC.Member {
     case .range(let r): return r
     case .atom(let a): return a
     case .quote(let q): return q
+    case .trivia(let t): return t
     case .setOperation(let lhs, let op, let rhs): return (lhs, op, rhs)
     }
   }
@@ -88,4 +92,19 @@ extension CustomCC.Member {
   func `as`<T>(_ t: T.Type = T.self) -> T? {
     _associatedValue as? T
   }
+
+  public var isTrivia: Bool {
+    if case .trivia = self { return true }
+    return false
+  }
+}
+
+extension AST.CustomCharacterClass {
+  /// Strip trivia from the character class members. This does not recurse into
+  /// nested custom character classes.
+  public var strippingTriviaShallow: Self {
+    var copy = self
+    copy.members = copy.members.filter { !$0.isTrivia }
+    return copy
+  }
 }
diff --git a/Sources/_MatchingEngine/Regex/AST/MatchingOptions.swift b/Sources/_MatchingEngine/Regex/AST/MatchingOptions.swift
index cd1c08e0f..f2b86d032 100644
--- a/Sources/_MatchingEngine/Regex/AST/MatchingOptions.swift
+++ b/Sources/_MatchingEngine/Regex/AST/MatchingOptions.swift
@@ -50,6 +50,16 @@ extension AST {
       self.location = location
     }
 
+    /// If this is either the regular or extra extended syntax option.
+    public var isAnyExtended: Bool {
+      switch kind {
+      case .extended, .extraExtended:
+        return true
+      default:
+        return false
+      }
+    }
+
     public var isTextSegmentMode: Bool {
       switch kind {
       case .textSegmentGraphemeMode, .textSegmentWordMode:
@@ -93,6 +103,10 @@ extension AST {
       self.minusLoc = minusLoc
       self.removing = removing
     }
+
+    /// Whether this set of matching options first resets the options before
+    /// adding onto them.
+    public var resetsCurrentOptions: Bool { caretLoc != nil }
   }
 }
 
@@ -102,7 +116,10 @@ extension AST.MatchingOption: _ASTPrintable {
 
 extension AST.MatchingOptionSequence: _ASTPrintable {
   public var _dumpBase: String {
-    "adding: \(adding), removing: \(removing), hasCaret: \(caretLoc != nil)"
+    """
+    adding: \(adding), removing: \(removing), \
+    resetsCurrentOptions: \(resetsCurrentOptions)
+    """
   }
 }
 
diff --git a/Sources/_MatchingEngine/Regex/AST/Quantification.swift b/Sources/_MatchingEngine/Regex/AST/Quantification.swift
index 941794935..f2189cb38 100644
--- a/Sources/_MatchingEngine/Regex/AST/Quantification.swift
+++ b/Sources/_MatchingEngine/Regex/AST/Quantification.swift
@@ -17,16 +17,23 @@ extension AST {
     public let child: AST.Node
     public let location: SourceLocation
 
+    /// Any trivia intermixed between the operand and the quantifier, as well
+    /// as between the quantifier characters themselves. This can occur in
+    /// extended syntax mode where PCRE permits e.g `x * +`.
+    public let trivia: [AST.Trivia]
+
     public init(
       _ amount: Located<Amount>,
       _ kind: Located<Kind>,
       _ child: AST.Node,
-      _ r: SourceLocation
+      _ r: SourceLocation,
+      trivia: [AST.Trivia]
     ) {
       self.amount = amount
       self.kind = kind
       self.child = child
       self.location = r
+      self.trivia = trivia
     }
 
     @frozen
diff --git a/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift b/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift
index afefbf5d9..9595dc420 100644
--- a/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift
+++ b/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift
@@ -332,9 +332,15 @@ extension Source {
   ///     Quantifier -> ('*' | '+' | '?' | '{' Range '}') QuantKind?
   ///     QuantKind  -> '?' | '+'
   ///
-  mutating func lexQuantifier(context: ParsingContext) throws -> (
-    Located<Quant.Amount>, Located<Quant.Kind>
-  )? {
+  mutating func lexQuantifier(
+    context: ParsingContext
+  ) throws -> (Located<Quant.Amount>, Located<Quant.Kind>, [AST.Trivia])? {
+    var trivia: [AST.Trivia] = []
+
+    if let t = try lexNonSemanticWhitespace(context: context) {
+      trivia.append(t)
+    }
+
     let amt: Located<Quant.Amount>? = try recordLoc { src in
       if src.tryEat("*") { return .zeroOrMore }
       if src.tryEat("+") { return .oneOrMore }
@@ -350,13 +356,18 @@ extension Source {
     }
     guard let amt = amt else { return nil }
 
+    // PCRE allows non-semantic whitespace here in extended syntax mode.
+    if let t = try lexNonSemanticWhitespace(context: context) {
+      trivia.append(t)
+    }
+
     let kind: Located<Quant.Kind> = recordLoc { src in
       if src.tryEat("?") { return .reluctant  }
       if src.tryEat("+") { return .possessive }
       return .eager
     }
 
-    return (amt, kind)
+    return (amt, kind, trivia)
   }
 
   /// Try to consume a range, returning `nil` if unsuccessful.
@@ -501,6 +512,10 @@ extension Source {
   ///
   ///     ExpComment -> '/*' (!'*/' .)* '*/'
   ///
+  /// With `SyntaxOptions.endOfLineComments`
+  ///
+  ///     EndOfLineComment -> '#' .*
+  ///
   /// TODO: Swift-style nested comments, line-ending comments, etc
   ///
   mutating func lexComment(context: ParsingContext) throws -> AST.Trivia? {
@@ -511,6 +526,13 @@ extension Source {
       if context.experimentalComments, src.tryEat(sequence: "/*") {
         return try src.expectQuoted(endingWith: "*/").value
       }
+      if context.endOfLineComments, src.tryEat("#") {
+        // TODO: If we ever support multi-line regex literals, this will need
+        // to be updated to stop at a newline. Note though that PCRE specifies
+        // that the newline it matches against can be controlled by the global
+        // matching options e.g `(*CR)`, `(*ANY)`, ...
+        return src.lexUntil(\.isEmpty).value
+      }
       return nil
     }
     guard let trivia = trivia else { return nil }
@@ -519,15 +541,36 @@ extension Source {
 
   /// Try to consume non-semantic whitespace as trivia
   ///
-  ///     Whitespace -> ' '+
+  ///     Whitespace -> WhitespaceChar+
   ///
   /// Does nothing unless `SyntaxOptions.nonSemanticWhitespace` is set
   mutating func lexNonSemanticWhitespace(
     context: ParsingContext
   ) throws -> AST.Trivia? {
     guard context.ignoreWhitespace else { return nil }
+
+    func isWhitespace(_ c: Character) -> Bool {
+      // This is a list of characters that PCRE treats as whitespace when
+      // compiled with Unicode support. It is a subset of the characters with
+      // the `.isWhitespace` property. ICU appears to also follow this list.
+      // Oniguruma and .NET follow a subset of this list.
+      //
+      // FIXME: PCRE only treats space and tab characters as whitespace when
+      // inside a custom character class (and only treats whitespace as
+      // non-semantic there for the extra-extended `(?xx)` mode). If we get a
+      // strict-PCRE mode, we'll need to add a case for that.
+      switch c {
+      case " ", "\u{9}"..."\u{D}", // space, \t, \n, vertical tab, \f, \r
+           "\u{85}", "\u{200E}",   // next line, left-to-right mark
+           "\u{200F}", "\u{2028}", // right-to-left-mark, line separator
+           "\u{2029}":             // paragraph separator
+        return true
+      default:
+        return false
+      }
+    }
     let trivia: Located<String>? = recordLoc { src in
-      src.tryEatPrefix { $0 == " " }?.string
+      src.tryEatPrefix(isWhitespace)?.string
     }
     guard let trivia = trivia else { return nil }
     return AST.Trivia(trivia)
@@ -1631,7 +1674,7 @@ extension Source {
       var name: Located<String>?
       if src.tryEat(":") {
         // TODO: PCRE allows escaped delimiters or '\Q...\E' sequences in the
-        // name under PCRE2_ALT_VERBNAMES.
+        // name under PCRE2_ALT_VERBNAMES. It also allows whitespace under (?x).
         name = try src.expectQuoted(endingWith: ")", eatEnding: false)
       }
       try src.expect(")")
diff --git a/Sources/_MatchingEngine/Regex/Parse/Parse.swift b/Sources/_MatchingEngine/Regex/Parse/Parse.swift
index 4d6221e24..84c703068 100644
--- a/Sources/_MatchingEngine/Regex/Parse/Parse.swift
+++ b/Sources/_MatchingEngine/Regex/Parse/Parse.swift
@@ -106,6 +106,7 @@ extension ParsingContext {
   var experimentalQuotes: Bool { syntax.contains(.experimentalQuotes) }
   var experimentalComments: Bool { syntax.contains(.experimentalComments) }
   var ignoreWhitespace: Bool { syntax.contains(.nonSemanticWhitespace) }
+  var endOfLineComments: Bool { syntax.contains(.endOfLineComments) }
 }
 
 // Diagnostics
@@ -208,12 +209,14 @@ extension Parser {
       }
       //     Quantification  -> QuantOperand Quantifier?
       if let operand = try parseQuantifierOperand() {
-        if let (amt, kind) = try source.lexQuantifier(context: context) {
+        if let (amt, kind, trivia) =
+            try source.lexQuantifier(context: context) {
           let location = loc(_start)
           guard operand.isQuantifiable else {
             throw Source.LocatedError(ParseError.notQuantifiable, location)
           }
-          result.append(.quantification(.init(amt, kind, operand, location)))
+          result.append(.quantification(
+            .init(amt, kind, operand, location, trivia: trivia)))
         } else {
           result.append(operand)
         }
@@ -270,6 +273,28 @@ extension Parser {
   ) throws -> AST.Group {
     context.recordGroup(kind.value)
 
+    // Check if we're introducing or removing extended syntax.
+    // TODO: PCRE differentiates between (?x) and (?xx) where only the latter
+    // handles non-semantic whitespace in a custom character class. Other
+    // engines such as Oniguruma, Java, and ICU do this under (?x). Therefore,
+    // treat (?x) and (?xx) as the same option here. If we ever get a strict
+    // PCRE mode, we will need to change this to handle that.
+    let currentSyntax = context.syntax
+    if case .changeMatchingOptions(let c, isIsolated: _) = kind.value {
+      if c.resetsCurrentOptions {
+        context.syntax.remove(.extendedSyntax)
+      }
+      if c.adding.contains(where: \.isAnyExtended) {
+        context.syntax.insert(.extendedSyntax)
+      }
+      if c.removing.contains(where: \.isAnyExtended) {
+        context.syntax.remove(.extendedSyntax)
+      }
+    }
+    defer {
+      context.syntax = currentSyntax
+    }
+
     let child = try parseNode()
     // An implicit scoped group has already consumed its closing paren.
     if !kind.value.hasImplicitScope {
@@ -449,6 +474,14 @@ extension Parser {
         continue
       }
 
+      // Lex non-semantic whitespace if we're allowed.
+      // TODO: ICU allows end-of-line comments in custom character classes,
+      // which we ought to support if we want to support multi-line regex.
+      if let trivia = try source.lexNonSemanticWhitespace(context: context) {
+        members.append(.trivia(trivia))
+        continue
+      }
+
       guard let atom = try source.lexAtom(context: context) else { break }
 
       // Range between atoms.
diff --git a/Sources/_MatchingEngine/Regex/Parse/SyntaxOptions.swift b/Sources/_MatchingEngine/Regex/Parse/SyntaxOptions.swift
index 457232da5..5135d8ec1 100644
--- a/Sources/_MatchingEngine/Regex/Parse/SyntaxOptions.swift
+++ b/Sources/_MatchingEngine/Regex/Parse/SyntaxOptions.swift
@@ -23,11 +23,19 @@ public struct SyntaxOptions: OptionSet {
   /// `'a \. b' == '/a\.b/'`
   public static var nonSemanticWhitespace: Self { Self(1 << 0) }
 
+  /// `abc # comment`
+  public static var endOfLineComments: Self { Self(1 << 1) }
+
+  /// `(?x)` `(?xx)`
+  public static var extendedSyntax: Self {
+    [.endOfLineComments, .nonSemanticWhitespace]
+  }
+
   /// `'a "." b' == '/a\Q.\Eb/'`
   ///
   /// NOTE: Currently, this means we have raw quotes.
   /// Better would be to have real Swift string delimiter parsing logic.
-  public static var experimentalQuotes: Self { Self(1 << 1) }
+  public static var experimentalQuotes: Self { Self(1 << 2) }
 
   /// `'a /* comment */ b' == '/a(?#. comment )b/'`
   ///
@@ -35,7 +43,7 @@ public struct SyntaxOptions: OptionSet {
   /// Traditional comments can't have `)`, not even escaped in them either, we
   /// can. Traditional comments can have `*/` in them, we can't without
   /// escaping. We don't currently do escaping.
-  public static var experimentalComments: Self { Self(1 << 2) }
+  public static var experimentalComments: Self { Self(1 << 3) }
 
   /// ```
   ///   'a{n...m}' == '/a{n,m}/'
@@ -44,11 +52,11 @@ public struct SyntaxOptions: OptionSet {
   ///   'a{...m}'  == '/a{,m}/'
   ///   'a{..<m}'  == '/a{,m-1}/'
   /// ```
-  public static var experimentalRanges: Self { Self(1 << 3) }
+  public static var experimentalRanges: Self { Self(1 << 4) }
 
   /// `(name: .*)` == `(?<name>.*)`
   ///  `(_: .*)` == `(?:.*)`
-  public static var experimentalCaptures: Self { Self(1 << 4) }
+  public static var experimentalCaptures: Self { Self(1 << 5) }
 
   /*
 
@@ -59,10 +67,9 @@ public struct SyntaxOptions: OptionSet {
 
   public static var traditional: Self { Self(0) }
 
-  public static var experimental: Self { Self(~0) }
-
-  public var ignoreWhitespace: Bool {
-    contains(.nonSemanticWhitespace)
+  public static var experimental: Self {
+    // Experimental syntax enables everything except end-of-line comments.
+    Self(~0).subtracting(.endOfLineComments)
   }
 
   // TODO: Probably want to model strict-PCRE etc. options too.
diff --git a/Sources/_MatchingEngine/Regex/Printing/DumpAST.swift b/Sources/_MatchingEngine/Regex/Printing/DumpAST.swift
index a130fb5a0..47142407a 100644
--- a/Sources/_MatchingEngine/Regex/Printing/DumpAST.swift
+++ b/Sources/_MatchingEngine/Regex/Printing/DumpAST.swift
@@ -52,6 +52,9 @@ extension _ASTPrintable {
       if $0.isTrivia { return nil }
       return $0._dump()
     }.joined(separator: ",")
+    if sub.isEmpty {
+      return "\(_dumpBase)"
+    }
     return "\(_dumpBase)(\(sub))"
   }
 }
@@ -287,7 +290,11 @@ extension AST.Quantification: _ASTPrintable {
 
 extension AST.CustomCharacterClass: _ASTNode {
   public var _dumpBase: String {
-    "customCharacterClass(\(members))"
+    // Exclude trivia for now, as we don't want it to appear when performing
+    // comparisons of dumped output in tests.
+    // TODO: We should eventually have some way of filtering out trivia for
+    // tests, so that it can appear in regular dumps.
+    return "customCharacterClass(\(strippingTriviaShallow.members))"
   }
 }
 
@@ -298,6 +305,7 @@ extension AST.CustomCharacterClass.Member: _ASTPrintable {
     case .atom(let a): return "\(a)"
     case .range(let r): return "\(r)"
     case .quote(let q): return "\(q)"
+    case .trivia(let t): return "\(t)"
     case .setOperation(let lhs, let op, let rhs):
       return "op \(lhs) \(op.value) \(rhs)"
     }
diff --git a/Sources/_MatchingEngine/Regex/Printing/PrintAsCanonical.swift b/Sources/_MatchingEngine/Regex/Printing/PrintAsCanonical.swift
index b6f0759b2..4888975f3 100644
--- a/Sources/_MatchingEngine/Regex/Printing/PrintAsCanonical.swift
+++ b/Sources/_MatchingEngine/Regex/Printing/PrintAsCanonical.swift
@@ -91,9 +91,7 @@ extension PrettyPrinter {
       output(q._canonicalBase)
 
     case let .trivia(t):
-      // TODO: We might want to output comments...
-      _ = t
-      output("")
+      output(t._canonicalBase)
 
     case let .atom(a):
       output(a._canonicalBase)
@@ -135,6 +133,8 @@ extension PrettyPrinter {
       output(a._canonicalBase)
     case .quote(let q):
       output(q._canonicalBase)
+    case .trivia(let t):
+      output(t._canonicalBase)
     case .setOperation:
       output("/* TODO: set operation \(self) */")
     }
@@ -315,3 +315,10 @@ extension AST.GlobalMatchingOption.Kind {
 extension AST.GlobalMatchingOption {
   var _canonicalBase: String { "(*\(kind._canonicalBase))"}
 }
+
+extension AST.Trivia {
+  var _canonicalBase: String {
+    // TODO: We might want to output comments...
+    ""
+  }
+}
diff --git a/Sources/_MatchingEngine/Regex/Printing/PrintAsPattern.swift b/Sources/_MatchingEngine/Regex/Printing/PrintAsPattern.swift
index 8a6367af6..5f00986c0 100644
--- a/Sources/_MatchingEngine/Regex/Printing/PrintAsPattern.swift
+++ b/Sources/_MatchingEngine/Regex/Printing/PrintAsPattern.swift
@@ -180,6 +180,9 @@ extension PrettyPrinter {
       }
     case .quote(let q):
       print("// TODO: quote \(q.literal._quoted) in custom character classes (should we split it?)")
+    case .trivia(let t):
+      // TODO: We might want to output comments...
+      _ = t
     case .setOperation:
       print("// TODO: Set operation: \(member)")
     }
diff --git a/Sources/_StringProcessing/CharacterClass.swift b/Sources/_StringProcessing/CharacterClass.swift
index e1e7dbe9c..92b2a76b9 100644
--- a/Sources/_StringProcessing/CharacterClass.swift
+++ b/Sources/_StringProcessing/CharacterClass.swift
@@ -420,6 +420,10 @@ extension AST.CustomCharacterClass {
           // Decompose quoted literal into literal characters.
           result += q.literal.map { .character($0) }
 
+        case .trivia:
+          // Not semantically important.
+          break
+
         case .setOperation(let lhs, let op, let rhs):
           // FIXME: CharacterClass wasn't designed for set operations with
           // multiple components in each operand, we should fix that. For now,
diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift
index 87a910279..a444368c3 100644
--- a/Sources/_StringProcessing/ConsumerInterface.swift
+++ b/Sources/_StringProcessing/ConsumerInterface.swift
@@ -26,10 +26,31 @@ struct Unsupported: Error, CustomStringConvertible {
 func unsupported(
   _ s: String,
   file: StaticString = #file,
-  line: UInt = #line
+  line: Int = #line
 ) -> Unsupported {
   return Unsupported(
-    message: s, file: String(describing: file), line: Int(line))
+    message: s, file: String(describing: file), line: line)
+}
+
+struct Unreachable: Error, CustomStringConvertible {
+  var message: String
+  var file: String
+  var line: Int
+
+  var description: String { """
+    Unreachable: '\(message)'
+      \(file):\(line)
+    """
+  }
+}
+
+func unreachable(
+  _ s: String,
+  file: StaticString = #file,
+  line: Int = #line
+) -> Unreachable {
+  return Unreachable(
+    message: s, file: String(describing: file), line: line)
 }
 
 extension AST.Node {
@@ -166,6 +187,9 @@ extension AST.CustomCharacterClass.Member {
         return nil
       }
 
+    case .trivia:
+      throw unreachable("Should have been stripped by caller")
+
     case .setOperation(let lhs, let op, let rhs):
       // TODO: We should probably have a component type
       // instead of a members array... for now we reconstruct
@@ -216,7 +240,7 @@ extension AST.CustomCharacterClass {
     _ opts: MatchingOptions
   ) throws -> Program<String>.ConsumeFunction {
     // NOTE: Easy way to implement, obviously not performant
-    let consumers = try members.map {
+    let consumers = try strippingTriviaShallow.members.map {
       try $0.generateConsumer(opts)
     }
     return { input, bounds in
diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift
index 4c17bc5dc..1346835fb 100644
--- a/Tests/RegexTests/ParseTests.swift
+++ b/Tests/RegexTests/ParseTests.swift
@@ -1272,11 +1272,167 @@ extension RegexTests {
 
     parseTest("[(*CR)]", charClass("(", "*", "C", "R", ")"))
 
+    // MARK: Trivia
+
+    parseTest("[(?#abc)]", charClass("(", "?", "#", "a", "b", "c", ")"))
+    parseTest("# abc", concat("#", " ", "a", "b", "c"))
+
+    parseTest("(?x) # hello", changeMatchingOptions(matchingOptions(
+      adding: .extended), isIsolated: true, empty()))
+    parseTest("(?xx) # hello", changeMatchingOptions(matchingOptions(
+      adding: .extraExtended), isIsolated: true, empty()))
+    parseTest("(?x) \\# abc", changeMatchingOptions(matchingOptions(
+      adding: .extended), isIsolated: true, concat("#", "a", "b", "c")))
+    parseTest("(?xx) \\ ", changeMatchingOptions(matchingOptions(
+      adding: .extraExtended), isIsolated: true, concat(" ")))
+
+    // End of line comments aren't applicable in custom char classes.
+    // TODO: ICU supports this.
+    parseTest(
+      "(?x)[ # abc]", changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true,
+        charClass("#", "a", "b", "c"))
+    )
+
+    parseTest(
+      "(?x)a b c[d e f]", changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true,
+        concat("a", "b", "c", charClass("d", "e", "f")))
+    )
+    parseTest(
+      "(?xx)a b c[d e f]", changeMatchingOptions(
+        matchingOptions(adding: .extraExtended), isIsolated: true,
+        concat("a", "b", "c", charClass("d", "e", "f")))
+    )
+    parseTest(
+      "(?x)a b c(?-x)d e f", changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true,
+        concat("a", "b", "c",
+               changeMatchingOptions(matchingOptions(removing: .extended),
+                                     isIsolated: true, concat("d", " ", "e", " ", "f"))))
+    )
+    parseTest(
+      "(?x)a b c(?-xx)d e f", changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true,
+        concat("a", "b", "c",
+               changeMatchingOptions(matchingOptions(removing: .extraExtended),
+                                     isIsolated: true, concat("d", " ", "e", " ", "f"))))
+    )
+    parseTest(
+      "(?xx)a b c(?-x)d e f", changeMatchingOptions(
+        matchingOptions(adding: .extraExtended), isIsolated: true,
+        concat("a", "b", "c",
+               changeMatchingOptions(matchingOptions(removing: .extended),
+                                     isIsolated: true, concat("d", " ", "e", " ", "f"))))
+    )
+    parseTest(
+      "(?x)a b c(?^i)d e f", changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true,
+        concat("a", "b", "c",
+               changeMatchingOptions(unsetMatchingOptions(adding: .caseInsensitive),
+                                     isIsolated: true, concat("d", " ", "e", " ", "f"))))
+    )
+    parseTest(
+      "(?x)a b c(?^x)d e f", changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true,
+        concat("a", "b", "c",
+               changeMatchingOptions(unsetMatchingOptions(adding: .extended),
+                                     isIsolated: true, concat("d", "e", "f"))))
+    )
+    parseTest(
+      "(?:(?x)a b c)d e f", concat(nonCapture(changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true,
+        concat("a", "b", "c"))), "d", " ", "e", " ", "f")
+    )
+    parseTest(
+      "(?x:a b c)# hi", concat(changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: false,
+        concat("a", "b", "c")), "#", " ", "h", "i")
+    )
+
+    parseTest(
+      "(?x-x)a b c", changeMatchingOptions(
+        matchingOptions(adding: .extended, removing: .extended), isIsolated: true,
+        concat("a", " ", "b", " ", "c"))
+    )
+    parseTest(
+      "(?xxx-x)a b c", changeMatchingOptions(
+        matchingOptions(adding: .extraExtended, .extended, removing: .extended), isIsolated: true,
+        concat("a", " ", "b", " ", "c"))
+    )
+    parseTest(
+      "(?xx-i)a b c", changeMatchingOptions(
+        matchingOptions(adding: .extraExtended, removing: .caseInsensitive), isIsolated: true,
+        concat("a", "b", "c"))
+    )
+
+    // PCRE states that whitespace seperating quantifiers is permitted under
+    // extended syntax http://pcre.org/current/doc/html/pcre2api.html#SEC20
+    parseTest(
+      "(?x)a *",
+      changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true,
+        zeroOrMore(.eager, "a"))
+    )
+    parseTest(
+      "(?x)a + ?",
+      changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true,
+        oneOrMore(.reluctant, "a"))
+    )
+    parseTest(
+      "(?x)a {2,4}",
+      changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true,
+        quantRange(.eager, 2 ... 4, "a"))
+    )
+
+    // PCRE states that whitespace won't be ignored within a range.
+    // http://pcre.org/current/doc/html/pcre2api.html#SEC20
+    // TODO: We ought to warn on this, and produce a range anyway.
+    parseTest(
+      "(?x)a{1, 3}",
+      changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true,
+        concat("a", "{", "1", ",", "3", "}"))
+    )
+
+    // Test that we cover the list of whitespace characters covered by PCRE.
+    parseTest(
+      "(?x)a\t\u{A}\u{B}\u{C}\u{D}\u{85}\u{200E}\u{200F}\u{2028}\u{2029} b",
+      changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true, concat("a", "b"))
+    )
+    parseTest(
+      "(?x)[a\t\u{A}\u{B}\u{C}\u{D}\u{85}\u{200E}\u{200F}\u{2028}\u{2029} b]",
+      changeMatchingOptions(
+        matchingOptions(adding: .extended), isIsolated: true, charClass("a", "b"))
+    )
+
     // MARK: Parse with delimiters
 
     parseWithDelimitersTest("'/a b/'", concat("a", " ", "b"))
     parseWithDelimitersTest("'|a b|'", concat("a", "b"))
 
+    parseWithDelimitersTest("'|[a b]|'", charClass("a", "b"))
+    parseWithDelimitersTest(
+      "'|(?-x)[a b]|'", changeMatchingOptions(
+        matchingOptions(removing: .extended), isIsolated: true,
+        charClass("a", " ", "b"))
+    )
+    parseWithDelimitersTest("'|[[a ] b]|'", charClass(charClass("a"), "b"))
+
+    // Non-semantic whitespace between quantifier characters for consistency
+    // with PCRE.
+    parseWithDelimitersTest("'|a * ?|'", zeroOrMore(.reluctant, "a"))
+
+    // End-of-line comments aren't enabled by default in experimental syntax.
+    parseWithDelimitersTest("'|#abc|'", concat("#", "a", "b", "c"))
+    parseWithDelimitersTest("'|(?x)#abc|'", changeMatchingOptions(
+      matchingOptions(adding: .extended), isIsolated: true,
+      empty())
+    )
+
     parseWithDelimitersTest("'|||'", alt(empty(), empty()))
     parseWithDelimitersTest("'||||'", alt(empty(), empty(), empty()))
     parseWithDelimitersTest("'|a||'", alt("a", empty()))
@@ -1285,6 +1441,7 @@ extension RegexTests {
 
     // Make sure dumping output correctly reflects differences in AST.
     parseNotEqualTest(#"abc"#, #"abd"#)
+    parseNotEqualTest(#" "#, #""#)
 
     parseNotEqualTest(#"[\p{Any}]"#, #"[[:Any:]]"#)
 
@@ -1303,6 +1460,8 @@ extension RegexTests {
     parseNotEqualTest(#"([a-c&&e]*)+"#,
                       #"([a-d&&e]*)+"#)
 
+    parseNotEqualTest(#"[abc]"#, #"[a b c]"#)
+
     parseNotEqualTest(#"\1"#, #"\10"#)
 
     parseNotEqualTest("(?^:)", ("(?-:)"))
@@ -1584,6 +1743,8 @@ extension RegexTests {
     diagnosticTest(#"(?'--')"#, .identifierMustBeAlphaNumeric(.groupName))
     diagnosticTest(#"(?'a-b-c')"#, .expected("'"))
 
+    diagnosticTest("(?x)(? : )", .unknownGroupKind("? "))
+
     // MARK: Matching options
 
     diagnosticTest(#"(?^-"#, .cannotRemoveMatchingOptionsAfterCaret)

From c389e5a21a7d5d9ec4951b46adc8fa0dc4a2bb08 Mon Sep 17 00:00:00 2001
From: Hamish Knight <hamish_github@mediocremail.com>
Date: Mon, 31 Jan 2022 11:29:41 +0000
Subject: [PATCH 3/3] Tweak quantifier AST builders

Because of the unlabeled parameters, callers were
never able to use the `.eager` default. Give the
child parameter an argument label so they can.
---
 Sources/_StringProcessing/ASTBuilder.swift   |  24 ++---
 Sources/_StringProcessing/RegexDSL/DSL.swift |   6 +-
 Tests/RegexTests/ParseTests.swift            | 101 +++++++++----------
 Tests/RegexTests/SyntaxOptionsTests.swift    |  18 ++--
 4 files changed, 73 insertions(+), 76 deletions(-)

diff --git a/Sources/_StringProcessing/ASTBuilder.swift b/Sources/_StringProcessing/ASTBuilder.swift
index dda007ca6..bbc199d27 100644
--- a/Sources/_StringProcessing/ASTBuilder.swift
+++ b/Sources/_StringProcessing/ASTBuilder.swift
@@ -242,51 +242,51 @@ func quant(
   _ child: AST.Node
 ) -> AST.Node {
   .quantification(.init(
-    .init(faking: amount), .init(faking: kind), child, .fake))
+    .init(faking: amount), .init(faking: kind), child, .fake, trivia: []))
 }
 func zeroOrMore(
   _ kind: AST.Quantification.Kind = .eager,
-  _ child: AST.Node
+  of child: AST.Node
 ) -> AST.Node {
   quant(.zeroOrMore, kind, child)
 }
 func zeroOrOne(
   _ kind: AST.Quantification.Kind = .eager,
-  _ child: AST.Node
+  of child: AST.Node
 ) -> AST.Node {
   quant(.zeroOrOne, kind, child)
 }
 func oneOrMore(
   _ kind: AST.Quantification.Kind = .eager,
-  _ child: AST.Node
+  of child: AST.Node
 ) -> AST.Node {
   quant(.oneOrMore, kind, child)
 }
 func exactly(
-  _ kind: AST.Quantification.Kind = .eager,
   _ i: Int,
-  _ child: AST.Node
+  _ kind: AST.Quantification.Kind = .eager,
+  of child: AST.Node
 ) -> AST.Node {
   quant(.exactly(.init(faking: i)), kind, child)
 }
 func nOrMore(
-  _ kind: AST.Quantification.Kind = .eager,
   _ i: Int,
-  _ child: AST.Node
+  _ kind: AST.Quantification.Kind = .eager,
+  of child: AST.Node
 ) -> AST.Node {
   quant(.nOrMore(.init(faking: i)), kind, child)
 }
 func upToN(
-  _ kind: AST.Quantification.Kind = .eager,
   _ i: Int,
-  _ child: AST.Node
+  _ kind: AST.Quantification.Kind = .eager,
+  of child: AST.Node
 ) -> AST.Node {
   quant(.upToN(.init(faking: i)), kind, child)
 }
 func quantRange(
-  _ kind: AST.Quantification.Kind = .eager,
   _ r: ClosedRange<Int>,
-  _ child: AST.Node
+  _ kind: AST.Quantification.Kind = .eager,
+  of child: AST.Node
 ) -> AST.Node {
   let lower = AST.Located(faking: r.lowerBound)
   let upper = AST.Located(faking: r.upperBound)
diff --git a/Sources/_StringProcessing/RegexDSL/DSL.swift b/Sources/_StringProcessing/RegexDSL/DSL.swift
index f39ae20a7..fde14a674 100644
--- a/Sources/_StringProcessing/RegexDSL/DSL.swift
+++ b/Sources/_StringProcessing/RegexDSL/DSL.swift
@@ -67,7 +67,7 @@ public struct OneOrMore<Component: RegexProtocol>: RegexProtocolWithComponent {
 
   public init(component: Component) {
     self.regex = .init(ast:
-      oneOrMore(.eager, component.regex.ast.root)
+      oneOrMore(of: component.regex.ast.root)
     )
   }
 
@@ -93,7 +93,7 @@ public struct Repeat<
 
   public init(component: Component) {
     self.regex = .init(ast:
-      zeroOrMore(.eager, component.regex.ast.root))
+      zeroOrMore(of: component.regex.ast.root))
   }
 
   public init(@RegexBuilder _ content: () -> Component) {
@@ -116,7 +116,7 @@ public struct Optionally<Component: RegexProtocol>: RegexProtocolWithComponent {
 
   public init(component: Component) {
     self.regex = .init(ast:
-      zeroOrOne(.eager, component.regex.ast.root))
+      zeroOrOne(of: component.regex.ast.root))
   }
 
   public init(@RegexBuilder _ content: () -> Component) {
diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift
index 1346835fb..4722ec57c 100644
--- a/Tests/RegexTests/ParseTests.swift
+++ b/Tests/RegexTests/ParseTests.swift
@@ -201,7 +201,7 @@ extension RegexTests {
       "abc", concat("a", "b", "c"))
     parseTest(
       #"abc\+d*"#,
-      concat("a", "b", "c", "+", zeroOrMore(.eager, "d")))
+      concat("a", "b", "c", "+", zeroOrMore(of: "d")))
     parseTest(
       "a(b)", concat("a", capture("b")),
       captures: .atom())
@@ -211,31 +211,31 @@ extension RegexTests {
         concat(
           "a", "b", "c",
           oneOrMore(
-            .eager, nonCapture(concat("d", "e"))),
-          "f", "g", "h", zeroOrMore(.eager, "i"), "k"),
+            of: nonCapture(concat("d", "e"))),
+          "f", "g", "h", zeroOrMore(of: "i"), "k"),
         "j"))
     parseTest(
       "a(?:b|c)?d",
       concat("a", zeroOrOne(
-        .eager, nonCapture(alt("b", "c"))), "d"))
+        of: nonCapture(alt("b", "c"))), "d"))
     parseTest(
       "a?b??c+d+?e*f*?",
       concat(
-        zeroOrOne(.eager, "a"), zeroOrOne(.reluctant, "b"),
-        oneOrMore(.eager, "c"), oneOrMore(.reluctant, "d"),
-        zeroOrMore(.eager, "e"), zeroOrMore(.reluctant, "f")))
+        zeroOrOne(of: "a"), zeroOrOne(.reluctant, of: "b"),
+        oneOrMore(of: "c"), oneOrMore(.reluctant, of: "d"),
+        zeroOrMore(of: "e"), zeroOrMore(.reluctant, of: "f")))
 
     parseTest(
       "(.)*(.*)",
       concat(
-        zeroOrMore(.eager, capture(atom(.any))),
-        capture(zeroOrMore(.eager, atom(.any)))),
+        zeroOrMore(of: capture(atom(.any))),
+        capture(zeroOrMore(of: atom(.any)))),
       captures: .tuple([.array(.atom()), .atom()]))
     parseTest(
       "((.))*((.)?)",
       concat(
-        zeroOrMore(.eager, capture(capture(atom(.any)))),
-        capture(zeroOrOne(.eager, capture(atom(.any))))),
+        zeroOrMore(of: capture(capture(atom(.any)))),
+        capture(zeroOrOne(of: capture(atom(.any))))),
       captures: .tuple([
         .array(.atom()), .array(.atom()), .atom(), .optional(.atom())
       ]))
@@ -247,7 +247,7 @@ extension RegexTests {
 
     parseTest(
       "a|b?c",
-      alt("a", concat(zeroOrOne(.eager, "b"), "c")))
+      alt("a", concat(zeroOrOne(of: "b"), "c")))
     parseTest(
       "(a|b)c",
       concat(capture(alt("a", "b")), "c"),
@@ -419,7 +419,7 @@ extension RegexTests {
 
     parseTest(
       #"[a[bc]de&&[^bc]\d]+"#,
-      oneOrMore(.eager, charClass(
+      oneOrMore(of: charClass(
         .setOperation(
           ["a", charClass("b", "c"), "d", "e"],
           .init(faking: .intersection),
@@ -448,13 +448,13 @@ extension RegexTests {
     parseTest(
       "a&&b", concat("a", "&", "&", "b"))
     parseTest(
-      "&?", zeroOrOne(.eager, "&"))
+      "&?", zeroOrOne(of: "&"))
     parseTest(
-      "&&?", concat("&", zeroOrOne(.eager, "&")))
+      "&&?", concat("&", zeroOrOne(of: "&")))
     parseTest(
-      "--+", concat("-", oneOrMore(.eager, "-")))
+      "--+", concat("-", oneOrMore(of: "-")))
     parseTest(
-      "~~*", concat("~", zeroOrMore(.eager, "~")))
+      "~~*", concat("~", zeroOrMore(of: "~")))
 
     // MARK: Quotes
 
@@ -496,25 +496,25 @@ extension RegexTests {
 
     parseTest(
       #"a{1,2}"#,
-      quantRange(.eager, 1...2, "a"))
+      quantRange(1...2, of: "a"))
     parseTest(
       #"a{,2}"#,
-      upToN(.eager, 2, "a"))
+      upToN(2, of: "a"))
     parseTest(
       #"a{2,}"#,
-      nOrMore(.eager, 2, "a"))
+      nOrMore(2, of: "a"))
     parseTest(
       #"a{1}"#,
-      exactly(.eager, 1, "a"))
+      exactly(1, of: "a"))
     parseTest(
       #"a{1,2}?"#,
-      quantRange(.reluctant, 1...2, "a"))
+      quantRange(1...2, .reluctant, of: "a"))
     parseTest(
       #"a{0}"#,
-      exactly(.eager, 0, "a"))
+      exactly(0, of: "a"))
     parseTest(
       #"a{0,0}"#,
-      quantRange(.eager, 0...0, "a"))
+      quantRange(0...0, of: "a"))
 
     // Make sure ranges get treated as literal if invalid.
     parseTest("{", "{")
@@ -524,16 +524,16 @@ extension RegexTests {
     parseTest("{,6", concat("{", ",", "6"))
     parseTest("{6", concat("{", "6"))
     parseTest("{6,", concat("{", "6", ","))
-    parseTest("{+", oneOrMore(.eager, "{"))
-    parseTest("{6,+", concat("{", "6", oneOrMore(.eager, ",")))
+    parseTest("{+", oneOrMore(of: "{"))
+    parseTest("{6,+", concat("{", "6", oneOrMore(of: ",")))
     parseTest("x{", concat("x", "{"))
     parseTest("x{}", concat("x", "{", "}"))
     parseTest("x{,}", concat("x", "{", ",", "}"))
     parseTest("x{,6", concat("x", "{", ",", "6"))
     parseTest("x{6", concat("x", "{", "6"))
     parseTest("x{6,", concat("x", "{", "6", ","))
-    parseTest("x{+", concat("x", oneOrMore(.eager, "{")))
-    parseTest("x{6,+", concat("x", "{", "6", oneOrMore(.eager, ",")))
+    parseTest("x{+", concat("x", oneOrMore(of: "{")))
+    parseTest("x{6,+", concat("x", "{", "6", oneOrMore(of: ",")))
 
     // TODO: We should emit a diagnostic for this.
     parseTest("x{3, 5}", concat("x", "{", "3", ",", " ", "5", "}"))
@@ -915,14 +915,11 @@ extension RegexTests {
 
     parseTest(#"\N{abc}"#, atom(.namedCharacter("abc")))
     parseTest(#"[\N{abc}]"#, charClass(atom_m(.namedCharacter("abc"))))
-    parseTest(
-      #"\N{abc}+"#,
-      oneOrMore(.eager,
-                atom(.namedCharacter("abc"))))
+    parseTest(#"\N{abc}+"#, oneOrMore(of: atom(.namedCharacter("abc"))))
     parseTest(
       #"\N {2}"#,
-      concat(atom(.escaped(.notNewline)),
-             exactly(.eager, 2, " ")))
+      concat(atom(.escaped(.notNewline)), exactly(2, of: " "))
+    )
 
     parseTest(#"\N{AA}"#, atom(.namedCharacter("AA")))
     parseTest(#"\N{U+AA}"#, scalar("\u{AA}"))
@@ -945,7 +942,7 @@ extension RegexTests {
     parseTest(#"[\p{C}]"#, charClass(prop_m(.generalCategory(.other))))
     parseTest(
       #"\p{C}+"#,
-      oneOrMore(.eager, prop(.generalCategory(.other))))
+      oneOrMore(of: prop(.generalCategory(.other))))
 
     parseTest(#"\p{Lx}"#, prop(.other(key: nil, value: "Lx")))
     parseTest(#"\p{gcL}"#, prop(.other(key: nil, value: "gcL")))
@@ -1064,7 +1061,7 @@ extension RegexTests {
       captures: .atom(name: "a1")
     )
 
-    parseTest(#"(?(1))?"#, zeroOrOne(.eager, conditional(
+    parseTest(#"(?(1))?"#, zeroOrOne(of: conditional(
       .groupMatched(ref(1)), trueBranch: empty(), falseBranch: empty())))
 
     parseTest(#"(?(R)a|b)"#, conditional(
@@ -1108,9 +1105,9 @@ extension RegexTests {
 
     parseTest(#"(?((a)?(b))(a)+|b)"#, conditional(
       groupCondition(.capture, concat(
-        zeroOrOne(.eager, capture("a")), capture("b")
+        zeroOrOne(of: capture("a")), capture("b")
       )),
-      trueBranch: oneOrMore(.eager, capture("a")),
+      trueBranch: oneOrMore(of: capture("a")),
       falseBranch: "b"
     ), captures: .tuple([
       .atom(), .optional(.atom()), .atom(), .optional(.array(.atom()))
@@ -1118,9 +1115,9 @@ extension RegexTests {
 
     parseTest(#"(?(?:(a)?(b))(a)+|b)"#, conditional(
       groupCondition(.nonCapture, concat(
-        zeroOrOne(.eager, capture("a")), capture("b")
+        zeroOrOne(of: capture("a")), capture("b")
       )),
-      trueBranch: oneOrMore(.eager, capture("a")),
+      trueBranch: oneOrMore(of: capture("a")),
       falseBranch: "b"
     ), captures: .tuple([
       .optional(.atom()), .atom(), .optional(.array(.atom()))
@@ -1190,10 +1187,10 @@ extension RegexTests {
 
     // MARK: Backtracking directives
 
-    parseTest("(*ACCEPT)?", zeroOrOne(.eager, backtrackingDirective(.accept)))
+    parseTest("(*ACCEPT)?", zeroOrOne(of: backtrackingDirective(.accept)))
     parseTest(
       "(*ACCEPT:a)??",
-      zeroOrOne(.reluctant, backtrackingDirective(.accept, name: "a"))
+      zeroOrOne(.reluctant, of: backtrackingDirective(.accept, name: "a"))
     )
     parseTest("(*:a)", backtrackingDirective(.mark, name: "a"))
     parseTest("(*MARK:a)", backtrackingDirective(.mark, name: "a"))
@@ -1208,17 +1205,17 @@ extension RegexTests {
 
     parseTest("(?~)", absentRepeater(empty()))
     parseTest("(?~abc)", absentRepeater(concat("a", "b", "c")))
-    parseTest("(?~a+)", absentRepeater(oneOrMore(.eager, "a")))
+    parseTest("(?~a+)", absentRepeater(oneOrMore(of: "a")))
     parseTest("(?~~)", absentRepeater("~"))
     parseTest("(?~a|b|c)", absentRepeater(alt("a", "b", "c")))
     parseTest("(?~(a))", absentRepeater(capture("a")), captures: .empty)
-    parseTest("(?~)*", zeroOrMore(.eager, absentRepeater(empty())))
+    parseTest("(?~)*", zeroOrMore(of: absentRepeater(empty())))
 
     parseTest("(?~|abc)", absentStopper(concat("a", "b", "c")))
-    parseTest("(?~|a+)", absentStopper(oneOrMore(.eager, "a")))
+    parseTest("(?~|a+)", absentStopper(oneOrMore(of: "a")))
     parseTest("(?~|~)", absentStopper("~"))
     parseTest("(?~|(a))", absentStopper(capture("a")), captures: .empty)
-    parseTest("(?~|a){2}", exactly(.eager, 2, absentStopper("a")))
+    parseTest("(?~|a){2}", exactly(2, of: absentStopper("a")))
 
     parseTest("(?~|a|b)", absentExpression("a", "b"))
     parseTest("(?~|~|~)", absentExpression("~", "~"))
@@ -1227,13 +1224,13 @@ extension RegexTests {
     parseTest("(?~|(a)|(?:(b)|c))", absentExpression(
       capture("a"), nonCapture(alt(capture("b"), "c"))
     ), captures: .optional(.atom()))
-    parseTest("(?~|a|b)?", zeroOrOne(.eager, absentExpression("a", "b")))
+    parseTest("(?~|a|b)?", zeroOrOne(of: absentExpression("a", "b")))
 
     parseTest("(?~|)", absentRangeClear())
 
     // TODO: It's not really clear what this means, but Oniguruma parses it...
     // Maybe we should diagnose it?
-    parseTest("(?~|)+", oneOrMore(.eager, absentRangeClear()))
+    parseTest("(?~|)+", oneOrMore(of: absentRangeClear()))
 
     // MARK: Global matching options
 
@@ -1372,19 +1369,19 @@ extension RegexTests {
       "(?x)a *",
       changeMatchingOptions(
         matchingOptions(adding: .extended), isIsolated: true,
-        zeroOrMore(.eager, "a"))
+        zeroOrMore(of: "a"))
     )
     parseTest(
       "(?x)a + ?",
       changeMatchingOptions(
         matchingOptions(adding: .extended), isIsolated: true,
-        oneOrMore(.reluctant, "a"))
+        oneOrMore(.reluctant, of: "a"))
     )
     parseTest(
       "(?x)a {2,4}",
       changeMatchingOptions(
         matchingOptions(adding: .extended), isIsolated: true,
-        quantRange(.eager, 2 ... 4, "a"))
+        quantRange(2 ... 4, of: "a"))
     )
 
     // PCRE states that whitespace won't be ignored within a range.
@@ -1424,7 +1421,7 @@ extension RegexTests {
 
     // Non-semantic whitespace between quantifier characters for consistency
     // with PCRE.
-    parseWithDelimitersTest("'|a * ?|'", zeroOrMore(.reluctant, "a"))
+    parseWithDelimitersTest("'|a * ?|'", zeroOrMore(.reluctant, of: "a"))
 
     // End-of-line comments aren't enabled by default in experimental syntax.
     parseWithDelimitersTest("'|#abc|'", concat("#", "a", "b", "c"))
diff --git a/Tests/RegexTests/SyntaxOptionsTests.swift b/Tests/RegexTests/SyntaxOptionsTests.swift
index d6e0d6bc5..881eb0cbc 100644
--- a/Tests/RegexTests/SyntaxOptionsTests.swift
+++ b/Tests/RegexTests/SyntaxOptionsTests.swift
@@ -15,7 +15,7 @@ import XCTest
 
 
 private let dplus = oneOrMore(
-  .eager, atom(.escaped(.decimalDigit)))
+  of: atom(.escaped(.decimalDigit)))
 private let dotAST = concat(
   dplus, ".", dplus, ".", dplus, ".", dplus)
 private let dotASTQuoted = concat(
@@ -61,34 +61,34 @@ extension RegexTests {
   func testExperimentalRanges() {
     parseTest(
       #"a{1,2}"#,
-      quantRange(.eager, 1...2, "a"))
+      quantRange(1...2, of: "a"))
     parseTest(
       #"a{1...2}"#,
-      quantRange(.eager, 1...2, "a"),
+      quantRange(1...2, of: "a"),
       syntax: .experimentalRanges)
     parseTest(
       #"a{1..<3}"#,
-      quantRange(.eager, 1...2, "a"),
+      quantRange(1...2, of: "a"),
       syntax: .experimentalRanges)
 
     parseTest(
       #"a{,2}"#,
-      upToN(.eager, 2, "a"))
+      upToN(2, of: "a"))
     parseTest(
       #"a{...2}"#,
-      upToN(.eager, 2, "a"),
+      upToN(2, of: "a"),
       syntax: .experimental)
     parseTest(
       #"a{..<3}"#,
-      upToN(.eager, 2, "a"),
+      upToN(2, of: "a"),
       syntax: .experimental)
 
     parseTest(
       #"a{1,}"#,
-      nOrMore(.eager, 1, "a"))
+      nOrMore(1, of: "a"))
     parseTest(
       #"a{1...}"#,
-      nOrMore(.eager, 1, "a"),
+      nOrMore(1, of: "a"),
       syntax: .experimental)
   }