diff --git a/Sources/CMakeLists.txt b/Sources/CMakeLists.txt index 7b4e5e3ed..0cd79e24c 100644 --- a/Sources/CMakeLists.txt +++ b/Sources/CMakeLists.txt @@ -2,5 +2,4 @@ add_subdirectory(RegexBuilder) add_subdirectory(_RegexParser) add_subdirectory(_StringProcessing) -add_subdirectory(Prototypes) add_subdirectory(VariadicsGenerator) diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift index df4168c2d..2622639fb 100644 --- a/Sources/RegexBenchmark/Benchmark.swift +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -1,15 +1,45 @@ -import _StringProcessing +@_spi(RegexBenchmark) import _StringProcessing +@_implementationOnly import _RegexParser import Foundation -protocol RegexBenchmark { +protocol RegexBenchmark: Debug { var name: String { get } func run() - func debug() } -struct Benchmark: RegexBenchmark { +protocol SwiftRegexBenchmark: RegexBenchmark { + var regex: Regex { get set } + var pattern: String? { get } +} + +extension SwiftRegexBenchmark { + mutating func compile() { + let _ = regex._forceAction(.recompile) + } + mutating func parse() -> Bool { + guard let s = pattern else { + return false + } + + do { + let _ = try _RegexParser.parse(s, .traditional) + return true + } catch { + return false + } + } + mutating func enableTracing() { + let _ = regex._forceAction(.addOptions(.enableTracing)) + } + mutating func enableMetrics() { + let _ = regex._forceAction(.addOptions([.enableMetrics])) + } +} + +struct Benchmark: SwiftRegexBenchmark { let name: String - let regex: Regex + var regex: Regex + let pattern: String? let type: MatchType let target: String @@ -52,11 +82,12 @@ struct NSBenchmark: RegexBenchmark { } /// A benchmark running a regex on strings in input set -struct InputListBenchmark: RegexBenchmark { +struct InputListBenchmark: SwiftRegexBenchmark { let name: String - let regex: Regex + var regex: Regex + let pattern: String? let targets: [String] - + func run() { for target in targets { blackHole(target.wholeMatch(of: regex)) @@ -78,7 +109,7 @@ struct InputListNSBenchmark: RegexBenchmark { func range(in target: String) -> NSRange { NSRange(target.startIndex.. BenchmarkRunner { - var benchmark = BenchmarkRunner("RegexBench", samples, quiet) + mutating func registerDefault() { // -- start of registrations -- - benchmark.addReluctantQuant() - benchmark.addCSS() - benchmark.addNotFound() - benchmark.addGraphemeBreak() - benchmark.addHangulSyllable() - // benchmark.addHTML() // Disabled due to \b being unusably slow - benchmark.addEmail() - benchmark.addCustomCharacterClasses() - benchmark.addBuiltinCC() - benchmark.addUnicode() - benchmark.addLiteralSearch() - benchmark.addDiceNotation() - benchmark.addErrorMessages() - benchmark.addIpAddress() + self.addReluctantQuant() + self.addCSS() + self.addNotFound() + self.addGraphemeBreak() + self.addHangulSyllable() + // self.addHTML() // Disabled due to \b being unusably slow + self.addEmail() + self.addCustomCharacterClasses() + self.addBuiltinCC() + self.addUnicode() + self.addLiteralSearch() + self.addDiceNotation() + self.addErrorMessages() + self.addIpAddress() // -- end of registrations -- - return benchmark } } diff --git a/Sources/RegexBenchmark/BenchmarkResults.swift b/Sources/RegexBenchmark/BenchmarkResults.swift new file mode 100644 index 000000000..ae9c5ded2 --- /dev/null +++ b/Sources/RegexBenchmark/BenchmarkResults.swift @@ -0,0 +1,277 @@ +import Foundation + +extension BenchmarkRunner { + /// Attempts to save the results to the given path + func save(to savePath: String) throws { + let url = URL(fileURLWithPath: savePath, isDirectory: false) + let parent = url.deletingLastPathComponent() + if !FileManager.default.fileExists(atPath: parent.path) { + try! FileManager.default.createDirectory( + atPath: parent.path, + withIntermediateDirectories: true) + } + print("Saving result to \(url.path)") + try results.save(to: url) + } + + /// Attempts to load the results from the given save file + mutating func load(from savePath: String) throws { + let url = URL(fileURLWithPath: savePath) + let result = try SuiteResult.load(from: url) + self.results = result + print("Loaded results from \(url.path)") + } + + /// Compare this runner's results against the results stored in the given file path + func compare( + against compareFilePath: String, + showChart: Bool, + saveTo: String? + ) throws { + let compareFileURL = URL(fileURLWithPath: compareFilePath) + let compareResult = try SuiteResult.load(from: compareFileURL) + let compareFile = compareFileURL.lastPathComponent + + let comparisons = results + .compare(with: compareResult) + .filter({!$0.name.contains("_NS")}) + .filter({$0.diff != nil}) + displayComparisons( + comparisons, + showChart, + against: "saved benchmark result " + compareFile) + if let saveFile = saveTo { + try saveComparisons(comparisons, path: saveFile) + } + } + + // Compile times are often very short (5-20ยตs) so results are likely to be + // very affected by background tasks. This is primarily for making sure + // there aren't any catastrophic changes in compile times + func compareCompileTimes( + against compareFilePath: String, + showChart: Bool + ) throws { + let compareFileURL = URL(fileURLWithPath: compareFilePath) + let compareResult = try SuiteResult.load(from: compareFileURL) + let compareFile = compareFileURL.lastPathComponent + + let compileTimeComparisons = results + .compareCompileTimes(with: compareResult) + .filter({!$0.name.contains("_NS")}) + .filter({$0.diff != nil}) + print("Comparing estimated compile times") + displayComparisons( + compileTimeComparisons, + false, + against: "saved benchmark result " + compareFile) + } + + /// Compares Swift Regex benchmark results against NSRegularExpression + func compareWithNS(showChart: Bool, saveTo: String?) throws { + let comparisons = results.compareWithNS().filter({$0.diff != nil}) + displayComparisons( + comparisons, + showChart, + against: "NSRegularExpression (via CrossBenchmark)") + if let saveFile = saveTo { + try saveComparisons(comparisons, path: saveFile) + } + } + + func displayComparisons( + _ comparisons: [BenchmarkResult.Comparison], + _ showChart: Bool, + against: String + ) { + let regressions = comparisons.filter({$0.diff!.seconds > 0}) + .sorted(by: {(a,b) in a.diff!.seconds > b.diff!.seconds}) + let improvements = comparisons.filter({$0.diff!.seconds < 0}) + .sorted(by: {(a,b) in a.diff!.seconds < b.diff!.seconds}) + + print("Comparing against \(against)") + print("=== Regressions ======================================================================") + for item in regressions { + print(item) + } + + print("=== Improvements =====================================================================") + for item in improvements { + print(item) + } + + #if os(macOS) && canImport(Charts) + if showChart { + print(""" + === Comparison chart ================================================================= + Press Control-C to close... + """) + BenchmarkResultApp.comparisons = comparisons + BenchmarkResultApp.main() + } + #endif + } + + func saveComparisons( + _ comparisons: [BenchmarkResult.Comparison], + path: String + ) throws { + let url = URL(fileURLWithPath: path, isDirectory: false) + let parent = url.deletingLastPathComponent() + if !FileManager.default.fileExists(atPath: parent.path) { + try! FileManager.default.createDirectory( + atPath: parent.path, + withIntermediateDirectories: true) + } + + var contents = "name,latest,baseline,diff,percentage\n" + for comparison in comparisons { + contents += comparison.asCsv + "\n" + } + print("Saving comparisons as .csv to \(path)") + try contents.write(to: url, atomically: true, encoding: String.Encoding.utf8) + } +} + +struct Measurement: Codable, CustomStringConvertible { + let median: Time + let stdev: Double + let samples: Int + + init(results: [Time]) { + let sorted = results.sorted() + self.samples = sorted.count + self.median = sorted[samples/2] + let sum = results.reduce(0.0) {acc, next in acc + next.seconds} + let mean = sum / Double(samples) + let squareDiffs = results.reduce(0.0) { acc, next in + acc + pow(next.seconds - mean, 2) + } + self.stdev = (squareDiffs / Double(samples)).squareRoot() + } + + var description: String { + return "\(median) (stdev: \(Time(stdev)), N = \(samples))" + } +} + +struct BenchmarkResult: Codable, CustomStringConvertible { + let runtime: Measurement + let compileTime: Measurement? + let parseTime: Measurement? + + var description: String { + var base = " > run time: \(runtime.description)" + if let compileTime = compileTime { + base += "\n > compile time: \(compileTime)" + } + if let parseTime = parseTime { + base += "\n > parse time: \(parseTime)" + } + return base + } +} + +extension BenchmarkResult { + struct Comparison: Identifiable, CustomStringConvertible { + var id = UUID() + var name: String + var baseline: Measurement + var latest: Measurement + + var latestTime: Time { latest.median } + var baselineTime: Time { baseline.median } + var diff: Time? { + if Stats.tTest(baseline, latest) { + return latestTime - baselineTime + } + return nil + } + var normalizedDiff: Double { + latestTime.seconds/baselineTime.seconds + } + + var description: String { + guard let diff = diff else { + return "- \(name) N/A" + } + let percentage = (1000 * diff.seconds / baselineTime.seconds).rounded()/10 + let len = max(40 - name.count, 1) + let nameSpacing = String(repeating: " ", count: len) + return "- \(name)\(nameSpacing)\(latestTime)\t\(baselineTime)\t\(diff)\t\t\(percentage)%" + } + + var asCsv: String { + guard let diff = diff else { + return "\(name),N/A" + } + let percentage = (1000 * diff.seconds / baselineTime.seconds).rounded()/10 + return "\"\(name)\",\(latestTime.seconds),\(baselineTime.seconds),\(diff.seconds),\(percentage)%" + } + } +} + +struct SuiteResult { + var results: [String: BenchmarkResult] = [:] + + mutating func add(name: String, result: BenchmarkResult) { + results.updateValue(result, forKey: name) + } + + func compare(with other: SuiteResult) -> [BenchmarkResult.Comparison] { + var comparisons: [BenchmarkResult.Comparison] = [] + for latest in results { + if let otherVal = other.results[latest.key] { + comparisons.append( + .init(name: latest.key, + baseline: otherVal.runtime, latest: latest.value.runtime)) + } + } + return comparisons + } + + /// Compares with the NSRegularExpression benchmarks generated by CrossBenchmark + func compareWithNS() -> [BenchmarkResult.Comparison] { + var comparisons: [BenchmarkResult.Comparison] = [] + for latest in results { + let key = latest.key + CrossBenchmark.nsSuffix + if let nsResult = results[key] { + comparisons.append( + .init(name: latest.key, + baseline: nsResult.runtime, latest: latest.value.runtime)) + } + } + return comparisons + } + + func compareCompileTimes( + with other: SuiteResult + ) -> [BenchmarkResult.Comparison] { + var comparisons: [BenchmarkResult.Comparison] = [] + for latest in results { + if let baseline = other.results[latest.key], + let baselineTime = baseline.compileTime, + let latestTime = latest.value.compileTime { + comparisons.append( + .init(name: latest.key, + baseline: baselineTime, + latest: latestTime)) + } + } + return comparisons + } +} + +extension SuiteResult: Codable { + func save(to url: URL) throws { + let encoder = JSONEncoder() + let data = try encoder.encode(self) + try data.write(to: url, options: .atomic) + } + + static func load(from url: URL) throws -> SuiteResult { + let decoder = JSONDecoder() + let data = try Data(contentsOf: url) + return try decoder.decode(SuiteResult.self, from: data) + } +} diff --git a/Sources/RegexBenchmark/BenchmarkRunner.swift b/Sources/RegexBenchmark/BenchmarkRunner.swift index 78953bdd6..1a62858c1 100644 --- a/Sources/RegexBenchmark/BenchmarkRunner.swift +++ b/Sources/RegexBenchmark/BenchmarkRunner.swift @@ -1,4 +1,5 @@ import Foundation +@_spi(RegexBenchmark) import _StringProcessing struct BenchmarkRunner { let suiteName: String @@ -7,60 +8,99 @@ struct BenchmarkRunner { let samples: Int var results: SuiteResult = SuiteResult() let quiet: Bool - - init(_ suiteName: String, _ n: Int, _ quiet: Bool) { - self.suiteName = suiteName - self.samples = n - self.quiet = quiet + let enableTracing: Bool + let enableMetrics: Bool + + // Forcibly include firstMatch benchmarks for all CrossBenchmarks + let includeFirstOverride: Bool + + mutating func register(_ benchmark: some RegexBenchmark) { + suite.append(benchmark) } - mutating func register(_ new: some RegexBenchmark) { - suite.append(new) + mutating func register(_ benchmark: some SwiftRegexBenchmark) { + var benchmark = benchmark + if enableTracing { + benchmark.enableTracing() + } + if enableMetrics { + benchmark.enableMetrics() + } + suite.append(benchmark) } - mutating func measure(benchmark: some RegexBenchmark, samples: Int) -> BenchmarkResult { + func medianMeasure( + samples: Int, + closure: () -> Void + ) -> Measurement { + // FIXME: use suspendingclock? var times: [Time] = [] - - // initial run to make sure the regex has been compiled - // todo: measure compile times, or at least how much this first run - // differs from the later ones - benchmark.run() - - // fixme: use suspendingclock? for _ in 0.. BenchmarkResult { + // Initial run to make sure the regex has been compiled + benchmark.run() - times.sort() - let median = times[samples/2] - let mean = times.reduce(0.0, {acc, next in acc + next.seconds}) / Double(times.count) - let stdev = (times.reduce(0.0, {acc, next in acc + pow(next.seconds - mean, 2)}) / Double(times.count)).squareRoot() - return BenchmarkResult(median, stdev, samples) + // Measure compilataion time for Swift regex + let compileTime: Measurement? + let parseTime: Measurement? + if benchmark is SwiftRegexBenchmark { + var benchmark = benchmark as! SwiftRegexBenchmark + compileTime = medianMeasure(samples: samples) { benchmark.compile() } + // Can't parse if we don't have an input string (ie a builder regex) + if benchmark.pattern != nil { + parseTime = medianMeasure(samples: samples) { let _ = benchmark.parse() } + } else { + parseTime = nil + } + + } else { + compileTime = nil + parseTime = nil + } + + let runtime = medianMeasure(samples: samples) { benchmark.run() } + return BenchmarkResult( + runtime: runtime, + compileTime: compileTime, + parseTime: parseTime) } mutating func run() { print("Running") for b in suite { var result = measure(benchmark: b, samples: samples) - if !quiet { - print("- \(b.name) \(result.median) (stdev: \(Time(result.stdev)))") - } - - if result.stdev > Stats.maxAllowedStdev { - print("Warning: Standard deviation > \(Time(Stats.maxAllowedStdev)) for \(b.name)") - print("N = \(samples), median: \(result.median), stdev: \(Time(result.stdev))") + if result.runtimeIsTooVariant { + print("Warning: Standard deviation > \(Stats.maxAllowedStdev*100)% for \(b.name)") + print(result.runtime) print("Rerunning \(b.name)") - result = measure(benchmark: b, samples: result.samples*2) - print("N = \(result.samples), median: \(result.median), stdev: \(Time(result.stdev))") - if result.stdev > Stats.maxAllowedStdev { + result = measure(benchmark: b, samples: result.runtime.samples*2) + print(result.runtime) + if result.runtimeIsTooVariant { fatalError("Benchmark \(b.name) is too variant") } } + if result.compileTime?.median ?? .zero > Time.millisecond { + print("Warning: Abnormally high compilation time, what happened?") + } + if result.parseTime?.median ?? .zero > Time.millisecond { + print("Warning: Abnormally high parse time, what happened?") + } + if !quiet { + print("- \(b.name)\n\(result)") + } self.results.add(name: b.name, result: result) } } @@ -69,104 +109,8 @@ struct BenchmarkRunner { print("Debugging") print("========================") for b in suite { - let result = measure(benchmark: b, samples: samples) - print("- \(b.name) \(result.median) (stdev: \(Time(result.stdev)))") b.debug() print("========================") } } } - -extension BenchmarkRunner { - - func save(to savePath: String) throws { - let url = URL(fileURLWithPath: savePath, isDirectory: false) - let parent = url.deletingLastPathComponent() - if !FileManager.default.fileExists(atPath: parent.path) { - try! FileManager.default.createDirectory(atPath: parent.path, withIntermediateDirectories: true) - } - print("Saving result to \(url.path)") - try results.save(to: url) - } - - func compare(against compareFilePath: String) throws { - let compareFileURL = URL(fileURLWithPath: compareFilePath) - let compareResult = try SuiteResult.load(from: compareFileURL) - let compareFile = compareFileURL.lastPathComponent - - let diff = results - .compare(with: compareResult) - .filter({(name, _) in !name.contains("_NS")}) - let regressions = diff.filter({(_, change) in change.seconds > 0}) - .sorted(by: {(a,b) in a.1 > b.1}) - let improvements = diff.filter({(_, change) in change.seconds < 0}) - .sorted(by: {(a,b) in a.1 < b.1}) - - print("Comparing against benchmark result file \(compareFile)") - print("=== Regressions ======================================================================") - func printComparison(name: String, diff: Time) { - let oldVal = compareResult.results[name]!.median - let newVal = results.results[name]!.median - let percentage = (1000 * diff.seconds / oldVal.seconds).rounded()/10 - let len = max(40 - name.count, 1) - let nameSpacing = String(repeating: " ", count: len) - print("- \(name)\(nameSpacing)\(newVal)\t\(oldVal)\t\(diff)\t\t\(percentage)%") - } - - for item in regressions { - printComparison(name: item.key, diff: item.value) - } - - print("=== Improvements =====================================================================") - for item in improvements { - printComparison(name: item.key, diff: item.value) - } - } -} - -struct BenchmarkResult: Codable { - let median: Time - let stdev: Double - let samples: Int - - init(_ median: Time, _ stdev: Double, _ samples: Int) { - self.median = median - self.stdev = stdev - self.samples = samples - } -} - -struct SuiteResult { - var results: [String: BenchmarkResult] = [:] - - mutating func add(name: String, result: BenchmarkResult) { - results.updateValue(result, forKey: name) - } - - func compare(with other: SuiteResult) -> [String: Time] { - var output: [String: Time] = [:] - for item in results { - if let otherVal = other.results[item.key] { - let diff = item.value.median - otherVal.median - if Stats.tTest(item.value, otherVal) { - output.updateValue(diff, forKey: item.key) - } - } - } - return output - } -} - -extension SuiteResult: Codable { - func save(to url: URL) throws { - let encoder = JSONEncoder() - let data = try encoder.encode(self) - try data.write(to: url, options: .atomic) - } - - static func load(from url: URL) throws -> SuiteResult { - let decoder = JSONDecoder() - let data = try Data(contentsOf: url) - return try decoder.decode(SuiteResult.self, from: data) - } -} diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift index 8ef351329..77ebff47b 100644 --- a/Sources/RegexBenchmark/CLI.swift +++ b/Sources/RegexBenchmark/CLI.swift @@ -10,6 +10,9 @@ struct Runner: ParsableCommand { @Flag(help: "Debug benchmark regexes") var debug = false + + @Option(help: "Load results from this file instead of rerunning") + var load: String? @Option(help: "The file results should be saved to") var save: String? @@ -17,15 +20,56 @@ struct Runner: ParsableCommand { @Option(help: "The result file to compare against") var compare: String? + @Option(help: "Compare compile times with the given results file") + var compareCompileTime: String? + + @Flag(help: "Show comparison chart") + var showChart: Bool = false + + @Flag(help: "Compare with NSRegularExpression") + var compareWithNS: Bool = false + + @Option(help: "Save comparison results as csv") + var saveComparison: String? + @Flag(help: "Quiet mode") var quiet = false @Flag(help: "Exclude running NSRegex benchmarks") var excludeNs = false + + @Flag(help: """ +Enable tracing of the engine (warning: lots of output). Prints out processor state each cycle - mutating func run() throws { - var runner = BenchmarkRunner.makeRunner(samples, quiet) +Note: swift-experimental-string-processing must be built with processor measurements enabled +swift build -c release -Xswiftc -DPROCESSOR_MEASUREMENTS_ENABLED + +""") + var enableTracing: Bool = false + + @Flag(help: """ +Enable engine metrics (warning: lots of output). Prints out cycle count, instruction counts, number of backtracks +Note: swift-experimental-string-processing must be built with processor measurements enabled +swift build -c release -Xswiftc -DPROCESSOR_MEASUREMENTS_ENABLED + +""") + var enableMetrics: Bool = false + + @Flag(help: "Include firstMatch benchmarks in CrossBenchmark (off by default)") + var includeFirst: Bool = false + + mutating func run() throws { + var runner = BenchmarkRunner( + suiteName: "DefaultRegexSuite", + samples: samples, + quiet: quiet, + enableTracing: enableTracing, + enableMetrics: enableMetrics, + includeFirstOverride: includeFirst) + + runner.registerDefault() + if !self.specificBenchmarks.isEmpty { runner.suite = runner.suite.filter { b in specificBenchmarks.contains { pattern in @@ -35,17 +79,35 @@ struct Runner: ParsableCommand { } if debug { runner.debug() + return + } + + if let loadFile = load { + try runner.load(from: loadFile) } else { if excludeNs { runner.suite = runner.suite.filter { b in !b.name.contains("NS") } } runner.run() - if let compareFile = compare { - try runner.compare(against: compareFile) - } - if let saveFile = save { - try runner.save(to: saveFile) - } + } + if let saveFile = save { + try runner.save(to: saveFile) + } + if saveComparison != nil && compareWithNS && compare != nil { + print("Unable to save both comparison results, specify only one compare operation") + return + } + if compareWithNS { + try runner.compareWithNS(showChart: showChart, saveTo: saveComparison) + } + if let compareFile = compare { + try runner.compare( + against: compareFile, + showChart: showChart, + saveTo: saveComparison) + } + if let compareFile = compareCompileTime { + try runner.compareCompileTimes(against: compareFile, showChart: showChart) } } } diff --git a/Sources/RegexBenchmark/Debug.swift b/Sources/RegexBenchmark/Debug.swift index fcd11f7ca..1171247e4 100644 --- a/Sources/RegexBenchmark/Debug.swift +++ b/Sources/RegexBenchmark/Debug.swift @@ -1,12 +1,21 @@ import Foundation +protocol Debug { + func debug() +} + +extension Debug { + var maxStringLengthForPrint: Int { 1000 } + var maxMatchCountForPrint: Int { 100 } +} + extension Benchmark { func debug() { switch type { case .whole: let result = target.wholeMatch(of: regex) if let match = result { - if match.0.count > 100 { + if match.0.count > maxStringLengthForPrint { print("- Match: len = \(match.0.count)") } else { print("- Match: \(match.0)") @@ -22,7 +31,7 @@ extension Benchmark { } print("- Total matches: \(results.count)") - if results.count > 10 { + if results.count > maxMatchCountForPrint { print("# Too many matches, not printing") let avgLen = results.map({result in String(target[result.range]).count}) .reduce(0.0, {$0 + Double($1)}) / Double(results.count) @@ -32,7 +41,7 @@ extension Benchmark { } for match in results { - if match.0.count > 100 { + if match.0.count > maxStringLengthForPrint { print("- Match: len = \(match.0.count)") } else { print("- Match: \(match.0)") @@ -42,7 +51,7 @@ extension Benchmark { case .first: let result = target.firstMatch(of: regex) if let match = result { - if match.0.count > 100 { + if match.0.count > maxStringLengthForPrint { print("- Match: len = \(match.0.count)") } else { print("- Match: \(match.0)") @@ -56,6 +65,7 @@ extension Benchmark { } extension NSBenchmark { + func debug() { switch type { case .allMatches: @@ -66,13 +76,13 @@ extension NSBenchmark { } print("- Total matches: \(results.count)") - if results.count > 10 { + if results.count > maxMatchCountForPrint { print("# Too many matches, not printing") return } for m in results { - if m.range.length > 100 { + if m.range.length > maxStringLengthForPrint { print("- Match: len = \(m.range.length)") } else { print("- Match: \(target[Range(m.range, in: target)!])") @@ -81,7 +91,7 @@ extension NSBenchmark { case .first: let result = regex.firstMatch(in: target, range: range) if let match = result { - if match.range.length > 100 { + if match.range.length > maxStringLengthForPrint { print("- Match: len = \(match.range.length)") } else { print("- Match: \(target[Range(match.range, in: target)!])") diff --git a/Sources/RegexBenchmark/Suite/CustomCharacterClasses.swift b/Sources/RegexBenchmark/Suite/CustomCharacterClasses.swift index 2f971b4e6..61d7b197f 100644 --- a/Sources/RegexBenchmark/Suite/CustomCharacterClasses.swift +++ b/Sources/RegexBenchmark/Suite/CustomCharacterClasses.swift @@ -15,42 +15,49 @@ extension BenchmarkRunner { register(Benchmark( name: "BasicCCC", regex: try! Regex(basic), + pattern: basic, type: .allMatches, target: input)) register(Benchmark( name: "BasicRangeCCC", regex: try! Regex(basicRange), + pattern: basicRange, type: .allMatches, target: input)) register(Benchmark( name: "CaseInsensitiveCCC", regex: try! Regex(caseInsensitive), + pattern: caseInsensitive, type: .allMatches, target: input)) register(Benchmark( name: "InvertedCCC", regex: try! Regex(inverted), + pattern: inverted, type: .allMatches, target: input)) register(Benchmark( name: "SubtractionCCC", regex: try! Regex(subtraction), + pattern: subtraction, type: .allMatches, target: input)) register(Benchmark( name: "IntersectionCCC", regex: try! Regex(intersection), + pattern: intersection, type: .allMatches, target: input)) register(Benchmark( name: "symDiffCCC", regex: try! Regex(symmetricDifference), + pattern: symmetricDifference, type: .allMatches, target: input)) } diff --git a/Sources/RegexBenchmark/Suite/LiteralSearch.swift b/Sources/RegexBenchmark/Suite/LiteralSearch.swift index 1f48f9945..32cf60a7d 100644 --- a/Sources/RegexBenchmark/Suite/LiteralSearch.swift +++ b/Sources/RegexBenchmark/Suite/LiteralSearch.swift @@ -3,7 +3,7 @@ import _StringProcessing extension BenchmarkRunner { mutating func addLiteralSearch() { let searchNotFound = CrossBenchmark(baseName: "LiteralSearchNotFound", regex: "magic_string_to_search_for", input: Inputs.graphemeBreakData) - let search = CrossBenchmark(baseName: "LiteralSearch", regex: "aatcgaagcagtcttctaacacccttagaaaagcaaacactattgaatactgccgccgca", input: Inputs.graphemeBreakData) + let search = CrossBenchmark(baseName: "LiteralSearch", regex: "HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH", input: Inputs.graphemeBreakData) searchNotFound.register(&self) search.register(&self) } diff --git a/Sources/RegexBenchmark/Suite/Unicode.swift b/Sources/RegexBenchmark/Suite/Unicode.swift index 5944ab2ca..46afda712 100644 --- a/Sources/RegexBenchmark/Suite/Unicode.swift +++ b/Sources/RegexBenchmark/Suite/Unicode.swift @@ -4,11 +4,11 @@ extension BenchmarkRunner { mutating func addUnicode() { // tagged unicode: unicode characters surrounded by html tags // use the same html regex, uses backreference + reluctant quantification - let tags = #"<(\w*)\b[^>]*>(.*?)<\/\1>"# - let taggedEmojis = CrossBenchmark( - baseName: "TaggedEmojis", - regex: tags, - input: Inputs.taggedEmojis) +// let tags = #"<(\w*)\b[^>]*>(.*?)<\/\1>"# // disabled due to \b being unusably slow +// let taggedEmojis = CrossBenchmark( +// baseName: "TaggedEmojis", +// regex: tags, +// input: Inputs.taggedEmojis) // Now actually matching emojis let emoji = #"(๐Ÿ˜ƒ|๐Ÿ˜€|๐Ÿ˜ณ|๐Ÿ˜ฒ|๐Ÿ˜ฆ|๐Ÿ˜Š|๐Ÿ™Š|๐Ÿ˜˜|๐Ÿ˜|๐Ÿ˜ณ|๐Ÿ˜’){2,5}"# @@ -18,7 +18,7 @@ extension BenchmarkRunner { regex: emoji, input: Inputs.taggedEmojis) - // taggedEmojis.register(&self) // disabled due to \b being unusably slow + // taggedEmojis.register(&self) emojiRegex.register(&self) } } diff --git a/Sources/RegexBenchmark/Utils/Stats.swift b/Sources/RegexBenchmark/Utils/Stats.swift index c5c46eef9..0cc9156a4 100644 --- a/Sources/RegexBenchmark/Utils/Stats.swift +++ b/Sources/RegexBenchmark/Utils/Stats.swift @@ -3,10 +3,10 @@ import Foundation enum Stats {} extension Stats { - // 500ยตs, maybe this should be a % of the runtime for each benchmark? - static let maxAllowedStdev = 500e-6 + // Maximum allowed standard deviation is 5% of the median runtime + static let maxAllowedStdev = 0.05 - static func tTest(_ a: BenchmarkResult, _ b: BenchmarkResult) -> Bool { + static func tTest(_ a: Measurement, _ b: Measurement) -> Bool { // Student's t-test // Since we should generally have similar variances across runs let n1 = Double(a.samples) @@ -18,3 +18,9 @@ extension Stats { return abs(tVal) > 2 } } + +extension BenchmarkResult { + var runtimeIsTooVariant: Bool { + runtime.stdev > Stats.maxAllowedStdev * runtime.median.seconds + } +} diff --git a/Sources/RegexBuilder/CMakeLists.txt b/Sources/RegexBuilder/CMakeLists.txt index c2a0d9738..77a8340ce 100644 --- a/Sources/RegexBuilder/CMakeLists.txt +++ b/Sources/RegexBuilder/CMakeLists.txt @@ -1,9 +1,10 @@ add_library(RegexBuilder + Algorithms.swift Anchor.swift Builder.swift + CharacterClass.swift DSL.swift - Match.swift Variadics.swift) target_compile_options(RegexBuilder PRIVATE -enable-library-evolution diff --git a/Sources/RegexBuilder/CharacterClass.swift b/Sources/RegexBuilder/CharacterClass.swift index ea52c28f3..08c7d347e 100644 --- a/Sources/RegexBuilder/CharacterClass.swift +++ b/Sources/RegexBuilder/CharacterClass.swift @@ -15,27 +15,39 @@ @available(SwiftStdlib 5.7, *) public struct CharacterClass { internal var ccc: DSLTree.CustomCharacterClass + /// The builtin character class, if this CharacterClass is representable by one + internal var builtin: DSLTree.Atom.CharacterClass? init(_ ccc: DSLTree.CustomCharacterClass) { self.ccc = ccc + self.builtin = nil } - init(unconverted atom: DSLTree._AST.Atom) { - self.ccc = .init(members: [.atom(.unconverted(atom))]) + init(builtin: DSLTree.Atom.CharacterClass) { + self.ccc = .init(members: [.atom(.characterClass(builtin))]) + self.builtin = builtin } } @available(SwiftStdlib 5.7, *) extension CharacterClass: RegexComponent { public var regex: Regex { - _RegexFactory().customCharacterClass(ccc) + if let cc = builtin { + return _RegexFactory().characterClass(cc) + } else { + return _RegexFactory().customCharacterClass(ccc) + } } } @available(SwiftStdlib 5.7, *) extension CharacterClass { public var inverted: CharacterClass { - CharacterClass(ccc.inverted) + if let inv = builtin?.inverted { + return CharacterClass(builtin: inv) + } else { + return CharacterClass(ccc.inverted) + } } } @@ -50,15 +62,15 @@ extension RegexComponent where Self == CharacterClass { } public static var anyGraphemeCluster: CharacterClass { - .init(unconverted: ._anyGrapheme) + .init(builtin: .anyGrapheme) } public static var whitespace: CharacterClass { - .init(unconverted: ._whitespace) + .init(builtin: .whitespace) } public static var digit: CharacterClass { - .init(unconverted: ._digit) + .init(builtin: .digit) } public static var hexDigit: CharacterClass { @@ -70,19 +82,19 @@ extension RegexComponent where Self == CharacterClass { } public static var horizontalWhitespace: CharacterClass { - .init(unconverted: ._horizontalWhitespace) + .init(builtin: .horizontalWhitespace) } public static var newlineSequence: CharacterClass { - .init(unconverted: ._newlineSequence) + .init(builtin: .newlineSequence) } public static var verticalWhitespace: CharacterClass { - .init(unconverted: ._verticalWhitespace) + .init(builtin: .verticalWhitespace) } public static var word: CharacterClass { - .init(unconverted: ._word) + .init(builtin: .word) } } diff --git a/Sources/RegexBuilder/Variadics.swift b/Sources/RegexBuilder/Variadics.swift index c1e380144..7336f0a30 100644 --- a/Sources/RegexBuilder/Variadics.swift +++ b/Sources/RegexBuilder/Variadics.swift @@ -705,10 +705,10 @@ extension Optionally { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == Substring { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -741,10 +741,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == Substring { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -768,10 +768,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == Substring { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -793,11 +793,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == Substring { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_disfavoredOverload @@ -816,10 +816,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == Substring, R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?), Component.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -873,10 +873,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?), Component.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -898,10 +898,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1), Component.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -921,11 +921,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?), Component.RegexOutput == (W, C1) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -942,10 +942,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?), Component.RegexOutput == (W, C1), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?), Component.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -999,10 +999,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?), Component.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1024,10 +1024,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2), Component.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1047,11 +1047,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?), Component.RegexOutput == (W, C1, C2) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1068,10 +1068,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?), Component.RegexOutput == (W, C1, C2), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?), Component.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1125,10 +1125,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?), Component.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1150,10 +1150,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3), Component.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1173,11 +1173,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?), Component.RegexOutput == (W, C1, C2, C3) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1194,10 +1194,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?), Component.RegexOutput == (W, C1, C2, C3), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?), Component.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1251,10 +1251,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?), Component.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1276,10 +1276,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4), Component.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1299,11 +1299,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?), Component.RegexOutput == (W, C1, C2, C3, C4) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1320,10 +1320,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?), Component.RegexOutput == (W, C1, C2, C3, C4), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?), Component.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1377,10 +1377,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?), Component.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1402,10 +1402,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5), Component.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1425,11 +1425,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?), Component.RegexOutput == (W, C1, C2, C3, C4, C5) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1446,10 +1446,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?), Component.RegexOutput == (W, C1, C2, C3, C4, C5), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1503,10 +1503,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1528,10 +1528,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1551,11 +1551,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1572,10 +1572,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1629,10 +1629,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1654,10 +1654,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1677,11 +1677,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1698,10 +1698,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1755,10 +1755,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1780,10 +1780,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7, C8), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1803,11 +1803,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1824,10 +1824,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1881,10 +1881,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1906,10 +1906,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7, C8, C9), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1929,11 +1929,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1950,10 +1950,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?, C10?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -2007,10 +2007,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?, C10?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -2032,10 +2032,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -2055,11 +2055,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?, C10?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -2076,10 +2076,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?, C10?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == Substring { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2124,10 +2124,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1), Component.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2147,10 +2147,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2), Component.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2170,10 +2170,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3), Component.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2193,10 +2193,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4), Component.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2216,10 +2216,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5), Component.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2239,10 +2239,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2262,10 +2262,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2285,10 +2285,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7, C8), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2308,10 +2308,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7, C8, C9), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2331,10 +2331,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -3142,41 +3142,41 @@ extension Capture { @_disfavoredOverload @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W), R.RegexOutput == W { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_disfavoredOverload @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W), R.RegexOutput == W { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_disfavoredOverload @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_disfavoredOverload @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3185,22 +3185,22 @@ extension TryCapture { @_disfavoredOverload @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_disfavoredOverload @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -3272,38 +3272,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1), R.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1), R.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3311,21 +3311,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -3397,38 +3397,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2), R.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2), R.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3436,21 +3436,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -3522,38 +3522,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3561,21 +3561,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -3647,38 +3647,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3686,21 +3686,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -3772,38 +3772,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3811,21 +3811,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -3897,38 +3897,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3936,21 +3936,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -4022,38 +4022,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -4061,21 +4061,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -4147,38 +4147,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -4186,21 +4186,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -4272,38 +4272,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -4311,21 +4311,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -4397,38 +4397,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -4436,21 +4436,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } diff --git a/Sources/VariadicsGenerator/VariadicsGenerator.swift b/Sources/VariadicsGenerator/VariadicsGenerator.swift index faab75762..8ddaee145 100644 --- a/Sources/VariadicsGenerator/VariadicsGenerator.swift +++ b/Sources/VariadicsGenerator/VariadicsGenerator.swift @@ -406,10 +406,10 @@ struct VariadicsGenerator: ParsableCommand { @_alwaysEmitIntoClient public init<\(params.genericParams)>( _ behavior: RegexRepetitionBehavior? = nil, - @\(concatBuilderName) _ component: () -> Component + @\(concatBuilderName) _ componentBuilder: () -> Component ) \(params.whereClauseForInit) { let factory = makeFactory() - self.init(factory.\(kind.astQuantifierAmount)(component(), behavior)) + self.init(factory.\(kind.astQuantifierAmount)(componentBuilder(), behavior)) } } @@ -436,7 +436,7 @@ struct VariadicsGenerator: ParsableCommand { let groupName = "Local" func node(builder: Bool) -> String { """ - component\(builder ? "()" : "") + component\(builder ? "Builder()" : "") """ } @@ -478,7 +478,7 @@ struct VariadicsGenerator: ParsableCommand { \(disfavored)\ @_alwaysEmitIntoClient public init<\(genericParams)>( - @\(concatBuilderName) _ component: () -> Component + @\(concatBuilderName) _ componentBuilder: () -> Component ) \(whereClauseForInit) { let factory = makeFactory() self.init(factory.atomicNonCapturing(\(node(builder: true)))) @@ -514,11 +514,11 @@ struct VariadicsGenerator: ParsableCommand { @_alwaysEmitIntoClient public init<\(params.genericParams)>( count: Int, - @\(concatBuilderName) _ component: () -> Component + @\(concatBuilderName) _ componentBuilder: () -> Component ) \(params.whereClauseForInit) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } \(params.disfavored)\ @@ -537,10 +537,10 @@ struct VariadicsGenerator: ParsableCommand { public init<\(params.genericParams), R: RangeExpression>( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @\(concatBuilderName) _ component: () -> Component + @\(concatBuilderName) _ componentBuilder: () -> Component ) \(params.repeatingWhereClause) { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( - @\(concatBuilderName) _ component: () -> R + @\(concatBuilderName) _ componentBuilder: () -> R ) \(whereClauseRaw) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } \(disfavored)\ @_alwaysEmitIntoClient public init<\(genericParams)>( as reference: Reference, - @\(concatBuilderName) _ component: () -> R + @\(concatBuilderName) _ componentBuilder: () -> R ) \(whereClauseRaw) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } \(disfavored)\ @_alwaysEmitIntoClient public init<\(genericParams), NewCapture>( - @\(concatBuilderName) _ component: () -> R, + @\(concatBuilderName) _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) \(whereClauseTransformed) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } \(disfavored)\ @_alwaysEmitIntoClient public init<\(genericParams), NewCapture>( as reference: Reference, - @\(concatBuilderName) _ component: () -> R, + @\(concatBuilderName) _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) \(whereClauseTransformed) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -763,22 +763,22 @@ struct VariadicsGenerator: ParsableCommand { \(disfavored)\ @_alwaysEmitIntoClient public init<\(genericParams), NewCapture>( - @\(concatBuilderName) _ component: () -> R, + @\(concatBuilderName) _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) \(whereClauseTransformed) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } \(disfavored)\ @_alwaysEmitIntoClient public init<\(genericParams), NewCapture>( as reference: Reference, - @\(concatBuilderName) _ component: () -> R, + @\(concatBuilderName) _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) \(whereClauseTransformed) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } diff --git a/Sources/_RegexParser/CMakeLists.txt b/Sources/_RegexParser/CMakeLists.txt index 48856b453..1df1b767c 100644 --- a/Sources/_RegexParser/CMakeLists.txt +++ b/Sources/_RegexParser/CMakeLists.txt @@ -9,13 +9,15 @@ add_library(_RegexParser AST/Group.swift AST/MatchingOptions.swift AST/Quantification.swift + Parse/CaptureList.swift Parse/CaptureStructure.swift Parse/CharacterPropertyClassification.swift + Parse/CompilerInterface.swift Parse/DelimiterLexing.swift Parse/Diagnostics.swift Parse/LexicalAnalysis.swift - Parse/Mocking.swift Parse/Parse.swift + Parse/Sema.swift Parse/Source.swift Parse/SourceLocation.swift Parse/SyntaxOptions.swift diff --git a/Sources/_RegexParser/Regex/Parse/DelimiterLexing.swift b/Sources/_RegexParser/Regex/Parse/DelimiterLexing.swift index dd142f016..4d86f9d93 100644 --- a/Sources/_RegexParser/Regex/Parse/DelimiterLexing.swift +++ b/Sources/_RegexParser/Regex/Parse/DelimiterLexing.swift @@ -31,7 +31,7 @@ public struct Delimiter: Hashable { switch kind { case .forwardSlash: return poundCount > 0 - case .experimental, .reSingleQuote, .rxSingleQuote: + case .experimental: return false } } @@ -47,15 +47,11 @@ extension Delimiter { enum Kind: Hashable, CaseIterable { case forwardSlash case experimental - case reSingleQuote - case rxSingleQuote var openingAndClosing: (opening: String, closing: String) { switch self { case .forwardSlash: return ("/", "/") case .experimental: return ("#|", "|#") - case .reSingleQuote: return ("re'", "'") - case .rxSingleQuote: return ("rx'", "'") } } var opening: String { openingAndClosing.opening } @@ -67,7 +63,7 @@ extension Delimiter { switch self { case .forwardSlash: return true - case .experimental, .reSingleQuote, .rxSingleQuote: + case .experimental: return false } } @@ -150,14 +146,6 @@ fileprivate struct DelimiterLexer { slice(at: cursor, count) } - /// Return the slice of `count` bytes preceding the current cursor, or `nil` - /// if there are fewer than `count` bytes before the cursor. - func sliceBehind(_ count: Int) -> UnsafeRawBufferPointer? { - let priorCursor = cursor - count - guard priorCursor >= start else { return nil } - return slice(at: priorCursor, count) - } - /// Advance the cursor `n` bytes. mutating func advanceCursor(_ n: Int = 1) { cursor += n @@ -186,86 +174,6 @@ fileprivate struct DelimiterLexer { return true } - /// Attempt to skip over a closing delimiter character that is unlikely to be - /// the actual closing delimiter. - mutating func trySkipDelimiter(_ delimiter: Delimiter) { - // Only the closing `'` for re'...'/rx'...' can potentially be skipped over. - switch delimiter.kind { - case .forwardSlash, .experimental: - return - case .reSingleQuote, .rxSingleQuote: - break - } - guard load() == ascii("'") else { return } - - /// Need to look for a prefix of `(?`, `(?(`, `\k`, `\g`, `(?C`, as those - /// are the cases that could use single quotes. Note that none of these - /// would be valid regex endings anyway. - let calloutPrefix = "(?C" - let prefix = ["(?", "(?(", #"\k"#, #"\g"#, calloutPrefix].first { prior in - guard let priorSlice = sliceBehind(prior.utf8.count), - priorSlice.elementsEqual(prior.utf8) - else { return false } - - // Make sure the slice isn't preceded by a '\', as that invalidates this - // analysis. - if let prior = sliceBehind(priorSlice.count + 1) { - return prior[0] != ascii("\\") - } - return true - } - guard let prefix = prefix else { return } - let isCallout = prefix == calloutPrefix - - func isPossiblyGroupReference(_ c: UInt8) -> Bool { - // If this is an ASCII character, make sure it's for a group name. Leave - // other UTF-8 encoded scalars alone, this should at least catch cases - // where we run into a symbol such as `{`, `.`, `;` that would indicate - // we've likely advanced out of the bounds of the regex. - let scalar = UnicodeScalar(c) - guard scalar.isASCII else { return true } - switch scalar { - // Include '-' and '+' which may be used in recursion levels and relative - // references. - case "A"..."Z", "a"..."z", "0"..."9", "_", "-", "+": - return true - default: - return false - } - } - - // Make a note of the current lexing position, as we may need to revert - // back to it. - let originalCursor = cursor - advanceCursor() - - // Try skip over what would be the contents of a group identifier/reference. - while let next = load() { - // Found the ending, we're done. Return so we can continue to lex to the - // real delimiter. - if next == ascii("'") { - advanceCursor() - return - } - - // If this isn't a callout, make sure we have something that could be a - // group reference. We limit the character set here to improve diagnostic - // behavior in the case where the literal is actually unterminated. We - // ideally don't want to go wandering off into Swift source code. We can't - // do the same for callouts, as they take arbitrary strings. - guard isCallout || isPossiblyGroupReference(next) else { break } - do { - try advance() - } catch { - break - } - } - // We bailed out, either because we ran into something that didn't look like - // an identifier, or we reached the end of the line. Revert back to the - // original guess of delimiter. - cursor = originalCursor - } - /// Attempt to eat a particular closing delimiter, returning the contents of /// the literal, and ending pointer, or `nil` if this is not a delimiter /// ending. @@ -401,10 +309,6 @@ fileprivate struct DelimiterLexer { } } while true { - // Check to see if we're at a character that looks like a delimiter, but - // likely isn't. In such a case, we can attempt to skip over it. - trySkipDelimiter(delimiter) - // Try to lex the closing delimiter. if let (contents, end) = try tryEatEnding(delimiter, contentsStart: contentsStart) { diff --git a/Sources/_RegexParser/Regex/Parse/Parse.swift b/Sources/_RegexParser/Regex/Parse/Parse.swift index 0aae031d5..d9b6f23a0 100644 --- a/Sources/_RegexParser/Regex/Parse/Parse.swift +++ b/Sources/_RegexParser/Regex/Parse/Parse.swift @@ -672,9 +672,7 @@ fileprivate func defaultSyntaxOptions( return [.multilineCompilerLiteral, .extendedSyntax] } return .traditional - case .reSingleQuote: - return .traditional - case .experimental, .rxSingleQuote: + case .experimental: return .experimental } } diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index e8c92f2b5..e0a6c7465 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -22,17 +22,19 @@ extension Compiler { /// This is used to determine whether to apply initial options. var hasEmittedFirstMatchableAtom = false - private let compileOptions: CompileOptions + private let compileOptions: _CompileOptions fileprivate var optimizationsEnabled: Bool { !compileOptions.contains(.disableOptimizations) } init( options: MatchingOptions, - compileOptions: CompileOptions, + compileOptions: _CompileOptions, captureList: CaptureList ) { self.options = options self.compileOptions = compileOptions self.builder.captureList = captureList + self.builder.enableTracing = compileOptions.contains(.enableTracing) + self.builder.enableMetrics = compileOptions.contains(.enableMetrics) } } } @@ -74,6 +76,9 @@ fileprivate extension Compiler.ByteCodeGen { emitMatchScalar(s) } + case let .characterClass(cc): + emitCharacterClass(cc) + case let .assertion(kind): try emitAssertion(kind) @@ -81,7 +86,8 @@ fileprivate extension Compiler.ByteCodeGen { try emitBackreference(ref.ast) case let .symbolicReference(id): - builder.buildUnresolvedReference(id: id) + builder.buildUnresolvedReference( + id: id, isScalarMode: options.semanticLevel == .unicodeScalar) case let .changeMatchingOptions(optionSequence): if !hasEmittedFirstMatchableAtom { @@ -109,7 +115,7 @@ fileprivate extension Compiler.ByteCodeGen { } // Fast path for eliding boundary checks for an all ascii quoted literal - if optimizationsEnabled && s.allSatisfy(\.isASCII) { + if optimizationsEnabled && s.allSatisfy(\.isASCII) && !s.isEmpty { let lastIdx = s.unicodeScalars.indices.last! for idx in s.unicodeScalars.indices { let boundaryCheck = idx == lastIdx @@ -140,155 +146,34 @@ fileprivate extension Compiler.ByteCodeGen { guard let i = n.value else { throw Unreachable("Expected a value") } - builder.buildBackreference(.init(i)) + builder.buildBackreference( + .init(i), isScalarMode: options.semanticLevel == .unicodeScalar) case .named(let name): - try builder.buildNamedReference(name) + try builder.buildNamedReference( + name, isScalarMode: options.semanticLevel == .unicodeScalar) case .relative: throw Unsupported("Backreference kind: \(ref)") } } - mutating func emitStartOfLine() { - builder.buildAssert { [semanticLevel = options.semanticLevel] - (_, _, input, pos, subjectBounds) in - if pos == subjectBounds.lowerBound { return true } - switch semanticLevel { - case .graphemeCluster: - return input[input.index(before: pos)].isNewline - case .unicodeScalar: - return input.unicodeScalars[input.unicodeScalars.index(before: pos)].isNewline - } - } - } - - mutating func emitEndOfLine() { - builder.buildAssert { [semanticLevel = options.semanticLevel] - (_, _, input, pos, subjectBounds) in - if pos == subjectBounds.upperBound { return true } - switch semanticLevel { - case .graphemeCluster: - return input[pos].isNewline - case .unicodeScalar: - return input.unicodeScalars[pos].isNewline - } - } - } - mutating func emitAssertion( _ kind: DSLTree.Atom.Assertion ) throws { - // FIXME: Depends on API model we have... We may want to - // think through some of these with API interactions in mind - // - // This might break how we use `bounds` for both slicing - // and things like `firstIndex`, that is `firstIndex` may - // need to supply both a slice bounds and a per-search bounds. - switch kind { - case .startOfSubject: - builder.buildAssert { (_, _, input, pos, subjectBounds) in - pos == subjectBounds.lowerBound - } - - case .endOfSubjectBeforeNewline: - builder.buildAssert { [semanticLevel = options.semanticLevel] - (_, _, input, pos, subjectBounds) in - if pos == subjectBounds.upperBound { return true } - switch semanticLevel { - case .graphemeCluster: - return input.index(after: pos) == subjectBounds.upperBound - && input[pos].isNewline - case .unicodeScalar: - return input.unicodeScalars.index(after: pos) == subjectBounds.upperBound - && input.unicodeScalars[pos].isNewline - } - } - - case .endOfSubject: - builder.buildAssert { (_, _, input, pos, subjectBounds) in - pos == subjectBounds.upperBound - } - - case .resetStartOfMatch: - // FIXME: Figure out how to communicate this out + if kind == .resetStartOfMatch { throw Unsupported(#"\K (reset/keep assertion)"#) - - case .firstMatchingPositionInSubject: - // TODO: We can probably build a nice model with API here - - // FIXME: This needs to be based on `searchBounds`, - // not the `subjectBounds` given as an argument here - builder.buildAssert { (_, _, input, pos, subjectBounds) in false } - - case .textSegment: - builder.buildAssert { (_, _, input, pos, _) in - // FIXME: Grapheme or word based on options - input.isOnGraphemeClusterBoundary(pos) - } - - case .notTextSegment: - builder.buildAssert { (_, _, input, pos, _) in - // FIXME: Grapheme or word based on options - !input.isOnGraphemeClusterBoundary(pos) - } - - case .startOfLine: - emitStartOfLine() - - case .endOfLine: - emitEndOfLine() - - case .caretAnchor: - if options.anchorsMatchNewlines { - emitStartOfLine() - } else { - builder.buildAssert { (_, _, input, pos, subjectBounds) in - pos == subjectBounds.lowerBound - } - } - - case .dollarAnchor: - if options.anchorsMatchNewlines { - emitEndOfLine() - } else { - builder.buildAssert { (_, _, input, pos, subjectBounds) in - pos == subjectBounds.upperBound - } - } - - case .wordBoundary: - builder.buildAssert { [options] - (cache, maxIndex, input, pos, subjectBounds) in - if options.usesSimpleUnicodeBoundaries { - // TODO: How should we handle bounds? - return _CharacterClassModel.word.isBoundary( - input, - at: pos, - bounds: subjectBounds, - with: options - ) - } else { - return input.isOnWordBoundary(at: pos, using: &cache, &maxIndex) - } - } - - case .notWordBoundary: - builder.buildAssert { [options] - (cache, maxIndex, input, pos, subjectBounds) in - if options.usesSimpleUnicodeBoundaries { - // TODO: How should we handle bounds? - return !_CharacterClassModel.word.isBoundary( - input, - at: pos, - bounds: subjectBounds, - with: options - ) - } else { - return !input.isOnWordBoundary(at: pos, using: &cache, &maxIndex) - } - } } + builder.buildAssert( + by: kind, + options.anchorsMatchNewlines, + options.usesSimpleUnicodeBoundaries, + options.usesASCIIWord, + options.semanticLevel) } - + + mutating func emitCharacterClass(_ cc: DSLTree.Atom.CharacterClass) { + builder.buildMatchBuiltin(model: cc.asRuntimeModel(options)) + } + mutating func emitMatchScalar(_ s: UnicodeScalar) { assert(options.semanticLevel == .unicodeScalar) if options.isCaseInsensitive && s.properties.isCased { @@ -337,27 +222,16 @@ fileprivate extension Compiler.ByteCodeGen { case .graphemeCluster: builder.buildAdvance(1) case .unicodeScalar: - // TODO: builder.buildAdvanceUnicodeScalar(1) - builder.buildConsume { input, bounds in - input.unicodeScalars.index(after: bounds.lowerBound) - } + builder.buildAdvanceUnicodeScalar(1) } } mutating func emitAnyNonNewline() { switch options.semanticLevel { case .graphemeCluster: - builder.buildConsume { input, bounds in - input[bounds.lowerBound].isNewline - ? nil - : input.index(after: bounds.lowerBound) - } + builder.buildConsumeNonNewline() case .unicodeScalar: - builder.buildConsume { input, bounds in - input[bounds.lowerBound].isNewline - ? nil - : input.unicodeScalars.index(after: bounds.lowerBound) - } + builder.buildConsumeScalarNonNewline() } } @@ -591,6 +465,10 @@ fileprivate extension Compiler.ByteCodeGen { let minTrips = low assert((extraTrips ?? 1) >= 0) + if tryEmitFastQuant(child, updatedKind, minTrips, extraTrips) { + return + } + // The below is a general algorithm for bounded and unbounded // quantification. It can be specialized when the min // is 0 or 1, or when extra trips is 1 or unbounded. @@ -775,6 +653,80 @@ fileprivate extension Compiler.ByteCodeGen { builder.label(exit) } + /// Specialized quantification instruction for repetition of certain nodes in grapheme semantic mode + /// Allowed nodes are: + /// - single ascii scalar .char + /// - ascii .customCharacterClass + /// - single grapheme consumgin built in character classes + /// - .any, .anyNonNewline, .dot + mutating func tryEmitFastQuant( + _ child: DSLTree.Node, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) -> Bool { + guard optimizationsEnabled + && minTrips <= QuantifyPayload.maxStorableTrips + && extraTrips ?? 0 <= QuantifyPayload.maxStorableTrips + && options.semanticLevel == .graphemeCluster + && kind != .reluctant else { + return false + } + switch child { + case .customCharacterClass(let ccc): + // ascii only custom character class + guard let bitset = ccc.asAsciiBitset(options) else { + return false + } + builder.buildQuantify(bitset: bitset, kind, minTrips, extraTrips) + + case .atom(let atom): + switch atom { + case .char(let c): + // Single scalar ascii value character + guard let val = c._singleScalarAsciiValue else { + return false + } + builder.buildQuantify(asciiChar: val, kind, minTrips, extraTrips) + + case .any: + builder.buildQuantifyAny( + matchesNewlines: true, kind, minTrips, extraTrips) + case .anyNonNewline: + builder.buildQuantifyAny( + matchesNewlines: false, kind, minTrips, extraTrips) + case .dot: + builder.buildQuantifyAny( + matchesNewlines: options.dotMatchesNewline, kind, minTrips, extraTrips) + + case .characterClass(let cc): + // Custom character class that consumes a single grapheme + let model = cc.asRuntimeModel(options) + guard model.consumesSingleGrapheme else { + return false + } + builder.buildQuantify( + model: model, + kind, + minTrips, + extraTrips) + default: + return false + } + case .convertedRegexLiteral(let node, _): + return tryEmitFastQuant(node, kind, minTrips, extraTrips) + case .nonCapturingGroup(let groupKind, let node): + // .nonCapture nonCapturingGroups are ignored during compilation + guard groupKind.ast == .nonCapture else { + return false + } + return tryEmitFastQuant(node, kind, minTrips, extraTrips) + default: + return false + } + return true + } + /// Coalesce any adjacent scalar members in a custom character class together. /// This is required in order to produce correct grapheme matching behavior. func coalescingCustomCharacterClassMembers( @@ -907,10 +859,10 @@ fileprivate extension Compiler.ByteCodeGen { } else { builder.buildMatchAsciiBitset(asciiBitset) } - } else { - let consumer = try ccc.generateConsumer(options) - builder.buildConsume(by: consumer) + return } + let consumer = try ccc.generateConsumer(options) + builder.buildConsume(by: consumer) } mutating func emitConcatenation(_ children: [DSLTree.Node]) throws { @@ -1061,6 +1013,8 @@ extension DSLTree.Node { case .atom(let atom): switch atom { case .changeMatchingOptions, .assertion: return false + // Captures may be nil so backreferences may be zero length matches + case .backreference: return false default: return true } case .trivia, .empty: diff --git a/Sources/_StringProcessing/CMakeLists.txt b/Sources/_StringProcessing/CMakeLists.txt index 963b6074c..ba3e2e03c 100644 --- a/Sources/_StringProcessing/CMakeLists.txt +++ b/Sources/_StringProcessing/CMakeLists.txt @@ -31,42 +31,48 @@ add_library(_StringProcessing Engine/InstPayload.swift Engine/Instruction.swift Engine/MEBuilder.swift + Engine/MEBuiltins.swift Engine/MECapture.swift Engine/MEProgram.swift + Engine/MEQuantify.swift + Engine/Metrics.swift Engine/Processor.swift - Engine/Register.swift + Engine/Registers.swift Engine/Structuralize.swift Engine/Tracing.swift Regex/AnyRegexOutput.swift Regex/ASTConversion.swift Regex/Core.swift - Regex/DSLConsumers.swift + Regex/CustomComponents.swift Regex/DSLTree.swift Regex/Match.swift Regex/Options.swift Unicode/CaseConversion.swift Unicode/CharacterProps.swift Unicode/Comparison.swift - Unicode/Data.swift Unicode/Decoding.swift Unicode/Encodings.swift Unicode/Formatting.swift - Unicode/Graphemes.swift Unicode/NecessaryEvils.swift - Unicode/Normalization.swift + Unicode/NFC.swift Unicode/NumberParsing.swift Unicode/ScalarProps.swift Unicode/Transcoding.swift Unicode/UCD.swift Unicode/Validation.swift + Unicode/WordBreaking.swift + Utility/AsciiBitset.swift Utility/ASTBuilder.swift + Utility/Misc.swift Utility/Protocols.swift + Utility/RegexFactory.swift Utility/Traced.swift Utility/TypedIndex.swift Utility/TypedInt.swift + Utility/TypeVerification.swift + _CharacterClassModel.swift ByteCodeGen.swift Capture.swift - CharacterClass.swift Compiler.swift ConsumerInterface.swift Executor.swift diff --git a/Sources/_StringProcessing/Compiler.swift b/Sources/_StringProcessing/Compiler.swift index b8daa8b21..34b0962d8 100644 --- a/Sources/_StringProcessing/Compiler.swift +++ b/Sources/_StringProcessing/Compiler.swift @@ -16,7 +16,7 @@ class Compiler { // TODO: Or are these stored on the tree? var options = MatchingOptions() - private var compileOptions: CompileOptions = .default + private var compileOptions: _CompileOptions = .default init(ast: AST) { self.tree = ast.dslTree @@ -26,7 +26,7 @@ class Compiler { self.tree = tree } - init(tree: DSLTree, compileOptions: CompileOptions) { + init(tree: DSLTree, compileOptions: _CompileOptions) { self.tree = tree self.compileOptions = compileOptions } @@ -107,10 +107,15 @@ func _compileRegex( return Executor(program: program) } -extension Compiler { - struct CompileOptions: OptionSet { - let rawValue: Int - static let disableOptimizations = CompileOptions(rawValue: 1) - static let `default`: CompileOptions = [] +@_spi(RegexBenchmark) +public struct _CompileOptions: OptionSet { + public let rawValue: Int + public init(rawValue: Int) { + self.rawValue = rawValue } + + public static let disableOptimizations = _CompileOptions(rawValue: 1 << 0) + public static let enableTracing = _CompileOptions(rawValue: 1 << 1) + public static let enableMetrics = _CompileOptions(rawValue: 1 << 2) + public static let `default`: _CompileOptions = [] } diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index 083781120..3a2731b0a 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -162,6 +162,8 @@ extension DSLTree.Atom { case .assertion: // TODO: We could handle, should this be total? return nil + case .characterClass(let cc): + return cc.generateConsumer(opts) case .backreference: // TODO: Should we handle? @@ -182,6 +184,15 @@ extension DSLTree.Atom { } } +extension DSLTree.Atom.CharacterClass { + func generateConsumer(_ opts: MatchingOptions) -> MEProgram.ConsumeFunction { + let model = asRuntimeModel(opts) + return { input, bounds in + model.matches(in: input, at: bounds.lowerBound) + } + } +} + extension String { /// Compares this string to `other` using the loose matching rule UAX44-LM2, /// which ignores case, whitespace, underscores, and nearly all medial @@ -269,16 +280,6 @@ extension AST.Atom { func generateConsumer( _ opts: MatchingOptions ) throws -> MEProgram.ConsumeFunction? { - // TODO: Wean ourselves off of this type... - if let cc = self.characterClass?.withMatchLevel( - opts.matchLevel - ) { - return { input, bounds in - // FIXME: should we worry about out of bounds? - cc.matches(in: input, at: bounds.lowerBound, with: opts) - } - } - switch kind { case let .scalar(s): assertionFailure( @@ -312,8 +313,11 @@ extension AST.Atom { case .caretAnchor, .dollarAnchor: // handled in emitAssertion return nil + case .escaped: + // handled in emitAssertion and emitCharacterClass + return nil - case .scalarSequence, .escaped, .keyboardControl, .keyboardMeta, + case .scalarSequence, .keyboardControl, .keyboardMeta, .keyboardMetaControl, .backreference, .subpattern, .callout, .backtrackingDirective, .changeMatchingOptions, .invalid: // FIXME: implement diff --git a/Sources/_StringProcessing/Engine/Backtracking.swift b/Sources/_StringProcessing/Engine/Backtracking.swift index 355702ac1..3ebb060c9 100644 --- a/Sources/_StringProcessing/Engine/Backtracking.swift +++ b/Sources/_StringProcessing/Engine/Backtracking.swift @@ -10,14 +10,12 @@ //===----------------------------------------------------------------------===// extension Processor { - - // TODO: What all do we want to save? Configurable? - // TODO: Do we need to save any registers? - // TODO: Is this the right place to do function stack unwinding? struct SavePoint { var pc: InstructionAddress var pos: Position? - + // Quantifiers may store a range of positions to restore to + var rangeStart: Position? + var rangeEnd: Position? // The end of the call stack, so we can slice it off // when failing inside a call. // @@ -43,7 +41,35 @@ extension Processor { intRegisters: [Int], PositionRegister: [Input.Index] ) { - (pc, pos, stackEnd, captureEnds, intRegisters, posRegisters) + return (pc, pos, stackEnd, captureEnds, intRegisters, posRegisters) + } + + var rangeIsEmpty: Bool { rangeEnd == nil } + + mutating func updateRange(newEnd: Input.Index) { + if rangeStart == nil { + rangeStart = newEnd + } + rangeEnd = newEnd + } + + /// Move the next range position into pos, and removing it from the range + mutating func takePositionFromRange(_ input: Input) { + assert(!rangeIsEmpty) + pos = rangeEnd! + shrinkRange(input) + } + + /// Shrink the range of the save point by one index, essentially dropping the last index + mutating func shrinkRange(_ input: Input) { + assert(!rangeIsEmpty) + if rangeEnd == rangeStart { + // The range is now empty + rangeStart = nil + rangeEnd = nil + } else { + input.formIndex(before: &rangeEnd!) + } } } @@ -54,6 +80,21 @@ extension Processor { SavePoint( pc: pc, pos: addressOnly ? nil : currentPosition, + rangeStart: nil, + rangeEnd: nil, + stackEnd: .init(callStack.count), + captureEnds: storedCaptures, + intRegisters: registers.ints, + posRegisters: registers.positions) + } + + func startQuantifierSavePoint() -> SavePoint { + // Restores to the instruction AFTER the current quantifier instruction + SavePoint( + pc: controller.pc + 1, + pos: nil, + rangeStart: nil, + rangeEnd: nil, stackEnd: .init(callStack.count), captureEnds: storedCaptures, intRegisters: registers.ints, diff --git a/Sources/_StringProcessing/Engine/Consume.swift b/Sources/_StringProcessing/Engine/Consume.swift index 12f65a777..6af973919 100644 --- a/Sources/_StringProcessing/Engine/Consume.swift +++ b/Sources/_StringProcessing/Engine/Consume.swift @@ -21,7 +21,8 @@ extension Engine { subjectBounds: bounds, searchBounds: bounds, matchMode: matchMode, - isTracingEnabled: enableTracing) + isTracingEnabled: enableTracing, + shouldMeasureMetrics: enableMetrics) } func makeFirstMatchProcessor( @@ -35,7 +36,8 @@ extension Engine { subjectBounds: subjectBounds, searchBounds: searchBounds, matchMode: .partialFromFront, - isTracingEnabled: enableTracing) + isTracingEnabled: enableTracing, + shouldMeasureMetrics: enableMetrics) } } diff --git a/Sources/_StringProcessing/Engine/Engine.swift b/Sources/_StringProcessing/Engine/Engine.swift index 9e67e4639..a5cb11bd6 100644 --- a/Sources/_StringProcessing/Engine/Engine.swift +++ b/Sources/_StringProcessing/Engine/Engine.swift @@ -13,25 +13,16 @@ // But, we can play around with this. struct Engine { - var program: MEProgram + let program: MEProgram // TODO: Pre-allocated register banks var instructions: InstructionList { program.instructions } - var enableTracing: Bool { - get { program.enableTracing } - set { program.enableTracing = newValue } - } + var enableTracing: Bool { program.enableTracing } + var enableMetrics: Bool { program.enableMetrics } - init( - _ program: MEProgram, - enableTracing: Bool? = nil - ) { - var program = program - if let t = enableTracing { - program.enableTracing = t - } + init(_ program: MEProgram) { self.program = program } } diff --git a/Sources/_StringProcessing/Engine/InstPayload.swift b/Sources/_StringProcessing/Engine/InstPayload.swift index 42fb86913..f6d5bfcc7 100644 --- a/Sources/_StringProcessing/Engine/InstPayload.swift +++ b/Sources/_StringProcessing/Engine/InstPayload.swift @@ -9,6 +9,7 @@ // //===----------------------------------------------------------------------===// +@_implementationOnly import _RegexParser extension Instruction { /// An instruction's payload packs operands and destination @@ -51,7 +52,6 @@ extension Instruction.Payload { case element(ElementRegister) case consumer(ConsumeFunctionRegister) case bitset(AsciiBitsetRegister) - case assertion(AssertionFunctionRegister) case addr(InstructionAddress) case capture(CaptureRegister) @@ -196,11 +196,19 @@ extension Instruction.Payload { interpret() } - init(distance: Distance) { - self.init(distance) + init(distance: Distance, isScalarDistance: Bool = false) { + self.init(isScalarDistance ? 1 : 0, distance) } - var distance: Distance { - interpret() + var distance: (isScalarDistance: Bool, Distance) { + let pair: (UInt64, Distance) = interpretPair() + return (isScalarDistance: pair.0 == 1, pair.1) + } + + init(isScalar: Bool) { + self.init(isScalar ? 1 : 0) + } + var isScalar: Bool { + self.rawValue == 1 } init(bool: BoolRegister) { @@ -225,7 +233,7 @@ extension Instruction.Payload { let pair: (UInt64, AsciiBitsetRegister) = interpretPair() return (isScalar: pair.0 == 1, pair.1) } - + init(consumer: ConsumeFunctionRegister) { self.init(consumer) } @@ -233,13 +241,6 @@ extension Instruction.Payload { interpret() } - init(assertion: AssertionFunctionRegister) { - self.init(assertion) - } - var assertion: AssertionFunctionRegister { - interpret() - } - init(addr: InstructionAddress) { self.init(addr) } @@ -247,6 +248,13 @@ extension Instruction.Payload { interpret() } + init(capture: CaptureRegister, isScalarMode: Bool) { + self.init(isScalarMode ? 1 : 0, capture) + } + var captureAndMode: (isScalarMode: Bool, CaptureRegister) { + let pair: (UInt64, CaptureRegister) = interpretPair() + return (pair.0 == 1, pair.1) + } init(capture: CaptureRegister) { self.init(capture) } @@ -254,7 +262,6 @@ extension Instruction.Payload { interpret() } - // MARK: Packed operand payloads init(immediate: UInt64, int: IntRegister) { @@ -339,5 +346,270 @@ extension Instruction.Payload { ) { interpretPair() } + + // MARK: Struct payloads + + init(_ model: _CharacterClassModel) { + self.init(CharacterClassPayload(model).rawValue) + } + var characterClassPayload: CharacterClassPayload{ + return CharacterClassPayload(rawValue: rawValue & _payloadMask) + } + + init(assertion payload: AssertionPayload) { + self.init(rawValue: payload.rawValue) + } + var assertion: AssertionPayload { + AssertionPayload.init(rawValue: rawValue & _payloadMask) + } + init(quantify: QuantifyPayload) { + self.init(quantify.rawValue) + } + var quantify: QuantifyPayload { + return QuantifyPayload(rawValue: rawValue & _payloadMask) + } +} + +// MARK: Struct definitions +struct QuantifyPayload: RawRepresentable { + let rawValue: UInt64 + enum PayloadType: UInt64 { + case bitset = 0 + case asciiChar = 1 + case any = 2 + case builtin = 4 + } + + // Future work: optimize this layout -> payload type should be a fast switch + // The top 8 bits are reserved for the opcode so we have 56 bits to work with + // b55-b38 - Unused + // b38-b35 - Payload type (one of 4 types, stored on 3 bits) + // b35-b27 - minTrips (8 bit int) + // b27-b18 - extraTrips (8 bit value, one bit for nil) + // b18-b16 - Quantification type (one of three types) + // b16-b0 - Payload value (depends on payload type) + static var quantKindShift: UInt64 { 16 } + static var extraTripsShift: UInt64 { 18 } + static var minTripsShift: UInt64 { 27 } + static var typeShift: UInt64 { 35 } + static var maxStorableTrips: UInt64 { (1 << 8) - 1 } + + var quantKindMask: UInt64 { 3 } + var extraTripsMask: UInt64 { 0x1FF } + var minTripsMask: UInt64 { 0xFF } + var typeMask: UInt64 { 7 } + var payloadMask: UInt64 { 0xFF_FF } + + static func packInfoValues( + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int?, + _ type: PayloadType + ) -> UInt64 { + let kindVal: UInt64 + switch kind { + case .eager: + kindVal = 0 + case .reluctant: + kindVal = 1 + case .possessive: + kindVal = 2 + } + let extraTripsVal: UInt64 = extraTrips == nil ? 1 : UInt64(extraTrips!) << 1 + return (kindVal << QuantifyPayload.quantKindShift) + + (extraTripsVal << QuantifyPayload.extraTripsShift) + + (UInt64(minTrips) << QuantifyPayload.minTripsShift) + + (type.rawValue << QuantifyPayload.typeShift) + } + + init(rawValue: UInt64) { + self.rawValue = rawValue + assert(rawValue & _opcodeMask == 0) + } + + init( + bitset: AsciiBitsetRegister, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + assert(bitset.bits <= _payloadMask) + self.rawValue = bitset.bits + + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .bitset) + } + + init( + asciiChar: UInt8, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + self.rawValue = UInt64(asciiChar) + + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .asciiChar) + } + + init( + matchesNewlines: Bool, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + self.rawValue = (matchesNewlines ? 1 : 0) + + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .any) + } + + init( + model: _CharacterClassModel, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + assert(model.cc.rawValue < 0xFF) + assert(model.matchLevel != .unicodeScalar) + let packedModel = model.cc.rawValue + + (model.isInverted ? 1 << 9 : 0) + + (model.isStrictASCII ? 1 << 10 : 0) + self.rawValue = packedModel + + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .builtin) + } + + var type: PayloadType { + PayloadType(rawValue: (self.rawValue >> QuantifyPayload.typeShift) & 7)! + } + + var quantKind: AST.Quantification.Kind { + switch (self.rawValue >> QuantifyPayload.quantKindShift) & quantKindMask { + case 0: return .eager + case 1: return .reluctant + case 2: return .possessive + default: + fatalError("Unreachable") + } + } + + var minTrips: UInt64 { + (self.rawValue >> QuantifyPayload.minTripsShift) & minTripsMask + } + + var extraTrips: UInt64? { + let val = (self.rawValue >> QuantifyPayload.extraTripsShift) & extraTripsMask + if val == 1 { + return nil + } else { + return val >> 1 + } + } + + var bitset: AsciiBitsetRegister { + TypedInt(self.rawValue & payloadMask) + } + + var asciiChar: UInt8 { + UInt8(asserting: self.rawValue & payloadMask) + } + + var anyMatchesNewline: Bool { + (self.rawValue & 1) == 1 + } + + var builtin: _CharacterClassModel.Representation { + _CharacterClassModel.Representation(rawValue: self.rawValue & 0xFF)! + } + var builtinIsInverted: Bool { + (self.rawValue >> 9) & 1 == 1 + } + var builtinIsStrict: Bool { + (self.rawValue >> 10) & 1 == 1 + } +} + +struct CharacterClassPayload: RawRepresentable { + let rawValue: UInt64 + // Layout: + // Top three bits are isInverted, isStrict, isScalar + // Lower 8 bits are _CCM.Representation + static var invertedBit: UInt64 { 1 << 55 } + static var strictASCIIBit: UInt64 { 1 << 54 } + static var scalarBit: UInt64 { 1 << 53 } + static var ccMask: UInt64 { 0xFF } + init(rawValue: UInt64) { + assert(rawValue & _opcodeMask == 0) + self.rawValue = rawValue + } + init(_ model: _CharacterClassModel) { + let invertedBit = model.isInverted ? CharacterClassPayload.invertedBit : 0 + let strictASCIIBit = model.isStrictASCII ? CharacterClassPayload.strictASCIIBit : 0 + let scalarBit = model.matchLevel == .unicodeScalar ? CharacterClassPayload.scalarBit : 0 + assert(model.cc.rawValue <= CharacterClassPayload.ccMask) + assert(model.cc.rawValue & invertedBit & strictASCIIBit & scalarBit == 0) // Sanity check + self.init(rawValue: model.cc.rawValue | invertedBit | strictASCIIBit | scalarBit) + } + + var isInverted: Bool { + self.rawValue & CharacterClassPayload.invertedBit != 0 + } + /// Represents if the given character class should strictly only match ascii values based on the options given + /// See Oniguruma options: (?D) (?\P) (?S) (?W) + var isStrictASCII: Bool { + self.rawValue & CharacterClassPayload.strictASCIIBit != 0 + } + var isScalarSemantics: Bool { + self.rawValue & CharacterClassPayload.scalarBit != 0 + } + var cc: _CharacterClassModel.Representation { + _CharacterClassModel.Representation.init( + rawValue: self.rawValue & CharacterClassPayload.ccMask).unsafelyUnwrapped + } } +struct AssertionPayload: RawRepresentable { + let rawValue: UInt64 + + init(rawValue: UInt64) { + self.rawValue = rawValue + assert(rawValue & _opcodeMask == 0) + } + + static var anchorBit: UInt64 { 1 << 55 } + static var boundaryBit: UInt64 { 1 << 54 } + static var strictASCIIWordBit: UInt64 { 1 << 53 } + static var isScalarBit: UInt64 { 1 << 52 } + static var assertionKindMask: UInt64 { 0xFF } + + init(_ assertion: DSLTree.Atom.Assertion, + _ anchorsMatchNewlines: Bool, + _ usesSimpleUnicodeBoundaries: Bool, + _ usesASCIIWord: Bool, + _ semanticLevel: MatchingOptions.SemanticLevel + ) { + // 4 bits of options + let anchorBit: UInt64 = anchorsMatchNewlines ? AssertionPayload.anchorBit : 0 + let boundaryBit: UInt64 = usesSimpleUnicodeBoundaries ? AssertionPayload.boundaryBit : 0 + let strictASCIIWordBit: UInt64 = usesASCIIWord ? AssertionPayload.strictASCIIWordBit : 0 + let isScalarBit: UInt64 = semanticLevel == .unicodeScalar ? AssertionPayload.isScalarBit : 0 + + // 8 bits for the assertion kind + // Future work: Optimize this layout + let kind = assertion.rawValue + assert(kind <= AssertionPayload.assertionKindMask) + assert(kind & anchorBit & boundaryBit & strictASCIIWordBit & isScalarBit == 0) + self.init(rawValue: kind | anchorBit | boundaryBit | strictASCIIWordBit | isScalarBit) + } + + var kind: DSLTree.Atom.Assertion { + return .init( + rawValue: self.rawValue & AssertionPayload.assertionKindMask).unsafelyUnwrapped + } + var anchorsMatchNewlines: Bool { self.rawValue & AssertionPayload.anchorBit != 0 } + var usesSimpleUnicodeBoundaries: Bool { + self.rawValue & AssertionPayload.boundaryBit != 0 + } + var usesASCIIWord: Bool { self.rawValue & AssertionPayload.strictASCIIWordBit != 0 } + var semanticLevel: MatchingOptions.SemanticLevel { + if self.rawValue & AssertionPayload.isScalarBit != 0 { + return .unicodeScalar + } else { + return .graphemeCluster + } + } +} diff --git a/Sources/_StringProcessing/Engine/Instruction.swift b/Sources/_StringProcessing/Engine/Instruction.swift index 8e1a1f294..21ab90a03 100644 --- a/Sources/_StringProcessing/Engine/Instruction.swift +++ b/Sources/_StringProcessing/Engine/Instruction.swift @@ -113,11 +113,19 @@ extension Instruction { /// - Boolean for if we should match by scalar value case matchBitset - /// TODO: builtin assertions and anchors - case builtinAssertion - - /// TODO: builtin character classes - case builtinCharacterClass + /// Match against a built-in character class + /// + /// matchBuiltin(_: CharacterClassPayload) + /// + /// Operand: the payload contains + /// - The character class + /// - If it is inverted + /// - If it strictly matches only ascii values + case matchBuiltin + + /// Matches any non newline character + /// Operand: If we are in scalar mode or not + case matchAnyNonNewline // MARK: Extension points @@ -127,16 +135,12 @@ extension Instruction { /// Operand: Consume function register to call. case consumeBy - /// Custom lookaround assertion operation. - /// Triggers a failure if customFunction returns false. + /// Lookaround assertion operation. Performs a zero width assertion based on + /// the assertion type and options stored in the payload /// - /// assert(_ customFunction: ( - /// input: Input, - /// currentPos: Position, - /// bounds: Range - /// ) -> Bool) + /// assert(_:AssertionPayload) /// - /// Operands: destination bool register, assert hook register + /// Operands: AssertionPayload containing assertion type and options case assertBy /// Custom value-creating consume operation. @@ -193,6 +197,13 @@ extension Instruction { /// case splitSaving + /// Fused quantify, execute, save instruction + /// Quantifies the stored instruction in an inner loop instead of looping through instructions in processor + /// Only quantifies specific nodes + /// + /// quantify(_:QuantifyPayload) + /// + case quantify /// Begin the given capture /// /// beginCapture(_:CapReg) diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index 0b9a91726..4b623fbda 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -14,13 +14,16 @@ extension MEProgram { struct Builder { var instructions: [Instruction] = [] + + // Tracing + var enableTracing = false + var enableMetrics = false var elements = TypedSetVector() var sequences = TypedSetVector<[Input.Element], _SequenceRegister>() var asciiBitsets: [DSLTree.CustomCharacterClass.AsciiBitset] = [] var consumeFunctions: [ConsumeFunction] = [] - var assertionFunctions: [AssertionFunction] = [] var transformFunctions: [TransformFunction] = [] var matcherFunctions: [MatcherFunction] = [] @@ -143,6 +146,19 @@ extension MEProgram.Builder { mutating func buildAdvance(_ n: Distance) { instructions.append(.init(.advance, .init(distance: n))) } + + mutating func buildAdvanceUnicodeScalar(_ n: Distance) { + instructions.append( + .init(.advance, .init(distance: n, isScalarDistance: true))) + } + + mutating func buildConsumeNonNewline() { + instructions.append(.init(.matchAnyNonNewline, .init(isScalar: false))) + } + + mutating func buildConsumeScalarNonNewline() { + instructions.append(.init(.matchAnyNonNewline, .init(isScalar: true))) + } mutating func buildMatch(_ e: Character, isCaseInsensitive: Bool) { instructions.append(.init( @@ -171,6 +187,11 @@ extension MEProgram.Builder { instructions.append(.init( .matchBitset, .init(bitset: makeAsciiBitset(b), isScalar: true))) } + + mutating func buildMatchBuiltin(model: _CharacterClassModel) { + instructions.append(.init( + .matchBuiltin, .init(model))) + } mutating func buildConsume( by p: @escaping MEProgram.ConsumeFunction @@ -180,10 +201,65 @@ extension MEProgram.Builder { } mutating func buildAssert( - by p: @escaping MEProgram.AssertionFunction + by kind: DSLTree.Atom.Assertion, + _ anchorsMatchNewlines: Bool, + _ usesSimpleUnicodeBoundaries: Bool, + _ usesASCIIWord: Bool, + _ semanticLevel: MatchingOptions.SemanticLevel ) { + let payload = AssertionPayload.init( + kind, + anchorsMatchNewlines, + usesSimpleUnicodeBoundaries, + usesASCIIWord, + semanticLevel) instructions.append(.init( - .assertBy, .init(assertion: makeAssertionFunction(p)))) + .assertBy, + .init(assertion: payload))) + } + + mutating func buildQuantify( + bitset: DSLTree.CustomCharacterClass.AsciiBitset, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + instructions.append(.init( + .quantify, + .init(quantify: .init(bitset: makeAsciiBitset(bitset), kind, minTrips, extraTrips)))) + } + + mutating func buildQuantify( + asciiChar: UInt8, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + instructions.append(.init( + .quantify, + .init(quantify: .init(asciiChar: asciiChar, kind, minTrips, extraTrips)))) + } + + mutating func buildQuantifyAny( + matchesNewlines: Bool, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + instructions.append(.init( + .quantify, + .init(quantify: .init(matchesNewlines: matchesNewlines, kind, minTrips, extraTrips)))) + } + + mutating func buildQuantify( + model: _CharacterClassModel, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + instructions.append(.init( + .quantify, + .init(quantify: .init(model: model,kind, minTrips, extraTrips)))) } mutating func buildAccept() { @@ -233,22 +309,23 @@ extension MEProgram.Builder { } mutating func buildBackreference( - _ cap: CaptureRegister + _ cap: CaptureRegister, + isScalarMode: Bool ) { instructions.append( - .init(.backreference, .init(capture: cap))) + .init(.backreference, .init(capture: cap, isScalarMode: isScalarMode))) } - mutating func buildUnresolvedReference(id: ReferenceID) { - buildBackreference(.init(0)) + mutating func buildUnresolvedReference(id: ReferenceID, isScalarMode: Bool) { + buildBackreference(.init(0), isScalarMode: isScalarMode) unresolvedReferences[id, default: []].append(lastInstructionAddress) } - mutating func buildNamedReference(_ name: String) throws { + mutating func buildNamedReference(_ name: String, isScalarMode: Bool) throws { guard let index = captureList.indexOfCapture(named: name) else { throw RegexCompilationError.uncapturedReference } - buildBackreference(.init(index)) + buildBackreference(.init(index), isScalarMode: isScalarMode) } // TODO: Mutating because of fail address fixup, drop when @@ -306,7 +383,6 @@ extension MEProgram.Builder { regInfo.positions = nextPositionRegister.rawValue regInfo.bitsets = asciiBitsets.count regInfo.consumeFunctions = consumeFunctions.count - regInfo.assertionFunctions = assertionFunctions.count regInfo.transformFunctions = transformFunctions.count regInfo.matcherFunctions = matcherFunctions.count regInfo.captures = nextCaptureRegister.rawValue @@ -317,10 +393,11 @@ extension MEProgram.Builder { staticSequences: sequences.stored, staticBitsets: asciiBitsets, staticConsumeFunctions: consumeFunctions, - staticAssertionFunctions: assertionFunctions, staticTransformFunctions: transformFunctions, staticMatcherFunctions: matcherFunctions, registerInfo: regInfo, + enableTracing: enableTracing, + enableMetrics: enableMetrics, captureList: captureList, referencedCaptureOffsets: referencedCaptureOffsets, initialOptions: initialOptions) @@ -399,8 +476,10 @@ fileprivate extension MEProgram.Builder { throw RegexCompilationError.uncapturedReference } for use in uses { + let (isScalarMode, _) = instructions[use.rawValue].payload.captureAndMode instructions[use.rawValue] = - Instruction(.backreference, .init(capture: .init(offset))) + Instruction(.backreference, + .init(capture: .init(offset), isScalarMode: isScalarMode)) } } } @@ -466,12 +545,6 @@ extension MEProgram.Builder { defer { consumeFunctions.append(f) } return ConsumeFunctionRegister(consumeFunctions.count) } - mutating func makeAssertionFunction( - _ f: @escaping MEProgram.AssertionFunction - ) -> AssertionFunctionRegister { - defer { assertionFunctions.append(f) } - return AssertionFunctionRegister(assertionFunctions.count) - } mutating func makeTransformFunction( _ f: @escaping MEProgram.TransformFunction ) -> TransformRegister { diff --git a/Sources/_StringProcessing/Engine/MEBuiltins.swift b/Sources/_StringProcessing/Engine/MEBuiltins.swift index f791da37e..36a6043fe 100644 --- a/Sources/_StringProcessing/Engine/MEBuiltins.swift +++ b/Sources/_StringProcessing/Engine/MEBuiltins.swift @@ -1,13 +1,204 @@ - +@_implementationOnly import _RegexParser // For AssertionKind +extension Character { + var _isHorizontalWhitespace: Bool { + self.unicodeScalars.first?.isHorizontalWhitespace == true + } + var _isNewline: Bool { + self.unicodeScalars.first?.isNewline == true + } +} extension Processor { + mutating func matchBuiltin( + _ cc: _CharacterClassModel.Representation, + _ isInverted: Bool, + _ isStrictASCII: Bool, + _ isScalarSemantics: Bool + ) -> Bool { + guard let next = _doMatchBuiltin( + cc, + isInverted, + isStrictASCII, + isScalarSemantics + ) else { + signalFailure() + return false + } + currentPosition = next + return true + } + + func _doMatchBuiltin( + _ cc: _CharacterClassModel.Representation, + _ isInverted: Bool, + _ isStrictASCII: Bool, + _ isScalarSemantics: Bool + ) -> Input.Index? { + guard let char = load(), let scalar = loadScalar() else { + return nil + } + let asciiCheck = (char.isASCII && !isScalarSemantics) + || (scalar.isASCII && isScalarSemantics) + || !isStrictASCII - mutating func builtinAssertion() { - fatalError("TODO: assertions and anchors") + var matched: Bool + var next: Input.Index + switch (isScalarSemantics, cc) { + case (_, .anyGrapheme): + next = input.index(after: currentPosition) + case (_, .anyScalar): + next = input.unicodeScalars.index(after: currentPosition) + case (true, _): + next = input.unicodeScalars.index(after: currentPosition) + case (false, _): + next = input.index(after: currentPosition) + } + + switch cc { + case .any, .anyGrapheme: + matched = true + case .anyScalar: + if isScalarSemantics { + matched = true + } else { + matched = input.isOnGraphemeClusterBoundary(next) + } + case .digit: + if isScalarSemantics { + matched = scalar.properties.numericType != nil && asciiCheck + } else { + matched = char.isNumber && asciiCheck + } + case .horizontalWhitespace: + if isScalarSemantics { + matched = scalar.isHorizontalWhitespace && asciiCheck + } else { + matched = char._isHorizontalWhitespace && asciiCheck + } + case .verticalWhitespace: + if isScalarSemantics { + matched = scalar.isNewline && asciiCheck + } else { + matched = char._isNewline && asciiCheck + } + case .newlineSequence: + if isScalarSemantics { + matched = scalar.isNewline && asciiCheck + if matched && scalar == "\r" + && next != input.endIndex && input.unicodeScalars[next] == "\n" { + // Match a full CR-LF sequence even in scalar semantics + input.unicodeScalars.formIndex(after: &next) + } + } else { + matched = char._isNewline && asciiCheck + } + case .whitespace: + if isScalarSemantics { + matched = scalar.properties.isWhitespace && asciiCheck + } else { + matched = char.isWhitespace && asciiCheck + } + case .word: + if isScalarSemantics { + matched = scalar.properties.isAlphabetic && asciiCheck + } else { + matched = char.isWordCharacter && asciiCheck + } + } + + if isInverted { + matched.toggle() + } + + guard matched else { + return nil + } + return next + } + + func isAtStartOfLine(_ payload: AssertionPayload) -> Bool { + if currentPosition == subjectBounds.lowerBound { return true } + switch payload.semanticLevel { + case .graphemeCluster: + return input[input.index(before: currentPosition)].isNewline + case .unicodeScalar: + return input.unicodeScalars[input.unicodeScalars.index(before: currentPosition)].isNewline + } + } + + func isAtEndOfLine(_ payload: AssertionPayload) -> Bool { + if currentPosition == subjectBounds.upperBound { return true } + switch payload.semanticLevel { + case .graphemeCluster: + return input[currentPosition].isNewline + case .unicodeScalar: + return input.unicodeScalars[currentPosition].isNewline + } } - mutating func builtinCharacterClass() { - fatalError("TODO: character classes") + mutating func builtinAssert(by payload: AssertionPayload) throws -> Bool { + // Future work: Optimize layout and dispatch + switch payload.kind { + case .startOfSubject: return currentPosition == subjectBounds.lowerBound + + case .endOfSubjectBeforeNewline: + if currentPosition == subjectBounds.upperBound { return true } + switch payload.semanticLevel { + case .graphemeCluster: + return input.index(after: currentPosition) == subjectBounds.upperBound + && input[currentPosition].isNewline + case .unicodeScalar: + return input.unicodeScalars.index(after: currentPosition) == subjectBounds.upperBound + && input.unicodeScalars[currentPosition].isNewline + } + + case .endOfSubject: return currentPosition == subjectBounds.upperBound + + case .resetStartOfMatch: + fatalError("Unreachable, we should have thrown an error during compilation") + + case .firstMatchingPositionInSubject: + return currentPosition == searchBounds.lowerBound + + case .textSegment: return input.isOnGraphemeClusterBoundary(currentPosition) + + case .notTextSegment: return !input.isOnGraphemeClusterBoundary(currentPosition) + + case .startOfLine: + return isAtStartOfLine(payload) + case .endOfLine: + return isAtEndOfLine(payload) + + case .caretAnchor: + if payload.anchorsMatchNewlines { + return isAtStartOfLine(payload) + } else { + return currentPosition == subjectBounds.lowerBound + } + + case .dollarAnchor: + if payload.anchorsMatchNewlines { + return isAtEndOfLine(payload) + } else { + return currentPosition == subjectBounds.upperBound + } + + case .wordBoundary: + if payload.usesSimpleUnicodeBoundaries { + // TODO: How should we handle bounds? + return atSimpleBoundary(payload.usesASCIIWord, payload.semanticLevel) + } else { + return input.isOnWordBoundary(at: currentPosition, using: &wordIndexCache, &wordIndexMaxIndex) + } + + case .notWordBoundary: + if payload.usesSimpleUnicodeBoundaries { + // TODO: How should we handle bounds? + return !atSimpleBoundary(payload.usesASCIIWord, payload.semanticLevel) + } else { + return !input.isOnWordBoundary(at: currentPosition, using: &wordIndexCache, &wordIndexMaxIndex) + } + } } } diff --git a/Sources/_StringProcessing/Engine/MECapture.swift b/Sources/_StringProcessing/Engine/MECapture.swift index 53243cd34..4bea21133 100644 --- a/Sources/_StringProcessing/Engine/MECapture.swift +++ b/Sources/_StringProcessing/Engine/MECapture.swift @@ -88,12 +88,6 @@ extension Processor { } } -extension Processor._StoredCapture: CustomStringConvertible { - var description: String { - return String(describing: self) - } -} - struct MECaptureList { var values: Array var referencedCaptureOffsets: [ReferenceID: Int] diff --git a/Sources/_StringProcessing/Engine/MEProgram.swift b/Sources/_StringProcessing/Engine/MEProgram.swift index d311b4465..67f5a8bc9 100644 --- a/Sources/_StringProcessing/Engine/MEProgram.swift +++ b/Sources/_StringProcessing/Engine/MEProgram.swift @@ -15,14 +15,6 @@ struct MEProgram { typealias Input = String typealias ConsumeFunction = (Input, Range) -> Input.Index? - typealias AssertionFunction = - ( - inout Set?, - inout String.Index?, - Input, - Input.Index, - Range - ) throws -> Bool typealias TransformFunction = (Input, Processor._StoredCapture) throws -> Any? typealias MatcherFunction = @@ -34,14 +26,14 @@ struct MEProgram { var staticSequences: [[Input.Element]] var staticBitsets: [DSLTree.CustomCharacterClass.AsciiBitset] var staticConsumeFunctions: [ConsumeFunction] - var staticAssertionFunctions: [AssertionFunction] var staticTransformFunctions: [TransformFunction] var staticMatcherFunctions: [MatcherFunction] var registerInfo: RegisterInfo - var enableTracing: Bool = false - + var enableTracing: Bool + var enableMetrics: Bool + let captureList: CaptureList let referencedCaptureOffsets: [ReferenceID: Int] diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift new file mode 100644 index 000000000..9d17dc9bd --- /dev/null +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -0,0 +1,125 @@ +extension Processor { + func _doQuantifyMatch(_ payload: QuantifyPayload) -> Input.Index? { + var next: Input.Index? + switch payload.type { + case .bitset: + next = _doMatchBitset(registers[payload.bitset]) + case .asciiChar: + next = _doMatchScalar( + UnicodeScalar.init(_value: UInt32(payload.asciiChar)), true) + case .builtin: + // We only emit .quantify if it consumes a single character + next = _doMatchBuiltin( + payload.builtin, + payload.builtinIsInverted, + payload.builtinIsStrict, + false) + case .any: + let matched = currentPosition != input.endIndex + && (!input[currentPosition].isNewline || payload.anyMatchesNewline) + next = matched ? input.index(after: currentPosition) : nil + } + return next + } + + /// Generic quantify instruction interpreter + /// - Handles .eager and .posessive + /// - Handles arbitrary minTrips and extraTrips + mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { + var trips = 0 + var extraTrips = payload.extraTrips + var savePoint = startQuantifierSavePoint() + + while true { + if trips >= payload.minTrips { + if extraTrips == 0 { break } + extraTrips = extraTrips.map({$0 - 1}) + if payload.quantKind == .eager { + savePoint.updateRange(newEnd: currentPosition) + } + } + let next = _doQuantifyMatch(payload) + guard let idx = next else { break } + currentPosition = idx + trips += 1 + } + + if trips < payload.minTrips { + signalFailure() + return false + } + + if payload.quantKind == .eager && !savePoint.rangeIsEmpty { + // The last save point has saved the current position, so it's unneeded + savePoint.shrinkRange(input) + if !savePoint.rangeIsEmpty { + savePoints.append(savePoint) + } + } + return true + } + + /// Specialized quantify instruction interpreter for * + mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) -> Bool { + assert(payload.quantKind == .eager + && payload.minTrips == 0 + && payload.extraTrips == nil) + var savePoint = startQuantifierSavePoint() + + while true { + savePoint.updateRange(newEnd: currentPosition) + let next = _doQuantifyMatch(payload) + guard let idx = next else { break } + currentPosition = idx + } + + // The last save point has saved the current position, so it's unneeded + savePoint.shrinkRange(input) + if !savePoint.rangeIsEmpty { + savePoints.append(savePoint) + } + return true + } + + /// Specialized quantify instruction interpreter for + + mutating func runEagerOneOrMoreQuantify(_ payload: QuantifyPayload) -> Bool { + assert(payload.quantKind == .eager + && payload.minTrips == 1 + && payload.extraTrips == nil) + var savePoint = startQuantifierSavePoint() + while true { + let next = _doQuantifyMatch(payload) + guard let idx = next else { break } + currentPosition = idx + savePoint.updateRange(newEnd: currentPosition) + } + + if savePoint.rangeIsEmpty { + signalFailure() + return false + } + // The last save point has saved the current position, so it's unneeded + savePoint.shrinkRange(input) + if !savePoint.rangeIsEmpty { + savePoints.append(savePoint) + } + return true + } + + /// Specialized quantify instruction interpreter for ? + mutating func runZeroOrOneQuantify(_ payload: QuantifyPayload) -> Bool { + assert(payload.minTrips == 0 + && payload.extraTrips == 1) + let next = _doQuantifyMatch(payload) + guard let idx = next else { + return true // matched zero times + } + if payload.quantKind != .possessive { + // Save the zero match + let savePoint = makeSavePoint(currentPC + 1) + savePoints.append(savePoint) + } + currentPosition = idx + return true + } +} diff --git a/Sources/_StringProcessing/Engine/Metrics.swift b/Sources/_StringProcessing/Engine/Metrics.swift new file mode 100644 index 000000000..753c3c3d1 --- /dev/null +++ b/Sources/_StringProcessing/Engine/Metrics.swift @@ -0,0 +1,37 @@ +extension Processor { + struct ProcessorMetrics { + var instructionCounts: [Instruction.OpCode: Int] = [:] + var backtracks: Int = 0 + var resets: Int = 0 + } + + func printMetrics() { + print("===") + print("Total cycle count: \(cycleCount)") + print("Backtracks: \(metrics.backtracks)") + print("Resets: \(metrics.resets)") + print("Instructions:") + let sorted = metrics.instructionCounts + .filter({$0.1 != 0}) + .sorted(by: { (a,b) in a.1 > b.1 }) + for (opcode, count) in sorted { + print("> \(opcode): \(count)") + } + print("===") + } + + mutating func measure() { + let (opcode, _) = fetch().destructure + if metrics.instructionCounts.keys.contains(opcode) { + metrics.instructionCounts[opcode]! += 1 + } else { + metrics.instructionCounts.updateValue(1, forKey: opcode) + } + } + + mutating func measureMetrics() { + if shouldMeasureMetrics { + measure() + } + } +} diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 2be918294..18e355fb5 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -9,6 +9,7 @@ // //===----------------------------------------------------------------------===// + enum MatchMode { case wholeString case partialFromFront @@ -88,6 +89,8 @@ struct Processor { // MARK: Metrics, debugging, etc. var cycleCount = 0 var isTracingEnabled: Bool + let shouldMeasureMetrics: Bool + var metrics: ProcessorMetrics = ProcessorMetrics() } extension Processor { @@ -104,7 +107,8 @@ extension Processor { subjectBounds: Range, searchBounds: Range, matchMode: MatchMode, - isTracingEnabled: Bool + isTracingEnabled: Bool, + shouldMeasureMetrics: Bool ) { self.controller = Controller(pc: 0) self.instructions = program.instructions @@ -113,6 +117,7 @@ extension Processor { self.searchBounds = searchBounds self.matchMode = matchMode self.isTracingEnabled = isTracingEnabled + self.shouldMeasureMetrics = shouldMeasureMetrics self.currentPosition = searchBounds.lowerBound // Initialize registers with end of search bounds @@ -139,7 +144,8 @@ extension Processor { self.state = .inProgress self.failureReason = nil - + + if shouldMeasureMetrics { metrics.resets += 1 } _checkInvariants() } @@ -180,6 +186,18 @@ extension Processor { currentPosition = idx return true } + + // Advances in unicode scalar view + mutating func consumeScalar(_ n: Distance) -> Bool { + guard let idx = input.unicodeScalars.index( + currentPosition, offsetBy: n.rawValue, limitedBy: end + ) else { + signalFailure() + return false + } + currentPosition = idx + return true + } /// Continue matching at the specified index. /// @@ -230,31 +248,46 @@ extension Processor { // Match against the current input prefix. Returns whether // it succeeded vs signaling an error. - mutating func matchSeq( - _ seq: C - ) -> Bool where C.Element == Input.Element { + mutating func matchSeq( + _ seq: Substring, + isScalarMode: Bool + ) -> Bool { + if isScalarMode { + for s in seq.unicodeScalars { + guard matchScalar(s, boundaryCheck: false) else { return false } + } + return true + } + for e in seq { guard match(e) else { return false } } return true } - + func loadScalar() -> Unicode.Scalar? { currentPosition < end ? input.unicodeScalars[currentPosition] : nil } + func _doMatchScalar(_ s: Unicode.Scalar, _ boundaryCheck: Bool) -> Input.Index? { + if s == loadScalar(), + let idx = input.unicodeScalars.index( + currentPosition, + offsetBy: 1, + limitedBy: end), + (!boundaryCheck || input.isOnGraphemeClusterBoundary(idx)) { + return idx + } else { + return nil + } + } + mutating func matchScalar(_ s: Unicode.Scalar, boundaryCheck: Bool) -> Bool { - guard s == loadScalar(), - let idx = input.unicodeScalars.index( - currentPosition, - offsetBy: 1, - limitedBy: end), - (!boundaryCheck || input.isOnGraphemeClusterBoundary(idx)) - else { + guard let next = _doMatchScalar(s, boundaryCheck) else { signalFailure() return false } - currentPosition = idx + currentPosition = next return true } @@ -277,17 +310,25 @@ extension Processor { return true } + func _doMatchBitset(_ bitset: DSLTree.CustomCharacterClass.AsciiBitset) -> Input.Index? { + if let cur = load(), bitset.matches(char: cur) { + return input.index(after: currentPosition) + } else { + return nil + } + } + // If we have a bitset we know that the CharacterClass only matches against // ascii characters, so check if the current input element is ascii then // check if it is set in the bitset mutating func matchBitset( _ bitset: DSLTree.CustomCharacterClass.AsciiBitset ) -> Bool { - guard let cur = load(), bitset.matches(char: cur) else { + guard let next = _doMatchBitset(bitset) else { signalFailure() return false } - _uncheckedForcedConsumeOne() + currentPosition = next return true } @@ -296,7 +337,7 @@ extension Processor { _ bitset: DSLTree.CustomCharacterClass.AsciiBitset ) -> Bool { guard let curScalar = loadScalar(), - bitset.matches(scalar: curScalar), + bitset.matches(scalar: curScalar), let idx = input.unicodeScalars.index(currentPosition, offsetBy: 1, limitedBy: end) else { signalFailure() return false @@ -305,13 +346,52 @@ extension Processor { return true } + // Matches the next character if it is not a newline + mutating func matchAnyNonNewline() -> Bool { + guard let c = load(), !c.isNewline else { + signalFailure() + return false + } + _uncheckedForcedConsumeOne() + return true + } + + // Matches the next scalar if it is not a newline + mutating func matchAnyNonNewlineScalar() -> Bool { + guard let s = loadScalar(), !s.isNewline else { + signalFailure() + return false + } + input.unicodeScalars.formIndex(after: ¤tPosition) + return true + } + mutating func signalFailure() { - guard let (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = - savePoints.popLast()?.destructure - else { + guard !savePoints.isEmpty else { state = .fail return } + let (pc, pos, stackEnd, capEnds, intRegisters, posRegisters): ( + pc: InstructionAddress, + pos: Position?, + stackEnd: CallStackAddress, + captureEnds: [_StoredCapture], + intRegisters: [Int], + PositionRegister: [Input.Index] + ) + + let idx = savePoints.index(before: savePoints.endIndex) + // If we have a quantifier save point, move the next range position into pos + if !savePoints[idx].rangeIsEmpty { + savePoints[idx].takePositionFromRange(input) + } + // If we have a normal save point or an empty quantifier save point, remove it + if savePoints[idx].rangeIsEmpty { + (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = savePoints.removeLast().destructure + } else { + (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = savePoints[idx].destructure + } + assert(stackEnd.rawValue <= callStack.count) assert(capEnds.count == storedCaptures.count) @@ -321,6 +401,8 @@ extension Processor { storedCaptures = capEnds registers.ints = intRegisters registers.positions = posRegisters + + if shouldMeasureMetrics { metrics.backtracks += 1 } } mutating func abort(_ e: Error? = nil) { @@ -358,14 +440,21 @@ extension Processor { mutating func cycle() { _checkInvariants() assert(state == .inProgress) - if cycleCount == 0 { trace() } + +#if PROCESSOR_MEASUREMENTS_ENABLED + if cycleCount == 0 { + trace() + measureMetrics() + } defer { cycleCount += 1 trace() + measureMetrics() _checkInvariants() } - let (opcode, payload) = fetch().destructure +#endif + let (opcode, payload) = fetch().destructure switch opcode { case .invalid: fatalError("Invalid program") @@ -435,10 +524,26 @@ extension Processor { signalFailure() case .advance: - if consume(payload.distance) { - controller.step() + let (isScalar, distance) = payload.distance + if isScalar { + if consumeScalar(distance) { + controller.step() + } + } else { + if consume(distance) { + controller.step() + } + } + case .matchAnyNonNewline: + if payload.isScalar { + if matchAnyNonNewlineScalar() { + controller.step() + } + } else { + if matchAnyNonNewline() { + controller.step() + } } - case .match: let (isCaseInsensitive, reg) = payload.elementPayload if isCaseInsensitive { @@ -476,6 +581,36 @@ extension Processor { } } + case .matchBuiltin: + let payload = payload.characterClassPayload + if matchBuiltin( + payload.cc, + payload.isInverted, + payload.isStrictASCII, + payload.isScalarSemantics + ) { + controller.step() + } + case .quantify: + let quantPayload = payload.quantify + let matched: Bool + switch (quantPayload.quantKind, quantPayload.minTrips, quantPayload.extraTrips) { + case (.reluctant, _, _): + assertionFailure(".reluctant is not supported by .quantify") + return + case (.eager, 0, nil): + matched = runEagerZeroOrMoreQuantify(quantPayload) + case (.eager, 1, nil): + matched = runEagerOneOrMoreQuantify(quantPayload) + case (_, 0, 1): + matched = runZeroOrOneQuantify(quantPayload) + default: + matched = runQuantify(quantPayload) + } + if matched { + controller.step() + } + case .consumeBy: let reg = payload.consumer guard currentPosition < searchBounds.upperBound, @@ -489,16 +624,9 @@ extension Processor { controller.step() case .assertBy: - let reg = payload.assertion - let assertion = registers[reg] + let payload = payload.assertion do { - guard try assertion( - &wordIndexCache, - &wordIndexMaxIndex, - input, - currentPosition, - subjectBounds - ) else { + guard try builtinAssert(by: payload) else { signalFailure() return } @@ -527,8 +655,9 @@ extension Processor { } case .backreference: + let (isScalarMode, capture) = payload.captureAndMode let capNum = Int( - asserting: payload.capture.rawValue) + asserting: capture.rawValue) guard capNum < storedCaptures.count else { fatalError("Should this be an assert?") } @@ -540,23 +669,21 @@ extension Processor { signalFailure() return } - if matchSeq(input[range]) { + if matchSeq(input[range], isScalarMode: isScalarMode) { controller.step() } case .beginCapture: let capNum = Int( asserting: payload.capture.rawValue) + storedCaptures[capNum].startCapture(currentPosition) + controller.step() - storedCaptures[capNum].startCapture(currentPosition) - controller.step() - - case .endCapture: + case .endCapture: let capNum = Int( asserting: payload.capture.rawValue) - - storedCaptures[capNum].endCapture(currentPosition) - controller.step() + storedCaptures[capNum].endCapture(currentPosition) + controller.step() case .transformCapture: let (cap, trans) = payload.pairedCaptureTransform @@ -584,14 +711,6 @@ extension Processor { storedCaptures[capNum].registerValue( value, overwriteInitial: sp) controller.step() - - case .builtinAssertion: - builtinAssertion() - - case .builtinCharacterClass: - builtinCharacterClass() } } } - - diff --git a/Sources/_StringProcessing/Engine/Registers.swift b/Sources/_StringProcessing/Engine/Registers.swift index e5d33af8b..69cc3e30a 100644 --- a/Sources/_StringProcessing/Engine/Registers.swift +++ b/Sources/_StringProcessing/Engine/Registers.swift @@ -33,8 +33,6 @@ extension Processor { var consumeFunctions: [MEProgram.ConsumeFunction] - var assertionFunctions: [MEProgram.AssertionFunction] - // Captured-value constructors var transformFunctions: [MEProgram.TransformFunction] @@ -85,9 +83,6 @@ extension Processor.Registers { subscript(_ i: ConsumeFunctionRegister) -> MEProgram.ConsumeFunction { consumeFunctions[i.rawValue] } - subscript(_ i: AssertionFunctionRegister) -> MEProgram.AssertionFunction { - assertionFunctions[i.rawValue] - } subscript(_ i: TransformRegister) -> MEProgram.TransformFunction { transformFunctions[i.rawValue] } @@ -117,9 +112,6 @@ extension Processor.Registers { self.consumeFunctions = program.staticConsumeFunctions assert(consumeFunctions.count == info.consumeFunctions) - self.assertionFunctions = program.staticAssertionFunctions - assert(assertionFunctions.count == info.assertionFunctions) - self.transformFunctions = program.staticTransformFunctions assert(transformFunctions.count == info.transformFunctions) @@ -159,7 +151,6 @@ extension MEProgram { var strings = 0 var bitsets = 0 var consumeFunctions = 0 - var assertionFunctions = 0 var transformFunctions = 0 var matcherFunctions = 0 var ints = 0 diff --git a/Sources/_StringProcessing/Engine/Tracing.swift b/Sources/_StringProcessing/Engine/Tracing.swift index 525beec63..725319b00 100644 --- a/Sources/_StringProcessing/Engine/Tracing.swift +++ b/Sources/_StringProcessing/Engine/Tracing.swift @@ -29,27 +29,81 @@ extension Processor: TracedProcessor { extension Instruction: CustomStringConvertible { var description: String { - // TODO: opcode specific rendering - "\(opcode) \(payload)" - } -} - -extension Instruction.Payload: CustomStringConvertible { - var description: String { -// var result = "" -// if hasCondition { -// result += "\(condition) " -// } -// if hasPayload { -// let payload: TypedInt<_Boo> = payload() -// result += payload.description -// } -// return result - - // TODO: Without bit packing our representation, what - // should we do? I'd say a payload cannot be printed - // in isolation of the instruction... - return "\(rawValue)" + switch opcode { + case .advance: + return "\(opcode) \(payload.distance)" + case .assertBy: + return "\(opcode) \(payload.assertion)" + case .backreference: + return "\(opcode) \(payload.capture.rawValue)" + case .beginCapture: + return "\(opcode) \(payload.capture.rawValue)" + case .branch: + return "\(opcode) \(payload.addr)" + case .captureValue: + let (val, cap) = payload.pairedValueCapture + return "\(opcode) vals[\(val)] -> captures[\(cap)]" + case .condBranchSamePosition: + let (addr, pos) = payload.pairedAddrPos + return "\(opcode) \(addr) pos[\(pos)]" + case .condBranchZeroElseDecrement: + let (addr, int) = payload.pairedAddrInt + return "\(opcode) \(addr) int[\(int)]" + case .consumeBy: + return "\(opcode) consumer[\(payload.consumer)]" + case .endCapture: + return "\(opcode) \(payload.capture.rawValue)" + case .match: + let (isCaseInsensitive, reg) = payload.elementPayload + if isCaseInsensitive { + return "matchCaseInsensitive char[\(reg)]" + } else { + return "match char[\(reg)]" + } + case .matchBitset: + let (isScalar, reg) = payload.bitsetPayload + if isScalar { + return "matchBitsetScalar bitset[\(reg)]" + } else { + return "matchBitset bitset[\(reg)]" + } + case .matchBuiltin: + let payload = payload.characterClassPayload + return "matchBuiltin \(payload.cc) (\(payload.isInverted))" + case .matchBy: + let (matcherReg, valReg) = payload.pairedMatcherValue + return "\(opcode) match[\(matcherReg)] -> val[\(valReg)]" + case .matchScalar: + let (scalar, caseInsensitive, boundaryCheck) = payload.scalarPayload + if caseInsensitive { + return "matchScalarCaseInsensitive '\(scalar)' boundaryCheck: \(boundaryCheck)" + } else { + return "matchScalar '\(scalar)' boundaryCheck: \(boundaryCheck)" + } + case .moveCurrentPosition: + let reg = payload.position + return "\(opcode) -> pos[\(reg)]" + case .moveImmediate: + let (imm, reg) = payload.pairedImmediateInt + return "\(opcode) \(imm) -> int[\(reg)]" + case .quantify: + let payload = payload.quantify + return "\(opcode) \(payload.type) \(payload.minTrips) \(payload.extraTrips?.description ?? "unbounded" )" + case .save: + let resumeAddr = payload.addr + return "\(opcode) \(resumeAddr)" + case .saveAddress: + let resumeAddr = payload.addr + return "\(opcode) \(resumeAddr)" + case .splitSaving: + let (nextPC, resumeAddr) = payload.pairedAddrAddr + return "\(opcode) saving: \(resumeAddr) jumpingTo: \(nextPC)" + case .transformCapture: + let (cap, trans) = payload.pairedCaptureTransform + return "\(opcode) trans[\(trans)](\(cap))" + default: + return "\(opcode)" + } } } @@ -59,7 +113,13 @@ extension Processor.SavePoint { if let p = self.pos { posStr = "\(input.distance(from: input.startIndex, to: p))" } else { - posStr = "" + if rangeIsEmpty { + posStr = "" + } else { + let startStr = "\(input.distance(from: input.startIndex, to: rangeStart!))" + let endStr = "\(input.distance(from: input.startIndex, to: rangeEnd!))" + posStr = "\(startStr)...\(endStr)" + } } return """ pc: \(self.pc), pos: \(posStr), stackEnd: \(stackEnd) diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index 718d37026..253858d1f 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -15,8 +15,8 @@ struct Executor { // TODO: consider let, for now lets us toggle tracing var engine: Engine - init(program: MEProgram, enablesTracing: Bool = false) { - self.engine = Engine(program, enableTracing: enablesTracing) + init(program: MEProgram) { + self.engine = Engine(program) } @available(SwiftStdlib 5.7, *) @@ -30,7 +30,9 @@ struct Executor { input: input, subjectBounds: subjectBounds, searchBounds: searchBounds) - +#if PROCESSOR_MEASUREMENTS_ENABLED + defer { if cpu.shouldMeasureMetrics { cpu.printMetrics() } } +#endif var low = searchBounds.lowerBound let high = searchBounds.upperBound while true { @@ -57,6 +59,9 @@ struct Executor { ) throws -> Regex.Match? { var cpu = engine.makeProcessor( input: input, bounds: subjectBounds, matchMode: mode) +#if PROCESSOR_MEASUREMENTS_ENABLED + defer { if cpu.shouldMeasureMetrics { cpu.printMetrics() } } +#endif return try _match(input, from: subjectBounds.lowerBound, using: &cpu) } diff --git a/Sources/_StringProcessing/MatchingOptions.swift b/Sources/_StringProcessing/MatchingOptions.swift index e56b8def2..d511c9f7c 100644 --- a/Sources/_StringProcessing/MatchingOptions.swift +++ b/Sources/_StringProcessing/MatchingOptions.swift @@ -122,18 +122,6 @@ extension MatchingOptions { } } -// Deprecated CharacterClass.MatchLevel API -extension MatchingOptions { - var matchLevel: _CharacterClassModel.MatchLevel { - switch semanticLevel { - case .graphemeCluster: - return .graphemeCluster - case .unicodeScalar: - return .unicodeScalar - } - } -} - // MARK: - Implementation extension MatchingOptions { /// An option that changes the behavior of a regular expression. diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index e60a1ce0e..953df6882 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -744,6 +744,41 @@ extension DSLTree.Atom.Assertion { } } +extension DSLTree.Atom.CharacterClass { + var _patternBase: String { + switch self { + case .anyGrapheme: + return ".anyGraphemeCluster" + case .anyUnicodeScalar: + return ".anyUnicodeScalar" + case .digit: + return ".digit" + case .notDigit: + return ".digit.inverted" + case .word: + return ".word" + case .notWord: + return ".word.inverted" + case .horizontalWhitespace: + return ".horizontalWhitespace" + case .notHorizontalWhitespace: + return ".horizontalWhitespace.inverted" + case .newlineSequence: + return ".newlineSequence" + case .notNewline: + return ".newlineSequence.inverted" + case .verticalWhitespace: + return ".verticalWhitespace" + case .notVerticalWhitespace: + return ".vertialWhitespace.inverted" + case .whitespace: + return ".whitespace" + case .notWhitespace: + return ".whitespace.inverted" + } + } +} + extension AST.Atom.CharacterProperty { var isUnprintableProperty: Bool { switch kind { @@ -1212,6 +1247,8 @@ extension DSLTree.Atom { case .assertion(let a): return (a._patternBase, false) + case .characterClass(let cc): + return (cc._patternBase, true) case .backreference(_): return ("/* TODO: backreferences */", false) @@ -1256,6 +1293,8 @@ extension DSLTree.Atom { case .assertion: return "/* TODO: assertions */" + case .characterClass: + return "/* TODO: character classes */" case .backreference: return "/* TODO: backreferences */" case .symbolicReference: diff --git a/Sources/_StringProcessing/Regex/ASTConversion.swift b/Sources/_StringProcessing/Regex/ASTConversion.swift index 4eb7bc42c..f5b08dd6d 100644 --- a/Sources/_StringProcessing/Regex/ASTConversion.swift +++ b/Sources/_StringProcessing/Regex/ASTConversion.swift @@ -168,6 +168,25 @@ extension AST.Atom.EscapedBuiltin { default: return nil } } + var dslCharacterClass: DSLTree.Atom.CharacterClass? { + switch self { + case .decimalDigit: return .digit + case .notDecimalDigit: return .notDigit + case .horizontalWhitespace: return .horizontalWhitespace + case .notHorizontalWhitespace: return .notHorizontalWhitespace + case .newlineSequence: return .newlineSequence + case .notNewline: return .notNewline + case .whitespace: return .whitespace + case .notWhitespace: return .notWhitespace + case .verticalTab: return .verticalWhitespace + case .notVerticalTab: return .notVerticalWhitespace + case .wordCharacter: return .word + case .notWordCharacter: return .notWord + case .graphemeCluster: return .anyGrapheme + case .trueAnychar: return .anyUnicodeScalar + default: return nil + } + } } extension AST.Atom { @@ -179,6 +198,12 @@ extension AST.Atom { default: return nil } } + var dslCharacterClass: DSLTree.Atom.CharacterClass? { + switch kind { + case .escaped(let b): return b.dslCharacterClass + default: return nil + } + } } extension AST.Atom { @@ -186,6 +211,10 @@ extension AST.Atom { if let kind = dslAssertionKind { return .assertion(kind) } + + if let cc = dslCharacterClass { + return .characterClass(cc) + } switch self.kind { case let .char(c): return .char(c) @@ -194,9 +223,11 @@ extension AST.Atom { case let .backreference(r): return .backreference(.init(ast: r)) case let .changeMatchingOptions(seq): return .changeMatchingOptions(.init(ast: seq)) - case .escaped(let c) where c.scalarValue != nil: - return .scalar(c.scalarValue!) - + case .escaped(let c): + guard let val = c.scalarValue else { + fatalError("Got a .escaped that was not an assertion, character class, or scalar value \(self)") + } + return .scalar(val) default: return .unconverted(.init(ast: self)) } } diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift index 0afe11c77..f58f63de7 100644 --- a/Sources/_StringProcessing/Regex/Core.swift +++ b/Sources/_StringProcessing/Regex/Core.swift @@ -63,6 +63,7 @@ public struct Regex: RegexComponent { @available(SwiftStdlib 5.7, *) extension Regex { + @available(*, deprecated, renamed: "init(verbatim:)") public init(quoting string: String) { self.init(node: .quotedLiteral(string)) } @@ -82,7 +83,7 @@ extension Regex { let tree: DSLTree /// OptionSet of compiler options for testing purposes - fileprivate var compileOptions: Compiler.CompileOptions = .default + fileprivate var compileOptions: _CompileOptions = .default private final class ProgramBox { let value: MEProgram @@ -136,9 +137,30 @@ extension Regex { } @available(SwiftStdlib 5.7, *) +@_spi(RegexBenchmark) extension Regex { - internal mutating func _setCompilerOptionsForTesting(_ opts: Compiler.CompileOptions) { - program.compileOptions = opts - program._loweredProgramStorage = nil + public enum _RegexInternalAction { + case recompile + case addOptions(_CompileOptions) + } + + /// Internal API for RegexBenchmark + /// Forces the regex to perform the given action, returning if it was successful + public mutating func _forceAction(_ action: _RegexInternalAction) -> Bool { + do { + switch action { + case .addOptions(let opts): + program.compileOptions.insert(opts) + program._loweredProgramStorage = nil + return true + case .recompile: + let _ = try Compiler( + tree: program.tree, + compileOptions: program.compileOptions).emit() + return true + } + } catch { + return false + } } } diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 520f4991a..0a0831706 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -177,6 +177,7 @@ extension DSLTree { /// newlines unless single line mode is enabled. case dot + case characterClass(CharacterClass) case assertion(Assertion) case backreference(_AST.Reference) case symbolicReference(ReferenceID) @@ -189,9 +190,9 @@ extension DSLTree { extension DSLTree.Atom { @_spi(RegexBuilder) - public enum Assertion: Hashable { + public enum Assertion: UInt64, Hashable { /// \A - case startOfSubject + case startOfSubject = 0 /// \Z case endOfSubjectBeforeNewline @@ -231,6 +232,46 @@ extension DSLTree.Atom { /// \B case notWordBoundary } + + @_spi(RegexBuilder) + public enum CharacterClass: Hashable { + case digit + case notDigit + case horizontalWhitespace + case notHorizontalWhitespace + case newlineSequence + case notNewline + case whitespace + case notWhitespace + case verticalWhitespace + case notVerticalWhitespace + case word + case notWord + case anyGrapheme + case anyUnicodeScalar + } +} + +extension DSLTree.Atom.CharacterClass { + @_spi(RegexBuilder) + public var inverted: DSLTree.Atom.CharacterClass? { + switch self { + case .anyGrapheme: return nil + case .anyUnicodeScalar: return nil + case .digit: return .notDigit + case .notDigit: return .digit + case .word: return .notWord + case .notWord: return .word + case .horizontalWhitespace: return .notHorizontalWhitespace + case .notHorizontalWhitespace: return .horizontalWhitespace + case .newlineSequence: return .notNewline + case .notNewline: return .newlineSequence + case .verticalWhitespace: return .notVerticalWhitespace + case .notVerticalWhitespace: return .verticalWhitespace + case .whitespace: return .notWhitespace + case .notWhitespace: return .whitespace + } + } } extension Unicode.GeneralCategory { @@ -767,34 +808,8 @@ extension DSLTree { internal var ast: AST.MatchingOptionSequence } - @_spi(RegexBuilder) public struct Atom { internal var ast: AST.Atom - - // FIXME: The below APIs should be removed once the DSL tree has been - // migrated to use proper DSL atoms for them. - - public static var _anyGrapheme: Self { - .init(ast: .init(.escaped(.graphemeCluster), .fake)) - } - public static var _whitespace: Self { - .init(ast: .init(.escaped(.whitespace), .fake)) - } - public static var _digit: Self { - .init(ast: .init(.escaped(.decimalDigit), .fake)) - } - public static var _horizontalWhitespace: Self { - .init(ast: .init(.escaped(.horizontalWhitespace), .fake)) - } - public static var _newlineSequence: Self { - .init(ast: .init(.escaped(.newlineSequence), .fake)) - } - public static var _verticalWhitespace: Self { - .init(ast: .init(.escaped(.verticalTab), .fake)) - } - public static var _word: Self { - .init(ast: .init(.escaped(.wordCharacter), .fake)) - } } } } @@ -808,7 +823,7 @@ extension DSLTree.Atom { case .changeMatchingOptions, .assertion: return false case .char, .scalar, .any, .anyNonNewline, .dot, .backreference, - .symbolicReference, .unconverted: + .symbolicReference, .unconverted, .characterClass: return true } } diff --git a/Sources/_StringProcessing/Unicode/WordBreaking.swift b/Sources/_StringProcessing/Unicode/WordBreaking.swift index 94c311e82..50da079f6 100644 --- a/Sources/_StringProcessing/Unicode/WordBreaking.swift +++ b/Sources/_StringProcessing/Unicode/WordBreaking.swift @@ -12,6 +12,39 @@ @_spi(_Unicode) import Swift +extension Processor { + func atSimpleBoundary( + _ usesAsciiWord: Bool, + _ semanticLevel: MatchingOptions.SemanticLevel + ) -> Bool { + func matchesWord(at i: Input.Index) -> Bool { + switch semanticLevel { + case .graphemeCluster: + let c = input[i] + return c.isWordCharacter && (c.isASCII || !usesAsciiWord) + case .unicodeScalar: + let c = input.unicodeScalars[i] + return (c.properties.isAlphabetic || c == "_") && (c.isASCII || !usesAsciiWord) + } + } + + // FIXME: How should we handle bounds? + // We probably need two concepts + if subjectBounds.isEmpty { return false } + if currentPosition == subjectBounds.lowerBound { + return matchesWord(at: currentPosition) + } + let priorIdx = input.index(before: currentPosition) + if currentPosition == subjectBounds.upperBound { + return matchesWord(at: priorIdx) + } + + let prior = matchesWord(at: priorIdx) + let current = matchesWord(at: currentPosition) + return prior != current + } +} + extension String { func isOnWordBoundary( at i: String.Index, diff --git a/Sources/_StringProcessing/Utility/RegexFactory.swift b/Sources/_StringProcessing/Utility/RegexFactory.swift index 31245c0f7..e0df906fa 100644 --- a/Sources/_StringProcessing/Utility/RegexFactory.swift +++ b/Sources/_StringProcessing/Utility/RegexFactory.swift @@ -58,6 +58,14 @@ public struct _RegexFactory { ) -> Regex { .init(node: .atom(.scalar(scalar))) } + + @_spi(RegexBuilder) + @available(SwiftStdlib 5.7, *) + public func characterClass( + _ cc: DSLTree.Atom.CharacterClass + ) -> Regex { + .init(node: .atom(.characterClass(cc))) + } @_spi(RegexBuilder) @available(SwiftStdlib 5.7, *) diff --git a/Sources/_StringProcessing/Utility/Traced.swift b/Sources/_StringProcessing/Utility/Traced.swift index 5ae7cd245..112a601b1 100644 --- a/Sources/_StringProcessing/Utility/Traced.swift +++ b/Sources/_StringProcessing/Utility/Traced.swift @@ -80,6 +80,21 @@ extension TracedProcessor { } func formatInput() -> String { + let distanceFromStart = input.distance( + from: input.startIndex, + to: currentPosition) + + // Cut a reasonably sized substring from the input to print + let start = input.index( + currentPosition, + offsetBy: -30, + limitedBy: input.startIndex) ?? input.startIndex + let end = input.index( + currentPosition, + offsetBy: 30, + limitedBy: input.endIndex) ?? input.endIndex + let input = input[start.. enum _ConsumeFunctionRegister {} -/// Used for assertion functions, e.g. anchors etc -typealias AssertionFunctionRegister = TypedInt<_AssertionFunctionRegister> -enum _AssertionFunctionRegister {} - /// Used for capture transforms, etc typealias TransformRegister = TypedInt<_TransformRegister> enum _TransformRegister {} diff --git a/Sources/_StringProcessing/_CharacterClassModel.swift b/Sources/_StringProcessing/_CharacterClassModel.swift index 9f515f220..c5f1f8ecd 100644 --- a/Sources/_StringProcessing/_CharacterClassModel.swift +++ b/Sources/_StringProcessing/_CharacterClassModel.swift @@ -17,27 +17,38 @@ struct _CharacterClassModel: Hashable { /// The actual character class to match. - var cc: Representation + let cc: Representation /// The level (character or Unicode scalar) at which to match. - var matchLevel: MatchLevel + let matchLevel: MatchingOptions.SemanticLevel + + /// If this character character class only matches ascii characters + let isStrictASCII: Bool /// Whether this character class matches against an inverse, /// e.g \D, \S, [^abc]. - var isInverted: Bool = false + let isInverted: Bool + + init( + cc: Representation, + options: MatchingOptions, + isInverted: Bool + ) { + self.cc = cc + self.matchLevel = options.semanticLevel + self.isStrictASCII = cc.isStrictAscii(options: options) + self.isInverted = isInverted + } - // TODO: Split out builtin character classes into their own type? - enum Representation: Hashable { + enum Representation: UInt64, Hashable { /// Any character - case any + case any = 0 /// Any grapheme cluster case anyGrapheme /// Any Unicode scalar case anyScalar /// Character.isDigit case digit - /// Character.isHexDigit - case hexDigit /// Horizontal whitespace: `[:blank:]`, i.e /// `[\p{gc=Space_Separator}\N{CHARACTER TABULATION}] case horizontalWhitespace @@ -50,43 +61,6 @@ struct _CharacterClassModel: Hashable { /// Character.isLetter or Character.isDigit or Character == "_" case word } - - enum MatchLevel: Hashable { - /// Match at the extended grapheme cluster level. - case graphemeCluster - /// Match at the Unicode scalar level. - case unicodeScalar - } - - var scalarSemantic: Self { - var result = self - result.matchLevel = .unicodeScalar - return result - } - - var graphemeClusterSemantic: Self { - var result = self - result.matchLevel = .graphemeCluster - return result - } - - /// Conditionally inverts a character class. - /// - /// - Parameter inversion: Indicates whether to invert the character class. - /// - Returns: The inverted character class if `inversion` is `true`; - /// otherwise, the same character class. - func withInversion(_ inversion: Bool) -> Self { - var copy = self - if inversion { - copy.isInverted.toggle() - } - return copy - } - - /// Inverts a character class. - var inverted: Self { - return withInversion(true) - } /// Returns the end of the match of this character class in the string. /// @@ -94,111 +68,115 @@ struct _CharacterClassModel: Hashable { /// - Parameter at: The index to start matching. /// - Parameter options: Options for the match operation. /// - Returns: The index of the end of the match, or `nil` if there is no match. - func matches(in str: String, at i: String.Index, with options: MatchingOptions) -> String.Index? { - switch matchLevel { - case .graphemeCluster: - let c = str[i] - var matched: Bool - var next = str.index(after: i) - switch cc { - case .any, .anyGrapheme: matched = true - case .anyScalar: - matched = true - next = str.unicodeScalars.index(after: i) - case .digit: - matched = c.isNumber && (c.isASCII || !options.usesASCIIDigits) - case .hexDigit: - matched = c.isHexDigit && (c.isASCII || !options.usesASCIIDigits) - case .horizontalWhitespace: - matched = c.unicodeScalars.first?.isHorizontalWhitespace == true - && (c.isASCII || !options.usesASCIISpaces) - case .newlineSequence, .verticalWhitespace: - matched = c.unicodeScalars.first?.isNewline == true - && (c.isASCII || !options.usesASCIISpaces) - case .whitespace: - matched = c.isWhitespace && (c.isASCII || !options.usesASCIISpaces) - case .word: - matched = c.isWordCharacter && (c.isASCII || !options.usesASCIIWord) + func matches( + in input: String, + at currentPosition: String.Index + ) -> String.Index? { + // FIXME: This is only called in custom character classes that contain builtin + // character classes as members (ie: [a\w] or set operations), is there + // any way to avoid that? Can we remove this somehow? + guard currentPosition != input.endIndex else { + return nil + } + let char = input[currentPosition] + let scalar = input.unicodeScalars[currentPosition] + let isScalarSemantics = matchLevel == .unicodeScalar + let asciiCheck = (char.isASCII && !isScalarSemantics) + || (scalar.isASCII && isScalarSemantics) + || !isStrictASCII + + var matched: Bool + var next: String.Index + switch (isScalarSemantics, cc) { + case (_, .anyGrapheme): + next = input.index(after: currentPosition) + case (_, .anyScalar): + // FIXME: This allows us to be not-scalar aligned when in grapheme mode + // Should this even be allowed? + next = input.unicodeScalars.index(after: currentPosition) + case (true, _): + next = input.unicodeScalars.index(after: currentPosition) + case (false, _): + next = input.index(after: currentPosition) + } + + switch cc { + case .any, .anyGrapheme, .anyScalar: + matched = true + case .digit: + if isScalarSemantics { + matched = scalar.properties.numericType != nil && asciiCheck + } else { + matched = char.isNumber && asciiCheck } - if isInverted { - matched.toggle() + case .horizontalWhitespace: + if isScalarSemantics { + matched = scalar.isHorizontalWhitespace && asciiCheck + } else { + matched = char._isHorizontalWhitespace && asciiCheck } - return matched ? next : nil - case .unicodeScalar: - let c = str.unicodeScalars[i] - var nextIndex = str.unicodeScalars.index(after: i) - var matched: Bool - switch cc { - case .any: matched = true - case .anyScalar: matched = true - case .anyGrapheme: - matched = true - nextIndex = str.index(after: i) - case .digit: - matched = c.properties.numericType != nil && (c.isASCII || !options.usesASCIIDigits) - case .hexDigit: - matched = Character(c).isHexDigit && (c.isASCII || !options.usesASCIIDigits) - case .horizontalWhitespace: - matched = c.isHorizontalWhitespace && (c.isASCII || !options.usesASCIISpaces) - case .verticalWhitespace: - matched = c.isNewline && (c.isASCII || !options.usesASCIISpaces) - case .newlineSequence: - matched = c.isNewline && (c.isASCII || !options.usesASCIISpaces) - if c == "\r" && nextIndex != str.endIndex && str.unicodeScalars[nextIndex] == "\n" { - str.unicodeScalars.formIndex(after: &nextIndex) + case .verticalWhitespace: + if isScalarSemantics { + matched = scalar.isNewline && asciiCheck + } else { + matched = char._isNewline && asciiCheck + } + case .newlineSequence: + if isScalarSemantics { + matched = scalar.isNewline && asciiCheck + if matched && scalar == "\r" + && next != input.endIndex && input.unicodeScalars[next] == "\n" { + // Match a full CR-LF sequence even in scalar sematnics + input.unicodeScalars.formIndex(after: &next) } - case .whitespace: - matched = c.properties.isWhitespace && (c.isASCII || !options.usesASCIISpaces) - case .word: - matched = (c.properties.isAlphabetic || c == "_") && (c.isASCII || !options.usesASCIIWord) + } else { + matched = char._isNewline && asciiCheck + } + case .whitespace: + if isScalarSemantics { + matched = scalar.properties.isWhitespace && asciiCheck + } else { + matched = char.isWhitespace && asciiCheck } - if isInverted { - matched.toggle() + case .word: + if isScalarSemantics { + matched = scalar.properties.isAlphabetic && asciiCheck + } else { + matched = char.isWordCharacter && asciiCheck } - return matched ? nextIndex : nil + } + if isInverted { + matched.toggle() + } + if matched { + return next + } else { + return nil } } } extension _CharacterClassModel { - static var any: _CharacterClassModel { - .init(cc: .any, matchLevel: .graphemeCluster) - } - - static var anyGrapheme: _CharacterClassModel { - .init(cc: .anyGrapheme, matchLevel: .graphemeCluster) - } - - static var anyUnicodeScalar: _CharacterClassModel { - .init(cc: .any, matchLevel: .unicodeScalar) - } - - static var whitespace: _CharacterClassModel { - .init(cc: .whitespace, matchLevel: .graphemeCluster) - } - - static var digit: _CharacterClassModel { - .init(cc: .digit, matchLevel: .graphemeCluster) - } - - static var hexDigit: _CharacterClassModel { - .init(cc: .hexDigit, matchLevel: .graphemeCluster) - } - - static var horizontalWhitespace: _CharacterClassModel { - .init(cc: .horizontalWhitespace, matchLevel: .graphemeCluster) - } - - static var newlineSequence: _CharacterClassModel { - .init(cc: .newlineSequence, matchLevel: .graphemeCluster) - } - - static var verticalWhitespace: _CharacterClassModel { - .init(cc: .verticalWhitespace, matchLevel: .graphemeCluster) + var consumesSingleGrapheme: Bool { + switch self.cc { + case .anyScalar: return false + default: return true + } } +} - static var word: _CharacterClassModel { - .init(cc: .word, matchLevel: .graphemeCluster) +extension _CharacterClassModel.Representation { + /// Returns true if this CharacterClass should be matched by strict ascii under the given options + func isStrictAscii(options: MatchingOptions) -> Bool { + switch self { + case .digit: return options.usesASCIIDigits + case .horizontalWhitespace: return options.usesASCIISpaces + case .newlineSequence: return options.usesASCIISpaces + case .verticalWhitespace: return options.usesASCIISpaces + case .whitespace: return options.usesASCIISpaces + case .word: return options.usesASCIIWord + default: return false + } } } @@ -209,7 +187,6 @@ extension _CharacterClassModel.Representation: CustomStringConvertible { case .anyGrapheme: return "" case .anyScalar: return "" case .digit: return "" - case .hexDigit: return "" case .horizontalWhitespace: return "" case .newlineSequence: return "" case .verticalWhitespace: return "vertical whitespace" @@ -225,102 +202,57 @@ extension _CharacterClassModel: CustomStringConvertible { } } -extension _CharacterClassModel { - func withMatchLevel( - _ level: _CharacterClassModel.MatchLevel - ) -> _CharacterClassModel { - var cc = self - cc.matchLevel = level - return cc - } -} - -extension AST.Atom { - var characterClass: _CharacterClassModel? { - switch kind { - case let .escaped(b): return b.characterClass - - case .property: - // TODO: Would our model type for character classes include - // this? Or does grapheme-semantic mode complicate that? - return nil - - case .dot: - // `.dot` is handled in the matching engine by Compiler.emitDot() and in - // the legacy compiler by the `.any` instruction, which can provide lower - // level instructions than the CharacterClass-generated consumer closure - // - // FIXME: We shouldn't be returning `nil` here, but instead fixing the call - // site to check for any before trying to construct a character class. - return nil - - default: return nil - - } - } - -} - -extension AST.Atom.EscapedBuiltin { - var characterClass: _CharacterClassModel? { +extension DSLTree.Atom.CharacterClass { + /// Converts this DSLTree CharacterClass into our runtime representation + func asRuntimeModel(_ options: MatchingOptions) -> _CharacterClassModel { + let cc: _CharacterClassModel.Representation + var inverted = false switch self { - case .decimalDigit: return .digit - case .notDecimalDigit: return .digit.inverted - - case .horizontalWhitespace: return .horizontalWhitespace + case .digit: + cc = .digit + case .notDigit: + cc = .digit + inverted = true + + case .horizontalWhitespace: + cc = .horizontalWhitespace case .notHorizontalWhitespace: - return .horizontalWhitespace.inverted + cc = .horizontalWhitespace + inverted = true - case .newlineSequence: return .newlineSequence + case .newlineSequence: + cc = .newlineSequence // FIXME: This is more like '.' than inverted '\R', as it is affected // by e.g (*CR). We should therefore really be emitting it through // emitDot(). For now we treat it as semantically invalid. - case .notNewline: return .newlineSequence.inverted - - case .whitespace: return .whitespace - case .notWhitespace: return .whitespace.inverted - - case .verticalTab: return .verticalWhitespace - case .notVerticalTab: return .verticalWhitespace.inverted - - case .wordCharacter: return .word - case .notWordCharacter: return .word.inverted - - case .graphemeCluster: return .anyGrapheme - case .trueAnychar: return .anyUnicodeScalar - - default: - return nil + case .notNewline: + cc = .newlineSequence + inverted = true + + case .whitespace: + cc = .whitespace + case .notWhitespace: + cc = .whitespace + inverted = true + + case .verticalWhitespace: + cc = .verticalWhitespace + case .notVerticalWhitespace: + cc = .verticalWhitespace + inverted = true + + case .word: + cc = .word + case .notWord: + cc = .word + inverted = true + + case .anyGrapheme: + cc = .anyGrapheme + case .anyUnicodeScalar: + cc = .anyScalar } + return _CharacterClassModel(cc: cc, options: options, isInverted: inverted) } } - -extension _CharacterClassModel { - // FIXME: Calling on inverted sets wont be the same as the - // inverse of a boundary if at the start or end of the - // string. (Think through what we want: do it ourselves or - // give the caller both options). - func isBoundary( - _ input: String, - at pos: String.Index, - bounds: Range, - with options: MatchingOptions - ) -> Bool { - // FIXME: How should we handle bounds? - // We probably need two concepts - if bounds.isEmpty { return false } - if pos == bounds.lowerBound { - return self.matches(in: input, at: pos, with: options) != nil - } - let priorIdx = input.index(before: pos) - if pos == bounds.upperBound { - return self.matches(in: input, at: priorIdx, with: options) != nil - } - - let prior = self.matches(in: input, at: priorIdx, with: options) != nil - let current = self.matches(in: input, at: pos, with: options) != nil - return prior != current - } - -} diff --git a/Tests/RegexTests/CompileTests.swift b/Tests/RegexTests/CompileTests.swift index 27f8d79cb..752921e19 100644 --- a/Tests/RegexTests/CompileTests.swift +++ b/Tests/RegexTests/CompileTests.swift @@ -37,7 +37,9 @@ enum DecodedInstr { case matchScalarCaseInsensitive case matchScalarUnchecked case matchBitsetScalar + case matchAnyNonNewline case matchBitset + case matchBuiltin case consumeBy case assertBy case matchBy @@ -46,8 +48,7 @@ enum DecodedInstr { case endCapture case transformCapture case captureValue - case builtinAssertion - case builtinCharacterClass + case quantify } extension DecodedInstr { @@ -56,87 +57,88 @@ extension DecodedInstr { /// /// Must stay in sync with Processor.cycle static func decode(_ instruction: Instruction) -> DecodedInstr { - let (opcode, payload) = instruction.destructure - - switch opcode { - case .invalid: - fatalError("Invalid program") - case .moveImmediate: - return .moveImmediate - case .moveCurrentPosition: - return .moveCurrentPosition - case .branch: - return .branch - case .condBranchZeroElseDecrement: - return .condBranchZeroElseDecrement - case .condBranchSamePosition: - return .condBranchSamePosition - case .save: - return .save - case .saveAddress: - return .saveAddress - case .splitSaving: - return .splitSaving - case .clear: - return .clear - case .clearThrough: - return .clearThrough - case .accept: - return .accept - case .fail: - return .fail - case .advance: - return .advance - case .match: - let (isCaseInsensitive, _) = payload.elementPayload - if isCaseInsensitive { - return .matchCaseInsensitive - } else { - return .match - } - case .matchScalar: - let (_, caseInsensitive, boundaryCheck) = payload.scalarPayload - if caseInsensitive { - if boundaryCheck { - return .matchScalarCaseInsensitive - } else { - return .matchScalarCaseInsensitiveUnchecked - } + let (opcode, payload) = instruction.destructure + switch opcode { + case .invalid: + fatalError("Invalid program") + case .moveImmediate: + return .moveImmediate + case .moveCurrentPosition: + return .moveCurrentPosition + case .branch: + return .branch + case .condBranchZeroElseDecrement: + return .condBranchZeroElseDecrement + case .condBranchSamePosition: + return .condBranchSamePosition + case .save: + return .save + case .saveAddress: + return .saveAddress + case .splitSaving: + return .splitSaving + case .clear: + return .clear + case .clearThrough: + return .clearThrough + case .accept: + return .accept + case .fail: + return .fail + case .advance: + return .advance + case .match: + let (isCaseInsensitive, _) = payload.elementPayload + if isCaseInsensitive { + return .matchCaseInsensitive + } else { + return .match + } + case .matchScalar: + let (_, caseInsensitive, boundaryCheck) = payload.scalarPayload + if caseInsensitive { + if boundaryCheck { + return .matchScalarCaseInsensitive } else { - if boundaryCheck { - return .matchScalar - } else { - return .matchScalarUnchecked - } + return .matchScalarCaseInsensitiveUnchecked } - case .matchBitset: - let (isScalar, _) = payload.bitsetPayload - if isScalar { - return .matchBitsetScalar + } else { + if boundaryCheck { + return .matchScalar } else { - return .matchBitset + return .matchScalarUnchecked } - case .consumeBy: - return consumeBy - case .assertBy: - return .assertBy - case .matchBy: - return .matchBy - case .backreference: - return .backreference - case .beginCapture: - return .beginCapture - case .endCapture: - return .endCapture - case .transformCapture: - return .transformCapture - case .captureValue: - return .captureValue - case .builtinAssertion: - return .builtinAssertion - case .builtinCharacterClass: - return .builtinCharacterClass -} + } + case .matchBitset: + let (isScalar, _) = payload.bitsetPayload + if isScalar { + return .matchBitsetScalar + } else { + return .matchBitset + } + case .consumeBy: + return .consumeBy + case .matchAnyNonNewline: + return .matchAnyNonNewline + case .assertBy: + return .assertBy + case .matchBy: + return .matchBy + case .quantify: + return .quantify + case .backreference: + return .backreference + case .beginCapture: + return .beginCapture + case .endCapture: + return .endCapture + case .transformCapture: + return .transformCapture + case .captureValue: + return .captureValue + case .matchBuiltin: + return .matchBuiltin + } } } @@ -308,7 +310,7 @@ extension RegexTests { matchingOptions(adding: [.caseInsensitive])) } - private func expectProgram( + func expectProgram( for regex: String, syntax: SyntaxOptions = .traditional, semanticLevel: RegexSemanticLevel? = nil, diff --git a/Tests/RegexTests/LexTests.swift b/Tests/RegexTests/LexTests.swift index 49184deb3..53775e66e 100644 --- a/Tests/RegexTests/LexTests.swift +++ b/Tests/RegexTests/LexTests.swift @@ -96,9 +96,6 @@ extension RegexTests { ("#|abc/#def#", nil), ("#/abc\n/#", nil), ("#/abc\r/#", nil), - - (#"re'abcre\''"#, (#"abcre\'"#, delim(.reSingleQuote))), - (#"re'\'"#, nil) ] for (input, expected) in testCases { diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 5f4c8bb30..2fa9b9381 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -11,7 +11,7 @@ import XCTest @testable import _RegexParser -@testable import _StringProcessing +@testable @_spi(RegexBenchmark) import _StringProcessing import TestSupport struct MatchError: Error { @@ -32,7 +32,7 @@ func _firstMatch( let result = try regex.firstMatch(in: input) if validateOptimizations { - regex._setCompilerOptionsForTesting(.disableOptimizations) + assert(regex._forceAction(.addOptions(.disableOptimizations))) let unoptResult = try regex.firstMatch(in: input) if result != nil && unoptResult == nil { throw MatchError("match not found for unoptimized \(regexStr) in \(input)") @@ -1517,13 +1517,10 @@ extension RegexTests { (" 123", "23"), ("123 456", "23")) - // TODO: \G and \K - do { - let regex = try Regex(#"\Gab"#, as: Substring.self) - XCTExpectFailure { - XCTAssertEqual("abab".matches(of: regex).map(\.output), ["ab", "ab"]) - } - } + // \G and \K + let regex = try Regex(#"\Gab"#, as: Substring.self) + XCTAssertEqual("abab".matches(of: regex).map(\.output), ["ab", "ab"]) + // TODO: Oniguruma \y and \Y firstMatchTests( @@ -1646,6 +1643,11 @@ extension RegexTests { (input: "123x23", match: "23x23"), xfail: true) + // Backreferences in scalar mode + // In scalar mode the backreference should not match + firstMatchTest(#"(.+)\1"#, input: "รฉe\u{301}", match: "รฉe\u{301}") + firstMatchTest(#"(.+)\1"#, input: "รฉe\u{301}", match: nil, semanticLevel: .unicodeScalar) + // Backreferences in lookaheads firstMatchTests( #"^(?=.*(.)(.)\2\1).+$"#, @@ -2456,6 +2458,9 @@ extension RegexTests { // case insensitive tests firstMatchTest(#"(?i)abc\u{301}d"#, input: "AbC\u{301}d", match: "AbC\u{301}d", semanticLevel: .unicodeScalar) + + // check that we don't crash on empty strings + firstMatchTest(#"\Q\E"#, input: "", match: "") } func testCase() { @@ -2525,4 +2530,35 @@ extension RegexTests { expectCompletion(regex: #"(a{,4})*"#, in: "aa") expectCompletion(regex: #"((|)+)*"#, in: "aa") } + + func testQuantifyOptimization() throws { + // test that the maximum values for minTrips and extraTrips are handled correctly + let maxStorable = Int(QuantifyPayload.maxStorableTrips) + let maxExtraTrips = "a{,\(maxStorable)}" + expectProgram(for: maxExtraTrips, contains: [.quantify]) + firstMatchTest(maxExtraTrips, input: String(repeating: "a", count: maxStorable), match: String(repeating: "a", count: maxStorable)) + firstMatchTest(maxExtraTrips, input: String(repeating: "a", count: maxStorable + 1), match: String(repeating: "a", count: maxStorable)) + XCTAssertNil(try Regex(maxExtraTrips).wholeMatch(in: String(repeating: "a", count: maxStorable + 1))) + + let maxMinTrips = "a{\(maxStorable),}" + expectProgram(for: maxMinTrips, contains: [.quantify]) + firstMatchTest(maxMinTrips, input: String(repeating: "a", count: maxStorable), match: String(repeating: "a", count: maxStorable)) + firstMatchTest(maxMinTrips, input: String(repeating: "a", count: maxStorable - 1), match: nil) + + let maxBothTrips = "a{\(maxStorable),\(maxStorable*2)}" + expectProgram(for: maxBothTrips, contains: [.quantify]) + XCTAssertNil(try Regex(maxBothTrips).wholeMatch(in: String(repeating: "a", count: maxStorable*2 + 1))) + firstMatchTest(maxBothTrips, input: String(repeating: "a", count: maxStorable*2), match: String(repeating: "a", count: maxStorable*2)) + firstMatchTest(maxBothTrips, input: String(repeating: "a", count: maxStorable), match: String(repeating: "a", count: maxStorable)) + firstMatchTest(maxBothTrips, input: String(repeating: "a", count: maxStorable - 1), match: nil) + + expectProgram(for: "a{,\(maxStorable+1)}", doesNotContain: [.quantify]) + expectProgram(for: "a{\(maxStorable+1),}", doesNotContain: [.quantify]) + expectProgram(for: "a{\(maxStorable-1),\(maxStorable*2)}", doesNotContain: [.quantify]) + expectProgram(for: "a{\(maxStorable),\(maxStorable*2+1)}", doesNotContain: [.quantify]) + } + + func testFuzzerArtifacts() throws { + expectCompletion(regex: #"(b?)\1*"#, in: "a") + } } diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index 84ce361f3..0e7d41eed 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -2151,9 +2151,6 @@ extension RegexTests { parseWithDelimitersTest("##/a b/##", concat("a", " ", "b")) parseWithDelimitersTest("#|a b|#", concat("a", "b")) - parseWithDelimitersTest("re'a b'", concat("a", " ", "b")) - parseWithDelimitersTest("rx'a b'", concat("a", "b")) - parseWithDelimitersTest("#|[a b]|#", charClass("a", "b")) parseWithDelimitersTest( "#|(?-x)[a b]|#", concat( @@ -2176,13 +2173,13 @@ extension RegexTests { parseWithDelimitersTest("#||||#", alt(empty(), empty(), empty())) parseWithDelimitersTest("#|a||#", alt("a", empty())) - parseWithDelimitersTest("re'x*'", zeroOrMore(of: "x")) + parseWithDelimitersTest("/x*/", zeroOrMore(of: "x")) - parseWithDelimitersTest(#"re'๐Ÿ”ฅ๐Ÿ‡ฉ๐Ÿ‡ฐ'"#, concat("๐Ÿ”ฅ", "๐Ÿ‡ฉ๐Ÿ‡ฐ")) - parseWithDelimitersTest(#"re'๐Ÿ”ฅโœ…'"#, concat("๐Ÿ”ฅ", "โœ…")) + parseWithDelimitersTest(#"/๐Ÿ”ฅ๐Ÿ‡ฉ๐Ÿ‡ฐ/"#, concat("๐Ÿ”ฅ", "๐Ÿ‡ฉ๐Ÿ‡ฐ")) + parseWithDelimitersTest(#"/๐Ÿ”ฅโœ…/"#, concat("๐Ÿ”ฅ", "โœ…")) // Printable ASCII characters. - delimiterLexingTest(##"re' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'"##) + delimiterLexingTest(##"#/ !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~/#"##) // Make sure we can handle a combining accent as first character. parseWithDelimitersTest("/\u{301}/", "\u{301}") @@ -2294,72 +2291,61 @@ extension RegexTests { /# """#, charClass(range_m("a", "b"))) - - // MARK: Delimiter skipping: Make sure we can skip over the ending delimiter - // if it's clear that it's part of the regex syntax. - parseWithDelimitersTest( - #"re'(?'a_bcA0'\')'"#, namedCapture("a_bcA0", "'")) + #"/(?'a_bcA0'\')/"#, namedCapture("a_bcA0", "'")) parseWithDelimitersTest( - #"re'(?'a_bcA0-c1A'x*)'"#, + #"/(?'a_bcA0-c1A'x*)/"#, balancedCapture(name: "a_bcA0", priorName: "c1A", zeroOrMore(of: "x")), unsupported: true) parseWithDelimitersTest( - #"rx' (?'a_bcA0' a b)'"#, concat(namedCapture("a_bcA0", concat("a", "b")))) + #"/ (?'a_bcA0' a b)/"#, concat(" ", namedCapture("a_bcA0", concat(" ", "a", " ", "b")))) parseWithDelimitersTest( - #"re'(?('a_bcA0')x|y)'"#, conditional( + #"/(?('a_bcA0')x|y)/"#, conditional( .groupMatched(ref("a_bcA0")), trueBranch: "x", falseBranch: "y"), unsupported: true ) parseWithDelimitersTest( - #"re'(?('+20')\')'"#, conditional( + #"/(?('+20')\')/"#, conditional( .groupMatched(ref(plus: 20)), trueBranch: "'", falseBranch: empty()), unsupported: true ) parseWithDelimitersTest( - #"re'a\k'b0A''"#, concat("a", backreference(.named("b0A"))), throwsError: .invalidNamedReference("b0A")) + #"/a\k'b0A'/"#, concat("a", backreference(.named("b0A"))), throwsError: .invalidNamedReference("b0A")) parseWithDelimitersTest( - #"re'\k'+2-1''"#, backreference(ref(plus: 2), recursionLevel: -1), + #"/\k'+2-1'/"#, backreference(ref(plus: 2), recursionLevel: -1), unsupported: true ) parseWithDelimitersTest( - #"re'a\g'b0A''"#, concat("a", subpattern(.named("b0A"))), unsupported: true) + #"/a\g'b0A'/"#, concat("a", subpattern(.named("b0A"))), unsupported: true) parseWithDelimitersTest( - #"re'\g'-1'\''"#, concat(subpattern(ref(minus: 1)), "'"), unsupported: true) + #"/\g'-1'\'/"#, concat(subpattern(ref(minus: 1)), "'"), unsupported: true) parseWithDelimitersTest( - #"re'(?C'a*b\c ๐Ÿ”ฅ_ ;')'"#, pcreCallout(string: #"a*b\c ๐Ÿ”ฅ_ ;"#), + #"/(?C'a*b\c ๐Ÿ”ฅ_ ;')/"#, pcreCallout(string: #"a*b\c ๐Ÿ”ฅ_ ;"#), unsupported: true) - // Fine, because we don't end up skipping. - delimiterLexingTest(#"re'(?'"#) - delimiterLexingTest(#"re'(?('"#) - delimiterLexingTest(#"re'\k'"#) - delimiterLexingTest(#"re'\g'"#) - delimiterLexingTest(#"re'(?C'"#) + delimiterLexingTest(#"/(?/"#) + delimiterLexingTest(#"/(?(/"#) + delimiterLexingTest(#"/\k/"#) + delimiterLexingTest(#"/\g/"#) + delimiterLexingTest(#"/(?C/"#) - // Not a valid group name, but we can still skip over it. - delimiterLexingTest(#"re'(?'๐Ÿ”ฅ')'"#) + delimiterLexingTest(#"/(?'๐Ÿ”ฅ')/"#) - // Escaped, so don't skip. These will ignore the ending `'` as we've already - // closed the literal. parseWithDelimitersTest( - #"re'\(?''"#, zeroOrOne(of: "("), ignoreTrailing: true - ) + #"/\(?/"#, zeroOrOne(of: "(")) parseWithDelimitersTest( - #"re'\\k''"#, concat("\\", "k"), ignoreTrailing: true - ) + #"/\\k/"#, concat("\\", "k")) parseWithDelimitersTest( - #"re'\\g''"#, concat("\\", "g"), ignoreTrailing: true - ) + #"/\\g/"#, concat("\\", "g")) parseWithDelimitersTest( - #"re'\(?C''"#, concat(zeroOrOne(of: "("), "C"), ignoreTrailing: true - ) - delimiterLexingTest(#"re'(\?''"#, ignoreTrailing: true) - delimiterLexingTest(#"re'\(?(''"#, ignoreTrailing: true) + #"/\(?C/"#, concat(zeroOrOne(of: "("), "C")) + + delimiterLexingTest(#"/(\?/"#) + delimiterLexingTest(#"/\(?(/"#) // MARK: Parse not-equal @@ -3322,21 +3308,17 @@ extension RegexTests { // MARK: Printable ASCII - delimiterLexingDiagnosticTest(#"re'\\#n'"#, .unterminated) for i: UInt8 in 0x1 ..< 0x20 where i != 0xA && i != 0xD { // U+A & U+D are \n and \r. - delimiterLexingDiagnosticTest("re'\(UnicodeScalar(i))'", .unprintableASCII) + delimiterLexingDiagnosticTest("/\(UnicodeScalar(i))/", .unprintableASCII) } - delimiterLexingDiagnosticTest("re'\n'", .unterminated) - delimiterLexingDiagnosticTest("re'\r'", .unterminated) - delimiterLexingDiagnosticTest("re'\u{7F}'", .unprintableASCII) - // MARK: Delimiter skipping + // Can only be done if pound signs are used. + delimiterLexingDiagnosticTest("/\n/", .unterminated) + delimiterLexingDiagnosticTest("/\r/", .unterminated) + delimiterLexingDiagnosticTest("/\u{7F}/", .unprintableASCII) - delimiterLexingDiagnosticTest("re'(?''", .unterminated) - delimiterLexingDiagnosticTest("re'(?'abc'", .unterminated) - delimiterLexingDiagnosticTest("re'(?('abc'", .unterminated) - delimiterLexingDiagnosticTest(#"re'\k'ab_c0+-'"#, .unterminated) - delimiterLexingDiagnosticTest(#"re'\g'ab_c0+-'"#, .unterminated) + delimiterLexingDiagnosticTest("/", .unterminated) + delimiterLexingDiagnosticTest("/x", .unterminated) // MARK: Unbalanced extended syntax delimiterLexingDiagnosticTest("#/a/", .unterminated) @@ -3344,9 +3326,6 @@ extension RegexTests { // MARK: Multiline - // Can only be done if pound signs are used. - delimiterLexingDiagnosticTest("/\n/", .unterminated) - // Opening and closing delimiters must be on a newline. delimiterLexingDiagnosticTest("#/a\n/#", .unterminated) delimiterLexingDiagnosticTest("#/\na/#", .multilineClosingNotOnNewline) diff --git a/Utils/createBenchmark.py b/Utils/createBenchmark.py index 7950ea522..6fce1bc69 100644 --- a/Utils/createBenchmark.py +++ b/Utils/createBenchmark.py @@ -46,7 +46,7 @@ def register_benchmark(name): new_file_contents.append(line) else: # add the newest benchmark - new_file_contents.append(f" benchmark.add{name}()\n") + new_file_contents.append(f" self.add{name}()\n") new_file_contents.append(line) # write the new contents