From 49c1d8c753ac909f45b59f78bd93e579146265e4 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Sat, 16 Jul 2022 00:25:20 -0700 Subject: [PATCH 01/33] Add option to show benchmark charts --- Sources/RegexBenchmark/BenchmarkChart.swift | 112 +++++++++++++++++++ Sources/RegexBenchmark/BenchmarkRunner.swift | 30 ++++- Sources/RegexBenchmark/CLI.swift | 9 +- 3 files changed, 146 insertions(+), 5 deletions(-) create mode 100644 Sources/RegexBenchmark/BenchmarkChart.swift diff --git a/Sources/RegexBenchmark/BenchmarkChart.swift b/Sources/RegexBenchmark/BenchmarkChart.swift new file mode 100644 index 000000000..3bb8e60ad --- /dev/null +++ b/Sources/RegexBenchmark/BenchmarkChart.swift @@ -0,0 +1,112 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +#if os(macOS) + +import Charts +import SwiftUI + +struct BenchmarkChart: View { + struct Comparison: Identifiable { + var id = UUID() + var name: String + var baseline: BenchmarkResult + var latest: BenchmarkResult + } + + var comparisons: [Comparison] + + var body: some View { + VStack(alignment: .leading) { + ForEach(comparisons) { comparison in + let new = comparison.latest.median.seconds + let old = comparison.baseline.median.seconds + Chart { + chartBody( + name: comparison.name, + new: new, + old: old, + sampleCount: comparison.latest.samples) + } + .chartXAxis { + AxisMarks { value in + AxisTick() + AxisValueLabel { + Text(String(format: "%.5fs", value.as(Double.self)!)) + } + } + } + .chartYAxis { + AxisMarks { value in + AxisGridLine() + AxisValueLabel { + HStack { + Text(value.as(String.self)!) + let delta = (new - old) / old * 100 + Text(String(format: "%+.2f%%", delta)) + .foregroundColor(delta <= 0 ? .green : .yellow) + } + } + } + } + .frame(idealHeight: 60) + } + } + } + + @ChartContentBuilder + func chartBody( + name: String, + new: TimeInterval, + old: TimeInterval, + sampleCount: Int + ) -> some ChartContent { + // Baseline bar + BarMark( + x: .value("Time", old), + y: .value("Name", "\(name) (\(sampleCount) samples)")) + .position(by: .value("Kind", "Baseline")) + .foregroundStyle(.gray) + + // Latest result bar + BarMark( + x: .value("Time", new), + y: .value("Name", "\(name) (\(sampleCount) samples)")) + .position(by: .value("Kind", "Latest")) + .foregroundStyle(LinearGradient( + colors: [.accentColor, new - old <= 0 ? .green : .yellow], + startPoint: .leading, + endPoint: .trailing)) + + // Comparison + RuleMark(x: .value("Time", new)) + .foregroundStyle(.gray) + .lineStyle(.init(lineWidth: 0.5, dash: [2])) + } +} + +struct BenchmarkResultApp: App { + static var comparisons: [BenchmarkChart.Comparison]? + + var body: some Scene { + WindowGroup { + if let comparisons = Self.comparisons { + ScrollView { + BenchmarkChart(comparisons: comparisons) + } + } else { + Text("No data") + } + } + } +} + +#endif diff --git a/Sources/RegexBenchmark/BenchmarkRunner.swift b/Sources/RegexBenchmark/BenchmarkRunner.swift index 78953bdd6..93fbdbe23 100644 --- a/Sources/RegexBenchmark/BenchmarkRunner.swift +++ b/Sources/RegexBenchmark/BenchmarkRunner.swift @@ -89,7 +89,7 @@ extension BenchmarkRunner { try results.save(to: url) } - func compare(against compareFilePath: String) throws { + func compare(against compareFilePath: String, showChart: Bool) throws { let compareFileURL = URL(fileURLWithPath: compareFilePath) let compareResult = try SuiteResult.load(from: compareFileURL) let compareFile = compareFileURL.lastPathComponent @@ -121,6 +121,32 @@ extension BenchmarkRunner { for item in improvements { printComparison(name: item.key, diff: item.value) } + + #if os(macOS) + if showChart { + print(""" + === Comparison chart ================================================================= + Press Control-C to close... + """) + BenchmarkResultApp.comparisons = { + var comparisons: [BenchmarkChart.Comparison] = [] + for (name, baseline) in compareResult.results { + if let latest = results.results[name] { + comparisons.append( + .init(name: name, baseline: baseline, latest: latest)) + } + } + return comparisons.sorted { + let delta0 = Float($0.latest.median.seconds - $0.baseline.median.seconds) + / Float($0.baseline.median.seconds) + let delta1 = Float($1.latest.median.seconds - $1.baseline.median.seconds) + / Float($1.baseline.median.seconds) + return delta0 > delta1 + } + }() + BenchmarkResultApp.main() + } + #endif } } @@ -128,7 +154,7 @@ struct BenchmarkResult: Codable { let median: Time let stdev: Double let samples: Int - + init(_ median: Time, _ stdev: Double, _ samples: Int) { self.median = median self.stdev = stdev diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift index 8ef351329..8afe9144a 100644 --- a/Sources/RegexBenchmark/CLI.swift +++ b/Sources/RegexBenchmark/CLI.swift @@ -17,6 +17,9 @@ struct Runner: ParsableCommand { @Option(help: "The result file to compare against") var compare: String? + @Flag(help: "Show comparison chart") + var showChart: Bool = false + @Flag(help: "Quiet mode") var quiet = false @@ -40,12 +43,12 @@ struct Runner: ParsableCommand { runner.suite = runner.suite.filter { b in !b.name.contains("NS") } } runner.run() - if let compareFile = compare { - try runner.compare(against: compareFile) - } if let saveFile = save { try runner.save(to: saveFile) } + if let compareFile = compare { + try runner.compare(against: compareFile, showChart: showChart) + } } } } From 6ca5cfd5296da5c5cf234bf76a44f2d4e27df797 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Wed, 27 Jul 2022 12:55:51 -0700 Subject: [PATCH 02/33] Add new comparison features - Compare with NS regular expression - Save comparison results - Save and compare estimated compile times --- Sources/RegexBenchmark/Benchmark.swift | 9 +- Sources/RegexBenchmark/BenchmarkChart.swift | 11 +- Sources/RegexBenchmark/BenchmarkResults.swift | 224 ++++++++++++++++++ Sources/RegexBenchmark/BenchmarkRunner.swift | 140 +---------- Sources/RegexBenchmark/CLI.swift | 15 +- 5 files changed, 257 insertions(+), 142 deletions(-) create mode 100644 Sources/RegexBenchmark/BenchmarkResults.swift diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift index df4168c2d..159228275 100644 --- a/Sources/RegexBenchmark/Benchmark.swift +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -89,6 +89,9 @@ struct InputListNSBenchmark: RegexBenchmark { /// A benchmark meant to be ran across multiple engines struct CrossBenchmark { + /// Suffix added onto NSRegularExpression benchmarks + static let nsSuffix = "_NS" + /// The base name of the benchmark var baseName: String @@ -127,7 +130,7 @@ struct CrossBenchmark { target: input)) runner.register( NSBenchmark( - name: baseName + "Whole_NS", + name: baseName + "Whole" + CrossBenchmark.nsSuffix, regex: nsRegex, type: .first, target: input)) @@ -140,7 +143,7 @@ struct CrossBenchmark { target: input)) runner.register( NSBenchmark( - name: baseName + "All_NS", + name: baseName + "All" + CrossBenchmark.nsSuffix, regex: nsRegex, type: .allMatches, target: input)) @@ -153,7 +156,7 @@ struct CrossBenchmark { target: input)) runner.register( NSBenchmark( - name: baseName + "First_NS", + name: baseName + "First" + CrossBenchmark.nsSuffix, regex: nsRegex, type: .first, target: input)) diff --git a/Sources/RegexBenchmark/BenchmarkChart.swift b/Sources/RegexBenchmark/BenchmarkChart.swift index 3bb8e60ad..aa4cafff3 100644 --- a/Sources/RegexBenchmark/BenchmarkChart.swift +++ b/Sources/RegexBenchmark/BenchmarkChart.swift @@ -15,14 +15,7 @@ import Charts import SwiftUI struct BenchmarkChart: View { - struct Comparison: Identifiable { - var id = UUID() - var name: String - var baseline: BenchmarkResult - var latest: BenchmarkResult - } - - var comparisons: [Comparison] + var comparisons: [BenchmarkResult.Comparison] var body: some View { VStack(alignment: .leading) { @@ -94,7 +87,7 @@ struct BenchmarkChart: View { } struct BenchmarkResultApp: App { - static var comparisons: [BenchmarkChart.Comparison]? + static var comparisons: [BenchmarkResult.Comparison]? var body: some Scene { WindowGroup { diff --git a/Sources/RegexBenchmark/BenchmarkResults.swift b/Sources/RegexBenchmark/BenchmarkResults.swift new file mode 100644 index 000000000..88d6896ad --- /dev/null +++ b/Sources/RegexBenchmark/BenchmarkResults.swift @@ -0,0 +1,224 @@ +import Foundation + +extension BenchmarkRunner { + func save(to savePath: String) throws { + let url = URL(fileURLWithPath: savePath, isDirectory: false) + let parent = url.deletingLastPathComponent() + if !FileManager.default.fileExists(atPath: parent.path) { + try! FileManager.default.createDirectory(atPath: parent.path, withIntermediateDirectories: true) + } + print("Saving result to \(url.path)") + try results.save(to: url) + } + + func compare(against compareFilePath: String, showChart: Bool, saveTo: String?) throws { + let compareFileURL = URL(fileURLWithPath: compareFilePath) + let compareResult = try SuiteResult.load(from: compareFileURL) + let compareFile = compareFileURL.lastPathComponent + + let comparisons = results + .compare(with: compareResult) + .filter({!$0.name.contains("_NS")}) + .filter({$0.diff != nil}) + displayComparisons(comparisons, showChart, against: "saved benchmark result " + compareFile) + if let saveFile = saveTo { + try saveComparisons(comparisons, path: saveFile) + } + + let compileTimeComparisons = results + .compareCompileTimes(with: compareResult) + .filter({!$0.name.contains("_NS")}) + .filter({$0.diff != nil}) + print("Comparing estimated compile times") + displayComparisons(compileTimeComparisons, false, against: "saved benchmark result " + compareFile) + } + + func compareWithNS(showChart: Bool, saveTo: String?) throws { + let comparisons = results.compareWithNS().filter({$0.diff != nil}) + displayComparisons(comparisons, showChart, against: "NSRegularExpression (via CrossBenchmark)") + if let saveFile = saveTo { + try saveComparisons(comparisons, path: saveFile) + } + } + + func displayComparisons(_ comparisons: [BenchmarkResult.Comparison], _ showChart: Bool, against: String) { + let regressions = comparisons.filter({$0.diff!.seconds > 0}) + .sorted(by: {(a,b) in a.diff!.seconds > b.diff!.seconds}) + let improvements = comparisons.filter({$0.diff!.seconds < 0}) + .sorted(by: {(a,b) in a.diff!.seconds < b.diff!.seconds}) + + print("Comparing against \(against)") + print("=== Regressions ======================================================================") + for item in regressions { + print(item) + } + + print("=== Improvements =====================================================================") + for item in improvements { + print(item) + } + + #if os(macOS) + if showChart { + print(""" + === Comparison chart ================================================================= + Press Control-C to close... + """) + BenchmarkResultApp.comparisons = { + return comparisons.sorted { + let delta0 = Float($0.latest.median.seconds - $0.baseline.median.seconds) + / Float($0.baseline.median.seconds) + let delta1 = Float($1.latest.median.seconds - $1.baseline.median.seconds) + / Float($1.baseline.median.seconds) + return delta0 > delta1 + } + }() + BenchmarkResultApp.main() + } + #endif + } + + func saveComparisons(_ comparisons: [BenchmarkResult.Comparison], path: String) throws { + let url = URL(fileURLWithPath: path, isDirectory: false) + let parent = url.deletingLastPathComponent() + if !FileManager.default.fileExists(atPath: parent.path) { + try! FileManager.default.createDirectory(atPath: parent.path, withIntermediateDirectories: true) + } + + var contents = "name,baseline,latest,diff,percentage\n" + for comparison in comparisons { + contents += comparison.asCsv + "\n" + } + print("Saving comparisons as .csv to \(path)") + try contents.write(to: url, atomically: true, encoding: String.Encoding.utf8) + } +} + +struct BenchmarkResult: Codable { + let median: Time + let estimatedCompileTime: Time + let stdev: Double + let samples: Int + + init(_ initialRunTime: Time, _ median: Time, _ stdev: Double, _ samples: Int) { + self.estimatedCompileTime = initialRunTime - median + self.median = median + self.stdev = stdev + self.samples = samples + } +} + +extension BenchmarkResult { + struct Comparison: Identifiable, CustomStringConvertible { + var id = UUID() + var name: String + var baseline: BenchmarkResult + var latest: BenchmarkResult + var diffCompileTimes: Bool = false + + var diff: Time? { + if diffCompileTimes { + return latest.estimatedCompileTime - baseline.estimatedCompileTime + } + if Stats.tTest(baseline, latest) { + return latest.median - baseline.median + } + return nil + } + + var description: String { + guard let diff = diff else { + return "- \(name) N/A" + } + let oldVal: Time + let newVal: Time + if diffCompileTimes { + oldVal = baseline.estimatedCompileTime + newVal = latest.estimatedCompileTime + } else { + oldVal = baseline.median + newVal = latest.median + } + let percentage = (1000 * diff.seconds / oldVal.seconds).rounded()/10 + let len = max(40 - name.count, 1) + let nameSpacing = String(repeating: " ", count: len) + return "- \(name)\(nameSpacing)\(newVal)\t\(oldVal)\t\(diff)\t\t\(percentage)%" + } + + var asCsv: String { + guard let diff = diff else { + return "\(name),N/A" + } + let oldVal: Time + let newVal: Time + if diffCompileTimes { + oldVal = baseline.estimatedCompileTime + newVal = latest.estimatedCompileTime + } else { + oldVal = baseline.median + newVal = latest.median + } + let percentage = (1000 * diff.seconds / oldVal.seconds).rounded()/10 + return "\"\(name)\",\(newVal.seconds),\(oldVal.seconds),\(diff.seconds),\(percentage)%" + } + } +} + +struct SuiteResult { + var results: [String: BenchmarkResult] = [:] + + mutating func add(name: String, result: BenchmarkResult) { + results.updateValue(result, forKey: name) + } + + /// Compares with the given SuiteResult + func compare(with other: SuiteResult) -> [BenchmarkResult.Comparison] { + var comparisons: [BenchmarkResult.Comparison] = [] + for item in results { + if let otherVal = other.results[item.key] { + comparisons.append( + .init(name: item.key, baseline: item.value, latest: otherVal)) + } + } + return comparisons + } + + /// Compares with the NSRegularExpression benchmarks generated by CrossBenchmark + func compareWithNS() -> [BenchmarkResult.Comparison] { + var comparisons: [BenchmarkResult.Comparison] = [] + for item in results { + let key = item.key + CrossBenchmark.nsSuffix + if let nsResult = results[key] { + comparisons.append( + .init(name: item.key, baseline: nsResult, latest: item.value)) + } + } + return comparisons + } + + /// Compares the estimated compile times + func compareCompileTimes(with other: SuiteResult) -> [BenchmarkResult.Comparison] { + var comparisons: [BenchmarkResult.Comparison] = [] + for item in results { + if let otherVal = other.results[item.key] { + comparisons.append( + .init(name: item.key, baseline: item.value, latest: otherVal, diffCompileTimes: true)) + } + } + return comparisons + } +} + +extension SuiteResult: Codable { + func save(to url: URL) throws { + let encoder = JSONEncoder() + let data = try encoder.encode(self) + try data.write(to: url, options: .atomic) + } + + static func load(from url: URL) throws -> SuiteResult { + let decoder = JSONDecoder() + let data = try Data(contentsOf: url) + return try decoder.decode(SuiteResult.self, from: data) + } +} diff --git a/Sources/RegexBenchmark/BenchmarkRunner.swift b/Sources/RegexBenchmark/BenchmarkRunner.swift index 93fbdbe23..d59184fed 100644 --- a/Sources/RegexBenchmark/BenchmarkRunner.swift +++ b/Sources/RegexBenchmark/BenchmarkRunner.swift @@ -22,11 +22,15 @@ struct BenchmarkRunner { var times: [Time] = [] // initial run to make sure the regex has been compiled - // todo: measure compile times, or at least how much this first run - // differs from the later ones + // FIXME: this is a very poor way of estimating compile time + // we should have some sort of interface directly with the engine to measure this + // This also completely breaks when we rerun measure() for variant results + let initialStart = Tick.now benchmark.run() + let initialEnd = Tick.now + let initialRunTime = initialEnd.elapsedTime(since: initialStart) - // fixme: use suspendingclock? + // FIXME: use suspendingclock? for _ in 0.. Stats.maxAllowedStdev { print("Warning: Standard deviation > \(Time(Stats.maxAllowedStdev)) for \(b.name)") print("N = \(samples), median: \(result.median), stdev: \(Time(result.stdev))") @@ -60,7 +60,9 @@ struct BenchmarkRunner { fatalError("Benchmark \(b.name) is too variant") } } - + if !quiet { + print("- \(b.name) \(result.median) (stdev: \(Time(result.stdev))) (estimated compile time: \(result.estimatedCompileTime))") + } self.results.add(name: b.name, result: result) } } @@ -76,123 +78,3 @@ struct BenchmarkRunner { } } } - -extension BenchmarkRunner { - - func save(to savePath: String) throws { - let url = URL(fileURLWithPath: savePath, isDirectory: false) - let parent = url.deletingLastPathComponent() - if !FileManager.default.fileExists(atPath: parent.path) { - try! FileManager.default.createDirectory(atPath: parent.path, withIntermediateDirectories: true) - } - print("Saving result to \(url.path)") - try results.save(to: url) - } - - func compare(against compareFilePath: String, showChart: Bool) throws { - let compareFileURL = URL(fileURLWithPath: compareFilePath) - let compareResult = try SuiteResult.load(from: compareFileURL) - let compareFile = compareFileURL.lastPathComponent - - let diff = results - .compare(with: compareResult) - .filter({(name, _) in !name.contains("_NS")}) - let regressions = diff.filter({(_, change) in change.seconds > 0}) - .sorted(by: {(a,b) in a.1 > b.1}) - let improvements = diff.filter({(_, change) in change.seconds < 0}) - .sorted(by: {(a,b) in a.1 < b.1}) - - print("Comparing against benchmark result file \(compareFile)") - print("=== Regressions ======================================================================") - func printComparison(name: String, diff: Time) { - let oldVal = compareResult.results[name]!.median - let newVal = results.results[name]!.median - let percentage = (1000 * diff.seconds / oldVal.seconds).rounded()/10 - let len = max(40 - name.count, 1) - let nameSpacing = String(repeating: " ", count: len) - print("- \(name)\(nameSpacing)\(newVal)\t\(oldVal)\t\(diff)\t\t\(percentage)%") - } - - for item in regressions { - printComparison(name: item.key, diff: item.value) - } - - print("=== Improvements =====================================================================") - for item in improvements { - printComparison(name: item.key, diff: item.value) - } - - #if os(macOS) - if showChart { - print(""" - === Comparison chart ================================================================= - Press Control-C to close... - """) - BenchmarkResultApp.comparisons = { - var comparisons: [BenchmarkChart.Comparison] = [] - for (name, baseline) in compareResult.results { - if let latest = results.results[name] { - comparisons.append( - .init(name: name, baseline: baseline, latest: latest)) - } - } - return comparisons.sorted { - let delta0 = Float($0.latest.median.seconds - $0.baseline.median.seconds) - / Float($0.baseline.median.seconds) - let delta1 = Float($1.latest.median.seconds - $1.baseline.median.seconds) - / Float($1.baseline.median.seconds) - return delta0 > delta1 - } - }() - BenchmarkResultApp.main() - } - #endif - } -} - -struct BenchmarkResult: Codable { - let median: Time - let stdev: Double - let samples: Int - - init(_ median: Time, _ stdev: Double, _ samples: Int) { - self.median = median - self.stdev = stdev - self.samples = samples - } -} - -struct SuiteResult { - var results: [String: BenchmarkResult] = [:] - - mutating func add(name: String, result: BenchmarkResult) { - results.updateValue(result, forKey: name) - } - - func compare(with other: SuiteResult) -> [String: Time] { - var output: [String: Time] = [:] - for item in results { - if let otherVal = other.results[item.key] { - let diff = item.value.median - otherVal.median - if Stats.tTest(item.value, otherVal) { - output.updateValue(diff, forKey: item.key) - } - } - } - return output - } -} - -extension SuiteResult: Codable { - func save(to url: URL) throws { - let encoder = JSONEncoder() - let data = try encoder.encode(self) - try data.write(to: url, options: .atomic) - } - - static func load(from url: URL) throws -> SuiteResult { - let decoder = JSONDecoder() - let data = try Data(contentsOf: url) - return try decoder.decode(SuiteResult.self, from: data) - } -} diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift index 8afe9144a..32bb14bb8 100644 --- a/Sources/RegexBenchmark/CLI.swift +++ b/Sources/RegexBenchmark/CLI.swift @@ -19,6 +19,12 @@ struct Runner: ParsableCommand { @Flag(help: "Show comparison chart") var showChart: Bool = false + + @Flag(help: "Compare with NSRegularExpression") + var compareWithNS: Bool = false + + @Option(help: "Save comparison results as csv") + var saveComparison: String? @Flag(help: "Quiet mode") var quiet = false @@ -46,8 +52,15 @@ struct Runner: ParsableCommand { if let saveFile = save { try runner.save(to: saveFile) } + if saveComparison != nil && compareWithNS && compare != nil { + print("Unable to save both comparison results, specify only one compare operation") + return + } + if compareWithNS { + try runner.compareWithNS(showChart: showChart, saveTo: saveComparison) + } if let compareFile = compare { - try runner.compare(against: compareFile, showChart: showChart) + try runner.compare(against: compareFile, showChart: showChart, saveTo: saveComparison) } } } From 74dd8e667852b414b77cdf48afbd419c2e65ef3f Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Wed, 27 Jul 2022 14:11:31 -0700 Subject: [PATCH 03/33] Add loading --- Sources/RegexBenchmark/BenchmarkResults.swift | 13 ++++++- Sources/RegexBenchmark/CLI.swift | 36 +++++++++++-------- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/Sources/RegexBenchmark/BenchmarkResults.swift b/Sources/RegexBenchmark/BenchmarkResults.swift index 88d6896ad..c6cdaf539 100644 --- a/Sources/RegexBenchmark/BenchmarkResults.swift +++ b/Sources/RegexBenchmark/BenchmarkResults.swift @@ -1,6 +1,7 @@ import Foundation extension BenchmarkRunner { + /// Attempts to save the results to the given path func save(to savePath: String) throws { let url = URL(fileURLWithPath: savePath, isDirectory: false) let parent = url.deletingLastPathComponent() @@ -10,7 +11,16 @@ extension BenchmarkRunner { print("Saving result to \(url.path)") try results.save(to: url) } - + + /// Attempts to load the results from the given save file + mutating func load(from savePath: String) throws { + let url = URL(fileURLWithPath: savePath) + let result = try SuiteResult.load(from: url) + self.results = result + print("Loaded results from \(url.path)") + } + + /// Compare this runner's results against the results stored in the given file path func compare(against compareFilePath: String, showChart: Bool, saveTo: String?) throws { let compareFileURL = URL(fileURLWithPath: compareFilePath) let compareResult = try SuiteResult.load(from: compareFileURL) @@ -33,6 +43,7 @@ extension BenchmarkRunner { displayComparisons(compileTimeComparisons, false, against: "saved benchmark result " + compareFile) } + /// Compares Swift Regex benchmark results against NSRegularExpression func compareWithNS(showChart: Bool, saveTo: String?) throws { let comparisons = results.compareWithNS().filter({$0.diff != nil}) displayComparisons(comparisons, showChart, against: "NSRegularExpression (via CrossBenchmark)") diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift index 32bb14bb8..bb31e2613 100644 --- a/Sources/RegexBenchmark/CLI.swift +++ b/Sources/RegexBenchmark/CLI.swift @@ -10,6 +10,9 @@ struct Runner: ParsableCommand { @Flag(help: "Debug benchmark regexes") var debug = false + + @Option(help: "Load results from this file instead of rerunning") + var load: String? @Option(help: "The file results should be saved to") var save: String? @@ -34,7 +37,7 @@ struct Runner: ParsableCommand { mutating func run() throws { var runner = BenchmarkRunner.makeRunner(samples, quiet) - + if !self.specificBenchmarks.isEmpty { runner.suite = runner.suite.filter { b in specificBenchmarks.contains { pattern in @@ -44,24 +47,29 @@ struct Runner: ParsableCommand { } if debug { runner.debug() + return + } + + if let loadFile = load { + try runner.load(from: loadFile) } else { if excludeNs { runner.suite = runner.suite.filter { b in !b.name.contains("NS") } } runner.run() - if let saveFile = save { - try runner.save(to: saveFile) - } - if saveComparison != nil && compareWithNS && compare != nil { - print("Unable to save both comparison results, specify only one compare operation") - return - } - if compareWithNS { - try runner.compareWithNS(showChart: showChart, saveTo: saveComparison) - } - if let compareFile = compare { - try runner.compare(against: compareFile, showChart: showChart, saveTo: saveComparison) - } + } + if let saveFile = save { + try runner.save(to: saveFile) + } + if saveComparison != nil && compareWithNS && compare != nil { + print("Unable to save both comparison results, specify only one compare operation") + return + } + if compareWithNS { + try runner.compareWithNS(showChart: showChart, saveTo: saveComparison) + } + if let compareFile = compare { + try runner.compare(against: compareFile, showChart: showChart, saveTo: saveComparison) } } } From 662870e19b7644e7a72b2f4dc03954e1ccb0fd66 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Wed, 27 Jul 2022 14:51:04 -0700 Subject: [PATCH 04/33] Accidently flipped the order --- Sources/RegexBenchmark/BenchmarkResults.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/RegexBenchmark/BenchmarkResults.swift b/Sources/RegexBenchmark/BenchmarkResults.swift index c6cdaf539..ed76ca5d9 100644 --- a/Sources/RegexBenchmark/BenchmarkResults.swift +++ b/Sources/RegexBenchmark/BenchmarkResults.swift @@ -188,7 +188,7 @@ struct SuiteResult { for item in results { if let otherVal = other.results[item.key] { comparisons.append( - .init(name: item.key, baseline: item.value, latest: otherVal)) + .init(name: item.key, baseline: otherVal, latest: item.value)) } } return comparisons @@ -213,7 +213,7 @@ struct SuiteResult { for item in results { if let otherVal = other.results[item.key] { comparisons.append( - .init(name: item.key, baseline: item.value, latest: otherVal, diffCompileTimes: true)) + .init(name: item.key, baseline: otherVal, latest: item.value, diffCompileTimes: true)) } } return comparisons From 1b761bee6cf8d77c91b5faf72b2f7b859b4b3029 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Wed, 27 Jul 2022 16:54:37 -0700 Subject: [PATCH 05/33] Disable compile time comparisons by default --- Sources/RegexBenchmark/BenchmarkResults.swift | 12 +++++++++--- Sources/RegexBenchmark/CLI.swift | 6 ++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Sources/RegexBenchmark/BenchmarkResults.swift b/Sources/RegexBenchmark/BenchmarkResults.swift index ed76ca5d9..173f42472 100644 --- a/Sources/RegexBenchmark/BenchmarkResults.swift +++ b/Sources/RegexBenchmark/BenchmarkResults.swift @@ -34,12 +34,18 @@ extension BenchmarkRunner { if let saveFile = saveTo { try saveComparisons(comparisons, path: saveFile) } - + } + + func compareCompileTimes(against compareFilePath: String, showChart: Bool) throws { + let compareFileURL = URL(fileURLWithPath: compareFilePath) + let compareResult = try SuiteResult.load(from: compareFileURL) + let compareFile = compareFileURL.lastPathComponent + let compileTimeComparisons = results .compareCompileTimes(with: compareResult) .filter({!$0.name.contains("_NS")}) .filter({$0.diff != nil}) - print("Comparing estimated compile times") + print("[Experimental] Comparing estimated compile times") displayComparisons(compileTimeComparisons, false, against: "saved benchmark result " + compareFile) } @@ -96,7 +102,7 @@ extension BenchmarkRunner { try! FileManager.default.createDirectory(atPath: parent.path, withIntermediateDirectories: true) } - var contents = "name,baseline,latest,diff,percentage\n" + var contents = "name,latest,baseline,diff,percentage\n" for comparison in comparisons { contents += comparison.asCsv + "\n" } diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift index bb31e2613..fa0729fd1 100644 --- a/Sources/RegexBenchmark/CLI.swift +++ b/Sources/RegexBenchmark/CLI.swift @@ -20,6 +20,9 @@ struct Runner: ParsableCommand { @Option(help: "The result file to compare against") var compare: String? + @Option(help: "Experimental compile time comparison") + var experimentalCompareCompileTimes: String? + @Flag(help: "Show comparison chart") var showChart: Bool = false @@ -71,5 +74,8 @@ struct Runner: ParsableCommand { if let compareFile = compare { try runner.compare(against: compareFile, showChart: showChart, saveTo: saveComparison) } + if let compareFile = experimentalCompareCompileTimes { + try runner.compareCompileTimes(against: compareFile, showChart: showChart) + } } } From f897844963825746c6f75766f8284f80c5a41996 Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Mon, 1 Aug 2022 11:05:03 -0700 Subject: [PATCH 06/33] Rename builder parameter name --- Sources/RegexBuilder/Variadics.swift | 528 +++++++++--------- .../VariadicsGenerator.swift | 40 +- 2 files changed, 284 insertions(+), 284 deletions(-) diff --git a/Sources/RegexBuilder/Variadics.swift b/Sources/RegexBuilder/Variadics.swift index c1e380144..7336f0a30 100644 --- a/Sources/RegexBuilder/Variadics.swift +++ b/Sources/RegexBuilder/Variadics.swift @@ -705,10 +705,10 @@ extension Optionally { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == Substring { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -741,10 +741,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == Substring { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -768,10 +768,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == Substring { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -793,11 +793,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == Substring { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_disfavoredOverload @@ -816,10 +816,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == Substring, R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?), Component.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -873,10 +873,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?), Component.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -898,10 +898,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1), Component.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -921,11 +921,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?), Component.RegexOutput == (W, C1) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -942,10 +942,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?), Component.RegexOutput == (W, C1), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?), Component.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -999,10 +999,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?), Component.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1024,10 +1024,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2), Component.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1047,11 +1047,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?), Component.RegexOutput == (W, C1, C2) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1068,10 +1068,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?), Component.RegexOutput == (W, C1, C2), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?), Component.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1125,10 +1125,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?), Component.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1150,10 +1150,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3), Component.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1173,11 +1173,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?), Component.RegexOutput == (W, C1, C2, C3) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1194,10 +1194,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?), Component.RegexOutput == (W, C1, C2, C3), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?), Component.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1251,10 +1251,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?), Component.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1276,10 +1276,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4), Component.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1299,11 +1299,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?), Component.RegexOutput == (W, C1, C2, C3, C4) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1320,10 +1320,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?), Component.RegexOutput == (W, C1, C2, C3, C4), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?), Component.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1377,10 +1377,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?), Component.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1402,10 +1402,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5), Component.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1425,11 +1425,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?), Component.RegexOutput == (W, C1, C2, C3, C4, C5) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1446,10 +1446,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?), Component.RegexOutput == (W, C1, C2, C3, C4, C5), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1503,10 +1503,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1528,10 +1528,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1551,11 +1551,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1572,10 +1572,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1629,10 +1629,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1654,10 +1654,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1677,11 +1677,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1698,10 +1698,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1755,10 +1755,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1780,10 +1780,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7, C8), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1803,11 +1803,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1824,10 +1824,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -1881,10 +1881,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -1906,10 +1906,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7, C8, C9), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -1929,11 +1929,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -1950,10 +1950,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?, C10?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.zeroOrOne(component(), behavior)) + self.init(factory.zeroOrOne(componentBuilder(), behavior)) } } @@ -2007,10 +2007,10 @@ extension ZeroOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?, C10?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.zeroOrMore(component(), behavior)) + self.init(factory.zeroOrMore(componentBuilder(), behavior)) } } @@ -2032,10 +2032,10 @@ extension OneOrMore { @_alwaysEmitIntoClient public init( _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.oneOrMore(component(), behavior)) + self.init(factory.oneOrMore(componentBuilder(), behavior)) } } @@ -2055,11 +2055,11 @@ extension Repeat { @_alwaysEmitIntoClient public init( count: Int, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?, C10?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } @_alwaysEmitIntoClient @@ -2076,10 +2076,10 @@ extension Repeat { public init( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1?, C2?, C3?, C4?, C5?, C6?, C7?, C8?, C9?, C10?), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.Bound == Int { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == Substring { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2124,10 +2124,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1), Component.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2147,10 +2147,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2), Component.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2170,10 +2170,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3), Component.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2193,10 +2193,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4), Component.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2216,10 +2216,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5), Component.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2239,10 +2239,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2262,10 +2262,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2285,10 +2285,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7, C8), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2308,10 +2308,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7, C8, C9), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -2331,10 +2331,10 @@ extension Local { @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> Component + @RegexComponentBuilder _ componentBuilder: () -> Component ) where RegexOutput == (Substring, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), Component.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.atomicNonCapturing(component())) + self.init(factory.atomicNonCapturing(componentBuilder())) } } @available(SwiftStdlib 5.7, *) @@ -3142,41 +3142,41 @@ extension Capture { @_disfavoredOverload @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W), R.RegexOutput == W { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_disfavoredOverload @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W), R.RegexOutput == W { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_disfavoredOverload @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_disfavoredOverload @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3185,22 +3185,22 @@ extension TryCapture { @_disfavoredOverload @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_disfavoredOverload @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -3272,38 +3272,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1), R.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1), R.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3311,21 +3311,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -3397,38 +3397,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2), R.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2), R.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3436,21 +3436,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -3522,38 +3522,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3561,21 +3561,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -3647,38 +3647,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3686,21 +3686,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -3772,38 +3772,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3811,21 +3811,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -3897,38 +3897,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -3936,21 +3936,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -4022,38 +4022,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -4061,21 +4061,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -4147,38 +4147,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -4186,21 +4186,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -4272,38 +4272,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -4311,21 +4311,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } @@ -4397,38 +4397,38 @@ extension TryCapture { extension Capture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R + @RegexComponentBuilder _ componentBuilder: () -> R ) where RegexOutput == (Substring, W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -4436,21 +4436,21 @@ extension Capture { extension TryCapture { @_alwaysEmitIntoClient public init( - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } @_alwaysEmitIntoClient public init( as reference: Reference, - @RegexComponentBuilder _ component: () -> R, + @RegexComponentBuilder _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } diff --git a/Sources/VariadicsGenerator/VariadicsGenerator.swift b/Sources/VariadicsGenerator/VariadicsGenerator.swift index faab75762..8ddaee145 100644 --- a/Sources/VariadicsGenerator/VariadicsGenerator.swift +++ b/Sources/VariadicsGenerator/VariadicsGenerator.swift @@ -406,10 +406,10 @@ struct VariadicsGenerator: ParsableCommand { @_alwaysEmitIntoClient public init<\(params.genericParams)>( _ behavior: RegexRepetitionBehavior? = nil, - @\(concatBuilderName) _ component: () -> Component + @\(concatBuilderName) _ componentBuilder: () -> Component ) \(params.whereClauseForInit) { let factory = makeFactory() - self.init(factory.\(kind.astQuantifierAmount)(component(), behavior)) + self.init(factory.\(kind.astQuantifierAmount)(componentBuilder(), behavior)) } } @@ -436,7 +436,7 @@ struct VariadicsGenerator: ParsableCommand { let groupName = "Local" func node(builder: Bool) -> String { """ - component\(builder ? "()" : "") + component\(builder ? "Builder()" : "") """ } @@ -478,7 +478,7 @@ struct VariadicsGenerator: ParsableCommand { \(disfavored)\ @_alwaysEmitIntoClient public init<\(genericParams)>( - @\(concatBuilderName) _ component: () -> Component + @\(concatBuilderName) _ componentBuilder: () -> Component ) \(whereClauseForInit) { let factory = makeFactory() self.init(factory.atomicNonCapturing(\(node(builder: true)))) @@ -514,11 +514,11 @@ struct VariadicsGenerator: ParsableCommand { @_alwaysEmitIntoClient public init<\(params.genericParams)>( count: Int, - @\(concatBuilderName) _ component: () -> Component + @\(concatBuilderName) _ componentBuilder: () -> Component ) \(params.whereClauseForInit) { precondition(count >= 0, "Must specify a positive count") let factory = makeFactory() - self.init(factory.exactly(count, component())) + self.init(factory.exactly(count, componentBuilder())) } \(params.disfavored)\ @@ -537,10 +537,10 @@ struct VariadicsGenerator: ParsableCommand { public init<\(params.genericParams), R: RangeExpression>( _ expression: R, _ behavior: RegexRepetitionBehavior? = nil, - @\(concatBuilderName) _ component: () -> Component + @\(concatBuilderName) _ componentBuilder: () -> Component ) \(params.repeatingWhereClause) { let factory = makeFactory() - self.init(factory.repeating(expression.relative(to: 0..( - @\(concatBuilderName) _ component: () -> R + @\(concatBuilderName) _ componentBuilder: () -> R ) \(whereClauseRaw) { let factory = makeFactory() - self.init(factory.capture(component())) + self.init(factory.capture(componentBuilder())) } \(disfavored)\ @_alwaysEmitIntoClient public init<\(genericParams)>( as reference: Reference, - @\(concatBuilderName) _ component: () -> R + @\(concatBuilderName) _ componentBuilder: () -> R ) \(whereClauseRaw) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw)) + self.init(factory.capture(componentBuilder(), reference._raw)) } \(disfavored)\ @_alwaysEmitIntoClient public init<\(genericParams), NewCapture>( - @\(concatBuilderName) _ component: () -> R, + @\(concatBuilderName) _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) \(whereClauseTransformed) { let factory = makeFactory() - self.init(factory.capture(component(), nil, transform)) + self.init(factory.capture(componentBuilder(), nil, transform)) } \(disfavored)\ @_alwaysEmitIntoClient public init<\(genericParams), NewCapture>( as reference: Reference, - @\(concatBuilderName) _ component: () -> R, + @\(concatBuilderName) _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture ) \(whereClauseTransformed) { let factory = makeFactory() - self.init(factory.capture(component(), reference._raw, transform)) + self.init(factory.capture(componentBuilder(), reference._raw, transform)) } } @@ -763,22 +763,22 @@ struct VariadicsGenerator: ParsableCommand { \(disfavored)\ @_alwaysEmitIntoClient public init<\(genericParams), NewCapture>( - @\(concatBuilderName) _ component: () -> R, + @\(concatBuilderName) _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) \(whereClauseTransformed) { let factory = makeFactory() - self.init(factory.captureOptional(component(), nil, transform)) + self.init(factory.captureOptional(componentBuilder(), nil, transform)) } \(disfavored)\ @_alwaysEmitIntoClient public init<\(genericParams), NewCapture>( as reference: Reference, - @\(concatBuilderName) _ component: () -> R, + @\(concatBuilderName) _ componentBuilder: () -> R, transform: @escaping (W) throws -> NewCapture? ) \(whereClauseTransformed) { let factory = makeFactory() - self.init(factory.captureOptional(component(), reference._raw, transform)) + self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } From 1b7779a70402a3ef100436895ca415a06a739924 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 2 Aug 2022 20:36:00 +0100 Subject: [PATCH 07/33] Remove `re'...'` and `rx'...'` delimiters We didn't end up choosing these, remove their lexing code. `#|...|#` remains to test the experimental syntax. --- .../Regex/Parse/DelimiterLexing.swift | 100 +----------------- Sources/_RegexParser/Regex/Parse/Parse.swift | 4 +- Tests/RegexTests/LexTests.swift | 3 - Tests/RegexTests/ParseTests.swift | 89 ++++++---------- 4 files changed, 37 insertions(+), 159 deletions(-) diff --git a/Sources/_RegexParser/Regex/Parse/DelimiterLexing.swift b/Sources/_RegexParser/Regex/Parse/DelimiterLexing.swift index dd142f016..4d86f9d93 100644 --- a/Sources/_RegexParser/Regex/Parse/DelimiterLexing.swift +++ b/Sources/_RegexParser/Regex/Parse/DelimiterLexing.swift @@ -31,7 +31,7 @@ public struct Delimiter: Hashable { switch kind { case .forwardSlash: return poundCount > 0 - case .experimental, .reSingleQuote, .rxSingleQuote: + case .experimental: return false } } @@ -47,15 +47,11 @@ extension Delimiter { enum Kind: Hashable, CaseIterable { case forwardSlash case experimental - case reSingleQuote - case rxSingleQuote var openingAndClosing: (opening: String, closing: String) { switch self { case .forwardSlash: return ("/", "/") case .experimental: return ("#|", "|#") - case .reSingleQuote: return ("re'", "'") - case .rxSingleQuote: return ("rx'", "'") } } var opening: String { openingAndClosing.opening } @@ -67,7 +63,7 @@ extension Delimiter { switch self { case .forwardSlash: return true - case .experimental, .reSingleQuote, .rxSingleQuote: + case .experimental: return false } } @@ -150,14 +146,6 @@ fileprivate struct DelimiterLexer { slice(at: cursor, count) } - /// Return the slice of `count` bytes preceding the current cursor, or `nil` - /// if there are fewer than `count` bytes before the cursor. - func sliceBehind(_ count: Int) -> UnsafeRawBufferPointer? { - let priorCursor = cursor - count - guard priorCursor >= start else { return nil } - return slice(at: priorCursor, count) - } - /// Advance the cursor `n` bytes. mutating func advanceCursor(_ n: Int = 1) { cursor += n @@ -186,86 +174,6 @@ fileprivate struct DelimiterLexer { return true } - /// Attempt to skip over a closing delimiter character that is unlikely to be - /// the actual closing delimiter. - mutating func trySkipDelimiter(_ delimiter: Delimiter) { - // Only the closing `'` for re'...'/rx'...' can potentially be skipped over. - switch delimiter.kind { - case .forwardSlash, .experimental: - return - case .reSingleQuote, .rxSingleQuote: - break - } - guard load() == ascii("'") else { return } - - /// Need to look for a prefix of `(?`, `(?(`, `\k`, `\g`, `(?C`, as those - /// are the cases that could use single quotes. Note that none of these - /// would be valid regex endings anyway. - let calloutPrefix = "(?C" - let prefix = ["(?", "(?(", #"\k"#, #"\g"#, calloutPrefix].first { prior in - guard let priorSlice = sliceBehind(prior.utf8.count), - priorSlice.elementsEqual(prior.utf8) - else { return false } - - // Make sure the slice isn't preceded by a '\', as that invalidates this - // analysis. - if let prior = sliceBehind(priorSlice.count + 1) { - return prior[0] != ascii("\\") - } - return true - } - guard let prefix = prefix else { return } - let isCallout = prefix == calloutPrefix - - func isPossiblyGroupReference(_ c: UInt8) -> Bool { - // If this is an ASCII character, make sure it's for a group name. Leave - // other UTF-8 encoded scalars alone, this should at least catch cases - // where we run into a symbol such as `{`, `.`, `;` that would indicate - // we've likely advanced out of the bounds of the regex. - let scalar = UnicodeScalar(c) - guard scalar.isASCII else { return true } - switch scalar { - // Include '-' and '+' which may be used in recursion levels and relative - // references. - case "A"..."Z", "a"..."z", "0"..."9", "_", "-", "+": - return true - default: - return false - } - } - - // Make a note of the current lexing position, as we may need to revert - // back to it. - let originalCursor = cursor - advanceCursor() - - // Try skip over what would be the contents of a group identifier/reference. - while let next = load() { - // Found the ending, we're done. Return so we can continue to lex to the - // real delimiter. - if next == ascii("'") { - advanceCursor() - return - } - - // If this isn't a callout, make sure we have something that could be a - // group reference. We limit the character set here to improve diagnostic - // behavior in the case where the literal is actually unterminated. We - // ideally don't want to go wandering off into Swift source code. We can't - // do the same for callouts, as they take arbitrary strings. - guard isCallout || isPossiblyGroupReference(next) else { break } - do { - try advance() - } catch { - break - } - } - // We bailed out, either because we ran into something that didn't look like - // an identifier, or we reached the end of the line. Revert back to the - // original guess of delimiter. - cursor = originalCursor - } - /// Attempt to eat a particular closing delimiter, returning the contents of /// the literal, and ending pointer, or `nil` if this is not a delimiter /// ending. @@ -401,10 +309,6 @@ fileprivate struct DelimiterLexer { } } while true { - // Check to see if we're at a character that looks like a delimiter, but - // likely isn't. In such a case, we can attempt to skip over it. - trySkipDelimiter(delimiter) - // Try to lex the closing delimiter. if let (contents, end) = try tryEatEnding(delimiter, contentsStart: contentsStart) { diff --git a/Sources/_RegexParser/Regex/Parse/Parse.swift b/Sources/_RegexParser/Regex/Parse/Parse.swift index 0aae031d5..d9b6f23a0 100644 --- a/Sources/_RegexParser/Regex/Parse/Parse.swift +++ b/Sources/_RegexParser/Regex/Parse/Parse.swift @@ -672,9 +672,7 @@ fileprivate func defaultSyntaxOptions( return [.multilineCompilerLiteral, .extendedSyntax] } return .traditional - case .reSingleQuote: - return .traditional - case .experimental, .rxSingleQuote: + case .experimental: return .experimental } } diff --git a/Tests/RegexTests/LexTests.swift b/Tests/RegexTests/LexTests.swift index 49184deb3..53775e66e 100644 --- a/Tests/RegexTests/LexTests.swift +++ b/Tests/RegexTests/LexTests.swift @@ -96,9 +96,6 @@ extension RegexTests { ("#|abc/#def#", nil), ("#/abc\n/#", nil), ("#/abc\r/#", nil), - - (#"re'abcre\''"#, (#"abcre\'"#, delim(.reSingleQuote))), - (#"re'\'"#, nil) ] for (input, expected) in testCases { diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift index 84ce361f3..0e7d41eed 100644 --- a/Tests/RegexTests/ParseTests.swift +++ b/Tests/RegexTests/ParseTests.swift @@ -2151,9 +2151,6 @@ extension RegexTests { parseWithDelimitersTest("##/a b/##", concat("a", " ", "b")) parseWithDelimitersTest("#|a b|#", concat("a", "b")) - parseWithDelimitersTest("re'a b'", concat("a", " ", "b")) - parseWithDelimitersTest("rx'a b'", concat("a", "b")) - parseWithDelimitersTest("#|[a b]|#", charClass("a", "b")) parseWithDelimitersTest( "#|(?-x)[a b]|#", concat( @@ -2176,13 +2173,13 @@ extension RegexTests { parseWithDelimitersTest("#||||#", alt(empty(), empty(), empty())) parseWithDelimitersTest("#|a||#", alt("a", empty())) - parseWithDelimitersTest("re'x*'", zeroOrMore(of: "x")) + parseWithDelimitersTest("/x*/", zeroOrMore(of: "x")) - parseWithDelimitersTest(#"re'🔥🇩🇰'"#, concat("🔥", "🇩🇰")) - parseWithDelimitersTest(#"re'🔥✅'"#, concat("🔥", "✅")) + parseWithDelimitersTest(#"/🔥🇩🇰/"#, concat("🔥", "🇩🇰")) + parseWithDelimitersTest(#"/🔥✅/"#, concat("🔥", "✅")) // Printable ASCII characters. - delimiterLexingTest(##"re' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'"##) + delimiterLexingTest(##"#/ !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~/#"##) // Make sure we can handle a combining accent as first character. parseWithDelimitersTest("/\u{301}/", "\u{301}") @@ -2294,72 +2291,61 @@ extension RegexTests { /# """#, charClass(range_m("a", "b"))) - - // MARK: Delimiter skipping: Make sure we can skip over the ending delimiter - // if it's clear that it's part of the regex syntax. - parseWithDelimitersTest( - #"re'(?'a_bcA0'\')'"#, namedCapture("a_bcA0", "'")) + #"/(?'a_bcA0'\')/"#, namedCapture("a_bcA0", "'")) parseWithDelimitersTest( - #"re'(?'a_bcA0-c1A'x*)'"#, + #"/(?'a_bcA0-c1A'x*)/"#, balancedCapture(name: "a_bcA0", priorName: "c1A", zeroOrMore(of: "x")), unsupported: true) parseWithDelimitersTest( - #"rx' (?'a_bcA0' a b)'"#, concat(namedCapture("a_bcA0", concat("a", "b")))) + #"/ (?'a_bcA0' a b)/"#, concat(" ", namedCapture("a_bcA0", concat(" ", "a", " ", "b")))) parseWithDelimitersTest( - #"re'(?('a_bcA0')x|y)'"#, conditional( + #"/(?('a_bcA0')x|y)/"#, conditional( .groupMatched(ref("a_bcA0")), trueBranch: "x", falseBranch: "y"), unsupported: true ) parseWithDelimitersTest( - #"re'(?('+20')\')'"#, conditional( + #"/(?('+20')\')/"#, conditional( .groupMatched(ref(plus: 20)), trueBranch: "'", falseBranch: empty()), unsupported: true ) parseWithDelimitersTest( - #"re'a\k'b0A''"#, concat("a", backreference(.named("b0A"))), throwsError: .invalidNamedReference("b0A")) + #"/a\k'b0A'/"#, concat("a", backreference(.named("b0A"))), throwsError: .invalidNamedReference("b0A")) parseWithDelimitersTest( - #"re'\k'+2-1''"#, backreference(ref(plus: 2), recursionLevel: -1), + #"/\k'+2-1'/"#, backreference(ref(plus: 2), recursionLevel: -1), unsupported: true ) parseWithDelimitersTest( - #"re'a\g'b0A''"#, concat("a", subpattern(.named("b0A"))), unsupported: true) + #"/a\g'b0A'/"#, concat("a", subpattern(.named("b0A"))), unsupported: true) parseWithDelimitersTest( - #"re'\g'-1'\''"#, concat(subpattern(ref(minus: 1)), "'"), unsupported: true) + #"/\g'-1'\'/"#, concat(subpattern(ref(minus: 1)), "'"), unsupported: true) parseWithDelimitersTest( - #"re'(?C'a*b\c 🔥_ ;')'"#, pcreCallout(string: #"a*b\c 🔥_ ;"#), + #"/(?C'a*b\c 🔥_ ;')/"#, pcreCallout(string: #"a*b\c 🔥_ ;"#), unsupported: true) - // Fine, because we don't end up skipping. - delimiterLexingTest(#"re'(?'"#) - delimiterLexingTest(#"re'(?('"#) - delimiterLexingTest(#"re'\k'"#) - delimiterLexingTest(#"re'\g'"#) - delimiterLexingTest(#"re'(?C'"#) + delimiterLexingTest(#"/(?/"#) + delimiterLexingTest(#"/(?(/"#) + delimiterLexingTest(#"/\k/"#) + delimiterLexingTest(#"/\g/"#) + delimiterLexingTest(#"/(?C/"#) - // Not a valid group name, but we can still skip over it. - delimiterLexingTest(#"re'(?'🔥')'"#) + delimiterLexingTest(#"/(?'🔥')/"#) - // Escaped, so don't skip. These will ignore the ending `'` as we've already - // closed the literal. parseWithDelimitersTest( - #"re'\(?''"#, zeroOrOne(of: "("), ignoreTrailing: true - ) + #"/\(?/"#, zeroOrOne(of: "(")) parseWithDelimitersTest( - #"re'\\k''"#, concat("\\", "k"), ignoreTrailing: true - ) + #"/\\k/"#, concat("\\", "k")) parseWithDelimitersTest( - #"re'\\g''"#, concat("\\", "g"), ignoreTrailing: true - ) + #"/\\g/"#, concat("\\", "g")) parseWithDelimitersTest( - #"re'\(?C''"#, concat(zeroOrOne(of: "("), "C"), ignoreTrailing: true - ) - delimiterLexingTest(#"re'(\?''"#, ignoreTrailing: true) - delimiterLexingTest(#"re'\(?(''"#, ignoreTrailing: true) + #"/\(?C/"#, concat(zeroOrOne(of: "("), "C")) + + delimiterLexingTest(#"/(\?/"#) + delimiterLexingTest(#"/\(?(/"#) // MARK: Parse not-equal @@ -3322,21 +3308,17 @@ extension RegexTests { // MARK: Printable ASCII - delimiterLexingDiagnosticTest(#"re'\\#n'"#, .unterminated) for i: UInt8 in 0x1 ..< 0x20 where i != 0xA && i != 0xD { // U+A & U+D are \n and \r. - delimiterLexingDiagnosticTest("re'\(UnicodeScalar(i))'", .unprintableASCII) + delimiterLexingDiagnosticTest("/\(UnicodeScalar(i))/", .unprintableASCII) } - delimiterLexingDiagnosticTest("re'\n'", .unterminated) - delimiterLexingDiagnosticTest("re'\r'", .unterminated) - delimiterLexingDiagnosticTest("re'\u{7F}'", .unprintableASCII) - // MARK: Delimiter skipping + // Can only be done if pound signs are used. + delimiterLexingDiagnosticTest("/\n/", .unterminated) + delimiterLexingDiagnosticTest("/\r/", .unterminated) + delimiterLexingDiagnosticTest("/\u{7F}/", .unprintableASCII) - delimiterLexingDiagnosticTest("re'(?''", .unterminated) - delimiterLexingDiagnosticTest("re'(?'abc'", .unterminated) - delimiterLexingDiagnosticTest("re'(?('abc'", .unterminated) - delimiterLexingDiagnosticTest(#"re'\k'ab_c0+-'"#, .unterminated) - delimiterLexingDiagnosticTest(#"re'\g'ab_c0+-'"#, .unterminated) + delimiterLexingDiagnosticTest("/", .unterminated) + delimiterLexingDiagnosticTest("/x", .unterminated) // MARK: Unbalanced extended syntax delimiterLexingDiagnosticTest("#/a/", .unterminated) @@ -3344,9 +3326,6 @@ extension RegexTests { // MARK: Multiline - // Can only be done if pound signs are used. - delimiterLexingDiagnosticTest("/\n/", .unterminated) - // Opening and closing delimiters must be on a newline. delimiterLexingDiagnosticTest("#/a\n/#", .unterminated) delimiterLexingDiagnosticTest("#/\na/#", .multilineClosingNotOnNewline) From e8d273aa49c8938bbfafde5561da88c6f36e9935 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Tue, 2 Aug 2022 17:11:19 -0700 Subject: [PATCH 08/33] Change chart to be a normalized performance graph A bad one because I have no idea how to use Swift Charts --- Sources/RegexBenchmark/BenchmarkChart.swift | 67 +++++++-------------- 1 file changed, 22 insertions(+), 45 deletions(-) diff --git a/Sources/RegexBenchmark/BenchmarkChart.swift b/Sources/RegexBenchmark/BenchmarkChart.swift index aa4cafff3..dc522ef7c 100644 --- a/Sources/RegexBenchmark/BenchmarkChart.swift +++ b/Sources/RegexBenchmark/BenchmarkChart.swift @@ -17,41 +17,30 @@ import SwiftUI struct BenchmarkChart: View { var comparisons: [BenchmarkResult.Comparison] + var sortedComparisons: [BenchmarkResult.Comparison] { + comparisons.sorted { a, b in + a.latest.median.seconds/a.baseline.median.seconds < + b.latest.median.seconds/b.baseline.median.seconds + } + } var body: some View { VStack(alignment: .leading) { - ForEach(comparisons) { comparison in - let new = comparison.latest.median.seconds - let old = comparison.baseline.median.seconds - Chart { + Chart { + ForEach(sortedComparisons) { comparison in + let new = comparison.latest.median.seconds + let old = comparison.baseline.median.seconds chartBody( name: comparison.name, new: new, old: old, sampleCount: comparison.latest.samples) } - .chartXAxis { - AxisMarks { value in - AxisTick() - AxisValueLabel { - Text(String(format: "%.5fs", value.as(Double.self)!)) - } - } - } - .chartYAxis { - AxisMarks { value in - AxisGridLine() - AxisValueLabel { - HStack { - Text(value.as(String.self)!) - let delta = (new - old) / old * 100 - Text(String(format: "%+.2f%%", delta)) - .foregroundColor(delta <= 0 ? .green : .yellow) - } - } - } - } - .frame(idealHeight: 60) - } + // Baseline + RuleMark(y: .value("Time", 1.0)) + .foregroundStyle(.red) + .lineStyle(.init(lineWidth: 1, dash: [2])) + + }.frame(idealHeight: 400) } } @@ -62,27 +51,15 @@ struct BenchmarkChart: View { old: TimeInterval, sampleCount: Int ) -> some ChartContent { - // Baseline bar - BarMark( - x: .value("Time", old), - y: .value("Name", "\(name) (\(sampleCount) samples)")) - .position(by: .value("Kind", "Baseline")) - .foregroundStyle(.gray) - - // Latest result bar + // Normalized runtime BarMark( - x: .value("Time", new), - y: .value("Name", "\(name) (\(sampleCount) samples)")) - .position(by: .value("Kind", "Latest")) + x: .value("Name", name), + y: .value("Normalized runtime", new / old)) + .foregroundStyle(LinearGradient( colors: [.accentColor, new - old <= 0 ? .green : .yellow], - startPoint: .leading, - endPoint: .trailing)) - - // Comparison - RuleMark(x: .value("Time", new)) - .foregroundStyle(.gray) - .lineStyle(.init(lineWidth: 0.5, dash: [2])) + startPoint: .bottom, + endPoint: .top)) } } From 2c1223628e533ae2ff1e5596b32f86f53fa9229c Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Tue, 2 Aug 2022 17:42:45 -0700 Subject: [PATCH 09/33] Add compile time measurement + cleanup --- Sources/RegexBenchmark/Benchmark.swift | 15 +++- Sources/RegexBenchmark/BenchmarkResults.swift | 80 +++++++++++++------ Sources/RegexBenchmark/BenchmarkRunner.swift | 54 +++++++++---- Sources/RegexBenchmark/CLI.swift | 11 ++- .../RegexBenchmark/Suite/LiteralSearch.swift | 2 +- Sources/RegexBenchmark/Suite/Unicode.swift | 12 +-- Sources/_StringProcessing/Regex/Core.swift | 19 +++++ 7 files changed, 138 insertions(+), 55 deletions(-) diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift index 159228275..43ce96542 100644 --- a/Sources/RegexBenchmark/Benchmark.swift +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -1,8 +1,9 @@ -import _StringProcessing +@_spi(RegexBenchmark) import _StringProcessing import Foundation protocol RegexBenchmark { var name: String { get } + func compile() func run() func debug() } @@ -19,6 +20,10 @@ struct Benchmark: RegexBenchmark { case allMatches } + func compile() { + blackHole(regex._compileRegex()) + } + func run() { switch type { case .whole: blackHole(target.wholeMatch(of: regex)) @@ -43,6 +48,8 @@ struct NSBenchmark: RegexBenchmark { case first } + func compile() {} + func run() { switch type { case .allMatches: blackHole(regex.matches(in: target, range: range)) @@ -57,6 +64,10 @@ struct InputListBenchmark: RegexBenchmark { let regex: Regex let targets: [String] + func compile() { + blackHole(regex._compileRegex()) + } + func run() { for target in targets { blackHole(target.wholeMatch(of: regex)) @@ -79,6 +90,8 @@ struct InputListNSBenchmark: RegexBenchmark { NSRange(target.startIndex.. 0}) .sorted(by: {(a,b) in a.diff!.seconds > b.diff!.seconds}) let improvements = comparisons.filter({$0.diff!.seconds < 0}) @@ -95,11 +120,16 @@ extension BenchmarkRunner { #endif } - func saveComparisons(_ comparisons: [BenchmarkResult.Comparison], path: String) throws { + func saveComparisons( + _ comparisons: [BenchmarkResult.Comparison], + path: String + ) throws { let url = URL(fileURLWithPath: path, isDirectory: false) let parent = url.deletingLastPathComponent() if !FileManager.default.fileExists(atPath: parent.path) { - try! FileManager.default.createDirectory(atPath: parent.path, withIntermediateDirectories: true) + try! FileManager.default.createDirectory( + atPath: parent.path, + withIntermediateDirectories: true) } var contents = "name,latest,baseline,diff,percentage\n" @@ -112,17 +142,10 @@ extension BenchmarkRunner { } struct BenchmarkResult: Codable { + let compileTime: Time let median: Time - let estimatedCompileTime: Time let stdev: Double let samples: Int - - init(_ initialRunTime: Time, _ median: Time, _ stdev: Double, _ samples: Int) { - self.estimatedCompileTime = initialRunTime - median - self.median = median - self.stdev = stdev - self.samples = samples - } } extension BenchmarkResult { @@ -135,7 +158,7 @@ extension BenchmarkResult { var diff: Time? { if diffCompileTimes { - return latest.estimatedCompileTime - baseline.estimatedCompileTime + return latest.compileTime - baseline.compileTime } if Stats.tTest(baseline, latest) { return latest.median - baseline.median @@ -150,8 +173,8 @@ extension BenchmarkResult { let oldVal: Time let newVal: Time if diffCompileTimes { - oldVal = baseline.estimatedCompileTime - newVal = latest.estimatedCompileTime + oldVal = baseline.compileTime + newVal = latest.compileTime } else { oldVal = baseline.median newVal = latest.median @@ -169,8 +192,8 @@ extension BenchmarkResult { let oldVal: Time let newVal: Time if diffCompileTimes { - oldVal = baseline.estimatedCompileTime - newVal = latest.estimatedCompileTime + oldVal = baseline.compileTime + newVal = latest.compileTime } else { oldVal = baseline.median newVal = latest.median @@ -213,13 +236,18 @@ struct SuiteResult { return comparisons } - /// Compares the estimated compile times - func compareCompileTimes(with other: SuiteResult) -> [BenchmarkResult.Comparison] { + /// Compares the compile times + func compareCompileTimes( + with other: SuiteResult + ) -> [BenchmarkResult.Comparison] { var comparisons: [BenchmarkResult.Comparison] = [] for item in results { if let otherVal = other.results[item.key] { comparisons.append( - .init(name: item.key, baseline: otherVal, latest: item.value, diffCompileTimes: true)) + .init(name: item.key, + baseline: otherVal, + latest: item.value, + diffCompileTimes: true)) } } return comparisons diff --git a/Sources/RegexBenchmark/BenchmarkRunner.swift b/Sources/RegexBenchmark/BenchmarkRunner.swift index d59184fed..3188f6567 100644 --- a/Sources/RegexBenchmark/BenchmarkRunner.swift +++ b/Sources/RegexBenchmark/BenchmarkRunner.swift @@ -18,17 +18,26 @@ struct BenchmarkRunner { suite.append(new) } - mutating func measure(benchmark: some RegexBenchmark, samples: Int) -> BenchmarkResult { - var times: [Time] = [] - - // initial run to make sure the regex has been compiled - // FIXME: this is a very poor way of estimating compile time - // we should have some sort of interface directly with the engine to measure this - // This also completely breaks when we rerun measure() for variant results - let initialStart = Tick.now + mutating func measure( + benchmark: some RegexBenchmark, + samples: Int + ) -> BenchmarkResult { + var runtimes: [Time] = [] + var compileTimes: [Time] = [] + // Initial run to make sure the regex has been compiled benchmark.run() - let initialEnd = Tick.now - let initialRunTime = initialEnd.elapsedTime(since: initialStart) + + // Measure compilataion time + for _ in 0.. Time.millisecond { + print("Warning: Abnormally high compilation time, what happened?") + } if !quiet { - print("- \(b.name) \(result.median) (stdev: \(Time(result.stdev))) (estimated compile time: \(result.estimatedCompileTime))") + print("- \(b.name) \(result.median) (stdev: \(Time(result.stdev))) (compile time: \(result.compileTime))") } self.results.add(name: b.name, result: result) } diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift index fa0729fd1..0746aeeeb 100644 --- a/Sources/RegexBenchmark/CLI.swift +++ b/Sources/RegexBenchmark/CLI.swift @@ -20,8 +20,8 @@ struct Runner: ParsableCommand { @Option(help: "The result file to compare against") var compare: String? - @Option(help: "Experimental compile time comparison") - var experimentalCompareCompileTimes: String? + @Option(help: "Compare compile times with the given results file") + var compareCompileTime: String? @Flag(help: "Show comparison chart") var showChart: Bool = false @@ -72,9 +72,12 @@ struct Runner: ParsableCommand { try runner.compareWithNS(showChart: showChart, saveTo: saveComparison) } if let compareFile = compare { - try runner.compare(against: compareFile, showChart: showChart, saveTo: saveComparison) + try runner.compare( + against: compareFile, + showChart: showChart, + saveTo: saveComparison) } - if let compareFile = experimentalCompareCompileTimes { + if let compareFile = compareCompileTime { try runner.compareCompileTimes(against: compareFile, showChart: showChart) } } diff --git a/Sources/RegexBenchmark/Suite/LiteralSearch.swift b/Sources/RegexBenchmark/Suite/LiteralSearch.swift index 1f48f9945..32cf60a7d 100644 --- a/Sources/RegexBenchmark/Suite/LiteralSearch.swift +++ b/Sources/RegexBenchmark/Suite/LiteralSearch.swift @@ -3,7 +3,7 @@ import _StringProcessing extension BenchmarkRunner { mutating func addLiteralSearch() { let searchNotFound = CrossBenchmark(baseName: "LiteralSearchNotFound", regex: "magic_string_to_search_for", input: Inputs.graphemeBreakData) - let search = CrossBenchmark(baseName: "LiteralSearch", regex: "aatcgaagcagtcttctaacacccttagaaaagcaaacactattgaatactgccgccgca", input: Inputs.graphemeBreakData) + let search = CrossBenchmark(baseName: "LiteralSearch", regex: "HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH", input: Inputs.graphemeBreakData) searchNotFound.register(&self) search.register(&self) } diff --git a/Sources/RegexBenchmark/Suite/Unicode.swift b/Sources/RegexBenchmark/Suite/Unicode.swift index 5944ab2ca..46afda712 100644 --- a/Sources/RegexBenchmark/Suite/Unicode.swift +++ b/Sources/RegexBenchmark/Suite/Unicode.swift @@ -4,11 +4,11 @@ extension BenchmarkRunner { mutating func addUnicode() { // tagged unicode: unicode characters surrounded by html tags // use the same html regex, uses backreference + reluctant quantification - let tags = #"<(\w*)\b[^>]*>(.*?)<\/\1>"# - let taggedEmojis = CrossBenchmark( - baseName: "TaggedEmojis", - regex: tags, - input: Inputs.taggedEmojis) +// let tags = #"<(\w*)\b[^>]*>(.*?)<\/\1>"# // disabled due to \b being unusably slow +// let taggedEmojis = CrossBenchmark( +// baseName: "TaggedEmojis", +// regex: tags, +// input: Inputs.taggedEmojis) // Now actually matching emojis let emoji = #"(😃|😀|😳|😲|😦|😊|🙊|😘|😏|😳|😒){2,5}"# @@ -18,7 +18,7 @@ extension BenchmarkRunner { regex: emoji, input: Inputs.taggedEmojis) - // taggedEmojis.register(&self) // disabled due to \b being unusably slow + // taggedEmojis.register(&self) emojiRegex.register(&self) } } diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift index 0afe11c77..27c1f4eff 100644 --- a/Sources/_StringProcessing/Regex/Core.swift +++ b/Sources/_StringProcessing/Regex/Core.swift @@ -135,6 +135,25 @@ extension Regex { } } +@available(SwiftStdlib 5.7, *) +@_spi(RegexBenchmark) +extension Regex { + /// Compiles the stored DSLTree into bytecode and return if it was successful + /// For measuring compilation times + /// + /// Note: This bypasses the cached program that is normally used + public func _compileRegex() -> Bool { + do { + let _ = try Compiler( + tree: program.tree, + compileOptions: program.compileOptions).emit() + return true + } catch { + return false + } + } +} + @available(SwiftStdlib 5.7, *) extension Regex { internal mutating func _setCompilerOptionsForTesting(_ opts: Compiler.CompileOptions) { From 1f6010c6a2e28a5b29380eeac8dd6b3e31a47b2d Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Tue, 2 Aug 2022 17:49:21 -0700 Subject: [PATCH 10/33] Oops --- Sources/RegexBenchmark/BenchmarkChart.swift | 1 - 1 file changed, 1 deletion(-) diff --git a/Sources/RegexBenchmark/BenchmarkChart.swift b/Sources/RegexBenchmark/BenchmarkChart.swift index dc522ef7c..f104fc78d 100644 --- a/Sources/RegexBenchmark/BenchmarkChart.swift +++ b/Sources/RegexBenchmark/BenchmarkChart.swift @@ -55,7 +55,6 @@ struct BenchmarkChart: View { BarMark( x: .value("Name", name), y: .value("Normalized runtime", new / old)) - .foregroundStyle(LinearGradient( colors: [.accentColor, new - old <= 0 ? .green : .yellow], startPoint: .bottom, From a0af34536d513e186fb45d539939fd52dbd1164d Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Tue, 2 Aug 2022 17:57:53 -0700 Subject: [PATCH 11/33] Add enum for comparison type --- Sources/RegexBenchmark/BenchmarkResults.swift | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/Sources/RegexBenchmark/BenchmarkResults.swift b/Sources/RegexBenchmark/BenchmarkResults.swift index 3fe96d9ff..43a57c8d0 100644 --- a/Sources/RegexBenchmark/BenchmarkResults.swift +++ b/Sources/RegexBenchmark/BenchmarkResults.swift @@ -154,16 +154,24 @@ extension BenchmarkResult { var name: String var baseline: BenchmarkResult var latest: BenchmarkResult - var diffCompileTimes: Bool = false + var type: ComparisonType = .runtime + enum ComparisonType { + case runtime + case compileTime + } + var diff: Time? { - if diffCompileTimes { + switch type { + case .compileTime: return latest.compileTime - baseline.compileTime + case .runtime: + if Stats.tTest(baseline, latest) { + return latest.median - baseline.median + } + return nil } - if Stats.tTest(baseline, latest) { - return latest.median - baseline.median - } - return nil + } var description: String { @@ -172,10 +180,11 @@ extension BenchmarkResult { } let oldVal: Time let newVal: Time - if diffCompileTimes { + switch type { + case .compileTime: oldVal = baseline.compileTime newVal = latest.compileTime - } else { + case .runtime: oldVal = baseline.median newVal = latest.median } @@ -191,10 +200,11 @@ extension BenchmarkResult { } let oldVal: Time let newVal: Time - if diffCompileTimes { + switch type { + case .compileTime: oldVal = baseline.compileTime newVal = latest.compileTime - } else { + case .runtime: oldVal = baseline.median newVal = latest.median } @@ -211,7 +221,6 @@ struct SuiteResult { results.updateValue(result, forKey: name) } - /// Compares with the given SuiteResult func compare(with other: SuiteResult) -> [BenchmarkResult.Comparison] { var comparisons: [BenchmarkResult.Comparison] = [] for item in results { @@ -236,7 +245,6 @@ struct SuiteResult { return comparisons } - /// Compares the compile times func compareCompileTimes( with other: SuiteResult ) -> [BenchmarkResult.Comparison] { @@ -247,7 +255,7 @@ struct SuiteResult { .init(name: item.key, baseline: otherVal, latest: item.value, - diffCompileTimes: true)) + type: .compileTime)) } } return comparisons From 405fbcbb9088b23e86508bf94f63c38661d60eec Mon Sep 17 00:00:00 2001 From: Lily Date: Wed, 3 Aug 2022 12:58:49 -0700 Subject: [PATCH 12/33] Implement instructions for matching builtin character classes and assertions (#547) - Adds `matchBuiltin` and adjusts `assertBy` to use a switch in processor instead of taking a generic assertion fn - Adds a `CharacterClass` atom --- Sources/RegexBuilder/CharacterClass.swift | 34 +- Sources/_StringProcessing/ByteCodeGen.swift | 156 +------ .../_StringProcessing/ConsumerInterface.swift | 26 +- .../Engine/InstPayload.swift | 116 +++++- .../Engine/Instruction.swift | 26 +- .../_StringProcessing/Engine/MEBuilder.swift | 29 +- .../_StringProcessing/Engine/MEBuiltins.swift | 185 ++++++++- .../_StringProcessing/Engine/MEProgram.swift | 9 - .../_StringProcessing/Engine/Processor.swift | 43 +- .../_StringProcessing/Engine/Registers.swift | 9 - .../_StringProcessing/MatchingOptions.swift | 12 - .../_StringProcessing/PrintAsPattern.swift | 39 ++ .../Regex/ASTConversion.swift | 37 +- Sources/_StringProcessing/Regex/DSLTree.swift | 73 ++-- .../Unicode/WordBreaking.swift | 33 ++ .../Utility/RegexFactory.swift | 8 + .../_StringProcessing/Utility/TypedInt.swift | 4 - .../_CharacterClassModel.swift | 385 +++++++----------- Tests/RegexTests/CompileTests.swift | 154 ++++--- Tests/RegexTests/MatchTests.swift | 11 +- 20 files changed, 785 insertions(+), 604 deletions(-) diff --git a/Sources/RegexBuilder/CharacterClass.swift b/Sources/RegexBuilder/CharacterClass.swift index ea52c28f3..08c7d347e 100644 --- a/Sources/RegexBuilder/CharacterClass.swift +++ b/Sources/RegexBuilder/CharacterClass.swift @@ -15,27 +15,39 @@ @available(SwiftStdlib 5.7, *) public struct CharacterClass { internal var ccc: DSLTree.CustomCharacterClass + /// The builtin character class, if this CharacterClass is representable by one + internal var builtin: DSLTree.Atom.CharacterClass? init(_ ccc: DSLTree.CustomCharacterClass) { self.ccc = ccc + self.builtin = nil } - init(unconverted atom: DSLTree._AST.Atom) { - self.ccc = .init(members: [.atom(.unconverted(atom))]) + init(builtin: DSLTree.Atom.CharacterClass) { + self.ccc = .init(members: [.atom(.characterClass(builtin))]) + self.builtin = builtin } } @available(SwiftStdlib 5.7, *) extension CharacterClass: RegexComponent { public var regex: Regex { - _RegexFactory().customCharacterClass(ccc) + if let cc = builtin { + return _RegexFactory().characterClass(cc) + } else { + return _RegexFactory().customCharacterClass(ccc) + } } } @available(SwiftStdlib 5.7, *) extension CharacterClass { public var inverted: CharacterClass { - CharacterClass(ccc.inverted) + if let inv = builtin?.inverted { + return CharacterClass(builtin: inv) + } else { + return CharacterClass(ccc.inverted) + } } } @@ -50,15 +62,15 @@ extension RegexComponent where Self == CharacterClass { } public static var anyGraphemeCluster: CharacterClass { - .init(unconverted: ._anyGrapheme) + .init(builtin: .anyGrapheme) } public static var whitespace: CharacterClass { - .init(unconverted: ._whitespace) + .init(builtin: .whitespace) } public static var digit: CharacterClass { - .init(unconverted: ._digit) + .init(builtin: .digit) } public static var hexDigit: CharacterClass { @@ -70,19 +82,19 @@ extension RegexComponent where Self == CharacterClass { } public static var horizontalWhitespace: CharacterClass { - .init(unconverted: ._horizontalWhitespace) + .init(builtin: .horizontalWhitespace) } public static var newlineSequence: CharacterClass { - .init(unconverted: ._newlineSequence) + .init(builtin: .newlineSequence) } public static var verticalWhitespace: CharacterClass { - .init(unconverted: ._verticalWhitespace) + .init(builtin: .verticalWhitespace) } public static var word: CharacterClass { - .init(unconverted: ._word) + .init(builtin: .word) } } diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index e8c92f2b5..da0888039 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -74,6 +74,9 @@ fileprivate extension Compiler.ByteCodeGen { emitMatchScalar(s) } + case let .characterClass(cc): + emitCharacterClass(cc) + case let .assertion(kind): try emitAssertion(kind) @@ -148,147 +151,24 @@ fileprivate extension Compiler.ByteCodeGen { } } - mutating func emitStartOfLine() { - builder.buildAssert { [semanticLevel = options.semanticLevel] - (_, _, input, pos, subjectBounds) in - if pos == subjectBounds.lowerBound { return true } - switch semanticLevel { - case .graphemeCluster: - return input[input.index(before: pos)].isNewline - case .unicodeScalar: - return input.unicodeScalars[input.unicodeScalars.index(before: pos)].isNewline - } - } - } - - mutating func emitEndOfLine() { - builder.buildAssert { [semanticLevel = options.semanticLevel] - (_, _, input, pos, subjectBounds) in - if pos == subjectBounds.upperBound { return true } - switch semanticLevel { - case .graphemeCluster: - return input[pos].isNewline - case .unicodeScalar: - return input.unicodeScalars[pos].isNewline - } - } - } - mutating func emitAssertion( _ kind: DSLTree.Atom.Assertion ) throws { - // FIXME: Depends on API model we have... We may want to - // think through some of these with API interactions in mind - // - // This might break how we use `bounds` for both slicing - // and things like `firstIndex`, that is `firstIndex` may - // need to supply both a slice bounds and a per-search bounds. - switch kind { - case .startOfSubject: - builder.buildAssert { (_, _, input, pos, subjectBounds) in - pos == subjectBounds.lowerBound - } - - case .endOfSubjectBeforeNewline: - builder.buildAssert { [semanticLevel = options.semanticLevel] - (_, _, input, pos, subjectBounds) in - if pos == subjectBounds.upperBound { return true } - switch semanticLevel { - case .graphemeCluster: - return input.index(after: pos) == subjectBounds.upperBound - && input[pos].isNewline - case .unicodeScalar: - return input.unicodeScalars.index(after: pos) == subjectBounds.upperBound - && input.unicodeScalars[pos].isNewline - } - } - - case .endOfSubject: - builder.buildAssert { (_, _, input, pos, subjectBounds) in - pos == subjectBounds.upperBound - } - - case .resetStartOfMatch: - // FIXME: Figure out how to communicate this out + if kind == .resetStartOfMatch { throw Unsupported(#"\K (reset/keep assertion)"#) - - case .firstMatchingPositionInSubject: - // TODO: We can probably build a nice model with API here - - // FIXME: This needs to be based on `searchBounds`, - // not the `subjectBounds` given as an argument here - builder.buildAssert { (_, _, input, pos, subjectBounds) in false } - - case .textSegment: - builder.buildAssert { (_, _, input, pos, _) in - // FIXME: Grapheme or word based on options - input.isOnGraphemeClusterBoundary(pos) - } - - case .notTextSegment: - builder.buildAssert { (_, _, input, pos, _) in - // FIXME: Grapheme or word based on options - !input.isOnGraphemeClusterBoundary(pos) - } - - case .startOfLine: - emitStartOfLine() - - case .endOfLine: - emitEndOfLine() - - case .caretAnchor: - if options.anchorsMatchNewlines { - emitStartOfLine() - } else { - builder.buildAssert { (_, _, input, pos, subjectBounds) in - pos == subjectBounds.lowerBound - } - } - - case .dollarAnchor: - if options.anchorsMatchNewlines { - emitEndOfLine() - } else { - builder.buildAssert { (_, _, input, pos, subjectBounds) in - pos == subjectBounds.upperBound - } - } - - case .wordBoundary: - builder.buildAssert { [options] - (cache, maxIndex, input, pos, subjectBounds) in - if options.usesSimpleUnicodeBoundaries { - // TODO: How should we handle bounds? - return _CharacterClassModel.word.isBoundary( - input, - at: pos, - bounds: subjectBounds, - with: options - ) - } else { - return input.isOnWordBoundary(at: pos, using: &cache, &maxIndex) - } - } - - case .notWordBoundary: - builder.buildAssert { [options] - (cache, maxIndex, input, pos, subjectBounds) in - if options.usesSimpleUnicodeBoundaries { - // TODO: How should we handle bounds? - return !_CharacterClassModel.word.isBoundary( - input, - at: pos, - bounds: subjectBounds, - with: options - ) - } else { - return !input.isOnWordBoundary(at: pos, using: &cache, &maxIndex) - } - } } + builder.buildAssert( + by: kind, + options.anchorsMatchNewlines, + options.usesSimpleUnicodeBoundaries, + options.usesASCIIWord, + options.semanticLevel) } - + + mutating func emitCharacterClass(_ cc: DSLTree.Atom.CharacterClass) { + builder.buildMatchBuiltin(model: cc.asRuntimeModel(options)) + } + mutating func emitMatchScalar(_ s: UnicodeScalar) { assert(options.semanticLevel == .unicodeScalar) if options.isCaseInsensitive && s.properties.isCased { @@ -907,10 +787,10 @@ fileprivate extension Compiler.ByteCodeGen { } else { builder.buildMatchAsciiBitset(asciiBitset) } - } else { - let consumer = try ccc.generateConsumer(options) - builder.buildConsume(by: consumer) + return } + let consumer = try ccc.generateConsumer(options) + builder.buildConsume(by: consumer) } mutating func emitConcatenation(_ children: [DSLTree.Node]) throws { diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index 083781120..3a2731b0a 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -162,6 +162,8 @@ extension DSLTree.Atom { case .assertion: // TODO: We could handle, should this be total? return nil + case .characterClass(let cc): + return cc.generateConsumer(opts) case .backreference: // TODO: Should we handle? @@ -182,6 +184,15 @@ extension DSLTree.Atom { } } +extension DSLTree.Atom.CharacterClass { + func generateConsumer(_ opts: MatchingOptions) -> MEProgram.ConsumeFunction { + let model = asRuntimeModel(opts) + return { input, bounds in + model.matches(in: input, at: bounds.lowerBound) + } + } +} + extension String { /// Compares this string to `other` using the loose matching rule UAX44-LM2, /// which ignores case, whitespace, underscores, and nearly all medial @@ -269,16 +280,6 @@ extension AST.Atom { func generateConsumer( _ opts: MatchingOptions ) throws -> MEProgram.ConsumeFunction? { - // TODO: Wean ourselves off of this type... - if let cc = self.characterClass?.withMatchLevel( - opts.matchLevel - ) { - return { input, bounds in - // FIXME: should we worry about out of bounds? - cc.matches(in: input, at: bounds.lowerBound, with: opts) - } - } - switch kind { case let .scalar(s): assertionFailure( @@ -312,8 +313,11 @@ extension AST.Atom { case .caretAnchor, .dollarAnchor: // handled in emitAssertion return nil + case .escaped: + // handled in emitAssertion and emitCharacterClass + return nil - case .scalarSequence, .escaped, .keyboardControl, .keyboardMeta, + case .scalarSequence, .keyboardControl, .keyboardMeta, .keyboardMetaControl, .backreference, .subpattern, .callout, .backtrackingDirective, .changeMatchingOptions, .invalid: // FIXME: implement diff --git a/Sources/_StringProcessing/Engine/InstPayload.swift b/Sources/_StringProcessing/Engine/InstPayload.swift index 42fb86913..d6372c0ba 100644 --- a/Sources/_StringProcessing/Engine/InstPayload.swift +++ b/Sources/_StringProcessing/Engine/InstPayload.swift @@ -9,7 +9,6 @@ // //===----------------------------------------------------------------------===// - extension Instruction { /// An instruction's payload packs operands and destination /// registers. @@ -51,7 +50,6 @@ extension Instruction.Payload { case element(ElementRegister) case consumer(ConsumeFunctionRegister) case bitset(AsciiBitsetRegister) - case assertion(AssertionFunctionRegister) case addr(InstructionAddress) case capture(CaptureRegister) @@ -225,7 +223,7 @@ extension Instruction.Payload { let pair: (UInt64, AsciiBitsetRegister) = interpretPair() return (isScalar: pair.0 == 1, pair.1) } - + init(consumer: ConsumeFunctionRegister) { self.init(consumer) } @@ -233,13 +231,6 @@ extension Instruction.Payload { interpret() } - init(assertion: AssertionFunctionRegister) { - self.init(assertion) - } - var assertion: AssertionFunctionRegister { - interpret() - } - init(addr: InstructionAddress) { self.init(addr) } @@ -339,5 +330,110 @@ extension Instruction.Payload { ) { interpretPair() } + // MARK: Struct payloads + init(_ model: _CharacterClassModel) { + self.init(CharacterClassPayload(model).rawValue) + } + var characterClassPayload: CharacterClassPayload{ + return CharacterClassPayload(rawValue: rawValue & _payloadMask) + } + + init(assertion payload: AssertionPayload) { + self.init(rawValue: payload.rawValue) + } + var assertion: AssertionPayload { + AssertionPayload.init(rawValue: self.rawValue & _payloadMask) + } } +// MARK: Struct definitions +struct CharacterClassPayload: RawRepresentable { + let rawValue: UInt64 + // Layout: + // Top three bits are isInverted, isStrict, isScalar + // Lower 8 bits are _CCM.Representation + static var invertedBit: UInt64 { 1 << 55 } + static var strictASCIIBit: UInt64 { 1 << 54 } + static var scalarBit: UInt64 { 1 << 53 } + static var ccMask: UInt64 { 0xFF } + init(rawValue: UInt64) { + assert(rawValue & _opcodeMask == 0) + self.rawValue = rawValue + } + init(_ model: _CharacterClassModel) { + let invertedBit = model.isInverted ? CharacterClassPayload.invertedBit : 0 + let strictASCIIBit = model.isStrictASCII ? CharacterClassPayload.strictASCIIBit : 0 + let scalarBit = model.matchLevel == .unicodeScalar ? CharacterClassPayload.scalarBit : 0 + assert(model.cc.rawValue <= CharacterClassPayload.ccMask) + assert(model.cc.rawValue & invertedBit & strictASCIIBit & scalarBit == 0) // Sanity check + self.init(rawValue: model.cc.rawValue | invertedBit | strictASCIIBit | scalarBit) + } + + var isInverted: Bool { + self.rawValue & CharacterClassPayload.invertedBit != 0 + } + /// Represents if the given character class should strictly only match ascii values based on the options given + /// See Oniguruma options: (?D) (?\P) (?S) (?W) + var isStrictASCII: Bool { + self.rawValue & CharacterClassPayload.strictASCIIBit != 0 + } + var isScalarSemantics: Bool { + self.rawValue & CharacterClassPayload.scalarBit != 0 + } + var cc: _CharacterClassModel.Representation { + _CharacterClassModel.Representation.init( + rawValue: self.rawValue & CharacterClassPayload.ccMask).unsafelyUnwrapped + } +} + +struct AssertionPayload: RawRepresentable { + let rawValue: UInt64 + + init(rawValue: UInt64) { + self.rawValue = rawValue + assert(rawValue & _opcodeMask == 0) + } + + static var anchorBit: UInt64 { 1 << 55 } + static var boundaryBit: UInt64 { 1 << 54 } + static var strictASCIIWordBit: UInt64 { 1 << 53 } + static var isScalarBit: UInt64 { 1 << 52 } + static var assertionKindMask: UInt64 { 0xFF } + + init(_ assertion: DSLTree.Atom.Assertion, + _ anchorsMatchNewlines: Bool, + _ usesSimpleUnicodeBoundaries: Bool, + _ usesASCIIWord: Bool, + _ semanticLevel: MatchingOptions.SemanticLevel + ) { + // 4 bits of options + let anchorBit: UInt64 = anchorsMatchNewlines ? AssertionPayload.anchorBit : 0 + let boundaryBit: UInt64 = usesSimpleUnicodeBoundaries ? AssertionPayload.boundaryBit : 0 + let strictASCIIWordBit: UInt64 = usesASCIIWord ? AssertionPayload.strictASCIIWordBit : 0 + let isScalarBit: UInt64 = semanticLevel == .unicodeScalar ? AssertionPayload.isScalarBit : 0 + + // 8 bits for the assertion kind + // Future work: Optimize this layout + let kind = assertion.rawValue + assert(kind <= AssertionPayload.assertionKindMask) + assert(kind & anchorBit & boundaryBit & strictASCIIWordBit & isScalarBit == 0) + self.init(rawValue: kind | anchorBit | boundaryBit | strictASCIIWordBit | isScalarBit) + } + + var kind: DSLTree.Atom.Assertion { + return .init( + rawValue: self.rawValue & AssertionPayload.assertionKindMask).unsafelyUnwrapped + } + var anchorsMatchNewlines: Bool { self.rawValue & AssertionPayload.anchorBit != 0 } + var usesSimpleUnicodeBoundaries: Bool { + self.rawValue & AssertionPayload.boundaryBit != 0 + } + var usesASCIIWord: Bool { self.rawValue & AssertionPayload.strictASCIIWordBit != 0 } + var semanticLevel: MatchingOptions.SemanticLevel { + if self.rawValue & AssertionPayload.isScalarBit != 0 { + return .unicodeScalar + } else { + return .graphemeCluster + } + } +} diff --git a/Sources/_StringProcessing/Engine/Instruction.swift b/Sources/_StringProcessing/Engine/Instruction.swift index 8e1a1f294..f2ee88636 100644 --- a/Sources/_StringProcessing/Engine/Instruction.swift +++ b/Sources/_StringProcessing/Engine/Instruction.swift @@ -113,11 +113,15 @@ extension Instruction { /// - Boolean for if we should match by scalar value case matchBitset - /// TODO: builtin assertions and anchors - case builtinAssertion - - /// TODO: builtin character classes - case builtinCharacterClass + /// Match against a built-in character class + /// + /// matchBuiltin(_: CharacterClassPayload) + /// + /// Operand: the payload contains + /// - The character class + /// - If it is inverted + /// - If it strictly matches only ascii values + case matchBuiltin // MARK: Extension points @@ -127,16 +131,12 @@ extension Instruction { /// Operand: Consume function register to call. case consumeBy - /// Custom lookaround assertion operation. - /// Triggers a failure if customFunction returns false. + /// Lookaround assertion operation. Performs a zero width assertion based on + /// the assertion type and options stored in the payload /// - /// assert(_ customFunction: ( - /// input: Input, - /// currentPos: Position, - /// bounds: Range - /// ) -> Bool) + /// assert(_:AssertionPayload) /// - /// Operands: destination bool register, assert hook register + /// Operands: AssertionPayload containing assertion type and options case assertBy /// Custom value-creating consume operation. diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index 0b9a91726..3406e9fed 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -20,7 +20,6 @@ extension MEProgram { var asciiBitsets: [DSLTree.CustomCharacterClass.AsciiBitset] = [] var consumeFunctions: [ConsumeFunction] = [] - var assertionFunctions: [AssertionFunction] = [] var transformFunctions: [TransformFunction] = [] var matcherFunctions: [MatcherFunction] = [] @@ -171,6 +170,11 @@ extension MEProgram.Builder { instructions.append(.init( .matchBitset, .init(bitset: makeAsciiBitset(b), isScalar: true))) } + + mutating func buildMatchBuiltin(model: _CharacterClassModel) { + instructions.append(.init( + .matchBuiltin, .init(model))) + } mutating func buildConsume( by p: @escaping MEProgram.ConsumeFunction @@ -180,10 +184,21 @@ extension MEProgram.Builder { } mutating func buildAssert( - by p: @escaping MEProgram.AssertionFunction + by kind: DSLTree.Atom.Assertion, + _ anchorsMatchNewlines: Bool, + _ usesSimpleUnicodeBoundaries: Bool, + _ usesASCIIWord: Bool, + _ semanticLevel: MatchingOptions.SemanticLevel ) { + let payload = AssertionPayload.init( + kind, + anchorsMatchNewlines, + usesSimpleUnicodeBoundaries, + usesASCIIWord, + semanticLevel) instructions.append(.init( - .assertBy, .init(assertion: makeAssertionFunction(p)))) + .assertBy, + .init(assertion: payload))) } mutating func buildAccept() { @@ -306,7 +321,6 @@ extension MEProgram.Builder { regInfo.positions = nextPositionRegister.rawValue regInfo.bitsets = asciiBitsets.count regInfo.consumeFunctions = consumeFunctions.count - regInfo.assertionFunctions = assertionFunctions.count regInfo.transformFunctions = transformFunctions.count regInfo.matcherFunctions = matcherFunctions.count regInfo.captures = nextCaptureRegister.rawValue @@ -317,7 +331,6 @@ extension MEProgram.Builder { staticSequences: sequences.stored, staticBitsets: asciiBitsets, staticConsumeFunctions: consumeFunctions, - staticAssertionFunctions: assertionFunctions, staticTransformFunctions: transformFunctions, staticMatcherFunctions: matcherFunctions, registerInfo: regInfo, @@ -466,12 +479,6 @@ extension MEProgram.Builder { defer { consumeFunctions.append(f) } return ConsumeFunctionRegister(consumeFunctions.count) } - mutating func makeAssertionFunction( - _ f: @escaping MEProgram.AssertionFunction - ) -> AssertionFunctionRegister { - defer { assertionFunctions.append(f) } - return AssertionFunctionRegister(assertionFunctions.count) - } mutating func makeTransformFunction( _ f: @escaping MEProgram.TransformFunction ) -> TransformRegister { diff --git a/Sources/_StringProcessing/Engine/MEBuiltins.swift b/Sources/_StringProcessing/Engine/MEBuiltins.swift index f791da37e..d05348893 100644 --- a/Sources/_StringProcessing/Engine/MEBuiltins.swift +++ b/Sources/_StringProcessing/Engine/MEBuiltins.swift @@ -1,13 +1,190 @@ +@_implementationOnly import _RegexParser // For AssertionKind +extension Character { + var _isHorizontalWhitespace: Bool { + self.unicodeScalars.first?.isHorizontalWhitespace == true + } + var _isNewline: Bool { + self.unicodeScalars.first?.isNewline == true + } +} extension Processor { + mutating func matchBuiltin( + _ cc: _CharacterClassModel.Representation, + _ isInverted: Bool, + _ isStrictASCII: Bool, + _ isScalarSemantics: Bool + ) -> Bool { + guard let char = load(), let scalar = loadScalar() else { + signalFailure() + return false + } + + let asciiCheck = (char.isASCII && !isScalarSemantics) + || (scalar.isASCII && isScalarSemantics) + || !isStrictASCII + + var matched: Bool + var next: Input.Index + switch (isScalarSemantics, cc) { + case (_, .anyGrapheme): + next = input.index(after: currentPosition) + case (_, .anyScalar): + next = input.unicodeScalars.index(after: currentPosition) + case (true, _): + next = input.unicodeScalars.index(after: currentPosition) + case (false, _): + next = input.index(after: currentPosition) + } + switch cc { + case .any, .anyGrapheme: + matched = true + case .anyScalar: + if isScalarSemantics { + matched = true + } else { + matched = input.isOnGraphemeClusterBoundary(next) + } + case .digit: + if isScalarSemantics { + matched = scalar.properties.numericType != nil && asciiCheck + } else { + matched = char.isNumber && asciiCheck + } + case .horizontalWhitespace: + if isScalarSemantics { + matched = scalar.isHorizontalWhitespace && asciiCheck + } else { + matched = char._isHorizontalWhitespace && asciiCheck + } + case .verticalWhitespace: + if isScalarSemantics { + matched = scalar.isNewline && asciiCheck + } else { + matched = char._isNewline && asciiCheck + } + case .newlineSequence: + if isScalarSemantics { + matched = scalar.isNewline && asciiCheck + if matched && scalar == "\r" + && next != input.endIndex && input.unicodeScalars[next] == "\n" { + // Match a full CR-LF sequence even in scalar semantics + input.unicodeScalars.formIndex(after: &next) + } + } else { + matched = char._isNewline && asciiCheck + } + case .whitespace: + if isScalarSemantics { + matched = scalar.properties.isWhitespace && asciiCheck + } else { + matched = char.isWhitespace && asciiCheck + } + case .word: + if isScalarSemantics { + matched = scalar.properties.isAlphabetic && asciiCheck + } else { + matched = char.isWordCharacter && asciiCheck + } + } - mutating func builtinAssertion() { - fatalError("TODO: assertions and anchors") + if isInverted { + matched.toggle() + } + + guard matched else { + signalFailure() + return false + } + + currentPosition = next + return true + } + + func isAtStartOfLine(_ payload: AssertionPayload) -> Bool { + if currentPosition == subjectBounds.lowerBound { return true } + switch payload.semanticLevel { + case .graphemeCluster: + return input[input.index(before: currentPosition)].isNewline + case .unicodeScalar: + return input.unicodeScalars[input.unicodeScalars.index(before: currentPosition)].isNewline + } } + + func isAtEndOfLine(_ payload: AssertionPayload) -> Bool { + if currentPosition == subjectBounds.upperBound { return true } + switch payload.semanticLevel { + case .graphemeCluster: + return input[currentPosition].isNewline + case .unicodeScalar: + return input.unicodeScalars[currentPosition].isNewline + } + } + + mutating func builtinAssert(by payload: AssertionPayload) throws -> Bool { + // Future work: Optimize layout and dispatch + switch payload.kind { + case .startOfSubject: return currentPosition == subjectBounds.lowerBound + + case .endOfSubjectBeforeNewline: + if currentPosition == subjectBounds.upperBound { return true } + switch payload.semanticLevel { + case .graphemeCluster: + return input.index(after: currentPosition) == subjectBounds.upperBound + && input[currentPosition].isNewline + case .unicodeScalar: + return input.unicodeScalars.index(after: currentPosition) == subjectBounds.upperBound + && input.unicodeScalars[currentPosition].isNewline + } + + case .endOfSubject: return currentPosition == subjectBounds.upperBound + + case .resetStartOfMatch: + fatalError("Unreachable, we should have thrown an error during compilation") + + case .firstMatchingPositionInSubject: + return currentPosition == searchBounds.lowerBound + + case .textSegment: return input.isOnGraphemeClusterBoundary(currentPosition) + + case .notTextSegment: return !input.isOnGraphemeClusterBoundary(currentPosition) + + case .startOfLine: + return isAtStartOfLine(payload) + case .endOfLine: + return isAtEndOfLine(payload) + + case .caretAnchor: + if payload.anchorsMatchNewlines { + return isAtStartOfLine(payload) + } else { + return currentPosition == subjectBounds.lowerBound + } + + case .dollarAnchor: + if payload.anchorsMatchNewlines { + return isAtEndOfLine(payload) + } else { + return currentPosition == subjectBounds.upperBound + } + + case .wordBoundary: + if payload.usesSimpleUnicodeBoundaries { + // TODO: How should we handle bounds? + return atSimpleBoundary(payload.usesASCIIWord, payload.semanticLevel) + } else { + return input.isOnWordBoundary(at: currentPosition, using: &wordIndexCache, &wordIndexMaxIndex) + } - mutating func builtinCharacterClass() { - fatalError("TODO: character classes") + case .notWordBoundary: + if payload.usesSimpleUnicodeBoundaries { + // TODO: How should we handle bounds? + return !atSimpleBoundary(payload.usesASCIIWord, payload.semanticLevel) + } else { + return !input.isOnWordBoundary(at: currentPosition, using: &wordIndexCache, &wordIndexMaxIndex) + } + } } } diff --git a/Sources/_StringProcessing/Engine/MEProgram.swift b/Sources/_StringProcessing/Engine/MEProgram.swift index d311b4465..bacefb209 100644 --- a/Sources/_StringProcessing/Engine/MEProgram.swift +++ b/Sources/_StringProcessing/Engine/MEProgram.swift @@ -15,14 +15,6 @@ struct MEProgram { typealias Input = String typealias ConsumeFunction = (Input, Range) -> Input.Index? - typealias AssertionFunction = - ( - inout Set?, - inout String.Index?, - Input, - Input.Index, - Range - ) throws -> Bool typealias TransformFunction = (Input, Processor._StoredCapture) throws -> Any? typealias MatcherFunction = @@ -34,7 +26,6 @@ struct MEProgram { var staticSequences: [[Input.Element]] var staticBitsets: [DSLTree.CustomCharacterClass.AsciiBitset] var staticConsumeFunctions: [ConsumeFunction] - var staticAssertionFunctions: [AssertionFunction] var staticTransformFunctions: [TransformFunction] var staticMatcherFunctions: [MatcherFunction] diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 2be918294..55ac49ed9 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -9,6 +9,7 @@ // //===----------------------------------------------------------------------===// + enum MatchMode { case wholeString case partialFromFront @@ -238,7 +239,7 @@ extension Processor { } return true } - + func loadScalar() -> Unicode.Scalar? { currentPosition < end ? input.unicodeScalars[currentPosition] : nil } @@ -476,6 +477,17 @@ extension Processor { } } + case .matchBuiltin: + let payload = payload.characterClassPayload + if matchBuiltin( + payload.cc, + payload.isInverted, + payload.isStrictASCII, + payload.isScalarSemantics + ) { + controller.step() + } + case .consumeBy: let reg = payload.consumer guard currentPosition < searchBounds.upperBound, @@ -489,16 +501,9 @@ extension Processor { controller.step() case .assertBy: - let reg = payload.assertion - let assertion = registers[reg] + let payload = payload.assertion do { - guard try assertion( - &wordIndexCache, - &wordIndexMaxIndex, - input, - currentPosition, - subjectBounds - ) else { + guard try builtinAssert(by: payload) else { signalFailure() return } @@ -547,16 +552,14 @@ extension Processor { case .beginCapture: let capNum = Int( asserting: payload.capture.rawValue) + storedCaptures[capNum].startCapture(currentPosition) + controller.step() - storedCaptures[capNum].startCapture(currentPosition) - controller.step() - - case .endCapture: + case .endCapture: let capNum = Int( asserting: payload.capture.rawValue) - - storedCaptures[capNum].endCapture(currentPosition) - controller.step() + storedCaptures[capNum].endCapture(currentPosition) + controller.step() case .transformCapture: let (cap, trans) = payload.pairedCaptureTransform @@ -584,12 +587,6 @@ extension Processor { storedCaptures[capNum].registerValue( value, overwriteInitial: sp) controller.step() - - case .builtinAssertion: - builtinAssertion() - - case .builtinCharacterClass: - builtinCharacterClass() } } } diff --git a/Sources/_StringProcessing/Engine/Registers.swift b/Sources/_StringProcessing/Engine/Registers.swift index e5d33af8b..69cc3e30a 100644 --- a/Sources/_StringProcessing/Engine/Registers.swift +++ b/Sources/_StringProcessing/Engine/Registers.swift @@ -33,8 +33,6 @@ extension Processor { var consumeFunctions: [MEProgram.ConsumeFunction] - var assertionFunctions: [MEProgram.AssertionFunction] - // Captured-value constructors var transformFunctions: [MEProgram.TransformFunction] @@ -85,9 +83,6 @@ extension Processor.Registers { subscript(_ i: ConsumeFunctionRegister) -> MEProgram.ConsumeFunction { consumeFunctions[i.rawValue] } - subscript(_ i: AssertionFunctionRegister) -> MEProgram.AssertionFunction { - assertionFunctions[i.rawValue] - } subscript(_ i: TransformRegister) -> MEProgram.TransformFunction { transformFunctions[i.rawValue] } @@ -117,9 +112,6 @@ extension Processor.Registers { self.consumeFunctions = program.staticConsumeFunctions assert(consumeFunctions.count == info.consumeFunctions) - self.assertionFunctions = program.staticAssertionFunctions - assert(assertionFunctions.count == info.assertionFunctions) - self.transformFunctions = program.staticTransformFunctions assert(transformFunctions.count == info.transformFunctions) @@ -159,7 +151,6 @@ extension MEProgram { var strings = 0 var bitsets = 0 var consumeFunctions = 0 - var assertionFunctions = 0 var transformFunctions = 0 var matcherFunctions = 0 var ints = 0 diff --git a/Sources/_StringProcessing/MatchingOptions.swift b/Sources/_StringProcessing/MatchingOptions.swift index e56b8def2..d511c9f7c 100644 --- a/Sources/_StringProcessing/MatchingOptions.swift +++ b/Sources/_StringProcessing/MatchingOptions.swift @@ -122,18 +122,6 @@ extension MatchingOptions { } } -// Deprecated CharacterClass.MatchLevel API -extension MatchingOptions { - var matchLevel: _CharacterClassModel.MatchLevel { - switch semanticLevel { - case .graphemeCluster: - return .graphemeCluster - case .unicodeScalar: - return .unicodeScalar - } - } -} - // MARK: - Implementation extension MatchingOptions { /// An option that changes the behavior of a regular expression. diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index e60a1ce0e..953df6882 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -744,6 +744,41 @@ extension DSLTree.Atom.Assertion { } } +extension DSLTree.Atom.CharacterClass { + var _patternBase: String { + switch self { + case .anyGrapheme: + return ".anyGraphemeCluster" + case .anyUnicodeScalar: + return ".anyUnicodeScalar" + case .digit: + return ".digit" + case .notDigit: + return ".digit.inverted" + case .word: + return ".word" + case .notWord: + return ".word.inverted" + case .horizontalWhitespace: + return ".horizontalWhitespace" + case .notHorizontalWhitespace: + return ".horizontalWhitespace.inverted" + case .newlineSequence: + return ".newlineSequence" + case .notNewline: + return ".newlineSequence.inverted" + case .verticalWhitespace: + return ".verticalWhitespace" + case .notVerticalWhitespace: + return ".vertialWhitespace.inverted" + case .whitespace: + return ".whitespace" + case .notWhitespace: + return ".whitespace.inverted" + } + } +} + extension AST.Atom.CharacterProperty { var isUnprintableProperty: Bool { switch kind { @@ -1212,6 +1247,8 @@ extension DSLTree.Atom { case .assertion(let a): return (a._patternBase, false) + case .characterClass(let cc): + return (cc._patternBase, true) case .backreference(_): return ("/* TODO: backreferences */", false) @@ -1256,6 +1293,8 @@ extension DSLTree.Atom { case .assertion: return "/* TODO: assertions */" + case .characterClass: + return "/* TODO: character classes */" case .backreference: return "/* TODO: backreferences */" case .symbolicReference: diff --git a/Sources/_StringProcessing/Regex/ASTConversion.swift b/Sources/_StringProcessing/Regex/ASTConversion.swift index 4eb7bc42c..f5b08dd6d 100644 --- a/Sources/_StringProcessing/Regex/ASTConversion.swift +++ b/Sources/_StringProcessing/Regex/ASTConversion.swift @@ -168,6 +168,25 @@ extension AST.Atom.EscapedBuiltin { default: return nil } } + var dslCharacterClass: DSLTree.Atom.CharacterClass? { + switch self { + case .decimalDigit: return .digit + case .notDecimalDigit: return .notDigit + case .horizontalWhitespace: return .horizontalWhitespace + case .notHorizontalWhitespace: return .notHorizontalWhitespace + case .newlineSequence: return .newlineSequence + case .notNewline: return .notNewline + case .whitespace: return .whitespace + case .notWhitespace: return .notWhitespace + case .verticalTab: return .verticalWhitespace + case .notVerticalTab: return .notVerticalWhitespace + case .wordCharacter: return .word + case .notWordCharacter: return .notWord + case .graphemeCluster: return .anyGrapheme + case .trueAnychar: return .anyUnicodeScalar + default: return nil + } + } } extension AST.Atom { @@ -179,6 +198,12 @@ extension AST.Atom { default: return nil } } + var dslCharacterClass: DSLTree.Atom.CharacterClass? { + switch kind { + case .escaped(let b): return b.dslCharacterClass + default: return nil + } + } } extension AST.Atom { @@ -186,6 +211,10 @@ extension AST.Atom { if let kind = dslAssertionKind { return .assertion(kind) } + + if let cc = dslCharacterClass { + return .characterClass(cc) + } switch self.kind { case let .char(c): return .char(c) @@ -194,9 +223,11 @@ extension AST.Atom { case let .backreference(r): return .backreference(.init(ast: r)) case let .changeMatchingOptions(seq): return .changeMatchingOptions(.init(ast: seq)) - case .escaped(let c) where c.scalarValue != nil: - return .scalar(c.scalarValue!) - + case .escaped(let c): + guard let val = c.scalarValue else { + fatalError("Got a .escaped that was not an assertion, character class, or scalar value \(self)") + } + return .scalar(val) default: return .unconverted(.init(ast: self)) } } diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 520f4991a..0a0831706 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -177,6 +177,7 @@ extension DSLTree { /// newlines unless single line mode is enabled. case dot + case characterClass(CharacterClass) case assertion(Assertion) case backreference(_AST.Reference) case symbolicReference(ReferenceID) @@ -189,9 +190,9 @@ extension DSLTree { extension DSLTree.Atom { @_spi(RegexBuilder) - public enum Assertion: Hashable { + public enum Assertion: UInt64, Hashable { /// \A - case startOfSubject + case startOfSubject = 0 /// \Z case endOfSubjectBeforeNewline @@ -231,6 +232,46 @@ extension DSLTree.Atom { /// \B case notWordBoundary } + + @_spi(RegexBuilder) + public enum CharacterClass: Hashable { + case digit + case notDigit + case horizontalWhitespace + case notHorizontalWhitespace + case newlineSequence + case notNewline + case whitespace + case notWhitespace + case verticalWhitespace + case notVerticalWhitespace + case word + case notWord + case anyGrapheme + case anyUnicodeScalar + } +} + +extension DSLTree.Atom.CharacterClass { + @_spi(RegexBuilder) + public var inverted: DSLTree.Atom.CharacterClass? { + switch self { + case .anyGrapheme: return nil + case .anyUnicodeScalar: return nil + case .digit: return .notDigit + case .notDigit: return .digit + case .word: return .notWord + case .notWord: return .word + case .horizontalWhitespace: return .notHorizontalWhitespace + case .notHorizontalWhitespace: return .horizontalWhitespace + case .newlineSequence: return .notNewline + case .notNewline: return .newlineSequence + case .verticalWhitespace: return .notVerticalWhitespace + case .notVerticalWhitespace: return .verticalWhitespace + case .whitespace: return .notWhitespace + case .notWhitespace: return .whitespace + } + } } extension Unicode.GeneralCategory { @@ -767,34 +808,8 @@ extension DSLTree { internal var ast: AST.MatchingOptionSequence } - @_spi(RegexBuilder) public struct Atom { internal var ast: AST.Atom - - // FIXME: The below APIs should be removed once the DSL tree has been - // migrated to use proper DSL atoms for them. - - public static var _anyGrapheme: Self { - .init(ast: .init(.escaped(.graphemeCluster), .fake)) - } - public static var _whitespace: Self { - .init(ast: .init(.escaped(.whitespace), .fake)) - } - public static var _digit: Self { - .init(ast: .init(.escaped(.decimalDigit), .fake)) - } - public static var _horizontalWhitespace: Self { - .init(ast: .init(.escaped(.horizontalWhitespace), .fake)) - } - public static var _newlineSequence: Self { - .init(ast: .init(.escaped(.newlineSequence), .fake)) - } - public static var _verticalWhitespace: Self { - .init(ast: .init(.escaped(.verticalTab), .fake)) - } - public static var _word: Self { - .init(ast: .init(.escaped(.wordCharacter), .fake)) - } } } } @@ -808,7 +823,7 @@ extension DSLTree.Atom { case .changeMatchingOptions, .assertion: return false case .char, .scalar, .any, .anyNonNewline, .dot, .backreference, - .symbolicReference, .unconverted: + .symbolicReference, .unconverted, .characterClass: return true } } diff --git a/Sources/_StringProcessing/Unicode/WordBreaking.swift b/Sources/_StringProcessing/Unicode/WordBreaking.swift index 94c311e82..50da079f6 100644 --- a/Sources/_StringProcessing/Unicode/WordBreaking.swift +++ b/Sources/_StringProcessing/Unicode/WordBreaking.swift @@ -12,6 +12,39 @@ @_spi(_Unicode) import Swift +extension Processor { + func atSimpleBoundary( + _ usesAsciiWord: Bool, + _ semanticLevel: MatchingOptions.SemanticLevel + ) -> Bool { + func matchesWord(at i: Input.Index) -> Bool { + switch semanticLevel { + case .graphemeCluster: + let c = input[i] + return c.isWordCharacter && (c.isASCII || !usesAsciiWord) + case .unicodeScalar: + let c = input.unicodeScalars[i] + return (c.properties.isAlphabetic || c == "_") && (c.isASCII || !usesAsciiWord) + } + } + + // FIXME: How should we handle bounds? + // We probably need two concepts + if subjectBounds.isEmpty { return false } + if currentPosition == subjectBounds.lowerBound { + return matchesWord(at: currentPosition) + } + let priorIdx = input.index(before: currentPosition) + if currentPosition == subjectBounds.upperBound { + return matchesWord(at: priorIdx) + } + + let prior = matchesWord(at: priorIdx) + let current = matchesWord(at: currentPosition) + return prior != current + } +} + extension String { func isOnWordBoundary( at i: String.Index, diff --git a/Sources/_StringProcessing/Utility/RegexFactory.swift b/Sources/_StringProcessing/Utility/RegexFactory.swift index 31245c0f7..e0df906fa 100644 --- a/Sources/_StringProcessing/Utility/RegexFactory.swift +++ b/Sources/_StringProcessing/Utility/RegexFactory.swift @@ -58,6 +58,14 @@ public struct _RegexFactory { ) -> Regex { .init(node: .atom(.scalar(scalar))) } + + @_spi(RegexBuilder) + @available(SwiftStdlib 5.7, *) + public func characterClass( + _ cc: DSLTree.Atom.CharacterClass + ) -> Regex { + .init(node: .atom(.characterClass(cc))) + } @_spi(RegexBuilder) @available(SwiftStdlib 5.7, *) diff --git a/Sources/_StringProcessing/Utility/TypedInt.swift b/Sources/_StringProcessing/Utility/TypedInt.swift index adc9edf78..e03f2572f 100644 --- a/Sources/_StringProcessing/Utility/TypedInt.swift +++ b/Sources/_StringProcessing/Utility/TypedInt.swift @@ -142,10 +142,6 @@ enum _AsciiBitsetRegister {} typealias ConsumeFunctionRegister = TypedInt<_ConsumeFunctionRegister> enum _ConsumeFunctionRegister {} -/// Used for assertion functions, e.g. anchors etc -typealias AssertionFunctionRegister = TypedInt<_AssertionFunctionRegister> -enum _AssertionFunctionRegister {} - /// Used for capture transforms, etc typealias TransformRegister = TypedInt<_TransformRegister> enum _TransformRegister {} diff --git a/Sources/_StringProcessing/_CharacterClassModel.swift b/Sources/_StringProcessing/_CharacterClassModel.swift index 9f515f220..3be26f27f 100644 --- a/Sources/_StringProcessing/_CharacterClassModel.swift +++ b/Sources/_StringProcessing/_CharacterClassModel.swift @@ -17,27 +17,38 @@ struct _CharacterClassModel: Hashable { /// The actual character class to match. - var cc: Representation + let cc: Representation /// The level (character or Unicode scalar) at which to match. - var matchLevel: MatchLevel + let matchLevel: MatchingOptions.SemanticLevel + + /// If this character character class only matches ascii characters + let isStrictASCII: Bool /// Whether this character class matches against an inverse, /// e.g \D, \S, [^abc]. - var isInverted: Bool = false + let isInverted: Bool + + init( + cc: Representation, + options: MatchingOptions, + isInverted: Bool + ) { + self.cc = cc + self.matchLevel = options.semanticLevel + self.isStrictASCII = cc.isStrictAscii(options: options) + self.isInverted = isInverted + } - // TODO: Split out builtin character classes into their own type? - enum Representation: Hashable { + enum Representation: UInt64, Hashable { /// Any character - case any + case any = 0 /// Any grapheme cluster case anyGrapheme /// Any Unicode scalar case anyScalar /// Character.isDigit case digit - /// Character.isHexDigit - case hexDigit /// Horizontal whitespace: `[:blank:]`, i.e /// `[\p{gc=Space_Separator}\N{CHARACTER TABULATION}] case horizontalWhitespace @@ -50,43 +61,6 @@ struct _CharacterClassModel: Hashable { /// Character.isLetter or Character.isDigit or Character == "_" case word } - - enum MatchLevel: Hashable { - /// Match at the extended grapheme cluster level. - case graphemeCluster - /// Match at the Unicode scalar level. - case unicodeScalar - } - - var scalarSemantic: Self { - var result = self - result.matchLevel = .unicodeScalar - return result - } - - var graphemeClusterSemantic: Self { - var result = self - result.matchLevel = .graphemeCluster - return result - } - - /// Conditionally inverts a character class. - /// - /// - Parameter inversion: Indicates whether to invert the character class. - /// - Returns: The inverted character class if `inversion` is `true`; - /// otherwise, the same character class. - func withInversion(_ inversion: Bool) -> Self { - var copy = self - if inversion { - copy.isInverted.toggle() - } - return copy - } - - /// Inverts a character class. - var inverted: Self { - return withInversion(true) - } /// Returns the end of the match of this character class in the string. /// @@ -94,111 +68,106 @@ struct _CharacterClassModel: Hashable { /// - Parameter at: The index to start matching. /// - Parameter options: Options for the match operation. /// - Returns: The index of the end of the match, or `nil` if there is no match. - func matches(in str: String, at i: String.Index, with options: MatchingOptions) -> String.Index? { - switch matchLevel { - case .graphemeCluster: - let c = str[i] - var matched: Bool - var next = str.index(after: i) - switch cc { - case .any, .anyGrapheme: matched = true - case .anyScalar: - matched = true - next = str.unicodeScalars.index(after: i) - case .digit: - matched = c.isNumber && (c.isASCII || !options.usesASCIIDigits) - case .hexDigit: - matched = c.isHexDigit && (c.isASCII || !options.usesASCIIDigits) - case .horizontalWhitespace: - matched = c.unicodeScalars.first?.isHorizontalWhitespace == true - && (c.isASCII || !options.usesASCIISpaces) - case .newlineSequence, .verticalWhitespace: - matched = c.unicodeScalars.first?.isNewline == true - && (c.isASCII || !options.usesASCIISpaces) - case .whitespace: - matched = c.isWhitespace && (c.isASCII || !options.usesASCIISpaces) - case .word: - matched = c.isWordCharacter && (c.isASCII || !options.usesASCIIWord) + func matches( + in input: String, + at currentPosition: String.Index + ) -> String.Index? { + // FIXME: This is only called in custom character classes that contain builtin + // character classes as members (ie: [a\w] or set operations), is there + // any way to avoid that? Can we remove this somehow? + guard currentPosition != input.endIndex else { + return nil + } + let char = input[currentPosition] + let scalar = input.unicodeScalars[currentPosition] + let isScalarSemantics = matchLevel == .unicodeScalar + let asciiCheck = (char.isASCII && !isScalarSemantics) + || (scalar.isASCII && isScalarSemantics) + || !isStrictASCII + + var matched: Bool + var next: String.Index + switch (isScalarSemantics, cc) { + case (_, .anyGrapheme): + next = input.index(after: currentPosition) + case (_, .anyScalar): + // FIXME: This allows us to be not-scalar aligned when in grapheme mode + // Should this even be allowed? + next = input.unicodeScalars.index(after: currentPosition) + case (true, _): + next = input.unicodeScalars.index(after: currentPosition) + case (false, _): + next = input.index(after: currentPosition) + } + + switch cc { + case .any, .anyGrapheme, .anyScalar: + matched = true + case .digit: + if isScalarSemantics { + matched = scalar.properties.numericType != nil && asciiCheck + } else { + matched = char.isNumber && asciiCheck + } + case .horizontalWhitespace: + if isScalarSemantics { + matched = scalar.isHorizontalWhitespace && asciiCheck + } else { + matched = char._isHorizontalWhitespace && asciiCheck } - if isInverted { - matched.toggle() + case .verticalWhitespace: + if isScalarSemantics { + matched = scalar.isNewline && asciiCheck + } else { + matched = char._isNewline && asciiCheck } - return matched ? next : nil - case .unicodeScalar: - let c = str.unicodeScalars[i] - var nextIndex = str.unicodeScalars.index(after: i) - var matched: Bool - switch cc { - case .any: matched = true - case .anyScalar: matched = true - case .anyGrapheme: - matched = true - nextIndex = str.index(after: i) - case .digit: - matched = c.properties.numericType != nil && (c.isASCII || !options.usesASCIIDigits) - case .hexDigit: - matched = Character(c).isHexDigit && (c.isASCII || !options.usesASCIIDigits) - case .horizontalWhitespace: - matched = c.isHorizontalWhitespace && (c.isASCII || !options.usesASCIISpaces) - case .verticalWhitespace: - matched = c.isNewline && (c.isASCII || !options.usesASCIISpaces) - case .newlineSequence: - matched = c.isNewline && (c.isASCII || !options.usesASCIISpaces) - if c == "\r" && nextIndex != str.endIndex && str.unicodeScalars[nextIndex] == "\n" { - str.unicodeScalars.formIndex(after: &nextIndex) + case .newlineSequence: + if isScalarSemantics { + matched = scalar.isNewline && asciiCheck + if matched && scalar == "\r" + && next != input.endIndex && input.unicodeScalars[next] == "\n" { + // Match a full CR-LF sequence even in scalar sematnics + input.unicodeScalars.formIndex(after: &next) } - case .whitespace: - matched = c.properties.isWhitespace && (c.isASCII || !options.usesASCIISpaces) - case .word: - matched = (c.properties.isAlphabetic || c == "_") && (c.isASCII || !options.usesASCIIWord) + } else { + matched = char._isNewline && asciiCheck + } + case .whitespace: + if isScalarSemantics { + matched = scalar.properties.isWhitespace && asciiCheck + } else { + matched = char.isWhitespace && asciiCheck } - if isInverted { - matched.toggle() + case .word: + if isScalarSemantics { + matched = scalar.properties.isAlphabetic && asciiCheck + } else { + matched = char.isWordCharacter && asciiCheck } - return matched ? nextIndex : nil + } + if isInverted { + matched.toggle() + } + if matched { + return next + } else { + return nil } } } -extension _CharacterClassModel { - static var any: _CharacterClassModel { - .init(cc: .any, matchLevel: .graphemeCluster) - } - - static var anyGrapheme: _CharacterClassModel { - .init(cc: .anyGrapheme, matchLevel: .graphemeCluster) - } - - static var anyUnicodeScalar: _CharacterClassModel { - .init(cc: .any, matchLevel: .unicodeScalar) - } - - static var whitespace: _CharacterClassModel { - .init(cc: .whitespace, matchLevel: .graphemeCluster) - } - - static var digit: _CharacterClassModel { - .init(cc: .digit, matchLevel: .graphemeCluster) - } - - static var hexDigit: _CharacterClassModel { - .init(cc: .hexDigit, matchLevel: .graphemeCluster) - } - - static var horizontalWhitespace: _CharacterClassModel { - .init(cc: .horizontalWhitespace, matchLevel: .graphemeCluster) - } - - static var newlineSequence: _CharacterClassModel { - .init(cc: .newlineSequence, matchLevel: .graphemeCluster) - } - - static var verticalWhitespace: _CharacterClassModel { - .init(cc: .verticalWhitespace, matchLevel: .graphemeCluster) - } - - static var word: _CharacterClassModel { - .init(cc: .word, matchLevel: .graphemeCluster) +extension _CharacterClassModel.Representation { + /// Returns true if this CharacterClass should be matched by strict ascii under the given options + func isStrictAscii(options: MatchingOptions) -> Bool { + switch self { + case .digit: return options.usesASCIIDigits + case .horizontalWhitespace: return options.usesASCIISpaces + case .newlineSequence: return options.usesASCIISpaces + case .verticalWhitespace: return options.usesASCIISpaces + case .whitespace: return options.usesASCIISpaces + case .word: return options.usesASCIIWord + default: return false + } } } @@ -209,7 +178,6 @@ extension _CharacterClassModel.Representation: CustomStringConvertible { case .anyGrapheme: return "" case .anyScalar: return "" case .digit: return "" - case .hexDigit: return "" case .horizontalWhitespace: return "" case .newlineSequence: return "" case .verticalWhitespace: return "vertical whitespace" @@ -225,102 +193,57 @@ extension _CharacterClassModel: CustomStringConvertible { } } -extension _CharacterClassModel { - func withMatchLevel( - _ level: _CharacterClassModel.MatchLevel - ) -> _CharacterClassModel { - var cc = self - cc.matchLevel = level - return cc - } -} - -extension AST.Atom { - var characterClass: _CharacterClassModel? { - switch kind { - case let .escaped(b): return b.characterClass - - case .property: - // TODO: Would our model type for character classes include - // this? Or does grapheme-semantic mode complicate that? - return nil - - case .dot: - // `.dot` is handled in the matching engine by Compiler.emitDot() and in - // the legacy compiler by the `.any` instruction, which can provide lower - // level instructions than the CharacterClass-generated consumer closure - // - // FIXME: We shouldn't be returning `nil` here, but instead fixing the call - // site to check for any before trying to construct a character class. - return nil - - default: return nil - - } - } - -} - -extension AST.Atom.EscapedBuiltin { - var characterClass: _CharacterClassModel? { +extension DSLTree.Atom.CharacterClass { + /// Converts this DSLTree CharacterClass into our runtime representation + func asRuntimeModel(_ options: MatchingOptions) -> _CharacterClassModel { + let cc: _CharacterClassModel.Representation + var inverted = false switch self { - case .decimalDigit: return .digit - case .notDecimalDigit: return .digit.inverted - - case .horizontalWhitespace: return .horizontalWhitespace + case .digit: + cc = .digit + case .notDigit: + cc = .digit + inverted = true + + case .horizontalWhitespace: + cc = .horizontalWhitespace case .notHorizontalWhitespace: - return .horizontalWhitespace.inverted + cc = .horizontalWhitespace + inverted = true - case .newlineSequence: return .newlineSequence + case .newlineSequence: + cc = .newlineSequence // FIXME: This is more like '.' than inverted '\R', as it is affected // by e.g (*CR). We should therefore really be emitting it through // emitDot(). For now we treat it as semantically invalid. - case .notNewline: return .newlineSequence.inverted - - case .whitespace: return .whitespace - case .notWhitespace: return .whitespace.inverted - - case .verticalTab: return .verticalWhitespace - case .notVerticalTab: return .verticalWhitespace.inverted - - case .wordCharacter: return .word - case .notWordCharacter: return .word.inverted - - case .graphemeCluster: return .anyGrapheme - case .trueAnychar: return .anyUnicodeScalar - - default: - return nil - } - } -} - -extension _CharacterClassModel { - // FIXME: Calling on inverted sets wont be the same as the - // inverse of a boundary if at the start or end of the - // string. (Think through what we want: do it ourselves or - // give the caller both options). - func isBoundary( - _ input: String, - at pos: String.Index, - bounds: Range, - with options: MatchingOptions - ) -> Bool { - // FIXME: How should we handle bounds? - // We probably need two concepts - if bounds.isEmpty { return false } - if pos == bounds.lowerBound { - return self.matches(in: input, at: pos, with: options) != nil - } - let priorIdx = input.index(before: pos) - if pos == bounds.upperBound { - return self.matches(in: input, at: priorIdx, with: options) != nil + case .notNewline: + cc = .newlineSequence + inverted = true + + case .whitespace: + cc = .whitespace + case .notWhitespace: + cc = .whitespace + inverted = true + + case .verticalWhitespace: + cc = .verticalWhitespace + case .notVerticalWhitespace: + cc = .verticalWhitespace + inverted = true + + case .word: + cc = .word + case .notWord: + cc = .word + inverted = true + + case .anyGrapheme: + cc = .anyGrapheme + case .anyUnicodeScalar: + cc = .anyScalar } - - let prior = self.matches(in: input, at: priorIdx, with: options) != nil - let current = self.matches(in: input, at: pos, with: options) != nil - return prior != current + return _CharacterClassModel(cc: cc, options: options, isInverted: inverted) } - } diff --git a/Tests/RegexTests/CompileTests.swift b/Tests/RegexTests/CompileTests.swift index 27f8d79cb..e0702f87f 100644 --- a/Tests/RegexTests/CompileTests.swift +++ b/Tests/RegexTests/CompileTests.swift @@ -38,6 +38,7 @@ enum DecodedInstr { case matchScalarUnchecked case matchBitsetScalar case matchBitset + case matchBuiltin case consumeBy case assertBy case matchBy @@ -46,8 +47,6 @@ enum DecodedInstr { case endCapture case transformCapture case captureValue - case builtinAssertion - case builtinCharacterClass } extension DecodedInstr { @@ -56,87 +55,84 @@ extension DecodedInstr { /// /// Must stay in sync with Processor.cycle static func decode(_ instruction: Instruction) -> DecodedInstr { - let (opcode, payload) = instruction.destructure - - switch opcode { - case .invalid: - fatalError("Invalid program") - case .moveImmediate: - return .moveImmediate - case .moveCurrentPosition: - return .moveCurrentPosition - case .branch: - return .branch - case .condBranchZeroElseDecrement: - return .condBranchZeroElseDecrement - case .condBranchSamePosition: - return .condBranchSamePosition - case .save: - return .save - case .saveAddress: - return .saveAddress - case .splitSaving: - return .splitSaving - case .clear: - return .clear - case .clearThrough: - return .clearThrough - case .accept: - return .accept - case .fail: - return .fail - case .advance: - return .advance - case .match: - let (isCaseInsensitive, _) = payload.elementPayload - if isCaseInsensitive { - return .matchCaseInsensitive - } else { - return .match - } - case .matchScalar: - let (_, caseInsensitive, boundaryCheck) = payload.scalarPayload - if caseInsensitive { - if boundaryCheck { - return .matchScalarCaseInsensitive - } else { - return .matchScalarCaseInsensitiveUnchecked - } + let (opcode, payload) = instruction.destructure + switch opcode { + case .invalid: + fatalError("Invalid program") + case .moveImmediate: + return .moveImmediate + case .moveCurrentPosition: + return .moveCurrentPosition + case .branch: + return .branch + case .condBranchZeroElseDecrement: + return .condBranchZeroElseDecrement + case .condBranchSamePosition: + return .condBranchSamePosition + case .save: + return .save + case .saveAddress: + return .saveAddress + case .splitSaving: + return .splitSaving + case .clear: + return .clear + case .clearThrough: + return .clearThrough + case .accept: + return .accept + case .fail: + return .fail + case .advance: + return .advance + case .match: + let (isCaseInsensitive, _) = payload.elementPayload + if isCaseInsensitive { + return .matchCaseInsensitive + } else { + return .match + } + case .matchScalar: + let (_, caseInsensitive, boundaryCheck) = payload.scalarPayload + if caseInsensitive { + if boundaryCheck { + return .matchScalarCaseInsensitive } else { - if boundaryCheck { - return .matchScalar - } else { - return .matchScalarUnchecked - } + return .matchScalarCaseInsensitiveUnchecked } - case .matchBitset: - let (isScalar, _) = payload.bitsetPayload - if isScalar { - return .matchBitsetScalar + } else { + if boundaryCheck { + return .matchScalar } else { - return .matchBitset + return .matchScalarUnchecked } - case .consumeBy: - return consumeBy - case .assertBy: - return .assertBy - case .matchBy: - return .matchBy - case .backreference: - return .backreference - case .beginCapture: - return .beginCapture - case .endCapture: - return .endCapture - case .transformCapture: - return .transformCapture - case .captureValue: - return .captureValue - case .builtinAssertion: - return .builtinAssertion - case .builtinCharacterClass: - return .builtinCharacterClass -} + } + case .matchBitset: + let (isScalar, _) = payload.bitsetPayload + if isScalar { + return .matchBitsetScalar + } else { + return .matchBitset + } + case .consumeBy: + return consumeBy + case .assertBy: + return .assertBy + case .matchBy: + return .matchBy + case .backreference: + return .backreference + case .beginCapture: + return .beginCapture + case .endCapture: + return .endCapture + case .transformCapture: + return .transformCapture + case .captureValue: + return .captureValue + case .matchBuiltin: + return .matchBuiltin + } } } diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 5f4c8bb30..ff51088ff 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -1517,13 +1517,10 @@ extension RegexTests { (" 123", "23"), ("123 456", "23")) - // TODO: \G and \K - do { - let regex = try Regex(#"\Gab"#, as: Substring.self) - XCTExpectFailure { - XCTAssertEqual("abab".matches(of: regex).map(\.output), ["ab", "ab"]) - } - } + // \G and \K + let regex = try Regex(#"\Gab"#, as: Substring.self) + XCTAssertEqual("abab".matches(of: regex).map(\.output), ["ab", "ab"]) + // TODO: Oniguruma \y and \Y firstMatchTests( From 1acca94663a0dfedd85e7ba10575dfb2c61503c8 Mon Sep 17 00:00:00 2001 From: Lily Date: Wed, 3 Aug 2022 15:48:08 -0700 Subject: [PATCH 13/33] [Optimization] Specialized quantification instruction (#577) Implements a specialized quantification instruction for repeated matching of a character, dot, character class, or custom character class --- Sources/_StringProcessing/ByteCodeGen.swift | 78 +++++++++ .../Engine/Backtracking.swift | 53 +++++- .../Engine/InstPayload.swift | 164 +++++++++++++++++- .../Engine/Instruction.swift | 7 + .../_StringProcessing/Engine/MEBuilder.swift | 44 +++++ .../_StringProcessing/Engine/MEBuiltins.swift | 30 +++- .../_StringProcessing/Engine/MECapture.swift | 6 - .../_StringProcessing/Engine/MEQuantify.swift | 125 +++++++++++++ .../_StringProcessing/Engine/Processor.swift | 84 +++++++-- .../_StringProcessing/Engine/Tracing.swift | 6 +- .../_CharacterClassModel.swift | 9 + Tests/RegexTests/CompileTests.swift | 5 +- Tests/RegexTests/MatchTests.swift | 27 +++ 13 files changed, 598 insertions(+), 40 deletions(-) create mode 100644 Sources/_StringProcessing/Engine/MEQuantify.swift diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index da0888039..66fefc49e 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -471,6 +471,10 @@ fileprivate extension Compiler.ByteCodeGen { let minTrips = low assert((extraTrips ?? 1) >= 0) + if tryEmitFastQuant(child, updatedKind, minTrips, extraTrips) { + return + } + // The below is a general algorithm for bounded and unbounded // quantification. It can be specialized when the min // is 0 or 1, or when extra trips is 1 or unbounded. @@ -655,6 +659,80 @@ fileprivate extension Compiler.ByteCodeGen { builder.label(exit) } + /// Specialized quantification instruction for repetition of certain nodes in grapheme semantic mode + /// Allowed nodes are: + /// - single ascii scalar .char + /// - ascii .customCharacterClass + /// - single grapheme consumgin built in character classes + /// - .any, .anyNonNewline, .dot + mutating func tryEmitFastQuant( + _ child: DSLTree.Node, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) -> Bool { + guard optimizationsEnabled + && minTrips <= QuantifyPayload.maxStorableTrips + && extraTrips ?? 0 <= QuantifyPayload.maxStorableTrips + && options.semanticLevel == .graphemeCluster + && kind != .reluctant else { + return false + } + switch child { + case .customCharacterClass(let ccc): + // ascii only custom character class + guard let bitset = ccc.asAsciiBitset(options) else { + return false + } + builder.buildQuantify(bitset: bitset, kind, minTrips, extraTrips) + + case .atom(let atom): + switch atom { + case .char(let c): + // Single scalar ascii value character + guard let val = c._singleScalarAsciiValue else { + return false + } + builder.buildQuantify(asciiChar: val, kind, minTrips, extraTrips) + + case .any: + builder.buildQuantifyAny( + matchesNewlines: true, kind, minTrips, extraTrips) + case .anyNonNewline: + builder.buildQuantifyAny( + matchesNewlines: false, kind, minTrips, extraTrips) + case .dot: + builder.buildQuantifyAny( + matchesNewlines: options.dotMatchesNewline, kind, minTrips, extraTrips) + + case .characterClass(let cc): + // Custom character class that consumes a single grapheme + let model = cc.asRuntimeModel(options) + guard model.consumesSingleGrapheme else { + return false + } + builder.buildQuantify( + model: model, + kind, + minTrips, + extraTrips) + default: + return false + } + case .convertedRegexLiteral(let node, _): + return tryEmitFastQuant(node, kind, minTrips, extraTrips) + case .nonCapturingGroup(let groupKind, let node): + // .nonCapture nonCapturingGroups are ignored during compilation + guard groupKind.ast == .nonCapture else { + return false + } + return tryEmitFastQuant(node, kind, minTrips, extraTrips) + default: + return false + } + return true + } + /// Coalesce any adjacent scalar members in a custom character class together. /// This is required in order to produce correct grapheme matching behavior. func coalescingCustomCharacterClassMembers( diff --git a/Sources/_StringProcessing/Engine/Backtracking.swift b/Sources/_StringProcessing/Engine/Backtracking.swift index 355702ac1..3ebb060c9 100644 --- a/Sources/_StringProcessing/Engine/Backtracking.swift +++ b/Sources/_StringProcessing/Engine/Backtracking.swift @@ -10,14 +10,12 @@ //===----------------------------------------------------------------------===// extension Processor { - - // TODO: What all do we want to save? Configurable? - // TODO: Do we need to save any registers? - // TODO: Is this the right place to do function stack unwinding? struct SavePoint { var pc: InstructionAddress var pos: Position? - + // Quantifiers may store a range of positions to restore to + var rangeStart: Position? + var rangeEnd: Position? // The end of the call stack, so we can slice it off // when failing inside a call. // @@ -43,7 +41,35 @@ extension Processor { intRegisters: [Int], PositionRegister: [Input.Index] ) { - (pc, pos, stackEnd, captureEnds, intRegisters, posRegisters) + return (pc, pos, stackEnd, captureEnds, intRegisters, posRegisters) + } + + var rangeIsEmpty: Bool { rangeEnd == nil } + + mutating func updateRange(newEnd: Input.Index) { + if rangeStart == nil { + rangeStart = newEnd + } + rangeEnd = newEnd + } + + /// Move the next range position into pos, and removing it from the range + mutating func takePositionFromRange(_ input: Input) { + assert(!rangeIsEmpty) + pos = rangeEnd! + shrinkRange(input) + } + + /// Shrink the range of the save point by one index, essentially dropping the last index + mutating func shrinkRange(_ input: Input) { + assert(!rangeIsEmpty) + if rangeEnd == rangeStart { + // The range is now empty + rangeStart = nil + rangeEnd = nil + } else { + input.formIndex(before: &rangeEnd!) + } } } @@ -54,6 +80,21 @@ extension Processor { SavePoint( pc: pc, pos: addressOnly ? nil : currentPosition, + rangeStart: nil, + rangeEnd: nil, + stackEnd: .init(callStack.count), + captureEnds: storedCaptures, + intRegisters: registers.ints, + posRegisters: registers.positions) + } + + func startQuantifierSavePoint() -> SavePoint { + // Restores to the instruction AFTER the current quantifier instruction + SavePoint( + pc: controller.pc + 1, + pos: nil, + rangeStart: nil, + rangeEnd: nil, stackEnd: .init(callStack.count), captureEnds: storedCaptures, intRegisters: registers.ints, diff --git a/Sources/_StringProcessing/Engine/InstPayload.swift b/Sources/_StringProcessing/Engine/InstPayload.swift index d6372c0ba..1e2ed757b 100644 --- a/Sources/_StringProcessing/Engine/InstPayload.swift +++ b/Sources/_StringProcessing/Engine/InstPayload.swift @@ -9,6 +9,8 @@ // //===----------------------------------------------------------------------===// +@_implementationOnly import _RegexParser + extension Instruction { /// An instruction's payload packs operands and destination /// registers. @@ -330,7 +332,9 @@ extension Instruction.Payload { ) { interpretPair() } + // MARK: Struct payloads + init(_ model: _CharacterClassModel) { self.init(CharacterClassPayload(model).rawValue) } @@ -342,11 +346,169 @@ extension Instruction.Payload { self.init(rawValue: payload.rawValue) } var assertion: AssertionPayload { - AssertionPayload.init(rawValue: self.rawValue & _payloadMask) + AssertionPayload.init(rawValue: rawValue & _payloadMask) + } + init(quantify: QuantifyPayload) { + self.init(quantify.rawValue) + } + var quantify: QuantifyPayload { + return QuantifyPayload(rawValue: rawValue & _payloadMask) } } // MARK: Struct definitions +struct QuantifyPayload: RawRepresentable { + let rawValue: UInt64 + enum PayloadType: UInt64 { + case bitset = 0 + case asciiChar = 1 + case any = 2 + case builtin = 4 + } + + // Future work: optimize this layout -> payload type should be a fast switch + // The top 8 bits are reserved for the opcode so we have 56 bits to work with + // b55-b38 - Unused + // b38-b35 - Payload type (one of 4 types, stored on 3 bits) + // b35-b27 - minTrips (8 bit int) + // b27-b18 - extraTrips (8 bit value, one bit for nil) + // b18-b16 - Quantification type (one of three types) + // b16-b0 - Payload value (depends on payload type) + static var quantKindShift: UInt64 { 16 } + static var extraTripsShift: UInt64 { 18 } + static var minTripsShift: UInt64 { 27 } + static var typeShift: UInt64 { 35 } + static var maxStorableTrips: UInt64 { (1 << 8) - 1 } + + var quantKindMask: UInt64 { 3 } + var extraTripsMask: UInt64 { 0x1FF } + var minTripsMask: UInt64 { 0xFF } + var typeMask: UInt64 { 7 } + var payloadMask: UInt64 { 0xFF_FF } + + static func packInfoValues( + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int?, + _ type: PayloadType + ) -> UInt64 { + let kindVal: UInt64 + switch kind { + case .eager: + kindVal = 0 + case .reluctant: + kindVal = 1 + case .possessive: + kindVal = 2 + } + let extraTripsVal: UInt64 = extraTrips == nil ? 1 : UInt64(extraTrips!) << 1 + return (kindVal << QuantifyPayload.quantKindShift) + + (extraTripsVal << QuantifyPayload.extraTripsShift) + + (UInt64(minTrips) << QuantifyPayload.minTripsShift) + + (type.rawValue << QuantifyPayload.typeShift) + } + + init(rawValue: UInt64) { + self.rawValue = rawValue + assert(rawValue & _opcodeMask == 0) + } + + init( + bitset: AsciiBitsetRegister, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + assert(bitset.bits <= _payloadMask) + self.rawValue = bitset.bits + + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .bitset) + } + + init( + asciiChar: UInt8, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + self.rawValue = UInt64(asciiChar) + + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .asciiChar) + } + + init( + matchesNewlines: Bool, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + self.rawValue = (matchesNewlines ? 1 : 0) + + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .any) + } + + init( + model: _CharacterClassModel, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + assert(model.cc.rawValue < 0xFF) + assert(model.matchLevel != .unicodeScalar) + let packedModel = model.cc.rawValue + + (model.isInverted ? 1 << 9 : 0) + + (model.isStrictASCII ? 1 << 10 : 0) + self.rawValue = packedModel + + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .builtin) + } + + var type: PayloadType { + PayloadType(rawValue: (self.rawValue >> QuantifyPayload.typeShift) & 7)! + } + + var quantKind: AST.Quantification.Kind { + switch (self.rawValue >> QuantifyPayload.quantKindShift) & quantKindMask { + case 0: return .eager + case 1: return .reluctant + case 2: return .possessive + default: + fatalError("Unreachable") + } + } + + var minTrips: UInt64 { + (self.rawValue >> QuantifyPayload.minTripsShift) & minTripsMask + } + + var extraTrips: UInt64? { + let val = (self.rawValue >> QuantifyPayload.extraTripsShift) & extraTripsMask + if val == 1 { + return nil + } else { + return val >> 1 + } + } + + var bitset: AsciiBitsetRegister { + TypedInt(self.rawValue & payloadMask) + } + + var asciiChar: UInt8 { + UInt8(asserting: self.rawValue & payloadMask) + } + + var anyMatchesNewline: Bool { + (self.rawValue & 1) == 1 + } + + var builtin: _CharacterClassModel.Representation { + _CharacterClassModel.Representation(rawValue: self.rawValue & 0xFF)! + } + var builtinIsInverted: Bool { + (self.rawValue >> 9) & 1 == 1 + } + var builtinIsStrict: Bool { + (self.rawValue >> 10) & 1 == 1 + } +} + struct CharacterClassPayload: RawRepresentable { let rawValue: UInt64 // Layout: diff --git a/Sources/_StringProcessing/Engine/Instruction.swift b/Sources/_StringProcessing/Engine/Instruction.swift index f2ee88636..a41d2f4af 100644 --- a/Sources/_StringProcessing/Engine/Instruction.swift +++ b/Sources/_StringProcessing/Engine/Instruction.swift @@ -193,6 +193,13 @@ extension Instruction { /// case splitSaving + /// Fused quantify, execute, save instruction + /// Quantifies the stored instruction in an inner loop instead of looping through instructions in processor + /// Only quantifies specific nodes + /// + /// quantify(_:QuantifyPayload) + /// + case quantify /// Begin the given capture /// /// beginCapture(_:CapReg) diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index 3406e9fed..959b1507e 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -201,6 +201,50 @@ extension MEProgram.Builder { .init(assertion: payload))) } + mutating func buildQuantify( + bitset: DSLTree.CustomCharacterClass.AsciiBitset, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + instructions.append(.init( + .quantify, + .init(quantify: .init(bitset: makeAsciiBitset(bitset), kind, minTrips, extraTrips)))) + } + + mutating func buildQuantify( + asciiChar: UInt8, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + instructions.append(.init( + .quantify, + .init(quantify: .init(asciiChar: asciiChar, kind, minTrips, extraTrips)))) + } + + mutating func buildQuantifyAny( + matchesNewlines: Bool, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + instructions.append(.init( + .quantify, + .init(quantify: .init(matchesNewlines: matchesNewlines, kind, minTrips, extraTrips)))) + } + + mutating func buildQuantify( + model: _CharacterClassModel, + _ kind: AST.Quantification.Kind, + _ minTrips: Int, + _ extraTrips: Int? + ) { + instructions.append(.init( + .quantify, + .init(quantify: .init(model: model,kind, minTrips, extraTrips)))) + } + mutating func buildAccept() { instructions.append(.init(.accept)) } diff --git a/Sources/_StringProcessing/Engine/MEBuiltins.swift b/Sources/_StringProcessing/Engine/MEBuiltins.swift index d05348893..36a6043fe 100644 --- a/Sources/_StringProcessing/Engine/MEBuiltins.swift +++ b/Sources/_StringProcessing/Engine/MEBuiltins.swift @@ -1,5 +1,4 @@ @_implementationOnly import _RegexParser // For AssertionKind - extension Character { var _isHorizontalWhitespace: Bool { self.unicodeScalars.first?.isHorizontalWhitespace == true @@ -16,10 +15,28 @@ extension Processor { _ isStrictASCII: Bool, _ isScalarSemantics: Bool ) -> Bool { - guard let char = load(), let scalar = loadScalar() else { + guard let next = _doMatchBuiltin( + cc, + isInverted, + isStrictASCII, + isScalarSemantics + ) else { signalFailure() return false } + currentPosition = next + return true + } + + func _doMatchBuiltin( + _ cc: _CharacterClassModel.Representation, + _ isInverted: Bool, + _ isStrictASCII: Bool, + _ isScalarSemantics: Bool + ) -> Input.Index? { + guard let char = load(), let scalar = loadScalar() else { + return nil + } let asciiCheck = (char.isASCII && !isScalarSemantics) || (scalar.isASCII && isScalarSemantics) @@ -95,12 +112,9 @@ extension Processor { } guard matched else { - signalFailure() - return false + return nil } - - currentPosition = next - return true + return next } func isAtStartOfLine(_ payload: AssertionPayload) -> Bool { @@ -185,6 +199,6 @@ extension Processor { } else { return !input.isOnWordBoundary(at: currentPosition, using: &wordIndexCache, &wordIndexMaxIndex) } - } + } } } diff --git a/Sources/_StringProcessing/Engine/MECapture.swift b/Sources/_StringProcessing/Engine/MECapture.swift index 53243cd34..4bea21133 100644 --- a/Sources/_StringProcessing/Engine/MECapture.swift +++ b/Sources/_StringProcessing/Engine/MECapture.swift @@ -88,12 +88,6 @@ extension Processor { } } -extension Processor._StoredCapture: CustomStringConvertible { - var description: String { - return String(describing: self) - } -} - struct MECaptureList { var values: Array var referencedCaptureOffsets: [ReferenceID: Int] diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift new file mode 100644 index 000000000..9d17dc9bd --- /dev/null +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -0,0 +1,125 @@ +extension Processor { + func _doQuantifyMatch(_ payload: QuantifyPayload) -> Input.Index? { + var next: Input.Index? + switch payload.type { + case .bitset: + next = _doMatchBitset(registers[payload.bitset]) + case .asciiChar: + next = _doMatchScalar( + UnicodeScalar.init(_value: UInt32(payload.asciiChar)), true) + case .builtin: + // We only emit .quantify if it consumes a single character + next = _doMatchBuiltin( + payload.builtin, + payload.builtinIsInverted, + payload.builtinIsStrict, + false) + case .any: + let matched = currentPosition != input.endIndex + && (!input[currentPosition].isNewline || payload.anyMatchesNewline) + next = matched ? input.index(after: currentPosition) : nil + } + return next + } + + /// Generic quantify instruction interpreter + /// - Handles .eager and .posessive + /// - Handles arbitrary minTrips and extraTrips + mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { + var trips = 0 + var extraTrips = payload.extraTrips + var savePoint = startQuantifierSavePoint() + + while true { + if trips >= payload.minTrips { + if extraTrips == 0 { break } + extraTrips = extraTrips.map({$0 - 1}) + if payload.quantKind == .eager { + savePoint.updateRange(newEnd: currentPosition) + } + } + let next = _doQuantifyMatch(payload) + guard let idx = next else { break } + currentPosition = idx + trips += 1 + } + + if trips < payload.minTrips { + signalFailure() + return false + } + + if payload.quantKind == .eager && !savePoint.rangeIsEmpty { + // The last save point has saved the current position, so it's unneeded + savePoint.shrinkRange(input) + if !savePoint.rangeIsEmpty { + savePoints.append(savePoint) + } + } + return true + } + + /// Specialized quantify instruction interpreter for * + mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) -> Bool { + assert(payload.quantKind == .eager + && payload.minTrips == 0 + && payload.extraTrips == nil) + var savePoint = startQuantifierSavePoint() + + while true { + savePoint.updateRange(newEnd: currentPosition) + let next = _doQuantifyMatch(payload) + guard let idx = next else { break } + currentPosition = idx + } + + // The last save point has saved the current position, so it's unneeded + savePoint.shrinkRange(input) + if !savePoint.rangeIsEmpty { + savePoints.append(savePoint) + } + return true + } + + /// Specialized quantify instruction interpreter for + + mutating func runEagerOneOrMoreQuantify(_ payload: QuantifyPayload) -> Bool { + assert(payload.quantKind == .eager + && payload.minTrips == 1 + && payload.extraTrips == nil) + var savePoint = startQuantifierSavePoint() + while true { + let next = _doQuantifyMatch(payload) + guard let idx = next else { break } + currentPosition = idx + savePoint.updateRange(newEnd: currentPosition) + } + + if savePoint.rangeIsEmpty { + signalFailure() + return false + } + // The last save point has saved the current position, so it's unneeded + savePoint.shrinkRange(input) + if !savePoint.rangeIsEmpty { + savePoints.append(savePoint) + } + return true + } + + /// Specialized quantify instruction interpreter for ? + mutating func runZeroOrOneQuantify(_ payload: QuantifyPayload) -> Bool { + assert(payload.minTrips == 0 + && payload.extraTrips == 1) + let next = _doQuantifyMatch(payload) + guard let idx = next else { + return true // matched zero times + } + if payload.quantKind != .possessive { + // Save the zero match + let savePoint = makeSavePoint(currentPC + 1) + savePoints.append(savePoint) + } + currentPosition = idx + return true + } +} diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 55ac49ed9..a62c1e070 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -244,18 +244,25 @@ extension Processor { currentPosition < end ? input.unicodeScalars[currentPosition] : nil } + func _doMatchScalar(_ s: Unicode.Scalar, _ boundaryCheck: Bool) -> Input.Index? { + if s == loadScalar(), + let idx = input.unicodeScalars.index( + currentPosition, + offsetBy: 1, + limitedBy: end), + (!boundaryCheck || input.isOnGraphemeClusterBoundary(idx)) { + return idx + } else { + return nil + } + } + mutating func matchScalar(_ s: Unicode.Scalar, boundaryCheck: Bool) -> Bool { - guard s == loadScalar(), - let idx = input.unicodeScalars.index( - currentPosition, - offsetBy: 1, - limitedBy: end), - (!boundaryCheck || input.isOnGraphemeClusterBoundary(idx)) - else { + guard let next = _doMatchScalar(s, boundaryCheck) else { signalFailure() return false } - currentPosition = idx + currentPosition = next return true } @@ -278,17 +285,25 @@ extension Processor { return true } + func _doMatchBitset(_ bitset: DSLTree.CustomCharacterClass.AsciiBitset) -> Input.Index? { + if let cur = load(), bitset.matches(char: cur) { + return input.index(after: currentPosition) + } else { + return nil + } + } + // If we have a bitset we know that the CharacterClass only matches against // ascii characters, so check if the current input element is ascii then // check if it is set in the bitset mutating func matchBitset( _ bitset: DSLTree.CustomCharacterClass.AsciiBitset ) -> Bool { - guard let cur = load(), bitset.matches(char: cur) else { + guard let next = _doMatchBitset(bitset) else { signalFailure() return false } - _uncheckedForcedConsumeOne() + currentPosition = next return true } @@ -297,7 +312,7 @@ extension Processor { _ bitset: DSLTree.CustomCharacterClass.AsciiBitset ) -> Bool { guard let curScalar = loadScalar(), - bitset.matches(scalar: curScalar), + bitset.matches(scalar: curScalar), let idx = input.unicodeScalars.index(currentPosition, offsetBy: 1, limitedBy: end) else { signalFailure() return false @@ -307,12 +322,31 @@ extension Processor { } mutating func signalFailure() { - guard let (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = - savePoints.popLast()?.destructure - else { + guard !savePoints.isEmpty else { state = .fail return } + let (pc, pos, stackEnd, capEnds, intRegisters, posRegisters): ( + pc: InstructionAddress, + pos: Position?, + stackEnd: CallStackAddress, + captureEnds: [_StoredCapture], + intRegisters: [Int], + PositionRegister: [Input.Index] + ) + + let idx = savePoints.index(before: savePoints.endIndex) + // If we have a quantifier save point, move the next range position into pos + if !savePoints[idx].rangeIsEmpty { + savePoints[idx].takePositionFromRange(input) + } + // If we have a normal save point or an empty quantifier save point, remove it + if savePoints[idx].rangeIsEmpty { + (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = savePoints.removeLast().destructure + } else { + (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = savePoints[idx].destructure + } + assert(stackEnd.rawValue <= callStack.count) assert(capEnds.count == storedCaptures.count) @@ -366,7 +400,6 @@ extension Processor { _checkInvariants() } let (opcode, payload) = fetch().destructure - switch opcode { case .invalid: fatalError("Invalid program") @@ -487,6 +520,25 @@ extension Processor { ) { controller.step() } + case .quantify: + let quantPayload = payload.quantify + let matched: Bool + switch (quantPayload.quantKind, quantPayload.minTrips, quantPayload.extraTrips) { + case (.reluctant, _, _): + assertionFailure(".reluctant is not supported by .quantify") + return + case (.eager, 0, nil): + matched = runEagerZeroOrMoreQuantify(quantPayload) + case (.eager, 1, nil): + matched = runEagerOneOrMoreQuantify(quantPayload) + case (_, 0, 1): + matched = runZeroOrOneQuantify(quantPayload) + default: + matched = runQuantify(quantPayload) + } + if matched { + controller.step() + } case .consumeBy: let reg = payload.consumer @@ -590,5 +642,3 @@ extension Processor { } } } - - diff --git a/Sources/_StringProcessing/Engine/Tracing.swift b/Sources/_StringProcessing/Engine/Tracing.swift index 525beec63..cbb065fc1 100644 --- a/Sources/_StringProcessing/Engine/Tracing.swift +++ b/Sources/_StringProcessing/Engine/Tracing.swift @@ -59,7 +59,11 @@ extension Processor.SavePoint { if let p = self.pos { posStr = "\(input.distance(from: input.startIndex, to: p))" } else { - posStr = "" + if rangeIsEmpty { + posStr = "" + } else { + posStr = "\(rangeStart!...rangeEnd!)" + } } return """ pc: \(self.pc), pos: \(posStr), stackEnd: \(stackEnd) diff --git a/Sources/_StringProcessing/_CharacterClassModel.swift b/Sources/_StringProcessing/_CharacterClassModel.swift index 3be26f27f..c5f1f8ecd 100644 --- a/Sources/_StringProcessing/_CharacterClassModel.swift +++ b/Sources/_StringProcessing/_CharacterClassModel.swift @@ -156,6 +156,15 @@ struct _CharacterClassModel: Hashable { } } +extension _CharacterClassModel { + var consumesSingleGrapheme: Bool { + switch self.cc { + case .anyScalar: return false + default: return true + } + } +} + extension _CharacterClassModel.Representation { /// Returns true if this CharacterClass should be matched by strict ascii under the given options func isStrictAscii(options: MatchingOptions) -> Bool { diff --git a/Tests/RegexTests/CompileTests.swift b/Tests/RegexTests/CompileTests.swift index e0702f87f..54fc3b561 100644 --- a/Tests/RegexTests/CompileTests.swift +++ b/Tests/RegexTests/CompileTests.swift @@ -47,6 +47,7 @@ enum DecodedInstr { case endCapture case transformCapture case captureValue + case quantify } extension DecodedInstr { @@ -120,6 +121,8 @@ extension DecodedInstr { return .assertBy case .matchBy: return .matchBy + case .quantify: + return .quantify case .backreference: return .backreference case .beginCapture: @@ -304,7 +307,7 @@ extension RegexTests { matchingOptions(adding: [.caseInsensitive])) } - private func expectProgram( + func expectProgram( for regex: String, syntax: SyntaxOptions = .traditional, semanticLevel: RegexSemanticLevel? = nil, diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index ff51088ff..794e57b16 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -2522,4 +2522,31 @@ extension RegexTests { expectCompletion(regex: #"(a{,4})*"#, in: "aa") expectCompletion(regex: #"((|)+)*"#, in: "aa") } + + func testQuantifyOptimization() throws { + // test that the maximum values for minTrips and extraTrips are handled correctly + let maxStorable = Int(QuantifyPayload.maxStorableTrips) + let maxExtraTrips = "a{,\(maxStorable)}" + expectProgram(for: maxExtraTrips, contains: [.quantify]) + firstMatchTest(maxExtraTrips, input: String(repeating: "a", count: maxStorable), match: String(repeating: "a", count: maxStorable)) + firstMatchTest(maxExtraTrips, input: String(repeating: "a", count: maxStorable + 1), match: String(repeating: "a", count: maxStorable)) + XCTAssertNil(try Regex(maxExtraTrips).wholeMatch(in: String(repeating: "a", count: maxStorable + 1))) + + let maxMinTrips = "a{\(maxStorable),}" + expectProgram(for: maxMinTrips, contains: [.quantify]) + firstMatchTest(maxMinTrips, input: String(repeating: "a", count: maxStorable), match: String(repeating: "a", count: maxStorable)) + firstMatchTest(maxMinTrips, input: String(repeating: "a", count: maxStorable - 1), match: nil) + + let maxBothTrips = "a{\(maxStorable),\(maxStorable*2)}" + expectProgram(for: maxBothTrips, contains: [.quantify]) + XCTAssertNil(try Regex(maxBothTrips).wholeMatch(in: String(repeating: "a", count: maxStorable*2 + 1))) + firstMatchTest(maxBothTrips, input: String(repeating: "a", count: maxStorable*2), match: String(repeating: "a", count: maxStorable*2)) + firstMatchTest(maxBothTrips, input: String(repeating: "a", count: maxStorable), match: String(repeating: "a", count: maxStorable)) + firstMatchTest(maxBothTrips, input: String(repeating: "a", count: maxStorable - 1), match: nil) + + expectProgram(for: "a{,\(maxStorable+1)}", doesNotContain: [.quantify]) + expectProgram(for: "a{\(maxStorable+1),}", doesNotContain: [.quantify]) + expectProgram(for: "a{\(maxStorable-1),\(maxStorable*2)}", doesNotContain: [.quantify]) + expectProgram(for: "a{\(maxStorable),\(maxStorable*2+1)}", doesNotContain: [.quantify]) + } } From b7b23d3cba9720250caeafecc5ad944d34208774 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Wed, 3 Aug 2022 16:15:55 -0700 Subject: [PATCH 14/33] Add instructions for consuming non-newlines and advancing in scalar view --- Sources/_StringProcessing/ByteCodeGen.swift | 17 ++---- .../Engine/InstPayload.swift | 16 ++++-- .../Engine/Instruction.swift | 4 ++ .../_StringProcessing/Engine/MEBuilder.swift | 13 +++++ .../_StringProcessing/Engine/Processor.swift | 54 +++++++++++++++++-- Tests/RegexTests/CompileTests.swift | 5 +- 6 files changed, 87 insertions(+), 22 deletions(-) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 66fefc49e..53c91bcca 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -217,27 +217,16 @@ fileprivate extension Compiler.ByteCodeGen { case .graphemeCluster: builder.buildAdvance(1) case .unicodeScalar: - // TODO: builder.buildAdvanceUnicodeScalar(1) - builder.buildConsume { input, bounds in - input.unicodeScalars.index(after: bounds.lowerBound) - } + builder.buildAdvanceUnicodeScalar(1) } } mutating func emitAnyNonNewline() { switch options.semanticLevel { case .graphemeCluster: - builder.buildConsume { input, bounds in - input[bounds.lowerBound].isNewline - ? nil - : input.index(after: bounds.lowerBound) - } + builder.buildConsumeNonNewline() case .unicodeScalar: - builder.buildConsume { input, bounds in - input[bounds.lowerBound].isNewline - ? nil - : input.unicodeScalars.index(after: bounds.lowerBound) - } + builder.buildConsumeScalarNonNewline() } } diff --git a/Sources/_StringProcessing/Engine/InstPayload.swift b/Sources/_StringProcessing/Engine/InstPayload.swift index 1e2ed757b..9db204250 100644 --- a/Sources/_StringProcessing/Engine/InstPayload.swift +++ b/Sources/_StringProcessing/Engine/InstPayload.swift @@ -196,11 +196,19 @@ extension Instruction.Payload { interpret() } - init(distance: Distance) { - self.init(distance) + init(distance: Distance, isScalarDistance: Bool = false) { + self.init(isScalarDistance ? 1 : 0, distance) } - var distance: Distance { - interpret() + var distance: (isScalarDistance: Bool, Distance) { + let pair: (UInt64, Distance) = interpretPair() + return (isScalarDistance: pair.0 == 1, pair.1) + } + + init(isScalar: Bool) { + self.init(isScalar ? 1 : 0) + } + var isScalar: Bool { + self.rawValue == 1 } init(bool: BoolRegister) { diff --git a/Sources/_StringProcessing/Engine/Instruction.swift b/Sources/_StringProcessing/Engine/Instruction.swift index a41d2f4af..21ab90a03 100644 --- a/Sources/_StringProcessing/Engine/Instruction.swift +++ b/Sources/_StringProcessing/Engine/Instruction.swift @@ -122,6 +122,10 @@ extension Instruction { /// - If it is inverted /// - If it strictly matches only ascii values case matchBuiltin + + /// Matches any non newline character + /// Operand: If we are in scalar mode or not + case matchAnyNonNewline // MARK: Extension points diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index 959b1507e..884ed47ab 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -142,6 +142,19 @@ extension MEProgram.Builder { mutating func buildAdvance(_ n: Distance) { instructions.append(.init(.advance, .init(distance: n))) } + + mutating func buildAdvanceUnicodeScalar(_ n: Distance) { + instructions.append( + .init(.advance, .init(distance: n, isScalarDistance: true))) + } + + mutating func buildConsumeNonNewline() { + instructions.append(.init(.matchAnyNonNewline, .init(isScalar: false))) + } + + mutating func buildConsumeScalarNonNewline() { + instructions.append(.init(.matchAnyNonNewline, .init(isScalar: true))) + } mutating func buildMatch(_ e: Character, isCaseInsensitive: Bool) { instructions.append(.init( diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index a62c1e070..a5a59b863 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -181,6 +181,18 @@ extension Processor { currentPosition = idx return true } + + // Advances in unicode scalar view + mutating func consumeScalar(_ n: Distance) -> Bool { + guard let idx = input.unicodeScalars.index( + currentPosition, offsetBy: n.rawValue, limitedBy: end + ) else { + signalFailure() + return false + } + currentPosition = idx + return true + } /// Continue matching at the specified index. /// @@ -321,6 +333,26 @@ extension Processor { return true } + // Matches the next character if it is not a newline + mutating func matchAnyNonNewline() -> Bool { + guard let c = load(), !c.isNewline else { + signalFailure() + return false + } + _uncheckedForcedConsumeOne() + return true + } + + // Matches the next scalar if it is not a newline + mutating func matchAnyNonNewlineScalar() -> Bool { + guard let s = loadScalar(), !s.isNewline else { + signalFailure() + return false + } + input.unicodeScalars.formIndex(after: ¤tPosition) + return true + } + mutating func signalFailure() { guard !savePoints.isEmpty else { state = .fail @@ -469,10 +501,26 @@ extension Processor { signalFailure() case .advance: - if consume(payload.distance) { - controller.step() + let (isScalar, distance) = payload.distance + if isScalar { + if consumeScalar(distance) { + controller.step() + } + } else { + if consume(distance) { + controller.step() + } + } + case .matchAnyNonNewline: + if payload.isScalar { + if matchAnyNonNewlineScalar() { + controller.step() + } + } else { + if matchAnyNonNewline() { + controller.step() + } } - case .match: let (isCaseInsensitive, reg) = payload.elementPayload if isCaseInsensitive { diff --git a/Tests/RegexTests/CompileTests.swift b/Tests/RegexTests/CompileTests.swift index 54fc3b561..752921e19 100644 --- a/Tests/RegexTests/CompileTests.swift +++ b/Tests/RegexTests/CompileTests.swift @@ -37,6 +37,7 @@ enum DecodedInstr { case matchScalarCaseInsensitive case matchScalarUnchecked case matchBitsetScalar + case matchAnyNonNewline case matchBitset case matchBuiltin case consumeBy @@ -116,7 +117,9 @@ extension DecodedInstr { return .matchBitset } case .consumeBy: - return consumeBy + return .consumeBy + case .matchAnyNonNewline: + return .matchAnyNonNewline case .assertBy: return .assertBy case .matchBy: From 1f76eb98121c44949497b5639ea9bfe0818684a7 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Thu, 4 Aug 2022 12:57:36 -0700 Subject: [PATCH 15/33] Fix matching of backreferences in scalar mode --- Sources/_StringProcessing/ByteCodeGen.swift | 9 ++++++--- .../Engine/InstPayload.swift | 8 +++++++- .../_StringProcessing/Engine/MEBuilder.swift | 17 ++++++++++------- .../_StringProcessing/Engine/Processor.swift | 19 ++++++++++++++----- Tests/RegexTests/MatchTests.swift | 5 +++++ 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 66fefc49e..ce965de68 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -84,7 +84,8 @@ fileprivate extension Compiler.ByteCodeGen { try emitBackreference(ref.ast) case let .symbolicReference(id): - builder.buildUnresolvedReference(id: id) + builder.buildUnresolvedReference( + id: id, isScalarMode: options.semanticLevel == .unicodeScalar) case let .changeMatchingOptions(optionSequence): if !hasEmittedFirstMatchableAtom { @@ -143,9 +144,11 @@ fileprivate extension Compiler.ByteCodeGen { guard let i = n.value else { throw Unreachable("Expected a value") } - builder.buildBackreference(.init(i)) + builder.buildBackreference( + .init(i), isScalarMode: options.semanticLevel == .unicodeScalar) case .named(let name): - try builder.buildNamedReference(name) + try builder.buildNamedReference( + name, isScalarMode: options.semanticLevel == .unicodeScalar) case .relative: throw Unsupported("Backreference kind: \(ref)") } diff --git a/Sources/_StringProcessing/Engine/InstPayload.swift b/Sources/_StringProcessing/Engine/InstPayload.swift index 1e2ed757b..3d618e416 100644 --- a/Sources/_StringProcessing/Engine/InstPayload.swift +++ b/Sources/_StringProcessing/Engine/InstPayload.swift @@ -240,6 +240,13 @@ extension Instruction.Payload { interpret() } + init(capture: CaptureRegister, isScalarMode: Bool) { + self.init(isScalarMode ? 1 : 0, capture) + } + var captureAndMode: (isScalarMode: Bool, CaptureRegister) { + let pair: (UInt64, CaptureRegister) = interpretPair() + return (pair.0 == 1, pair.1) + } init(capture: CaptureRegister) { self.init(capture) } @@ -247,7 +254,6 @@ extension Instruction.Payload { interpret() } - // MARK: Packed operand payloads init(immediate: UInt64, int: IntRegister) { diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index 959b1507e..7ecfb5b70 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -292,22 +292,23 @@ extension MEProgram.Builder { } mutating func buildBackreference( - _ cap: CaptureRegister + _ cap: CaptureRegister, + isScalarMode: Bool ) { instructions.append( - .init(.backreference, .init(capture: cap))) + .init(.backreference, .init(capture: cap, isScalarMode: isScalarMode))) } - mutating func buildUnresolvedReference(id: ReferenceID) { - buildBackreference(.init(0)) + mutating func buildUnresolvedReference(id: ReferenceID, isScalarMode: Bool) { + buildBackreference(.init(0), isScalarMode: isScalarMode) unresolvedReferences[id, default: []].append(lastInstructionAddress) } - mutating func buildNamedReference(_ name: String) throws { + mutating func buildNamedReference(_ name: String, isScalarMode: Bool) throws { guard let index = captureList.indexOfCapture(named: name) else { throw RegexCompilationError.uncapturedReference } - buildBackreference(.init(index)) + buildBackreference(.init(index), isScalarMode: isScalarMode) } // TODO: Mutating because of fail address fixup, drop when @@ -456,8 +457,10 @@ fileprivate extension MEProgram.Builder { throw RegexCompilationError.uncapturedReference } for use in uses { + let (isScalarMode, _) = instructions[use.rawValue].payload.captureAndMode instructions[use.rawValue] = - Instruction(.backreference, .init(capture: .init(offset))) + Instruction(.backreference, + .init(capture: .init(offset), isScalarMode: isScalarMode)) } } } diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index a62c1e070..9a234e356 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -231,9 +231,17 @@ extension Processor { // Match against the current input prefix. Returns whether // it succeeded vs signaling an error. - mutating func matchSeq( - _ seq: C - ) -> Bool where C.Element == Input.Element { + mutating func matchSeq( + _ seq: Substring, + isScalarMode: Bool + ) -> Bool { + if isScalarMode { + for s in seq.unicodeScalars { + guard matchScalar(s, boundaryCheck: false) else { return false } + } + return true + } + for e in seq { guard match(e) else { return false } } @@ -584,8 +592,9 @@ extension Processor { } case .backreference: + let (isScalarMode, capture) = payload.captureAndMode let capNum = Int( - asserting: payload.capture.rawValue) + asserting: capture.rawValue) guard capNum < storedCaptures.count else { fatalError("Should this be an assert?") } @@ -597,7 +606,7 @@ extension Processor { signalFailure() return } - if matchSeq(input[range]) { + if matchSeq(input[range], isScalarMode: isScalarMode) { controller.step() } diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 794e57b16..e86352285 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -1643,6 +1643,11 @@ extension RegexTests { (input: "123x23", match: "23x23"), xfail: true) + // Backreferences in scalar mode + // In scalar mode the backreference should not match + firstMatchTest(#"(.+)\1"#, input: "ée\u{301}", match: "ée\u{301}") + firstMatchTest(#"(.+)\1"#, input: "ée\u{301}", match: nil, semanticLevel: .unicodeScalar) + // Backreferences in lookaheads firstMatchTests( #"^(?=.*(.)(.)\2\1).+$"#, From aa810377698c7b70c3aa9780502d99c9ed991d3a Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Thu, 4 Aug 2022 13:14:41 -0700 Subject: [PATCH 16/33] Add canImport for CI --- Sources/RegexBenchmark/BenchmarkChart.swift | 2 +- Sources/RegexBenchmark/BenchmarkResults.swift | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/RegexBenchmark/BenchmarkChart.swift b/Sources/RegexBenchmark/BenchmarkChart.swift index f104fc78d..3de89736c 100644 --- a/Sources/RegexBenchmark/BenchmarkChart.swift +++ b/Sources/RegexBenchmark/BenchmarkChart.swift @@ -9,7 +9,7 @@ // //===----------------------------------------------------------------------===// -#if os(macOS) +#if os(macOS) && canImport(Charts) import Charts import SwiftUI diff --git a/Sources/RegexBenchmark/BenchmarkResults.swift b/Sources/RegexBenchmark/BenchmarkResults.swift index 43a57c8d0..8fdc5e1dd 100644 --- a/Sources/RegexBenchmark/BenchmarkResults.swift +++ b/Sources/RegexBenchmark/BenchmarkResults.swift @@ -100,7 +100,7 @@ extension BenchmarkRunner { print(item) } - #if os(macOS) + #if os(macOS) && canImport(Charts) if showChart { print(""" === Comparison chart ================================================================= From 8cdc76e91b3b74d33fa7d27caaa20a795bd03e93 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Thu, 4 Aug 2022 16:23:26 -0700 Subject: [PATCH 17/33] Add compiler options for tracing and metrics --- Sources/_StringProcessing/ByteCodeGen.swift | 2 + Sources/_StringProcessing/Compiler.swift | 15 ++++--- Sources/_StringProcessing/Engine/Engine.swift | 4 ++ .../_StringProcessing/Engine/MEBuilder.swift | 6 +++ .../_StringProcessing/Engine/MEProgram.swift | 5 ++- Sources/_StringProcessing/Regex/Core.swift | 45 ++++++++++++++----- 6 files changed, 59 insertions(+), 18 deletions(-) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 66fefc49e..31102de1f 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -33,6 +33,8 @@ extension Compiler { self.options = options self.compileOptions = compileOptions self.builder.captureList = captureList + self.builder.enableTracing = compileOptions.contains(.enableTracing) + self.builder.enableMetrics = compileOptions.contains(.enableMetrics) } } } diff --git a/Sources/_StringProcessing/Compiler.swift b/Sources/_StringProcessing/Compiler.swift index b8daa8b21..f506c76d7 100644 --- a/Sources/_StringProcessing/Compiler.swift +++ b/Sources/_StringProcessing/Compiler.swift @@ -107,10 +107,15 @@ func _compileRegex( return Executor(program: program) } -extension Compiler { - struct CompileOptions: OptionSet { - let rawValue: Int - static let disableOptimizations = CompileOptions(rawValue: 1) - static let `default`: CompileOptions = [] +@_spi(RegexBenchmark) +public struct CompileOptions: OptionSet { + public let rawValue: Int + public init(rawValue: Int) { + self.rawValue = rawValue } + + public static let disableOptimizations = CompileOptions(rawValue: 1 << 0) + public static let enableTracing = CompileOptions(rawValue: 1 << 1) + public static let enableMetrics = CompileOptions(rawValue: 1 << 2) + public static let `default`: CompileOptions = [] } diff --git a/Sources/_StringProcessing/Engine/Engine.swift b/Sources/_StringProcessing/Engine/Engine.swift index 9e67e4639..2efaacddb 100644 --- a/Sources/_StringProcessing/Engine/Engine.swift +++ b/Sources/_StringProcessing/Engine/Engine.swift @@ -23,6 +23,10 @@ struct Engine { get { program.enableTracing } set { program.enableTracing = newValue } } + var enableMetrics: Bool { + get { program.enableTracing } + set { program.enableMetrics = newValue } + } init( _ program: MEProgram, diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index 959b1507e..20885b8c8 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -14,6 +14,10 @@ extension MEProgram { struct Builder { var instructions: [Instruction] = [] + + // Tracing + var enableTracing = false + var enableMetrics = false var elements = TypedSetVector() var sequences = TypedSetVector<[Input.Element], _SequenceRegister>() @@ -378,6 +382,8 @@ extension MEProgram.Builder { staticTransformFunctions: transformFunctions, staticMatcherFunctions: matcherFunctions, registerInfo: regInfo, + enableTracing: enableTracing, + enableMetrics: enableMetrics, captureList: captureList, referencedCaptureOffsets: referencedCaptureOffsets, initialOptions: initialOptions) diff --git a/Sources/_StringProcessing/Engine/MEProgram.swift b/Sources/_StringProcessing/Engine/MEProgram.swift index bacefb209..67f5a8bc9 100644 --- a/Sources/_StringProcessing/Engine/MEProgram.swift +++ b/Sources/_StringProcessing/Engine/MEProgram.swift @@ -31,8 +31,9 @@ struct MEProgram { var registerInfo: RegisterInfo - var enableTracing: Bool = false - + var enableTracing: Bool + var enableMetrics: Bool + let captureList: CaptureList let referencedCaptureOffsets: [ReferenceID: Int] diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift index 27c1f4eff..8235b33af 100644 --- a/Sources/_StringProcessing/Regex/Core.swift +++ b/Sources/_StringProcessing/Regex/Core.swift @@ -82,7 +82,7 @@ extension Regex { let tree: DSLTree /// OptionSet of compiler options for testing purposes - fileprivate var compileOptions: Compiler.CompileOptions = .default + fileprivate var compileOptions: CompileOptions = .default private final class ProgramBox { let value: MEProgram @@ -138,16 +138,37 @@ extension Regex { @available(SwiftStdlib 5.7, *) @_spi(RegexBenchmark) extension Regex { - /// Compiles the stored DSLTree into bytecode and return if it was successful - /// For measuring compilation times - /// - /// Note: This bypasses the cached program that is normally used - public func _compileRegex() -> Bool { + public struct QueryResult { + + } + + public func _queryRegex() -> QueryResult { + QueryResult() + } + + public enum _RegexInternalAction { + case parse(String) + case recompile + case setOptions(CompileOptions) + } + + /// Internal API for RegexBenchmark + /// Forces the regex to perform the given action, returning if it was successful + public mutating func _forceAction(_ action: _RegexInternalAction) -> Bool { do { - let _ = try Compiler( - tree: program.tree, - compileOptions: program.compileOptions).emit() - return true + switch action { + case .setOptions(let opts): + _setCompilerOptionsForTesting(opts) + return true + case .parse(let pattern): + let _ = try parse(pattern, .traditional) + return true + case .recompile: + let _ = try Compiler( + tree: program.tree, + compileOptions: program.compileOptions).emit() + return true + } } catch { return false } @@ -156,7 +177,9 @@ extension Regex { @available(SwiftStdlib 5.7, *) extension Regex { - internal mutating func _setCompilerOptionsForTesting(_ opts: Compiler.CompileOptions) { + internal mutating func _setCompilerOptionsForTesting( + _ opts: CompileOptions + ) { program.compileOptions = opts program._loweredProgramStorage = nil } From 8101f5549f3edf76a2ad79a3346bc05bbfa1923d Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Wed, 20 Jul 2022 13:33:02 -0700 Subject: [PATCH 18/33] Add metrics + update tracing --- .../Engine/Instruction.swift | 2 +- .../_StringProcessing/Engine/Metrics.swift | 24 +++++ .../_StringProcessing/Engine/Processor.swift | 3 + .../_StringProcessing/Engine/Tracing.swift | 100 ++++++++++++++---- Sources/_StringProcessing/Executor.swift | 3 +- .../_StringProcessing/Utility/Traced.swift | 2 +- 6 files changed, 109 insertions(+), 25 deletions(-) create mode 100644 Sources/_StringProcessing/Engine/Metrics.swift diff --git a/Sources/_StringProcessing/Engine/Instruction.swift b/Sources/_StringProcessing/Engine/Instruction.swift index a41d2f4af..8d8a08a15 100644 --- a/Sources/_StringProcessing/Engine/Instruction.swift +++ b/Sources/_StringProcessing/Engine/Instruction.swift @@ -22,7 +22,7 @@ struct Instruction: RawRepresentable, Hashable { } extension Instruction { - enum OpCode: UInt64 { + enum OpCode: UInt64, CaseIterable { case invalid = 0 // MARK: - General Purpose diff --git a/Sources/_StringProcessing/Engine/Metrics.swift b/Sources/_StringProcessing/Engine/Metrics.swift new file mode 100644 index 000000000..77a1fed09 --- /dev/null +++ b/Sources/_StringProcessing/Engine/Metrics.swift @@ -0,0 +1,24 @@ +extension Processor { + struct ProcessorMetrics { + var instructionCounts: [Int] = .init(repeating: 0, count: Instruction.OpCode.allCases.count) + var caseInsensitiveInstrs: Bool = false + } + + func printMetrics() { + // print("Total cycle count: \(cycleCount)") + // print("Instructions:") + let sorted = metrics.instructionCounts.enumerated() + .filter({$0.1 != 0}) + .sorted(by: { (a,b) in a.1 > b.1 }) + for (opcode, count) in sorted { + print("\(Instruction.OpCode.init(rawValue: UInt64(opcode))!),\(count)") + } + } + + mutating func measureMetrics() { + if shouldMeasureMetrics { + let (opcode, _) = fetch().destructure + metrics.instructionCounts[Int(opcode.rawValue)] += 1 + } + } +} diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index a62c1e070..2583d13ab 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -89,6 +89,8 @@ struct Processor { // MARK: Metrics, debugging, etc. var cycleCount = 0 var isTracingEnabled: Bool + let shouldMeasureMetrics: Bool = true + var metrics: ProcessorMetrics = ProcessorMetrics() } extension Processor { @@ -397,6 +399,7 @@ extension Processor { defer { cycleCount += 1 trace() + measureMetrics() _checkInvariants() } let (opcode, payload) = fetch().destructure diff --git a/Sources/_StringProcessing/Engine/Tracing.swift b/Sources/_StringProcessing/Engine/Tracing.swift index cbb065fc1..a297bc353 100644 --- a/Sources/_StringProcessing/Engine/Tracing.swift +++ b/Sources/_StringProcessing/Engine/Tracing.swift @@ -29,27 +29,81 @@ extension Processor: TracedProcessor { extension Instruction: CustomStringConvertible { var description: String { - // TODO: opcode specific rendering - "\(opcode) \(payload)" - } -} - -extension Instruction.Payload: CustomStringConvertible { - var description: String { -// var result = "" -// if hasCondition { -// result += "\(condition) " -// } -// if hasPayload { -// let payload: TypedInt<_Boo> = payload() -// result += payload.description -// } -// return result - - // TODO: Without bit packing our representation, what - // should we do? I'd say a payload cannot be printed - // in isolation of the instruction... - return "\(rawValue)" + switch opcode { + case .advance: + return "\(opcode) \(payload.distance)" + case .assertBy: + return "\(opcode) \(payload.assertion)" + case .backreference: + return "\(opcode) \(payload.capture.rawValue)" + case .beginCapture: + return "\(opcode) \(payload.capture.rawValue)" + case .branch: + return "\(opcode) \(payload.addr)" + case .captureValue: + let (val, cap) = payload.pairedValueCapture + return "\(opcode) vals[\(val)] -> captures[\(cap)]" + case .condBranchSamePosition: + let (addr, pos) = payload.pairedAddrPos + return "\(opcode) \(addr) pos[\(pos)]" + case .condBranchZeroElseDecrement: + let (addr, int) = payload.pairedAddrInt + return "\(opcode) \(addr) int[\(int)]" + case .consumeBy: + return "\(opcode) consumer[\(payload.consumer)]" + case .endCapture: + return "\(opcode) \(payload.capture.rawValue)" + case .match: + let (isCaseInsensitive, reg) = payload.elementPayload + if isCaseInsensitive { + return "matchCaseInsensitive char[\(reg)]" + } else { + return "match char[\(reg)]" + } + case .matchBitset: + let (isScalar, reg) = payload.bitsetPayload + if isScalar { + return "matchBitsetScalar bitset[\(reg)]" + } else { + return "matchBitset bitset[\(reg)]" + } + case .matchBuiltin: + let payload = payload.characterClassPayload + return "matchBuiltin \(payload.cc) (\(payload.isInverted))" + case .matchBy: + let (matcherReg, valReg) = payload.pairedMatcherValue + return "\(opcode) match[\(matcherReg)] -> val[\(valReg)]" + case .matchScalar: + let (scalar, caseInsensitive, boundaryCheck) = payload.scalarPayload + if caseInsensitive { + return "matchScalarCaseInsensitive \(scalar) boundaryCheck: \(boundaryCheck)" + } else { + return "matchScalar \(scalar) boundaryCheck: \(boundaryCheck)" + } + case .moveCurrentPosition: + let reg = payload.position + return "\(opcode) -> pos[\(reg)]" + case .moveImmediate: + let (imm, reg) = payload.pairedImmediateInt + return "\(opcode) \(imm) -> int[\(reg)]" + case .quantify: + let payload = payload.quantify + return "\(opcode) \(payload.type) \(payload.minTrips) \(payload.extraTrips?.description ?? "unbounded" )" + case .save: + let resumeAddr = payload.addr + return "\(opcode) \(resumeAddr)" + case .saveAddress: + let resumeAddr = payload.addr + return "\(opcode) \(resumeAddr)" + case .splitSaving: + let (nextPC, resumeAddr) = payload.pairedAddrAddr + return "\(opcode) saving: \(resumeAddr) jumpingTo: \(nextPC)" + case .transformCapture: + let (cap, trans) = payload.pairedCaptureTransform + return "\(opcode) trans[\(trans)](\(cap))" + default: + return "\(opcode)" + } } } @@ -62,7 +116,9 @@ extension Processor.SavePoint { if rangeIsEmpty { posStr = "" } else { - posStr = "\(rangeStart!...rangeEnd!)" + let startStr = "\(input.distance(from: input.startIndex, to: rangeStart!))" + let endStr = "\(input.distance(from: input.startIndex, to: rangeEnd!))" + posStr = "\(startStr)...\(endStr)" } } return """ diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index 718d37026..f9f16e153 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -30,7 +30,7 @@ struct Executor { input: input, subjectBounds: subjectBounds, searchBounds: searchBounds) - + defer { if cpu.shouldMeasureMetrics { cpu.printMetrics() } } var low = searchBounds.lowerBound let high = searchBounds.upperBound while true { @@ -57,6 +57,7 @@ struct Executor { ) throws -> Regex.Match? { var cpu = engine.makeProcessor( input: input, bounds: subjectBounds, matchMode: mode) + defer { if cpu.shouldMeasureMetrics { cpu.printMetrics() } } return try _match(input, from: subjectBounds.lowerBound, using: &cpu) } diff --git a/Sources/_StringProcessing/Utility/Traced.swift b/Sources/_StringProcessing/Utility/Traced.swift index 5ae7cd245..ae7849dbe 100644 --- a/Sources/_StringProcessing/Utility/Traced.swift +++ b/Sources/_StringProcessing/Utility/Traced.swift @@ -144,7 +144,7 @@ extension TracedProcessor { result += formatCallStack() result += formatSavePoints() result += formatRegisters() - result += formatInput() + // result += formatInput() result += "\n" result += formatInstructionWindow() return result From b9f68c888df733eec99af1ebb5a9654f497cd7a7 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Thu, 4 Aug 2022 17:26:53 -0700 Subject: [PATCH 19/33] Enable metrics and tracing on the benchmarker --- Sources/RegexBenchmark/Benchmark.swift | 34 ++++++++++++----- .../BenchmarkRegistration.swift | 35 ++++++++---------- Sources/RegexBenchmark/BenchmarkRunner.swift | 37 +++++++++++++++---- Sources/RegexBenchmark/CLI.swift | 19 +++++++++- Sources/RegexBenchmark/Debug.swift | 14 +++---- .../_StringProcessing/Engine/Consume.swift | 6 ++- Sources/_StringProcessing/Engine/Engine.swift | 2 +- .../Engine/Instruction.swift | 2 +- .../_StringProcessing/Engine/Metrics.swift | 29 +++++++++++---- .../_StringProcessing/Engine/Processor.swift | 17 +++++++-- .../_StringProcessing/Utility/Traced.swift | 13 ++++++- Tests/RegexTests/MatchTests.swift | 2 +- Utils/createBenchmark.py | 2 +- 13 files changed, 148 insertions(+), 64 deletions(-) diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift index 43ce96542..978d376db 100644 --- a/Sources/RegexBenchmark/Benchmark.swift +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -3,14 +3,14 @@ import Foundation protocol RegexBenchmark { var name: String { get } - func compile() + mutating func compile() func run() func debug() } struct Benchmark: RegexBenchmark { let name: String - let regex: Regex + var regex: Regex let type: MatchType let target: String @@ -20,8 +20,15 @@ struct Benchmark: RegexBenchmark { case allMatches } - func compile() { - blackHole(regex._compileRegex()) + mutating func compile() { + let _ = regex._forceAction(.recompile) + } + + mutating func enableTracing() { + let _ = regex._forceAction(.setOptions(.enableTracing)) + } + mutating func enableMetrics() { + let _ = regex._forceAction(.setOptions(.enableMetrics)) } func run() { @@ -48,7 +55,8 @@ struct NSBenchmark: RegexBenchmark { case first } - func compile() {} + // Not measured for NSRegularExpression + mutating func compile() {} func run() { switch type { @@ -61,11 +69,17 @@ struct NSBenchmark: RegexBenchmark { /// A benchmark running a regex on strings in input set struct InputListBenchmark: RegexBenchmark { let name: String - let regex: Regex + var regex: Regex let targets: [String] - func compile() { - blackHole(regex._compileRegex()) + mutating func compile() { + blackHole(regex._forceAction(.recompile)) + } + mutating func enableTracing() { + let _ = regex._forceAction(.setOptions(.enableTracing)) + } + mutating func enableMetrics() { + let _ = regex._forceAction(.setOptions(.enableMetrics)) } func run() { @@ -90,7 +104,7 @@ struct InputListNSBenchmark: RegexBenchmark { NSRange(target.startIndex.. BenchmarkRunner { - var benchmark = BenchmarkRunner("RegexBench", samples, quiet) + mutating func registerDefault() { // -- start of registrations -- - benchmark.addReluctantQuant() - benchmark.addCSS() - benchmark.addNotFound() - benchmark.addGraphemeBreak() - benchmark.addHangulSyllable() - // benchmark.addHTML() // Disabled due to \b being unusably slow - benchmark.addEmail() - benchmark.addCustomCharacterClasses() - benchmark.addBuiltinCC() - benchmark.addUnicode() - benchmark.addLiteralSearch() - benchmark.addDiceNotation() - benchmark.addErrorMessages() - benchmark.addIpAddress() + self.addReluctantQuant() + self.addCSS() + self.addNotFound() + self.addGraphemeBreak() + self.addHangulSyllable() + // self.addHTML() // Disabled due to \b being unusably slow + self.addEmail() + self.addCustomCharacterClasses() + self.addBuiltinCC() + self.addUnicode() + self.addLiteralSearch() + self.addDiceNotation() + self.addErrorMessages() + self.addIpAddress() // -- end of registrations -- - return benchmark } } diff --git a/Sources/RegexBenchmark/BenchmarkRunner.swift b/Sources/RegexBenchmark/BenchmarkRunner.swift index 3188f6567..e84facf4a 100644 --- a/Sources/RegexBenchmark/BenchmarkRunner.swift +++ b/Sources/RegexBenchmark/BenchmarkRunner.swift @@ -1,4 +1,5 @@ import Foundation +@_spi(RegexBenchmark) import _StringProcessing struct BenchmarkRunner { let suiteName: String @@ -7,21 +8,43 @@ struct BenchmarkRunner { let samples: Int var results: SuiteResult = SuiteResult() let quiet: Bool - - init(_ suiteName: String, _ n: Int, _ quiet: Bool) { - self.suiteName = suiteName - self.samples = n - self.quiet = quiet + let enableTracing: Bool + let enableMetrics: Bool + + // Forcibly include firstMatch benchmarks for all CrossBenchmarks + let includeFirstOverride: Bool + + mutating func register(_ benchmark: some RegexBenchmark) { + suite.append(benchmark) } - mutating func register(_ new: some RegexBenchmark) { - suite.append(new) + mutating func register(_ benchmark: Benchmark) { + var benchmark = benchmark + if enableTracing { + benchmark.enableTracing() + } + if enableMetrics { + benchmark.enableMetrics() + } + suite.append(benchmark) + } + + mutating func register(_ benchmark: InputListBenchmark) { + var benchmark = benchmark + if enableTracing { + benchmark.enableTracing() + } + if enableMetrics { + benchmark.enableMetrics() + } + suite.append(benchmark) } mutating func measure( benchmark: some RegexBenchmark, samples: Int ) -> BenchmarkResult { + var benchmark = benchmark var runtimes: [Time] = [] var compileTimes: [Time] = [] // Initial run to make sure the regex has been compiled diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift index 0746aeeeb..db345264c 100644 --- a/Sources/RegexBenchmark/CLI.swift +++ b/Sources/RegexBenchmark/CLI.swift @@ -37,9 +37,26 @@ struct Runner: ParsableCommand { @Flag(help: "Exclude running NSRegex benchmarks") var excludeNs = false + + @Flag(help: "Enable tracing of the engine (warning: lots of output)") + var enableTracing: Bool = false + + @Flag(help: "Enable engine metrics (warning: lots of output)") + var enableMetrics: Bool = false + + @Flag(help: "Include firstMatch benchmarks in CrossBenchmark (off by default") + var includeFirst: Bool = false mutating func run() throws { - var runner = BenchmarkRunner.makeRunner(samples, quiet) + var runner = BenchmarkRunner( + suiteName: "DefaultRegexSuite", + samples: samples, + quiet: quiet, + enableTracing: enableTracing, + enableMetrics: enableMetrics, + includeFirstOverride: includeFirst) + + runner.registerDefault() if !self.specificBenchmarks.isEmpty { runner.suite = runner.suite.filter { b in diff --git a/Sources/RegexBenchmark/Debug.swift b/Sources/RegexBenchmark/Debug.swift index fcd11f7ca..da6667d1b 100644 --- a/Sources/RegexBenchmark/Debug.swift +++ b/Sources/RegexBenchmark/Debug.swift @@ -6,7 +6,7 @@ extension Benchmark { case .whole: let result = target.wholeMatch(of: regex) if let match = result { - if match.0.count > 100 { + if match.0.count > 1000 { print("- Match: len = \(match.0.count)") } else { print("- Match: \(match.0)") @@ -22,7 +22,7 @@ extension Benchmark { } print("- Total matches: \(results.count)") - if results.count > 10 { + if results.count > 100 { print("# Too many matches, not printing") let avgLen = results.map({result in String(target[result.range]).count}) .reduce(0.0, {$0 + Double($1)}) / Double(results.count) @@ -32,7 +32,7 @@ extension Benchmark { } for match in results { - if match.0.count > 100 { + if match.0.count > 1000 { print("- Match: len = \(match.0.count)") } else { print("- Match: \(match.0)") @@ -42,7 +42,7 @@ extension Benchmark { case .first: let result = target.firstMatch(of: regex) if let match = result { - if match.0.count > 100 { + if match.0.count > 1000 { print("- Match: len = \(match.0.count)") } else { print("- Match: \(match.0)") @@ -66,13 +66,13 @@ extension NSBenchmark { } print("- Total matches: \(results.count)") - if results.count > 10 { + if results.count > 100 { print("# Too many matches, not printing") return } for m in results { - if m.range.length > 100 { + if m.range.length > 1000 { print("- Match: len = \(m.range.length)") } else { print("- Match: \(target[Range(m.range, in: target)!])") @@ -81,7 +81,7 @@ extension NSBenchmark { case .first: let result = regex.firstMatch(in: target, range: range) if let match = result { - if match.range.length > 100 { + if match.range.length > 1000 { print("- Match: len = \(match.range.length)") } else { print("- Match: \(target[Range(match.range, in: target)!])") diff --git a/Sources/_StringProcessing/Engine/Consume.swift b/Sources/_StringProcessing/Engine/Consume.swift index 12f65a777..6af973919 100644 --- a/Sources/_StringProcessing/Engine/Consume.swift +++ b/Sources/_StringProcessing/Engine/Consume.swift @@ -21,7 +21,8 @@ extension Engine { subjectBounds: bounds, searchBounds: bounds, matchMode: matchMode, - isTracingEnabled: enableTracing) + isTracingEnabled: enableTracing, + shouldMeasureMetrics: enableMetrics) } func makeFirstMatchProcessor( @@ -35,7 +36,8 @@ extension Engine { subjectBounds: subjectBounds, searchBounds: searchBounds, matchMode: .partialFromFront, - isTracingEnabled: enableTracing) + isTracingEnabled: enableTracing, + shouldMeasureMetrics: enableMetrics) } } diff --git a/Sources/_StringProcessing/Engine/Engine.swift b/Sources/_StringProcessing/Engine/Engine.swift index 2efaacddb..db9ecfa6c 100644 --- a/Sources/_StringProcessing/Engine/Engine.swift +++ b/Sources/_StringProcessing/Engine/Engine.swift @@ -24,7 +24,7 @@ struct Engine { set { program.enableTracing = newValue } } var enableMetrics: Bool { - get { program.enableTracing } + get { program.enableMetrics } set { program.enableMetrics = newValue } } diff --git a/Sources/_StringProcessing/Engine/Instruction.swift b/Sources/_StringProcessing/Engine/Instruction.swift index 8d8a08a15..a41d2f4af 100644 --- a/Sources/_StringProcessing/Engine/Instruction.swift +++ b/Sources/_StringProcessing/Engine/Instruction.swift @@ -22,7 +22,7 @@ struct Instruction: RawRepresentable, Hashable { } extension Instruction { - enum OpCode: UInt64, CaseIterable { + enum OpCode: UInt64 { case invalid = 0 // MARK: - General Purpose diff --git a/Sources/_StringProcessing/Engine/Metrics.swift b/Sources/_StringProcessing/Engine/Metrics.swift index 77a1fed09..753c3c3d1 100644 --- a/Sources/_StringProcessing/Engine/Metrics.swift +++ b/Sources/_StringProcessing/Engine/Metrics.swift @@ -1,24 +1,37 @@ extension Processor { struct ProcessorMetrics { - var instructionCounts: [Int] = .init(repeating: 0, count: Instruction.OpCode.allCases.count) - var caseInsensitiveInstrs: Bool = false + var instructionCounts: [Instruction.OpCode: Int] = [:] + var backtracks: Int = 0 + var resets: Int = 0 } func printMetrics() { - // print("Total cycle count: \(cycleCount)") - // print("Instructions:") - let sorted = metrics.instructionCounts.enumerated() + print("===") + print("Total cycle count: \(cycleCount)") + print("Backtracks: \(metrics.backtracks)") + print("Resets: \(metrics.resets)") + print("Instructions:") + let sorted = metrics.instructionCounts .filter({$0.1 != 0}) .sorted(by: { (a,b) in a.1 > b.1 }) for (opcode, count) in sorted { - print("\(Instruction.OpCode.init(rawValue: UInt64(opcode))!),\(count)") + print("> \(opcode): \(count)") + } + print("===") + } + + mutating func measure() { + let (opcode, _) = fetch().destructure + if metrics.instructionCounts.keys.contains(opcode) { + metrics.instructionCounts[opcode]! += 1 + } else { + metrics.instructionCounts.updateValue(1, forKey: opcode) } } mutating func measureMetrics() { if shouldMeasureMetrics { - let (opcode, _) = fetch().destructure - metrics.instructionCounts[Int(opcode.rawValue)] += 1 + measure() } } } diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 2583d13ab..a0acaaec7 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -89,7 +89,7 @@ struct Processor { // MARK: Metrics, debugging, etc. var cycleCount = 0 var isTracingEnabled: Bool - let shouldMeasureMetrics: Bool = true + let shouldMeasureMetrics: Bool var metrics: ProcessorMetrics = ProcessorMetrics() } @@ -107,8 +107,10 @@ extension Processor { subjectBounds: Range, searchBounds: Range, matchMode: MatchMode, - isTracingEnabled: Bool + isTracingEnabled: Bool, + shouldMeasureMetrics: Bool ) { + // print("metrics? \(shouldMeasureMetrics) tracing? \(isTracingEnabled)") self.controller = Controller(pc: 0) self.instructions = program.instructions self.input = input @@ -116,6 +118,7 @@ extension Processor { self.searchBounds = searchBounds self.matchMode = matchMode self.isTracingEnabled = isTracingEnabled + self.shouldMeasureMetrics = shouldMeasureMetrics self.currentPosition = searchBounds.lowerBound // Initialize registers with end of search bounds @@ -142,7 +145,8 @@ extension Processor { self.state = .inProgress self.failureReason = nil - + + if shouldMeasureMetrics { metrics.resets += 1 } _checkInvariants() } @@ -358,6 +362,8 @@ extension Processor { storedCaptures = capEnds registers.ints = intRegisters registers.positions = posRegisters + + if shouldMeasureMetrics { metrics.backtracks += 1 } } mutating func abort(_ e: Error? = nil) { @@ -395,7 +401,10 @@ extension Processor { mutating func cycle() { _checkInvariants() assert(state == .inProgress) - if cycleCount == 0 { trace() } + if cycleCount == 0 { + trace() + measureMetrics() + } defer { cycleCount += 1 trace() diff --git a/Sources/_StringProcessing/Utility/Traced.swift b/Sources/_StringProcessing/Utility/Traced.swift index ae7849dbe..3f345f3dc 100644 --- a/Sources/_StringProcessing/Utility/Traced.swift +++ b/Sources/_StringProcessing/Utility/Traced.swift @@ -80,6 +80,17 @@ extension TracedProcessor { } func formatInput() -> String { + // Cut a reasonably sized substring from the input to print + let start = input.index( + currentPosition, + offsetBy: -10, + limitedBy: input.startIndex) ?? input.startIndex + let end = input.index( + currentPosition, + offsetBy: 10, + limitedBy: input.endIndex) ?? input.endIndex + let input = input[start...end] + // String override for printing sub-character information. if !input.indices.contains(currentPosition) { // Format unicode scalars as: @@ -144,7 +155,7 @@ extension TracedProcessor { result += formatCallStack() result += formatSavePoints() result += formatRegisters() - // result += formatInput() + result += formatInput() result += "\n" result += formatInstructionWindow() return result diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 794e57b16..0e8ec044e 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -11,7 +11,7 @@ import XCTest @testable import _RegexParser -@testable import _StringProcessing +@testable @_spi(RegexBenchmark) import _StringProcessing import TestSupport struct MatchError: Error { diff --git a/Utils/createBenchmark.py b/Utils/createBenchmark.py index 7950ea522..6fce1bc69 100644 --- a/Utils/createBenchmark.py +++ b/Utils/createBenchmark.py @@ -46,7 +46,7 @@ def register_benchmark(name): new_file_contents.append(line) else: # add the newest benchmark - new_file_contents.append(f" benchmark.add{name}()\n") + new_file_contents.append(f" self.add{name}()\n") new_file_contents.append(line) # write the new contents From 4b107a873f004d04be98b351484174a1f9418ba0 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Thu, 4 Aug 2022 17:59:34 -0700 Subject: [PATCH 20/33] Fix tracing --- Sources/RegexBenchmark/Benchmark.swift | 8 +++---- .../_StringProcessing/Engine/Consume.swift | 2 +- Sources/_StringProcessing/Engine/Engine.swift | 21 ++++--------------- .../_StringProcessing/Engine/Processor.swift | 1 - .../_StringProcessing/Engine/Tracing.swift | 4 ++-- Sources/_StringProcessing/Executor.swift | 4 ++-- Sources/_StringProcessing/Regex/Core.swift | 17 ++++----------- .../_StringProcessing/Utility/Traced.swift | 16 +++++++++----- Tests/RegexTests/MatchTests.swift | 2 +- 9 files changed, 29 insertions(+), 46 deletions(-) diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift index 978d376db..d905bc88c 100644 --- a/Sources/RegexBenchmark/Benchmark.swift +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -25,10 +25,10 @@ struct Benchmark: RegexBenchmark { } mutating func enableTracing() { - let _ = regex._forceAction(.setOptions(.enableTracing)) + let _ = regex._forceAction(.addOptions(.enableTracing)) } mutating func enableMetrics() { - let _ = regex._forceAction(.setOptions(.enableMetrics)) + let _ = regex._forceAction(.addOptions([.enableMetrics, .disableOptimizations])) } func run() { @@ -76,10 +76,10 @@ struct InputListBenchmark: RegexBenchmark { blackHole(regex._forceAction(.recompile)) } mutating func enableTracing() { - let _ = regex._forceAction(.setOptions(.enableTracing)) + let _ = regex._forceAction(.addOptions(.enableTracing)) } mutating func enableMetrics() { - let _ = regex._forceAction(.setOptions(.enableMetrics)) + let _ = regex._forceAction(.addOptions(.enableMetrics)) } func run() { diff --git a/Sources/_StringProcessing/Engine/Consume.swift b/Sources/_StringProcessing/Engine/Consume.swift index 6af973919..6b4049283 100644 --- a/Sources/_StringProcessing/Engine/Consume.swift +++ b/Sources/_StringProcessing/Engine/Consume.swift @@ -30,7 +30,7 @@ extension Engine { subjectBounds: Range, searchBounds: Range ) -> Processor { - Processor( + return Processor( program: program, input: input, subjectBounds: subjectBounds, diff --git a/Sources/_StringProcessing/Engine/Engine.swift b/Sources/_StringProcessing/Engine/Engine.swift index db9ecfa6c..a5cb11bd6 100644 --- a/Sources/_StringProcessing/Engine/Engine.swift +++ b/Sources/_StringProcessing/Engine/Engine.swift @@ -13,29 +13,16 @@ // But, we can play around with this. struct Engine { - var program: MEProgram + let program: MEProgram // TODO: Pre-allocated register banks var instructions: InstructionList { program.instructions } - var enableTracing: Bool { - get { program.enableTracing } - set { program.enableTracing = newValue } - } - var enableMetrics: Bool { - get { program.enableMetrics } - set { program.enableMetrics = newValue } - } + var enableTracing: Bool { program.enableTracing } + var enableMetrics: Bool { program.enableMetrics } - init( - _ program: MEProgram, - enableTracing: Bool? = nil - ) { - var program = program - if let t = enableTracing { - program.enableTracing = t - } + init(_ program: MEProgram) { self.program = program } } diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index a0acaaec7..bef2d3495 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -110,7 +110,6 @@ extension Processor { isTracingEnabled: Bool, shouldMeasureMetrics: Bool ) { - // print("metrics? \(shouldMeasureMetrics) tracing? \(isTracingEnabled)") self.controller = Controller(pc: 0) self.instructions = program.instructions self.input = input diff --git a/Sources/_StringProcessing/Engine/Tracing.swift b/Sources/_StringProcessing/Engine/Tracing.swift index a297bc353..725319b00 100644 --- a/Sources/_StringProcessing/Engine/Tracing.swift +++ b/Sources/_StringProcessing/Engine/Tracing.swift @@ -76,9 +76,9 @@ extension Instruction: CustomStringConvertible { case .matchScalar: let (scalar, caseInsensitive, boundaryCheck) = payload.scalarPayload if caseInsensitive { - return "matchScalarCaseInsensitive \(scalar) boundaryCheck: \(boundaryCheck)" + return "matchScalarCaseInsensitive '\(scalar)' boundaryCheck: \(boundaryCheck)" } else { - return "matchScalar \(scalar) boundaryCheck: \(boundaryCheck)" + return "matchScalar '\(scalar)' boundaryCheck: \(boundaryCheck)" } case .moveCurrentPosition: let reg = payload.position diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index f9f16e153..85fb72acf 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -15,8 +15,8 @@ struct Executor { // TODO: consider let, for now lets us toggle tracing var engine: Engine - init(program: MEProgram, enablesTracing: Bool = false) { - self.engine = Engine(program, enableTracing: enablesTracing) + init(program: MEProgram) { + self.engine = Engine(program) } @available(SwiftStdlib 5.7, *) diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift index 8235b33af..26741b746 100644 --- a/Sources/_StringProcessing/Regex/Core.swift +++ b/Sources/_StringProcessing/Regex/Core.swift @@ -149,7 +149,7 @@ extension Regex { public enum _RegexInternalAction { case parse(String) case recompile - case setOptions(CompileOptions) + case addOptions(CompileOptions) } /// Internal API for RegexBenchmark @@ -157,8 +157,9 @@ extension Regex { public mutating func _forceAction(_ action: _RegexInternalAction) -> Bool { do { switch action { - case .setOptions(let opts): - _setCompilerOptionsForTesting(opts) + case .addOptions(let opts): + program.compileOptions.insert(opts) + program._loweredProgramStorage = nil return true case .parse(let pattern): let _ = try parse(pattern, .traditional) @@ -174,13 +175,3 @@ extension Regex { } } } - -@available(SwiftStdlib 5.7, *) -extension Regex { - internal mutating func _setCompilerOptionsForTesting( - _ opts: CompileOptions - ) { - program.compileOptions = opts - program._loweredProgramStorage = nil - } -} diff --git a/Sources/_StringProcessing/Utility/Traced.swift b/Sources/_StringProcessing/Utility/Traced.swift index 3f345f3dc..112a601b1 100644 --- a/Sources/_StringProcessing/Utility/Traced.swift +++ b/Sources/_StringProcessing/Utility/Traced.swift @@ -80,17 +80,21 @@ extension TracedProcessor { } func formatInput() -> String { + let distanceFromStart = input.distance( + from: input.startIndex, + to: currentPosition) + // Cut a reasonably sized substring from the input to print let start = input.index( currentPosition, - offsetBy: -10, + offsetBy: -30, limitedBy: input.startIndex) ?? input.startIndex let end = input.index( currentPosition, - offsetBy: 10, + offsetBy: 30, limitedBy: input.endIndex) ?? input.endIndex - let input = input[start...end] - + let input = input[start.. Date: Thu, 4 Aug 2022 18:43:31 -0700 Subject: [PATCH 21/33] Cleanup benchmark protocols --- Sources/RegexBenchmark/Benchmark.swift | 50 ++++++++------------ Sources/RegexBenchmark/BenchmarkRunner.swift | 41 +++++++--------- 2 files changed, 38 insertions(+), 53 deletions(-) diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift index d905bc88c..99b75b1af 100644 --- a/Sources/RegexBenchmark/Benchmark.swift +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -3,33 +3,40 @@ import Foundation protocol RegexBenchmark { var name: String { get } - mutating func compile() func run() func debug() } -struct Benchmark: RegexBenchmark { - let name: String - var regex: Regex - let type: MatchType - let target: String +protocol SwiftRegexBenchmark: RegexBenchmark { + var regex: Regex { get set } + mutating func compile() + mutating func enableTracing() + mutating func enableMetrics() +} - enum MatchType { - case whole - case first - case allMatches - } - +extension SwiftRegexBenchmark { mutating func compile() { let _ = regex._forceAction(.recompile) } - mutating func enableTracing() { let _ = regex._forceAction(.addOptions(.enableTracing)) } mutating func enableMetrics() { let _ = regex._forceAction(.addOptions([.enableMetrics, .disableOptimizations])) } +} + +struct Benchmark: SwiftRegexBenchmark { + let name: String + var regex: Regex + let type: MatchType + let target: String + + enum MatchType { + case whole + case first + case allMatches + } func run() { switch type { @@ -55,9 +62,6 @@ struct NSBenchmark: RegexBenchmark { case first } - // Not measured for NSRegularExpression - mutating func compile() {} - func run() { switch type { case .allMatches: blackHole(regex.matches(in: target, range: range)) @@ -67,20 +71,10 @@ struct NSBenchmark: RegexBenchmark { } /// A benchmark running a regex on strings in input set -struct InputListBenchmark: RegexBenchmark { +struct InputListBenchmark: SwiftRegexBenchmark { let name: String var regex: Regex let targets: [String] - - mutating func compile() { - blackHole(regex._forceAction(.recompile)) - } - mutating func enableTracing() { - let _ = regex._forceAction(.addOptions(.enableTracing)) - } - mutating func enableMetrics() { - let _ = regex._forceAction(.addOptions(.enableMetrics)) - } func run() { for target in targets { @@ -103,8 +97,6 @@ struct InputListNSBenchmark: RegexBenchmark { func range(in target: String) -> NSRange { NSRange(target.startIndex.. BenchmarkResult { - var benchmark = benchmark var runtimes: [Time] = [] - var compileTimes: [Time] = [] // Initial run to make sure the regex has been compiled benchmark.run() - // Measure compilataion time - for _ in 0.. Date: Thu, 4 Aug 2022 19:32:35 -0700 Subject: [PATCH 22/33] Add parse time measurement + cleanup benchmarker --- Sources/RegexBenchmark/Benchmark.swift | 16 +++ Sources/RegexBenchmark/BenchmarkChart.swift | 36 ++---- Sources/RegexBenchmark/BenchmarkResults.swift | 103 +++++++++++------- Sources/RegexBenchmark/BenchmarkRunner.swift | 84 +++++++------- .../Suite/CustomCharacterClasses.swift | 7 ++ Sources/RegexBenchmark/Utils/Stats.swift | 2 +- .../_StringProcessing/Engine/Consume.swift | 2 +- Sources/_StringProcessing/Regex/Core.swift | 8 -- 8 files changed, 141 insertions(+), 117 deletions(-) diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift index 99b75b1af..386129b2b 100644 --- a/Sources/RegexBenchmark/Benchmark.swift +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -9,7 +9,9 @@ protocol RegexBenchmark { protocol SwiftRegexBenchmark: RegexBenchmark { var regex: Regex { get set } + var pattern: String? { get } mutating func compile() + mutating func parse() -> Bool mutating func enableTracing() mutating func enableMetrics() } @@ -18,6 +20,14 @@ extension SwiftRegexBenchmark { mutating func compile() { let _ = regex._forceAction(.recompile) } + mutating func parse() -> Bool { + if let s = pattern { + let _ = regex._forceAction(.parse(s)) + return true + } else { + return false + } + } mutating func enableTracing() { let _ = regex._forceAction(.addOptions(.enableTracing)) } @@ -29,6 +39,7 @@ extension SwiftRegexBenchmark { struct Benchmark: SwiftRegexBenchmark { let name: String var regex: Regex + let pattern: String? let type: MatchType let target: String @@ -74,6 +85,7 @@ struct NSBenchmark: RegexBenchmark { struct InputListBenchmark: SwiftRegexBenchmark { let name: String var regex: Regex + let pattern: String? let targets: [String] func run() { @@ -145,6 +157,7 @@ struct CrossBenchmark { Benchmark( name: baseName + "Whole", regex: swiftRegex, + pattern: regex, type: .whole, target: input)) runner.register( @@ -158,6 +171,7 @@ struct CrossBenchmark { Benchmark( name: baseName + "All", regex: swiftRegex, + pattern: regex, type: .allMatches, target: input)) runner.register( @@ -171,6 +185,7 @@ struct CrossBenchmark { Benchmark( name: baseName + "First", regex: swiftRegex, + pattern: regex, type: .first, target: input)) runner.register( @@ -200,6 +215,7 @@ struct CrossInputListBenchmark { runner.register(InputListBenchmark( name: baseName, regex: swiftRegex, + pattern: regex, targets: inputs )) runner.register(InputListNSBenchmark( diff --git a/Sources/RegexBenchmark/BenchmarkChart.swift b/Sources/RegexBenchmark/BenchmarkChart.swift index 3de89736c..2e33eb384 100644 --- a/Sources/RegexBenchmark/BenchmarkChart.swift +++ b/Sources/RegexBenchmark/BenchmarkChart.swift @@ -17,23 +17,24 @@ import SwiftUI struct BenchmarkChart: View { var comparisons: [BenchmarkResult.Comparison] + // Sort by normalized difference var sortedComparisons: [BenchmarkResult.Comparison] { comparisons.sorted { a, b in - a.latest.median.seconds/a.baseline.median.seconds < - b.latest.median.seconds/b.baseline.median.seconds + a.normalizedDiff < b.normalizedDiff } } var body: some View { VStack(alignment: .leading) { Chart { ForEach(sortedComparisons) { comparison in - let new = comparison.latest.median.seconds - let old = comparison.baseline.median.seconds - chartBody( - name: comparison.name, - new: new, - old: old, - sampleCount: comparison.latest.samples) + // Normalized runtime + BarMark( + x: .value("Name", comparison.name), + y: .value("Normalized runtime", comparison.normalizedDiff)) + .foregroundStyle(LinearGradient( + colors: [.accentColor, comparison.diff?.seconds ?? 0 <= 0 ? .green : .yellow], + startPoint: .bottom, + endPoint: .top)) } // Baseline RuleMark(y: .value("Time", 1.0)) @@ -43,23 +44,6 @@ struct BenchmarkChart: View { }.frame(idealHeight: 400) } } - - @ChartContentBuilder - func chartBody( - name: String, - new: TimeInterval, - old: TimeInterval, - sampleCount: Int - ) -> some ChartContent { - // Normalized runtime - BarMark( - x: .value("Name", name), - y: .value("Normalized runtime", new / old)) - .foregroundStyle(LinearGradient( - colors: [.accentColor, new - old <= 0 ? .green : .yellow], - startPoint: .bottom, - endPoint: .top)) - } } struct BenchmarkResultApp: App { diff --git a/Sources/RegexBenchmark/BenchmarkResults.swift b/Sources/RegexBenchmark/BenchmarkResults.swift index 8fdc5e1dd..cdc60c91e 100644 --- a/Sources/RegexBenchmark/BenchmarkResults.swift +++ b/Sources/RegexBenchmark/BenchmarkResults.swift @@ -106,15 +106,7 @@ extension BenchmarkRunner { === Comparison chart ================================================================= Press Control-C to close... """) - BenchmarkResultApp.comparisons = { - return comparisons.sorted { - let delta0 = Float($0.latest.median.seconds - $0.baseline.median.seconds) - / Float($0.baseline.median.seconds) - let delta1 = Float($1.latest.median.seconds - $1.baseline.median.seconds) - / Float($1.baseline.median.seconds) - return delta0 > delta1 - } - }() + BenchmarkResultApp.comparisons = comparisons BenchmarkResultApp.main() } #endif @@ -141,11 +133,43 @@ extension BenchmarkRunner { } } -struct BenchmarkResult: Codable { - let compileTime: Time +struct Measurement: Codable, CustomStringConvertible { let median: Time let stdev: Double let samples: Int + + init(results: [Time]) { + let sorted = results.sorted() + self.samples = sorted.count + self.median = sorted[samples/2] + let sum = results.reduce(0.0) {acc, next in acc + next.seconds} + let mean = sum / Double(samples) + let squareDiffs = results.reduce(0.0) { acc, next in + acc + pow(next.seconds - mean, 2) + } + self.stdev = (squareDiffs / Double(samples)).squareRoot() + } + + var description: String { + return "\(median) (stdev: \(Time(stdev)), N = \(samples))" + } +} + +struct BenchmarkResult: Codable, CustomStringConvertible { + let runtime: Measurement + let compileTime: Measurement? + let parseTime: Measurement? + + var description: String { + var base = "> run time: \(runtime.description)" + if let compileTime = compileTime { + base += "\n> compile time: \(compileTime)" + } + if let parseTime = parseTime { + base += "\n> parse time: \(parseTime)" + } + return base + } } extension BenchmarkResult { @@ -160,56 +184,57 @@ extension BenchmarkResult { case runtime case compileTime } + + var latestTime: Time { + switch type { + case .compileTime: + return latest.compileTime?.median ?? .zero + case .runtime: + return latest.runtime.median + } + } + + var baselineTime: Time { + switch type { + case .compileTime: + return baseline.compileTime?.median ?? .zero + case .runtime: + return baseline.runtime.median + } + } var diff: Time? { switch type { case .compileTime: - return latest.compileTime - baseline.compileTime + return latestTime - baselineTime case .runtime: - if Stats.tTest(baseline, latest) { - return latest.median - baseline.median + if Stats.tTest(baseline.runtime, latest.runtime) { + return latestTime - baselineTime } return nil } - + } + + var normalizedDiff: Double { + latestTime.seconds/baselineTime.seconds } var description: String { guard let diff = diff else { return "- \(name) N/A" } - let oldVal: Time - let newVal: Time - switch type { - case .compileTime: - oldVal = baseline.compileTime - newVal = latest.compileTime - case .runtime: - oldVal = baseline.median - newVal = latest.median - } - let percentage = (1000 * diff.seconds / oldVal.seconds).rounded()/10 + let percentage = (1000 * diff.seconds / baselineTime.seconds).rounded()/10 let len = max(40 - name.count, 1) let nameSpacing = String(repeating: " ", count: len) - return "- \(name)\(nameSpacing)\(newVal)\t\(oldVal)\t\(diff)\t\t\(percentage)%" + return "- \(name)\(nameSpacing)\(latestTime)\t\(baselineTime)\t\(diff)\t\t\(percentage)%" } var asCsv: String { guard let diff = diff else { return "\(name),N/A" } - let oldVal: Time - let newVal: Time - switch type { - case .compileTime: - oldVal = baseline.compileTime - newVal = latest.compileTime - case .runtime: - oldVal = baseline.median - newVal = latest.median - } - let percentage = (1000 * diff.seconds / oldVal.seconds).rounded()/10 - return "\"\(name)\",\(newVal.seconds),\(oldVal.seconds),\(diff.seconds),\(percentage)%" + let percentage = (1000 * diff.seconds / baselineTime.seconds).rounded()/10 + return "\"\(name)\",\(latestTime.seconds),\(baselineTime.seconds),\(diff.seconds),\(percentage)%" } } } diff --git a/Sources/RegexBenchmark/BenchmarkRunner.swift b/Sources/RegexBenchmark/BenchmarkRunner.swift index 7ac76c17c..1e516c1c7 100644 --- a/Sources/RegexBenchmark/BenchmarkRunner.swift +++ b/Sources/RegexBenchmark/BenchmarkRunner.swift @@ -29,75 +29,77 @@ struct BenchmarkRunner { suite.append(benchmark) } - mutating func measure( + func medianMeasure( + samples: Int, + closure: () -> Void + ) -> Measurement { + // FIXME: use suspendingclock? + var times: [Time] = [] + for _ in 0.. BenchmarkResult { - var runtimes: [Time] = [] // Initial run to make sure the regex has been compiled benchmark.run() // Measure compilataion time for Swift regex - let compileTime: Time + let compileTime: Measurement? + let parseTime: Measurement? if benchmark is SwiftRegexBenchmark { var benchmark = benchmark as! SwiftRegexBenchmark - var compileTimes: [Time] = [] - for _ in 0.. Stats.maxAllowedStdev { + if result.runtime.stdev > Stats.maxAllowedStdev { print("Warning: Standard deviation > \(Time(Stats.maxAllowedStdev)) for \(b.name)") - print("N = \(samples), median: \(result.median), stdev: \(Time(result.stdev))") + print(result.runtime) print("Rerunning \(b.name)") - result = measure(benchmark: b, samples: result.samples*2) - print("N = \(result.samples), median: \(result.median), stdev: \(Time(result.stdev))") - if result.stdev > Stats.maxAllowedStdev { + result = measure(benchmark: b, samples: result.runtime.samples*2) + print(result.runtime) + if result.runtime.stdev > Stats.maxAllowedStdev { fatalError("Benchmark \(b.name) is too variant") } } - if result.compileTime > Time.millisecond { + if result.compileTime?.median ?? .zero > Time.millisecond { print("Warning: Abnormally high compilation time, what happened?") } + + if result.parseTime?.median ?? .zero > Time.millisecond { + print("Warning: Abnormally high parse time, what happened?") + } if !quiet { - print("- \(b.name) \(result.median) (stdev: \(Time(result.stdev))) (compile time: \(result.compileTime))") + print("- \(b.name)\n\(result)") } self.results.add(name: b.name, result: result) } @@ -107,8 +109,6 @@ struct BenchmarkRunner { print("Debugging") print("========================") for b in suite { - let result = measure(benchmark: b, samples: samples) - print("- \(b.name) \(result.median) (stdev: \(Time(result.stdev)))") b.debug() print("========================") } diff --git a/Sources/RegexBenchmark/Suite/CustomCharacterClasses.swift b/Sources/RegexBenchmark/Suite/CustomCharacterClasses.swift index 2f971b4e6..61d7b197f 100644 --- a/Sources/RegexBenchmark/Suite/CustomCharacterClasses.swift +++ b/Sources/RegexBenchmark/Suite/CustomCharacterClasses.swift @@ -15,42 +15,49 @@ extension BenchmarkRunner { register(Benchmark( name: "BasicCCC", regex: try! Regex(basic), + pattern: basic, type: .allMatches, target: input)) register(Benchmark( name: "BasicRangeCCC", regex: try! Regex(basicRange), + pattern: basicRange, type: .allMatches, target: input)) register(Benchmark( name: "CaseInsensitiveCCC", regex: try! Regex(caseInsensitive), + pattern: caseInsensitive, type: .allMatches, target: input)) register(Benchmark( name: "InvertedCCC", regex: try! Regex(inverted), + pattern: inverted, type: .allMatches, target: input)) register(Benchmark( name: "SubtractionCCC", regex: try! Regex(subtraction), + pattern: subtraction, type: .allMatches, target: input)) register(Benchmark( name: "IntersectionCCC", regex: try! Regex(intersection), + pattern: intersection, type: .allMatches, target: input)) register(Benchmark( name: "symDiffCCC", regex: try! Regex(symmetricDifference), + pattern: symmetricDifference, type: .allMatches, target: input)) } diff --git a/Sources/RegexBenchmark/Utils/Stats.swift b/Sources/RegexBenchmark/Utils/Stats.swift index c5c46eef9..7c97cc6ff 100644 --- a/Sources/RegexBenchmark/Utils/Stats.swift +++ b/Sources/RegexBenchmark/Utils/Stats.swift @@ -6,7 +6,7 @@ extension Stats { // 500µs, maybe this should be a % of the runtime for each benchmark? static let maxAllowedStdev = 500e-6 - static func tTest(_ a: BenchmarkResult, _ b: BenchmarkResult) -> Bool { + static func tTest(_ a: Measurement, _ b: Measurement) -> Bool { // Student's t-test // Since we should generally have similar variances across runs let n1 = Double(a.samples) diff --git a/Sources/_StringProcessing/Engine/Consume.swift b/Sources/_StringProcessing/Engine/Consume.swift index 6b4049283..6af973919 100644 --- a/Sources/_StringProcessing/Engine/Consume.swift +++ b/Sources/_StringProcessing/Engine/Consume.swift @@ -30,7 +30,7 @@ extension Engine { subjectBounds: Range, searchBounds: Range ) -> Processor { - return Processor( + Processor( program: program, input: input, subjectBounds: subjectBounds, diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift index 26741b746..56a1ce5b6 100644 --- a/Sources/_StringProcessing/Regex/Core.swift +++ b/Sources/_StringProcessing/Regex/Core.swift @@ -138,14 +138,6 @@ extension Regex { @available(SwiftStdlib 5.7, *) @_spi(RegexBenchmark) extension Regex { - public struct QueryResult { - - } - - public func _queryRegex() -> QueryResult { - QueryResult() - } - public enum _RegexInternalAction { case parse(String) case recompile From 1de8128dd16f9e0f2f6bfa6b9c749f6d221d5d29 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Thu, 4 Aug 2022 21:09:23 -0700 Subject: [PATCH 23/33] Call parse directly --- Sources/RegexBenchmark/Benchmark.swift | 11 ++++++++--- Sources/_StringProcessing/Regex/Core.swift | 4 ---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift index 386129b2b..f9f83199c 100644 --- a/Sources/RegexBenchmark/Benchmark.swift +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -1,4 +1,5 @@ @_spi(RegexBenchmark) import _StringProcessing +@_implementationOnly import _RegexParser import Foundation protocol RegexBenchmark { @@ -21,10 +22,14 @@ extension SwiftRegexBenchmark { let _ = regex._forceAction(.recompile) } mutating func parse() -> Bool { - if let s = pattern { - let _ = regex._forceAction(.parse(s)) + guard let s = pattern else { + return false + } + + do { + let _ = try _RegexParser.parse(s, .traditional) return true - } else { + } catch { return false } } diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift index 56a1ce5b6..2266d628f 100644 --- a/Sources/_StringProcessing/Regex/Core.swift +++ b/Sources/_StringProcessing/Regex/Core.swift @@ -139,7 +139,6 @@ extension Regex { @_spi(RegexBenchmark) extension Regex { public enum _RegexInternalAction { - case parse(String) case recompile case addOptions(CompileOptions) } @@ -153,9 +152,6 @@ extension Regex { program.compileOptions.insert(opts) program._loweredProgramStorage = nil return true - case .parse(let pattern): - let _ = try parse(pattern, .traditional) - return true case .recompile: let _ = try Compiler( tree: program.tree, From 205e4d365683fc4b66d34fd2e5dd81a5a73a0837 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Thu, 4 Aug 2022 21:12:20 -0700 Subject: [PATCH 24/33] Make comparison generic + Adjust stdev requirement to be a % --- Sources/RegexBenchmark/Benchmark.swift | 4 - Sources/RegexBenchmark/BenchmarkResults.swift | 73 ++++++------------- Sources/RegexBenchmark/BenchmarkRunner.swift | 4 +- Sources/RegexBenchmark/Utils/Stats.swift | 4 +- 4 files changed, 28 insertions(+), 57 deletions(-) diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift index f9f83199c..cf5c54445 100644 --- a/Sources/RegexBenchmark/Benchmark.swift +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -11,10 +11,6 @@ protocol RegexBenchmark { protocol SwiftRegexBenchmark: RegexBenchmark { var regex: Regex { get set } var pattern: String? { get } - mutating func compile() - mutating func parse() -> Bool - mutating func enableTracing() - mutating func enableMetrics() } extension SwiftRegexBenchmark { diff --git a/Sources/RegexBenchmark/BenchmarkResults.swift b/Sources/RegexBenchmark/BenchmarkResults.swift index cdc60c91e..ae9c5ded2 100644 --- a/Sources/RegexBenchmark/BenchmarkResults.swift +++ b/Sources/RegexBenchmark/BenchmarkResults.swift @@ -161,12 +161,12 @@ struct BenchmarkResult: Codable, CustomStringConvertible { let parseTime: Measurement? var description: String { - var base = "> run time: \(runtime.description)" + var base = " > run time: \(runtime.description)" if let compileTime = compileTime { - base += "\n> compile time: \(compileTime)" + base += "\n > compile time: \(compileTime)" } if let parseTime = parseTime { - base += "\n> parse time: \(parseTime)" + base += "\n > parse time: \(parseTime)" } return base } @@ -176,45 +176,17 @@ extension BenchmarkResult { struct Comparison: Identifiable, CustomStringConvertible { var id = UUID() var name: String - var baseline: BenchmarkResult - var latest: BenchmarkResult - var type: ComparisonType = .runtime + var baseline: Measurement + var latest: Measurement - enum ComparisonType { - case runtime - case compileTime - } - - var latestTime: Time { - switch type { - case .compileTime: - return latest.compileTime?.median ?? .zero - case .runtime: - return latest.runtime.median - } - } - - var baselineTime: Time { - switch type { - case .compileTime: - return baseline.compileTime?.median ?? .zero - case .runtime: - return baseline.runtime.median - } - } - + var latestTime: Time { latest.median } + var baselineTime: Time { baseline.median } var diff: Time? { - switch type { - case .compileTime: + if Stats.tTest(baseline, latest) { return latestTime - baselineTime - case .runtime: - if Stats.tTest(baseline.runtime, latest.runtime) { - return latestTime - baselineTime - } - return nil } + return nil } - var normalizedDiff: Double { latestTime.seconds/baselineTime.seconds } @@ -248,10 +220,11 @@ struct SuiteResult { func compare(with other: SuiteResult) -> [BenchmarkResult.Comparison] { var comparisons: [BenchmarkResult.Comparison] = [] - for item in results { - if let otherVal = other.results[item.key] { + for latest in results { + if let otherVal = other.results[latest.key] { comparisons.append( - .init(name: item.key, baseline: otherVal, latest: item.value)) + .init(name: latest.key, + baseline: otherVal.runtime, latest: latest.value.runtime)) } } return comparisons @@ -260,11 +233,12 @@ struct SuiteResult { /// Compares with the NSRegularExpression benchmarks generated by CrossBenchmark func compareWithNS() -> [BenchmarkResult.Comparison] { var comparisons: [BenchmarkResult.Comparison] = [] - for item in results { - let key = item.key + CrossBenchmark.nsSuffix + for latest in results { + let key = latest.key + CrossBenchmark.nsSuffix if let nsResult = results[key] { comparisons.append( - .init(name: item.key, baseline: nsResult, latest: item.value)) + .init(name: latest.key, + baseline: nsResult.runtime, latest: latest.value.runtime)) } } return comparisons @@ -274,13 +248,14 @@ struct SuiteResult { with other: SuiteResult ) -> [BenchmarkResult.Comparison] { var comparisons: [BenchmarkResult.Comparison] = [] - for item in results { - if let otherVal = other.results[item.key] { + for latest in results { + if let baseline = other.results[latest.key], + let baselineTime = baseline.compileTime, + let latestTime = latest.value.compileTime { comparisons.append( - .init(name: item.key, - baseline: otherVal, - latest: item.value, - type: .compileTime)) + .init(name: latest.key, + baseline: baselineTime, + latest: latestTime)) } } return comparisons diff --git a/Sources/RegexBenchmark/BenchmarkRunner.swift b/Sources/RegexBenchmark/BenchmarkRunner.swift index 1e516c1c7..e8a7b6e9e 100644 --- a/Sources/RegexBenchmark/BenchmarkRunner.swift +++ b/Sources/RegexBenchmark/BenchmarkRunner.swift @@ -81,8 +81,8 @@ struct BenchmarkRunner { print("Running") for b in suite { var result = measure(benchmark: b, samples: samples) - if result.runtime.stdev > Stats.maxAllowedStdev { - print("Warning: Standard deviation > \(Time(Stats.maxAllowedStdev)) for \(b.name)") + if result.runtime.stdev > Stats.maxAllowedStdev * result.runtime.median.seconds { + print("Warning: Standard deviation > \(Stats.maxAllowedStdev*100)% for \(b.name)") print(result.runtime) print("Rerunning \(b.name)") result = measure(benchmark: b, samples: result.runtime.samples*2) diff --git a/Sources/RegexBenchmark/Utils/Stats.swift b/Sources/RegexBenchmark/Utils/Stats.swift index 7c97cc6ff..9736f1954 100644 --- a/Sources/RegexBenchmark/Utils/Stats.swift +++ b/Sources/RegexBenchmark/Utils/Stats.swift @@ -3,8 +3,8 @@ import Foundation enum Stats {} extension Stats { - // 500µs, maybe this should be a % of the runtime for each benchmark? - static let maxAllowedStdev = 500e-6 + // Maximum allowed standard deviation is 5% of the median runtime + static let maxAllowedStdev = 0.05 static func tTest(_ a: Measurement, _ b: Measurement) -> Bool { // Student's t-test From 82f71ddd1c50fcf37c57198cb104aeb2302338e1 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Wed, 10 Aug 2022 15:35:50 -0700 Subject: [PATCH 25/33] Cleanup --- Sources/RegexBenchmark/Benchmark.swift | 2 +- Sources/RegexBenchmark/BenchmarkRunner.swift | 6 +++--- Sources/RegexBenchmark/Utils/Stats.swift | 6 ++++++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift index cf5c54445..3035b78aa 100644 --- a/Sources/RegexBenchmark/Benchmark.swift +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -33,7 +33,7 @@ extension SwiftRegexBenchmark { let _ = regex._forceAction(.addOptions(.enableTracing)) } mutating func enableMetrics() { - let _ = regex._forceAction(.addOptions([.enableMetrics, .disableOptimizations])) + let _ = regex._forceAction(.addOptions([.enableMetrics])) } } diff --git a/Sources/RegexBenchmark/BenchmarkRunner.swift b/Sources/RegexBenchmark/BenchmarkRunner.swift index e8a7b6e9e..1a62858c1 100644 --- a/Sources/RegexBenchmark/BenchmarkRunner.swift +++ b/Sources/RegexBenchmark/BenchmarkRunner.swift @@ -59,7 +59,7 @@ struct BenchmarkRunner { var benchmark = benchmark as! SwiftRegexBenchmark compileTime = medianMeasure(samples: samples) { benchmark.compile() } // Can't parse if we don't have an input string (ie a builder regex) - if benchmark.parse() { + if benchmark.pattern != nil { parseTime = medianMeasure(samples: samples) { let _ = benchmark.parse() } } else { parseTime = nil @@ -81,13 +81,13 @@ struct BenchmarkRunner { print("Running") for b in suite { var result = measure(benchmark: b, samples: samples) - if result.runtime.stdev > Stats.maxAllowedStdev * result.runtime.median.seconds { + if result.runtimeIsTooVariant { print("Warning: Standard deviation > \(Stats.maxAllowedStdev*100)% for \(b.name)") print(result.runtime) print("Rerunning \(b.name)") result = measure(benchmark: b, samples: result.runtime.samples*2) print(result.runtime) - if result.runtime.stdev > Stats.maxAllowedStdev { + if result.runtimeIsTooVariant { fatalError("Benchmark \(b.name) is too variant") } } diff --git a/Sources/RegexBenchmark/Utils/Stats.swift b/Sources/RegexBenchmark/Utils/Stats.swift index 9736f1954..0cc9156a4 100644 --- a/Sources/RegexBenchmark/Utils/Stats.swift +++ b/Sources/RegexBenchmark/Utils/Stats.swift @@ -18,3 +18,9 @@ extension Stats { return abs(tVal) > 2 } } + +extension BenchmarkResult { + var runtimeIsTooVariant: Bool { + runtime.stdev > Stats.maxAllowedStdev * runtime.median.seconds + } +} From a99b33314fc22d50f23f5eafe64161bcb7d023f2 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Thu, 11 Aug 2022 11:02:19 -0700 Subject: [PATCH 26/33] Backreferences do not guarantee forward progress --- Sources/_StringProcessing/ByteCodeGen.swift | 2 ++ Tests/RegexTests/MatchTests.swift | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 66fefc49e..44eb75402 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -1019,6 +1019,8 @@ extension DSLTree.Node { case .atom(let atom): switch atom { case .changeMatchingOptions, .assertion: return false + // Captures may be nil so backreferences may be zero length matches + case .backreference: return false default: return true } case .trivia, .empty: diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 794e57b16..66e7a3791 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -2549,4 +2549,8 @@ extension RegexTests { expectProgram(for: "a{\(maxStorable-1),\(maxStorable*2)}", doesNotContain: [.quantify]) expectProgram(for: "a{\(maxStorable),\(maxStorable*2+1)}", doesNotContain: [.quantify]) } + + func testFuzzerArtifacts() throws { + expectCompletion(regex: #"(b?)\1*"#, in: "a") + } } From 3d0789f7b8d68cef8f245f0e5f913d46d876eb32 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Mon, 15 Aug 2022 12:49:37 -0700 Subject: [PATCH 27/33] Cleanup - make charts prettier - underscore compileroptions - cleanup debug --- Sources/RegexBenchmark/Benchmark.swift | 3 +-- Sources/RegexBenchmark/BenchmarkChart.swift | 17 ++++++++++++++- Sources/RegexBenchmark/Debug.swift | 24 +++++++++++++++------ Sources/_StringProcessing/ByteCodeGen.swift | 4 ++-- Sources/_StringProcessing/Compiler.swift | 14 ++++++------ Sources/_StringProcessing/Regex/Core.swift | 4 ++-- 6 files changed, 45 insertions(+), 21 deletions(-) diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift index 3035b78aa..b72128073 100644 --- a/Sources/RegexBenchmark/Benchmark.swift +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -2,10 +2,9 @@ @_implementationOnly import _RegexParser import Foundation -protocol RegexBenchmark { +protocol RegexBenchmark: Debug { var name: String { get } func run() - func debug() } protocol SwiftRegexBenchmark: RegexBenchmark { diff --git a/Sources/RegexBenchmark/BenchmarkChart.swift b/Sources/RegexBenchmark/BenchmarkChart.swift index 2e33eb384..862565c6e 100644 --- a/Sources/RegexBenchmark/BenchmarkChart.swift +++ b/Sources/RegexBenchmark/BenchmarkChart.swift @@ -40,8 +40,23 @@ struct BenchmarkChart: View { RuleMark(y: .value("Time", 1.0)) .foregroundStyle(.red) .lineStyle(.init(lineWidth: 1, dash: [2])) + .annotation(position: .top, alignment: .leading) { + Text("Baseline").foregroundStyle(.red) + } - }.frame(idealHeight: 400) + } + .frame(idealWidth: 800, idealHeight: 800) + .chartYScale(domain: 0...2.0) + .chartYAxis { + AxisMarks(values: .stride(by: 0.1)) + } + .chartXAxis { + AxisMarks { value in + AxisGridLine() + AxisTick() + AxisValueLabel(value.as(String.self)!, orientation: .vertical) + } + } } } } diff --git a/Sources/RegexBenchmark/Debug.swift b/Sources/RegexBenchmark/Debug.swift index da6667d1b..1171247e4 100644 --- a/Sources/RegexBenchmark/Debug.swift +++ b/Sources/RegexBenchmark/Debug.swift @@ -1,12 +1,21 @@ import Foundation +protocol Debug { + func debug() +} + +extension Debug { + var maxStringLengthForPrint: Int { 1000 } + var maxMatchCountForPrint: Int { 100 } +} + extension Benchmark { func debug() { switch type { case .whole: let result = target.wholeMatch(of: regex) if let match = result { - if match.0.count > 1000 { + if match.0.count > maxStringLengthForPrint { print("- Match: len = \(match.0.count)") } else { print("- Match: \(match.0)") @@ -22,7 +31,7 @@ extension Benchmark { } print("- Total matches: \(results.count)") - if results.count > 100 { + if results.count > maxMatchCountForPrint { print("# Too many matches, not printing") let avgLen = results.map({result in String(target[result.range]).count}) .reduce(0.0, {$0 + Double($1)}) / Double(results.count) @@ -32,7 +41,7 @@ extension Benchmark { } for match in results { - if match.0.count > 1000 { + if match.0.count > maxStringLengthForPrint { print("- Match: len = \(match.0.count)") } else { print("- Match: \(match.0)") @@ -42,7 +51,7 @@ extension Benchmark { case .first: let result = target.firstMatch(of: regex) if let match = result { - if match.0.count > 1000 { + if match.0.count > maxStringLengthForPrint { print("- Match: len = \(match.0.count)") } else { print("- Match: \(match.0)") @@ -56,6 +65,7 @@ extension Benchmark { } extension NSBenchmark { + func debug() { switch type { case .allMatches: @@ -66,13 +76,13 @@ extension NSBenchmark { } print("- Total matches: \(results.count)") - if results.count > 100 { + if results.count > maxMatchCountForPrint { print("# Too many matches, not printing") return } for m in results { - if m.range.length > 1000 { + if m.range.length > maxStringLengthForPrint { print("- Match: len = \(m.range.length)") } else { print("- Match: \(target[Range(m.range, in: target)!])") @@ -81,7 +91,7 @@ extension NSBenchmark { case .first: let result = regex.firstMatch(in: target, range: range) if let match = result { - if match.range.length > 1000 { + if match.range.length > maxStringLengthForPrint { print("- Match: len = \(match.range.length)") } else { print("- Match: \(target[Range(match.range, in: target)!])") diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 31102de1f..a0f3b2a44 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -22,12 +22,12 @@ extension Compiler { /// This is used to determine whether to apply initial options. var hasEmittedFirstMatchableAtom = false - private let compileOptions: CompileOptions + private let compileOptions: _CompileOptions fileprivate var optimizationsEnabled: Bool { !compileOptions.contains(.disableOptimizations) } init( options: MatchingOptions, - compileOptions: CompileOptions, + compileOptions: _CompileOptions, captureList: CaptureList ) { self.options = options diff --git a/Sources/_StringProcessing/Compiler.swift b/Sources/_StringProcessing/Compiler.swift index f506c76d7..34b0962d8 100644 --- a/Sources/_StringProcessing/Compiler.swift +++ b/Sources/_StringProcessing/Compiler.swift @@ -16,7 +16,7 @@ class Compiler { // TODO: Or are these stored on the tree? var options = MatchingOptions() - private var compileOptions: CompileOptions = .default + private var compileOptions: _CompileOptions = .default init(ast: AST) { self.tree = ast.dslTree @@ -26,7 +26,7 @@ class Compiler { self.tree = tree } - init(tree: DSLTree, compileOptions: CompileOptions) { + init(tree: DSLTree, compileOptions: _CompileOptions) { self.tree = tree self.compileOptions = compileOptions } @@ -108,14 +108,14 @@ func _compileRegex( } @_spi(RegexBenchmark) -public struct CompileOptions: OptionSet { +public struct _CompileOptions: OptionSet { public let rawValue: Int public init(rawValue: Int) { self.rawValue = rawValue } - public static let disableOptimizations = CompileOptions(rawValue: 1 << 0) - public static let enableTracing = CompileOptions(rawValue: 1 << 1) - public static let enableMetrics = CompileOptions(rawValue: 1 << 2) - public static let `default`: CompileOptions = [] + public static let disableOptimizations = _CompileOptions(rawValue: 1 << 0) + public static let enableTracing = _CompileOptions(rawValue: 1 << 1) + public static let enableMetrics = _CompileOptions(rawValue: 1 << 2) + public static let `default`: _CompileOptions = [] } diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift index 2266d628f..28e64a6e2 100644 --- a/Sources/_StringProcessing/Regex/Core.swift +++ b/Sources/_StringProcessing/Regex/Core.swift @@ -82,7 +82,7 @@ extension Regex { let tree: DSLTree /// OptionSet of compiler options for testing purposes - fileprivate var compileOptions: CompileOptions = .default + fileprivate var compileOptions: _CompileOptions = .default private final class ProgramBox { let value: MEProgram @@ -140,7 +140,7 @@ extension Regex { extension Regex { public enum _RegexInternalAction { case recompile - case addOptions(CompileOptions) + case addOptions(_CompileOptions) } /// Internal API for RegexBenchmark From 9c8e5678461e2a2e4b361c986f0fb76001ec8448 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Mon, 15 Aug 2022 13:27:28 -0700 Subject: [PATCH 28/33] Add conditional compilation for metric measuring Ideally this would be done in Package.swift instead of having to add a flag to swift build but alas it appears that it is not possible - https://forums.swift.org/t/swiftpm-and-conditional-compilation/36874 - https://forums.swift.org/t/compilation-conditions-and-swift-packages/34627/4 --- Sources/RegexBenchmark/CLI.swift | 16 ++++++++++++++-- Sources/_StringProcessing/Engine/Processor.swift | 4 ++++ Sources/_StringProcessing/Executor.swift | 4 ++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift index db345264c..b93fc712a 100644 --- a/Sources/RegexBenchmark/CLI.swift +++ b/Sources/RegexBenchmark/CLI.swift @@ -38,10 +38,22 @@ struct Runner: ParsableCommand { @Flag(help: "Exclude running NSRegex benchmarks") var excludeNs = false - @Flag(help: "Enable tracing of the engine (warning: lots of output)") + @Flag(help: """ +Enable tracing of the engine (warning: lots of output). Prints out processor state each cycle + +Note: swift-experimental-string-processing must be built with processor measurements enabled + +swift build -c release -Xswiftc -DPROCESSOR_MEASUREMENTS_ENABLED +""") var enableTracing: Bool = false - @Flag(help: "Enable engine metrics (warning: lots of output)") + @Flag(help: """ +Enable engine metrics (warning: lots of output). Prints out cycle count, instruction counts, number of backtracks + +Note: swift-experimental-string-processing must be built with processor measurements enabled + +swift build -c release -Xswiftc -DPROCESSOR_MEASUREMENTS_ENABLED +""") var enableMetrics: Bool = false @Flag(help: "Include firstMatch benchmarks in CrossBenchmark (off by default") diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index bef2d3495..66dcb9dbb 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -400,6 +400,8 @@ extension Processor { mutating func cycle() { _checkInvariants() assert(state == .inProgress) + +#if PROCESSOR_MEASUREMENTS_ENABLED if cycleCount == 0 { trace() measureMetrics() @@ -410,6 +412,8 @@ extension Processor { measureMetrics() _checkInvariants() } +#endif + let (opcode, payload) = fetch().destructure switch opcode { case .invalid: diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index 85fb72acf..253858d1f 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -30,7 +30,9 @@ struct Executor { input: input, subjectBounds: subjectBounds, searchBounds: searchBounds) +#if PROCESSOR_MEASUREMENTS_ENABLED defer { if cpu.shouldMeasureMetrics { cpu.printMetrics() } } +#endif var low = searchBounds.lowerBound let high = searchBounds.upperBound while true { @@ -57,7 +59,9 @@ struct Executor { ) throws -> Regex.Match? { var cpu = engine.makeProcessor( input: input, bounds: subjectBounds, matchMode: mode) +#if PROCESSOR_MEASUREMENTS_ENABLED defer { if cpu.shouldMeasureMetrics { cpu.printMetrics() } } +#endif return try _match(input, from: subjectBounds.lowerBound, using: &cpu) } From dbcb7fdf59fffd7d441331c489681bd0d19f11fd Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Mon, 15 Aug 2022 13:34:35 -0700 Subject: [PATCH 29/33] Fix spacing --- Sources/RegexBenchmark/CLI.swift | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift index b93fc712a..77ebff47b 100644 --- a/Sources/RegexBenchmark/CLI.swift +++ b/Sources/RegexBenchmark/CLI.swift @@ -42,8 +42,8 @@ struct Runner: ParsableCommand { Enable tracing of the engine (warning: lots of output). Prints out processor state each cycle Note: swift-experimental-string-processing must be built with processor measurements enabled - swift build -c release -Xswiftc -DPROCESSOR_MEASUREMENTS_ENABLED + """) var enableTracing: Bool = false @@ -51,12 +51,12 @@ swift build -c release -Xswiftc -DPROCESSOR_MEASUREMENTS_ENABLED Enable engine metrics (warning: lots of output). Prints out cycle count, instruction counts, number of backtracks Note: swift-experimental-string-processing must be built with processor measurements enabled - swift build -c release -Xswiftc -DPROCESSOR_MEASUREMENTS_ENABLED + """) var enableMetrics: Bool = false - @Flag(help: "Include firstMatch benchmarks in CrossBenchmark (off by default") + @Flag(help: "Include firstMatch benchmarks in CrossBenchmark (off by default)") var includeFirst: Bool = false mutating func run() throws { From c2b672f87cb9b5a944a09830458d63e3eef18bb0 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Mon, 15 Aug 2022 13:38:34 -0700 Subject: [PATCH 30/33] Fix ns suffix mismatch --- Sources/RegexBenchmark/Benchmark.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift index b72128073..2622639fb 100644 --- a/Sources/RegexBenchmark/Benchmark.swift +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -121,7 +121,7 @@ struct InputListNSBenchmark: RegexBenchmark { /// A benchmark meant to be ran across multiple engines struct CrossBenchmark { /// Suffix added onto NSRegularExpression benchmarks - static let nsSuffix = "_NS" + static var nsSuffix: String { "_NS" } /// The base name of the benchmark var baseName: String @@ -219,7 +219,7 @@ struct CrossInputListBenchmark { targets: inputs )) runner.register(InputListNSBenchmark( - name: baseName + "NS", + name: baseName + CrossBenchmark.nsSuffix, regex: regex, targets: inputs )) From fc368285681b6c7a07fe69d8ce0b73b82a771da9 Mon Sep 17 00:00:00 2001 From: Lily Lin Date: Mon, 15 Aug 2022 15:25:03 -0700 Subject: [PATCH 31/33] Don't assume quoted literals are non-empty during bytecodegen --- Sources/_StringProcessing/ByteCodeGen.swift | 2 +- Tests/RegexTests/MatchTests.swift | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 5111979b1..e0a6c7465 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -115,7 +115,7 @@ fileprivate extension Compiler.ByteCodeGen { } // Fast path for eliding boundary checks for an all ascii quoted literal - if optimizationsEnabled && s.allSatisfy(\.isASCII) { + if optimizationsEnabled && s.allSatisfy(\.isASCII) && !s.isEmpty { let lastIdx = s.unicodeScalars.indices.last! for idx in s.unicodeScalars.indices { let boundaryCheck = idx == lastIdx diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 5025c8c47..2fa9b9381 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -2458,6 +2458,9 @@ extension RegexTests { // case insensitive tests firstMatchTest(#"(?i)abc\u{301}d"#, input: "AbC\u{301}d", match: "AbC\u{301}d", semanticLevel: .unicodeScalar) + + // check that we don't crash on empty strings + firstMatchTest(#"\Q\E"#, input: "", match: "") } func testCase() { From d1cfc4eb8a0b62d417489e453167fd2e72aef260 Mon Sep 17 00:00:00 2001 From: Hamish Knight Date: Tue, 16 Aug 2022 18:41:25 +0100 Subject: [PATCH 32/33] Deprecate `Regex.init(quoting:)` This was renamed to `init(verbatim:)`, but we never removed the old overload. Deprecate it, and suggest using `init(verbatim:)` instead. rdar://98737122 --- Sources/_StringProcessing/Regex/Core.swift | 1 + 1 file changed, 1 insertion(+) diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift index 28e64a6e2..f58f63de7 100644 --- a/Sources/_StringProcessing/Regex/Core.swift +++ b/Sources/_StringProcessing/Regex/Core.swift @@ -63,6 +63,7 @@ public struct Regex: RegexComponent { @available(SwiftStdlib 5.7, *) extension Regex { + @available(*, deprecated, renamed: "init(verbatim:)") public init(quoting string: String) { self.init(node: .quotedLiteral(string)) } From a220befe8cd2df0a70c9157d0990bb32df408cd2 Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Wed, 24 Aug 2022 09:44:38 -0700 Subject: [PATCH 33/33] Update cmake file list --- Sources/CMakeLists.txt | 1 - Sources/RegexBuilder/CMakeLists.txt | 3 ++- Sources/_RegexParser/CMakeLists.txt | 4 +++- Sources/_StringProcessing/CMakeLists.txt | 18 ++++++++++++------ 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/Sources/CMakeLists.txt b/Sources/CMakeLists.txt index 7b4e5e3ed..0cd79e24c 100644 --- a/Sources/CMakeLists.txt +++ b/Sources/CMakeLists.txt @@ -2,5 +2,4 @@ add_subdirectory(RegexBuilder) add_subdirectory(_RegexParser) add_subdirectory(_StringProcessing) -add_subdirectory(Prototypes) add_subdirectory(VariadicsGenerator) diff --git a/Sources/RegexBuilder/CMakeLists.txt b/Sources/RegexBuilder/CMakeLists.txt index c2a0d9738..77a8340ce 100644 --- a/Sources/RegexBuilder/CMakeLists.txt +++ b/Sources/RegexBuilder/CMakeLists.txt @@ -1,9 +1,10 @@ add_library(RegexBuilder + Algorithms.swift Anchor.swift Builder.swift + CharacterClass.swift DSL.swift - Match.swift Variadics.swift) target_compile_options(RegexBuilder PRIVATE -enable-library-evolution diff --git a/Sources/_RegexParser/CMakeLists.txt b/Sources/_RegexParser/CMakeLists.txt index 48856b453..1df1b767c 100644 --- a/Sources/_RegexParser/CMakeLists.txt +++ b/Sources/_RegexParser/CMakeLists.txt @@ -9,13 +9,15 @@ add_library(_RegexParser AST/Group.swift AST/MatchingOptions.swift AST/Quantification.swift + Parse/CaptureList.swift Parse/CaptureStructure.swift Parse/CharacterPropertyClassification.swift + Parse/CompilerInterface.swift Parse/DelimiterLexing.swift Parse/Diagnostics.swift Parse/LexicalAnalysis.swift - Parse/Mocking.swift Parse/Parse.swift + Parse/Sema.swift Parse/Source.swift Parse/SourceLocation.swift Parse/SyntaxOptions.swift diff --git a/Sources/_StringProcessing/CMakeLists.txt b/Sources/_StringProcessing/CMakeLists.txt index 963b6074c..ba3e2e03c 100644 --- a/Sources/_StringProcessing/CMakeLists.txt +++ b/Sources/_StringProcessing/CMakeLists.txt @@ -31,42 +31,48 @@ add_library(_StringProcessing Engine/InstPayload.swift Engine/Instruction.swift Engine/MEBuilder.swift + Engine/MEBuiltins.swift Engine/MECapture.swift Engine/MEProgram.swift + Engine/MEQuantify.swift + Engine/Metrics.swift Engine/Processor.swift - Engine/Register.swift + Engine/Registers.swift Engine/Structuralize.swift Engine/Tracing.swift Regex/AnyRegexOutput.swift Regex/ASTConversion.swift Regex/Core.swift - Regex/DSLConsumers.swift + Regex/CustomComponents.swift Regex/DSLTree.swift Regex/Match.swift Regex/Options.swift Unicode/CaseConversion.swift Unicode/CharacterProps.swift Unicode/Comparison.swift - Unicode/Data.swift Unicode/Decoding.swift Unicode/Encodings.swift Unicode/Formatting.swift - Unicode/Graphemes.swift Unicode/NecessaryEvils.swift - Unicode/Normalization.swift + Unicode/NFC.swift Unicode/NumberParsing.swift Unicode/ScalarProps.swift Unicode/Transcoding.swift Unicode/UCD.swift Unicode/Validation.swift + Unicode/WordBreaking.swift + Utility/AsciiBitset.swift Utility/ASTBuilder.swift + Utility/Misc.swift Utility/Protocols.swift + Utility/RegexFactory.swift Utility/Traced.swift Utility/TypedIndex.swift Utility/TypedInt.swift + Utility/TypeVerification.swift + _CharacterClassModel.swift ByteCodeGen.swift Capture.swift - CharacterClass.swift Compiler.swift ConsumerInterface.swift Executor.swift