diff --git a/Package.swift b/Package.swift index effbdb54e..abc895813 100644 --- a/Package.swift +++ b/Package.swift @@ -41,7 +41,10 @@ let package = Package( targets: ["_RegexParser"]), .executable( name: "VariadicsGenerator", - targets: ["VariadicsGenerator"]) + targets: ["VariadicsGenerator"]), + .executable( + name: "RegexBenchmark", + targets: ["RegexBenchmark"]) ], dependencies: [ .package(url: "https://github.com/apple/swift-argument-parser", from: "1.0.0"), @@ -105,6 +108,17 @@ let package = Package( "_RegexParser", "_StringProcessing" ]), + .executableTarget( + name: "RegexBenchmark", + dependencies: [ + .product(name: "ArgumentParser", package: "swift-argument-parser"), + "_RegexParser", + "_StringProcessing", + "RegexBuilder" + ], + swiftSettings: [ + .unsafeFlags(["-Xfrontend", "-disable-availability-checking"]), + ]), // MARK: Exercises .target( diff --git a/Sources/RegexBenchmark/Benchmark.swift b/Sources/RegexBenchmark/Benchmark.swift new file mode 100644 index 000000000..93a825a93 --- /dev/null +++ b/Sources/RegexBenchmark/Benchmark.swift @@ -0,0 +1,116 @@ +import _StringProcessing +import Foundation + +public protocol RegexBenchmark { + var name: String { get } + func run() +} + +public struct Benchmark: RegexBenchmark { + public let name: String + let regex: Regex + let ty: MatchType + let target: String + + public enum MatchType { + case whole + case first + case allMatches + } + + public func run() { + switch ty { + case .whole: blackHole(target.wholeMatch(of: regex)) + case .allMatches: blackHole(target.matches(of: regex)) + case .first: blackHole(target.firstMatch(of: regex)) + } + } +} + +public struct NSBenchmark: RegexBenchmark { + public let name: String + let regex: NSRegularExpression + let ty: NSMatchType + let target: String + + var range: NSRange { + NSRange(target.startIndex.. Time { + var times: [Time] = [] + + // initial run to make sure the regex has been compiled + benchmark.run() + + // fixme: use suspendingclock? + for _ in 0..(_ x: T) { +} diff --git a/Sources/RegexBenchmark/CLI.swift b/Sources/RegexBenchmark/CLI.swift new file mode 100644 index 000000000..004d1f681 --- /dev/null +++ b/Sources/RegexBenchmark/CLI.swift @@ -0,0 +1,33 @@ +import ArgumentParser + +@main +struct Runner: ParsableCommand { + @Argument(help: "Names of benchmarks to run") + var specificBenchmarks: [String] = [] + + @Option(help: "Run only once for profiling purposes") + var profile = false + + @Option(help: "How many samples to collect for each benchmark") + var samples = 20 + + func makeRunner() -> BenchmarkRunner { + var benchmark = BenchmarkRunner("RegexBench", samples) + benchmark.addReluctantQuant() + benchmark.addBacktracking() + benchmark.addCSS() + benchmark.addFirstMatch() + return benchmark + } + mutating func run() throws { + var runner = makeRunner() + if !self.specificBenchmarks.isEmpty { + runner.suite = runner.suite.filter { b in specificBenchmarks.contains(b.name) } + } + if profile { + runner.profile() + } else { + runner.run() + } + } +} diff --git a/Sources/RegexBenchmark/Inputs/CSS.swift b/Sources/RegexBenchmark/Inputs/CSS.swift new file mode 100644 index 000000000..ad8cf89b2 --- /dev/null +++ b/Sources/RegexBenchmark/Inputs/CSS.swift @@ -0,0 +1,1652 @@ +enum Inputs {} + +extension Inputs { + static let swiftOrgCSS = """ +html { + font-size: 100%; + -ms-text-size-adjust: 100%; + -webkit-text-size-adjust:100% +} + +body { + margin: 0; + padding: 0; + background-color: var(--color-fill); + color:var(--color-text) +} + +ul, ol, li, dl, dt, dd, h1, h2, h3, h4, h5, h6, hgroup, p, blockquote, figure, form, fieldset, input, legend, pre, abbr { + margin: 0; + padding:0 +} + +pre, code, address, caption, th, figcaption { + font-size: 1em; + font-weight: normal; + font-style:normal +} + +fieldset, iframe, img { + width: 100%; + border:none +} + +caption, th { + text-align:left +} + +table { + border-collapse: collapse; + border-spacing:0 +} + +article, aside, footer, header, nav, main, section, summary, details, hgroup, figure, figcaption { + display:block +} + +audio, canvas, video, progress { + display: inline-block; + vertical-align:baseline +} + +button { + font: inherit; + vertical-align:middle +} + +nav a:link, nav a:visited, nav a:hover, nav a:active { + text-decoration:none +} + +:root { + --border-radius: 4px; + --content-margin-bottom: 1em +} + +body { + color-scheme: light dark; + --logo-reference: url('/assets/images/swift.svg'); + --menu-icon: url('/assets/images/icon-menu.svg'); + --menu-icon-close: url('/assets/images/icon-close.svg'); + --color-nav-background: var(--color-fill-secondary); + --color-nav-rule: rgb(230, 230, 230); + --color-active-menu-group: #2a2a2a; + --color-fill: #fff; + --color-fill-secondary: #f7f7f7; + --color-fill-tertiary: #f0f0f0; + --color-fill-quaternary: #282828; + --color-fill-blue: blue; + --color-fill-gray: #ccc; + --color-fill-gray-secondary: #f5f5f5; + --color-fill-green-secondary: #f0fff0; + --color-fill-orange-secondary: #fffaf6; + --color-fill-red-secondary: #fff0f5; + --color-figure-blue: #36f; + --color-figure-gray: #000; + --color-figure-gray-secondary: #666; + --color-figure-gray-secondary-alt: #666; + --color-figure-gray-tertiary: #666; + --color-figure-green: green; + --color-figure-light-gray: #666; + --color-figure-orange: #c30; + --color-figure-red: red; + --color-tutorials-teal: #000; + --color-article-background: var(--color-fill-tertiary); + --color-article-body-background: var(--color-fill); + --color-aside-deprecated: var(--color-figure-gray); + --color-aside-deprecated-background: var(--color-fill-orange-secondary); + --color-aside-deprecated-border: var(--color-figure-orange); + --color-aside-experiment: var(--color-figure-gray); + --color-aside-experiment-background: var(--color-fill-gray-secondary); + --color-aside-experiment-border: var(--color-figure-light-gray); + --color-aside-important: var(--color-figure-gray); + --color-aside-important-background: var(--color-fill-gray-secondary); + --color-aside-important-border: var(--color-figure-light-gray); + --color-aside-note: var(--color-figure-gray); + --color-aside-note-background: var(--color-fill-gray-secondary); + --color-aside-note-border: var(--color-figure-light-gray); + --color-aside-tip: var(--color-figure-gray); + --color-aside-tip-background: var(--color-fill-gray-secondary); + --color-aside-tip-border: var(--color-figure-light-gray); + --color-aside-warning: var(--color-figure-gray); + --color-aside-warning-background: var(--color-fill-red-secondary); + --color-aside-warning-border: var(--color-figure-red); + --color-badge-default: var(--color-figure-light-gray); + --color-badge-beta: var(--color-figure-gray-tertiary); + --color-badge-deprecated: var(--color-figure-orange); + --color-badge-dark-default: #b0b0b0; + --color-badge-dark-beta: #b0b0b0; + --color-badge-dark-deprecated: #f60; + --color-button-background: var(--color-fill-blue); + --color-button-background-active: #36f; + --color-button-background-hover: var(--color-figure-blue); + --color-button-text: #fff; + --color-call-to-action-background: var(--color-fill-secondary); + --color-changes-added: var(--color-figure-light-gray); + --color-changes-added-hover: var(--color-figure-light-gray); + --color-changes-deprecated: var(--color-figure-light-gray); + --color-changes-deprecated-hover: var(--color-figure-light-gray); + --color-changes-modified: var(--color-figure-light-gray); + --color-changes-modified-hover: var(--color-figure-light-gray); + --color-changes-modified-previous-background: var(--color-fill-gray-secondary); + --color-code-background: var(--color-fill-secondary); + --color-code-collapsible-background: var(--color-fill-tertiary); + --color-code-collapsible-text: var(--color-figure-gray-secondary-alt); + --color-code-line-highlight: rgba(51, 102, 255, 0.08); + --color-code-line-highlight-border: var(--color-figure-blue); + --color-code-plain: var(--color-figure-gray); + --color-content-table-content-color: var(--color-fill-secondary); + --color-dropdown-background: rgba(255, 255, 255, 0.8); + --color-dropdown-border: #ccc; + --color-dropdown-option-text: #666; + --color-dropdown-text: #000; + --color-dropdown-dark-background: rgba(255, 255, 255, 0.1); + --color-dropdown-dark-border: rgba(240, 240, 240, 0.2); + --color-dropdown-dark-option-text: #ccc; + --color-dropdown-dark-text: #fff; + --color-eyebrow: var(--color-figure-gray-secondary); + --color-focus-border-color: var(--color-fill-blue); + --color-focus-color: rgba(0, 125, 250, 0.6); + --color-form-error: var(--color-figure-red); + --color-form-error-background: var(--color-fill-red-secondary); + --color-form-valid: var(--color-figure-green); + --color-form-valid-background: var(--color-fill-green-secondary); + --color-generic-modal-background: var(--color-fill); + --color-grid: var(--color-fill-gray); + --color-header-text: var(--color-figure-gray); + --color-hero-eyebrow: #ccc; + --color-link: var(--color-figure-blue); + --color-loading-placeholder-background: var(--color-fill); + --color-nav-color: #666; + --color-nav-current-link: rgba(0, 0, 0, 0.6); + --color-nav-expanded: #fff; + --color-nav-hierarchy-collapse-background: #f0f0f0; + --color-nav-hierarchy-collapse-borders: #ccc; + --color-nav-hierarchy-item-borders: #ccc; + --color-nav-keyline: rgba(0, 0, 0, 0.2); + --color-nav-link-color: #000; + --color-nav-link-color-hover: #36f; + --color-nav-outlines: #ccc; + --color-nav-solid-background: #fff; + --color-nav-sticking-expanded-keyline: rgba(0, 0, 0, 0.1); + --color-nav-stuck: rgba(255, 255, 255, 0.9); + --color-nav-uiblur-expanded: rgba(255, 255, 255, 0.9); + --color-nav-uiblur-stuck: rgba(255, 255, 255, 0.7); + --color-nav-root-subhead: var(--color-tutorials-teal); + --color-nav-dark-border-top-color: rgba(255, 255, 255, 0.4); + --color-nav-dark-color: #b0b0b0; + --color-nav-dark-current-link: rgba(255, 255, 255, 0.6); + --color-nav-dark-expanded: #2a2a2a; + --color-nav-dark-hierarchy-collapse-background: #424242; + --color-nav-dark-hierarchy-collapse-borders: #666; + --color-nav-dark-hierarchy-item-borders: #424242; + --color-nav-dark-keyline: rgba(66, 66, 66, 0.95); + --color-nav-dark-link-color: #fff; + --color-nav-dark-link-color-hover: #09f; + --color-nav-dark-outlines: #575757; + --color-nav-dark-rule: #575757; + --color-nav-dark-solid-background: #000; + --color-nav-dark-sticking-expanded-keyline: rgba(66, 66, 66, 0.7); + --color-nav-dark-stuck: rgba(42, 42, 42, 0.9); + --color-nav-dark-uiblur-expanded: rgba(42, 42, 42, 0.9); + --color-nav-dark-uiblur-stuck: rgba(42, 42, 42, 0.7); + --color-nav-dark-root-subhead: #fff; + --color-runtime-preview-background: var(--color-fill-tertiary); + --color-runtime-preview-disabled-text: rgba(102, 102, 102, 0.6); + --color-runtime-preview-text: var(--color-figure-gray-secondary); + --color-secondary-label: var(--color-figure-gray-secondary); + --color-step-background: var(--color-fill-secondary); + --color-step-caption: var(--color-figure-gray-secondary); + --color-step-focused: var(--color-figure-light-gray); + --color-step-text: var(--color-figure-gray-secondary); + --color-svg-icon: #666; + --color-syntax-attributes: rgb(148, 113, 0); + --color-syntax-characters: rgb(39, 42, 216); + --color-syntax-comments: rgb(112, 127, 140); + --color-syntax-documentation-markup: rgb(80, 99, 117); + --color-syntax-documentation-markup-keywords: rgb(80, 99, 117); + --color-syntax-heading: rgb(186, 45, 162); + --color-syntax-keywords: rgb(173, 61, 164); + --color-syntax-marks: rgb(0, 0, 0); + --color-syntax-numbers: rgb(39, 42, 216); + --color-syntax-other-class-names: rgb(112, 61, 170); + --color-syntax-other-constants: rgb(75, 33, 176); + --color-syntax-other-declarations: rgb(4, 124, 176); + --color-syntax-other-function-and-method-names: rgb(75, 33, 176); + --color-syntax-other-instance-variables-and-globals: rgb(112, 61, 170); + --color-syntax-other-preprocessor-macros: rgb(120, 73, 42); + --color-syntax-other-type-names: rgb(112, 61, 170); + --color-syntax-param-internal-name: rgb(64, 64, 64); + --color-syntax-plain-text: rgb(0, 0, 0); + --color-syntax-preprocessor-statements: rgb(120, 73, 42); + --color-syntax-project-class-names: rgb(62, 128, 135); + --color-syntax-project-constants: rgb(45, 100, 105); + --color-syntax-project-function-and-method-names: rgb(45, 100, 105); + --color-syntax-project-instance-variables-and-globals: rgb(62, 128, 135); + --color-syntax-project-preprocessor-macros: rgb(120, 73, 42); + --color-syntax-project-type-names: rgb(62, 128, 135); + --color-syntax-strings: rgb(209, 47, 27); + --color-syntax-type-declarations: rgb(3, 99, 140); + --color-syntax-urls: rgb(19, 55, 255); + --color-tabnav-item-border-color: var(--color-fill-gray); + --color-text: var(--color-figure-gray); + --color-text-background: var(--color-fill); + --color-tutorial-assessments-background: var(--color-fill-secondary); + --color-tutorial-background: var(--color-fill); + --color-tutorial-navbar-dropdown-background: var(--color-fill); + --color-tutorial-navbar-dropdown-border: var(--color-fill-gray); + --color-tutorial-quiz-border-active: var(--color-figure-blue); + --color-tutorials-overview-background: #161616; + --color-tutorials-overview-content: #fff; + --color-tutorials-overview-content-alt: #fff; + --color-tutorials-overview-eyebrow: #ccc; + --color-tutorials-overview-icon: #b0b0b0; + --color-tutorials-overview-link: #09f; + --color-tutorials-overview-navigation-link: #ccc; + --color-tutorials-overview-navigation-link-active: #fff; + --color-tutorials-overview-navigation-link-hover: #fff; + --color-tutorial-hero-text: #fff; + --color-tutorial-hero-background: #000 +} + +body[data-color-scheme="light"] { + color-scheme: light +} + +body[data-color-scheme="dark"] { + color-scheme:dark +} + +@media screen { + body[data-color-scheme="dark"] { + --logo-reference: url('/assets/images/swift~dark.svg'); + --menu-icon: url('/assets/images/icon-menu~dark.svg'); + --menu-icon-close: url('/assets/images/icon-close~dark.svg'); + --color-nav-background: var(--color-fill-tertiary); + --color-nav-rule: #424242; + --color-active-menu-group: #f0f0f0; + --color-fill: #000; + --color-fill-secondary: #161616; + --color-fill-tertiary: #2a2a2a; + --color-fill-blue: #06f; + --color-fill-gray: #575757; + --color-fill-gray-secondary: #222; + --color-fill-green-secondary: #030; + --color-fill-orange-secondary: #472400; + --color-fill-red-secondary: #300; + --color-figure-blue: #09f; + --color-figure-gray: #fff; + --color-figure-gray-secondary: #ccc; + --color-figure-gray-secondary-alt: #b0b0b0; + --color-figure-gray-tertiary: #b0b0b0; + --color-figure-green: #090; + --color-figure-light-gray: #b0b0b0; + --color-figure-orange: #f60; + --color-figure-red: #f33; + --color-tutorials-teal: #fff; + --color-article-body-background: rgb(17, 17, 17); + --color-button-background-active: #06f; + --color-code-line-highlight: rgba(0, 153, 255, 0.08); + --color-dropdown-background: var(--color-dropdown-dark-background); + --color-dropdown-border: var(--color-dropdown-dark-border); + --color-dropdown-option-text: var(--color-dropdown-dark-option-text); + --color-dropdown-text: var(--color-dropdown-dark-text); + --color-nav-color: var(--color-nav-dark-color); + --color-nav-current-link: var(--color-nav-dark-current-link); + --color-nav-expanded: var(--color-nav-dark-expanded); + --color-nav-hierarchy-collapse-background: var(--color-nav-dark-hierarchy-collapse-background); + --color-nav-hierarchy-collapse-borders: var(--color-nav-dark-hierarchy-collapse-borders); + --color-nav-hierarchy-item-borders: var(--color-nav-dark-hierarchy-item-borders); + --color-nav-keyline: var(--color-nav-dark-keyline); + --color-nav-link-color: var(--color-nav-dark-link-color); + --color-nav-link-color-hover: var(--color-nav-dark-link-color-hover); + --color-nav-outlines: var(--color-nav-dark-outlines); + --color-nav-solid-background: var(--color-nav-dark-solid-background); + --color-nav-sticking-expanded-keyline: var(--color-nav-dark-sticking-expanded-keyline); + --color-nav-stuck: var(--color-nav-dark-stuck); + --color-nav-uiblur-expanded: var(--color-nav-dark-uiblur-expanded); + --color-nav-uiblur-stuck: var(--color-nav-dark-uiblur-stuck); + --color-runtime-preview-disabled-text: rgba(204, 204, 204, 0.6); + --color-syntax-attributes: rgb(204, 151, 104); + --color-syntax-characters: rgb(217, 201, 124); + --color-syntax-comments: rgb(127, 140, 152); + --color-syntax-documentation-markup: rgb(127, 140, 152); + --color-syntax-documentation-markup-keywords: rgb(163, 177, 191); + --color-syntax-keywords: rgb(255, 122, 178); + --color-syntax-marks: rgb(255, 255, 255); + --color-syntax-numbers: rgb(217, 201, 124); + --color-syntax-other-class-names: rgb(218, 186, 255); + --color-syntax-other-constants: rgb(167, 235, 221); + --color-syntax-other-declarations: rgb(78, 176, 204); + --color-syntax-other-function-and-method-names: rgb(178, 129, 235); + --color-syntax-other-instance-variables-and-globals: rgb(178, 129, 235); + --color-syntax-other-preprocessor-macros: rgb(255, 161, 79); + --color-syntax-other-type-names: rgb(218, 186, 255); + --color-syntax-param-internal-name: rgb(191, 191, 191); + --color-syntax-plain-text: rgb(255, 255, 255); + --color-syntax-preprocessor-statements: rgb(255, 161, 79); + --color-syntax-project-class-names: rgb(172, 242, 228); + --color-syntax-project-constants: rgb(120, 194, 179); + --color-syntax-project-function-and-method-names: rgb(120, 194, 179); + --color-syntax-project-instance-variables-and-globals: rgb(120, 194, 179); + --color-syntax-project-preprocessor-macros: rgb(255, 161, 79); + --color-syntax-project-type-names: rgb(172, 242, 228); + --color-syntax-strings: rgb(255, 129, 112); + --color-syntax-type-declarations: rgb(107, 223, 255); + --color-syntax-urls: rgb(102, 153, 255); + --color-tutorial-background: var(--color-fill-tertiary) + } +} + +.highlight { + background:var(--color-code-background) +} + +.highlight .c, .highlight .cm, .highlight .cp, .highlight .c1, .highlight .cs { + color:var(--color-syntax-comments) +} + +.highlight .k, .highlight .kc, .highlight .kd, .highlight .kp, .highlight .kr, .highlight .kt .nb { + color:var(--color-syntax-keywords) +} + +.highlight .nv, .highlight .nf { + color:color(--color-syntax-project-constants) +} + +.highlight .s, .highlight .sb, .highlight .sc, .highlight .sd, .highlight .s2, .highlight .se, .highlight .sh, .highlight .si, .highlight .s1, .highlight .sx { + color:var(--color-syntax-strings) +} + +.highlight .na { + color:var(--color-syntax-attributes) +} + +.highlight .nc, .highlight .ni, .highlight .no, .highlight .vc, .highlight .vg, .highlight .vi { + color:var(--color-syntax-other-type-names) +} + +.highlight .err, .highlight .gr, .highlight .gt, .highlight .ne { + color:var(--color-syntax-strings) +} + +.highlight .m, .highlight .mf, .highlight .mh, .highlight .mi, .highlight .il, .highlight .mo { + color:var(--color-syntax-numbers) +} + +.highlight .o, .highlight .ow, .highlight .gs { + font-weight:bold +} + +.highlight .ge { + font-style:italic +} + +.highlight .nt { + color:var(--color-syntax-characters) +} + +.highlight .gd, .highlight .gd .x { + color: var(--color-syntax-plain-text); + background-color:var(--color-fill-red-secondary) +} + +.highlight .gi, .highlight .gi .x { + color: var(--color-syntax-plain-text); + background-color:color(--color-fill-green-secondary) +} + +.highlight .gh, .highlight .bp, .highlight .go, .highlight .gp, .highlight .gu, .highlight .w { + color:var(--color-syntax-comments) +} + +.highlight .nn { + color:var(--color-syntax-other-declarations) +} + +.highlight .sr { + color:var(--color-figure-green) +} + +.highlight .ss { + color:var(--color-syntax-heading) +} +.language-console { + color:var(--color-syntax-plain-text) +} + +*, * :before, * :after { + -moz-box-sizing: border-box; + -webkit-box-sizing: border-box; + box-sizing:border-box +} + +html, body { + height:100% +} + +body { + font-family: -apple-system, BlinkMacSystemFont, "SF Hello", "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif; + font-size: 18px; + line-height: 1.5; + background-color: var(--color-fill); + color: var(--color-text); + font-weight:300 +} + +body pre, body code { + font-family: "SF Mono", Menlo, Consolas, Monaco, "Courier New", monospace, serif +} + +a:link { + color: var(--color-link); + text-decoration:none +} + +a:visited { + color:var(--color-link) +} + +a:active { + color:var(--color-link) +} + +a:hover { + color: var(--color-link); + text-decoration:underline +} + +p { + margin-bottom:1em +} + +h1 { + margin-bottom: 0.5em; + font-size: 3em; + font-weight: 300; + line-height:1 +} + +h1.active + .main-nav { + border-top:1px solid var(--color-active-menu-group) +} + +h2 { + margin-bottom: 0.5em; + font-size: 2.5em; + font-weight: 300; + line-height:1 +} + +h3 { + margin-bottom: 0.5em; + font-size: 1.5em; + font-weight: 300; + line-height:1 +} + +h4 { + margin-bottom: 0.5em; + font-size: 1.25em; + font-weight: 300; + line-height:1.2 +} + +h5 { + margin-bottom: 0.5em; + font-size: 1.175em; + font-weight: 500; + line-height:1.4 +} + +h6 { + margin-bottom: 0.5em; + font-size: 1em; + font-weight: 700; + line-height:1.5 +} + +h1, h2, h3, h4, h5, h6 { + color:var(--color-header-text) +} + +div.highlighter-rouge { + margin-left:13px +} + +pre { + font-size: 14px; + line-height: 1.6em; + border-left: 5px solid var(--color-code-line-highlight-border); + margin: 0.5em 0 1.5em 10px; + padding: 4px 0 2px 10px; + overflow:scroll +} + +a > code, p > code, li > code, dd > code, blockquote > code, td > code { + padding: 0; + margin: 0; + font-size: 16px; + white-space: nowrap; + background-color:transparent +} + +p > code, li > code, dd > code, blockquote > code, td > code { + color:var(--color-code-plain) +} + +p > code { + white-space: pre-wrap; + word-break:break-word +} + +hr { + border: none; + border-top: 1px var(--color-dropdown-border) solid; + margin:2em 0 +} + +hr:last-child { + display:none +} + +details { + margin-bottom:2em +} + +details :first-child { + margin-top:1.5em +} + +cite { + display:block +} + +cite:before { + content: "— " +} + +#logo { + text-indent: -9999px; + height: 48px; + width: 100%; + margin-top: 20px; + margin-bottom: 0.5em; + padding-bottom:10px +} + +#logo a { + display: block; + width: 190px; + height: 48px; + background-image: var(--logo-reference); + background-repeat: no-repeat; + background-size: 190px 48px; + background-position-x: -8px +} + +nav[role="navigation"] { + width: 250px; + position: fixed; + overflow: scroll; + left: 0; + top: 0; + bottom: 0; + background: var(--color-nav-background); + color: var(--color-nav-color); + border-right: 1px solid var(--color-nav-rule); + padding: 20px 30px +} + +nav[role="navigation"] ul { + border-top: 1px solid var(--color-nav-rule); + font-weight: 400; + margin-bottom: 30px; + list-style: none +} + +nav[role="navigation"] ul ul { + list-style: none +} + +nav[role="navigation"] ul li { + border-bottom: 1px solid var(--color-nav-rule) +} + +nav[role="navigation"] ul li.active { + border-bottom: 1px solid var(--color-active-menu-group) +} + +nav[role="navigation"] ul li.active a { + font-weight: 700 +} + +nav[role="navigation"] ul li a:link { + color: var(--color-nav-link-color); + text-decoration: none; + text-transform: uppercase; + letter-spacing: 1px; + font-size: 12px; + display: block; + padding: 10px +} + +nav[role="navigation"] ul li a:visited { + color: var(--color-nav-link-color) +} + +nav[role="navigation"] ul li a:active { + font-weight: 700 +} + +nav[role="navigation"] ul li a:hover { + color: var(--color-link) +} + +nav[role="navigation"] ul li ul { + margin-bottom: 10px; + border-top: none +} + +nav[role="navigation"] ul li ul li { + border-bottom: none; + padding: 0.1em +} + +nav[role="navigation"] ul li ul li.active { + border-bottom: none +} + +nav[role="navigation"] ul li ul li.active a { + font-weight: 700 +} + +nav[role="navigation"] ul li ul a:link { + color: var(--color-nav-link-color); + text-decoration: none; + text-transform: none; + letter-spacing: 0; + font-size: 12px; + display: block; + margin-left: 15px; + padding: 0 0 3px; + border-bottom: none; + font-weight: 300 +} + +nav[role="navigation"] ul li ul a:hover { + color: var(--color-link) +} + +nav[role="navigation"] h2 { + font-size: 0.75em; + font-weight: 700; + text-transform: lowercase; + font-variant: small-caps; + color: var(--color-figure-gray-secondary-alt); + padding-bottom:0.5em +} + +main { + max-width: 798px; + min-width: 320px; + margin-left: 250px; + padding: 35px 30px 0; + min-height: 100%; + height: auto !important; + height: 100% +} + +footer[role="contentinfo"] { + background: var(--color-nav-background); + border-top: 1px solid var(--color-nav-rule); + color: var(--color-nav-color); + padding: 20px 30px; + margin-left: 250px; + min-height: 74px +} + +footer[role="contentinfo"] p { + font-size: 0.625em; + color: var(--color-nav-link-color); + line-height: 1em; + margin-bottom: 1em; + margin-bottom: var(--content-margin-bottom) +} + +footer[role="contentinfo"] p.privacy a { + color: var(--color-nav-link-color); + border-right: 1px solid var(--color-nav-rule); + margin-right: 6px; + padding-right: 8px +} + +footer[role="contentinfo"] p.privacy a:last-child { + border: none; + margin: 0; + padding: 0 +} + +footer[role="contentinfo"] p:last-child { + margin-bottom: 0 +} + +footer[role="contentinfo"] aside { + position: relative; + width: 100%; + max-width: 700px +} + +footer[role="contentinfo"] aside i { + width: 16px; + height: 16px; + background-repeat: no-repeat; + background-size: 16px; + display: block; + margin-left: 1em; + float: right +} + +footer[role="contentinfo"] aside i.twitter { + background-image: url("/assets/images/icon-twitter.svg") +} + +footer[role="contentinfo"] aside i.feed { + background-image: url("/assets/images/icon-feed.svg") +} + +article:first-of-type { + padding-bottom:36px +} + +article h2 { + padding-top:1.1em +} + +article h3 { + padding-top:1em +} + +article h4 { + padding-top: 1em; + border-bottom: 1px var(--color-dropdown-border) solid; + padding-bottom:0.5em +} + +article h5 { + margin-top:1em +} + +article header { + width: 100%; + display: inline-block; + padding-bottom:2.5em +} + +article header h1 { + padding-bottom:0.125em +} + +article header .byline { + float: left; + font-size: 14px; + margin-bottom:1em +} + +article header .byline img { + width: 32px; + height: 32px; + border-radius: 50%; + border: 1px var(--color-fill-gray) solid; + position: absolute; + margin-right: 0.25em; + margin-top:-6px +} + +article header .byline span { + padding-left:42px +} + +article header .about { + float: none; + clear: both; + font-size: 14px; + font-weight: 400; + color: var(--color-figure-gray-tertiary); + border-left: 1px var(--color-figure-gray-tertiary) solid; + margin: 23px 3em 23px 0; + padding:4px 0 4px 10px +} + +article header time { + float: left; + text-transform: uppercase; + font-size: 14px; + font-weight: 400; + color: var(--color-figure-gray-tertiary); + margin-right: 3em; + margin-bottom:1em +} + +article header .tags { + display: block; + font-size: 12px; + font-weight: 400; + margin-top:0 +} + +article:not(:first-of-type) { + border-top: 1px solid var(--color-figure-gray-tertiary); + padding:36px 0 +} + +article blockquote { + border-left: 5px var(--color-fill-gray) solid; + margin: 0.5em 0 23px 1em; + padding: 4px 0 2px 10px; + color: var(--color-aside-note); + overflow-x:auto +} + +article blockquote p:last-child { + margin-bottom:0 +} + +article ul, article ol { + padding-left: 40px; + margin:1em 0 +} + +article ul ul, article ul ol, article ol ul, article ol ol { + margin:0 +} + +article ul { + list-style:disc +} + +article ul ul { + list-style:circle +} + +article ul ul ul { + list-style:square +} + +article ol { + list-style:decimal +} + +article dl { + margin:2em 0 1em 0 +} + +article dl:after { + content: ""; + display: table; + clear:both +} + +article dl dt { + float: left; + clear: right; + margin-right: 1em; + display: block; + width: 28%; + text-align:right +} + +article dl dd { + float: right; + width: 65%; + margin-bottom: 1em; + overflow:scroll +} + +article dl dd { + padding-bottom: 1em; + border-bottom:1px var(--color-dropdown-border) solid +} + +article table { + display: block; + overflow-x: auto; + width: max-content; + min-width: 68%; + max-width: 100%; + margin: 2em auto 3em auto; + border-collapse: separate; + border:1px var(--color-dropdown-border) solid +} + +article table th { + font-weight: 700; + text-align:center +} + +article table th, article table td { + width: 50%; + padding: 0.5em 1.5em; + border-bottom:1px var(--color-dropdown-border) solid +} + +article table th:not(:first-child), article table td:not(:first-child) { + border-left:1px var(--color-dropdown-border) solid +} + +article table tr:last-child td { + border-bottom:none +} + +article details { + margin-top: 0; + cursor:pointer +} + +article details summary { + display: list-item; + padding-bottom: 0.5em; + outline: none; + margin-top:0 +} + +article details summary:after { + content: "Expand"; + text-transform: lowercase; + font-variant: small-caps; + border-bottom:1px var(--color-fill-gray) dashed +} + +article details[open] summary:after { + content: "Collapse" +} + +article details[open] * :not(summary) { + cursor:auto +} + +article details.download { + margin-top: 0; + cursor:pointer +} + +article details.download table { + display:inline-table +} + +article details.download summary { + padding-bottom: 0.5em; + outline: none; + margin-top:0 +} + +article details.download summary:after { + content: none; + text-transform: lowercase; + font-variant: small-caps; + border-bottom:1px var(--color-fill-gray) dashed +} + +article details.download[open] summary:after { + content:none +} + +article details.download[open] * :not(summary) { + cursor:auto +} + +article > details { + margin-left:40px +} + +article .good pre, article pre.good { + background: var(--color-fill-green-secondary); + border-color:var(--color-figure-green) +} + +article .good pre:before, article pre.good:before { + content: "✅"; + float:right +} + +article .bad pre, article pre.bad { + background: var(--color-fill-red-secondary); + border-color:var(--color-figure-red) +} + +article .bad pre:before, article pre.bad:before { + content: "⛔️"; + float:right +} + +article .links ul { + list-style:none +} + +article .links ul ul { + list-style: disc; + margin-top:5px +} + +article .links a:after { + content: " ›" +} + +article .links .link-external:after, article .links-external a:after, article .link-external:after { + content: " ↗" +} + +article .links-download a:after { + content: " ⬇" +} + +article .links-list-nostyle ul { + padding-left:0 +} + +article .links-list-nostyle ul ul { + list-style:none +} + +article .links-sublevel p { + margin-bottom:0 +} + +article .links-sublevel ul { + margin-top: 0; + padding-left:40px +} + +article footer { + margin: 4em 0 0 0; + padding: 1.5em 0 1em 0; + border-top:1px var(--color-dropdown-border) solid +} + +article footer:after { + content: ""; + display: table; + clear: both +} + +article footer nav [rel="prev"] { + width: 45%; + float: left; + text-align: left +} + +article footer nav [rel="prev"]:before { + content: "← " +} + +article footer nav [rel="next"] { + width: 45%; + float: right; + text-align: right +} + +article footer nav [rel="next"]:after { + content: " →" +} + +.title a:link, .title a:visited { + color:var(--color-header-text) +} + +.alert, .danger, .warning, .info, .success { + border-width: 1px; + border-style: solid; + padding: 0.5em; + margin:0.5em 0 1.5em 0 +} + +.alert a, .danger a, .warning a, .info a, .success a { + word-break:break-word +} + +.alert p:first-child, .danger p:first-child, .warning p:first-child, .info p:first-child, .success p:first-child { + margin-top:0 +} + +.alert p:last-child, .danger p:last-child, .warning p:last-child, .info p:last-child, .success p:last-child { + margin-bottom:0 +} + +.alert code, .danger code, .warning code, .info code, .success code { + border: none; + background: transparent; + padding:0 +} + +code { + white-space:pre-line +} + +pre code { + white-space:inherit +} + +pre code .graphic { + font-size: 19px; + line-height:0 +} + +pre code .commentary, pre code .graphic { + font-family: "SF Hello", "Helvetica Neue", Helvetica, Arial, Verdana, sans-serif +} + +@supports (overflow: -webkit-marquee) and(justify-content: inherit) { + .alert:before, .danger:before, .warning:before, .info:before, .success:before { + font-size: 1em; + float: left; + clear: left; + padding-left: 0.125em; + width:2em + } + + .alert p, .danger p, .warning p, .info p, .success p { + padding-left:2em + } + + .success:before { + content: "✅" + } + + .info:before { + content: "ℹ️" + } + + .warning:before { + content: "⚠️" + } + + .danger:before { + content: "❗️" + } +} + +.success { + color: var(--color-aside-note); + border-color: var(--color-form-valid); + background-color:var(--color-form-valid-background) +} + +.info { + color: var(--color-aside-note); + border-color: var(--color-aside-note-border); + background-color:var(--color-aside-note-background) +} + +.warning { + color: var(--color-aside-deprecated); + border-color: var(--color-aside-deprecated-border); + background-color:var(--color-aside-deprecated-background) +} + +.danger { + color: var(--color-aside-warning); + border-color: var(--color-aside-warning-border); + background-color:var(--color-aside-warning-background) +} + +table.downloads { + width: 100%; + table-layout:fixed +} + +table.downloads th { + font-size:0.75em +} + +table.downloads .platform { + width:40% +} + +table.downloads .download { + width:60% +} + +table.downloads .download a.debug, table.downloads .download a.signature { + font-size: 0.7em; + display:block +} + +table.downloads .download a { + font-weight: 700; + font-size:1em +} + +table.downloads .download a:not([download]) { + font-weight:400 +} + +table.downloads .download a:not([download]):before { + content: "(" +} + +table.downloads .download a:not([download]):after { + content: ")" +} + +table.downloads .arch-tag { + width:60% +} + +table.downloads .arch-tag a.debug, table.downloads .arch-tag a.signature { + font-size: 0.7em; + display:block +} + +table.downloads .arch-tag a { + font-weight: 700; + font-size:1em +} + +table.downloads .arch-tag a:not([arch-tag]) { + font-weight:400 +} + +article input.detail[type=checkbox] { + visibility: hidden; + cursor: pointer; + height: 0; + width: 100%; + margin-bottom: 2em; + display: block; + font-size: inherit; + font-style: inherit; + font-weight: inherit; + font-family: inherit; + position: relative; + top:-.85rem +} + +article p + input.detail[type=checkbox] { + margin-top:auto +} + +article .screenonly { + display:none +} + +@media screen { + article .screenonly { + display:inherit + } + + article input.detail[type=checkbox]:before { + content: "▶ "; + visibility: visible; + font-size:80% + } + + article input.detail[type=checkbox]:after { + text-transform: lowercase; + font-variant: small-caps; + border-bottom: 1px var(--color-fill-gray) dashed; + color: var(--color-figure-gray-secondary); + content: "More detail"; + visibility:visible + } + + article input.detail[type=checkbox]:checked:before { + content: "▼ " + } + + article input.detail[type=checkbox]:checked:after { + content: "Less detail" + } + + article input.detail[type=checkbox] + .more { + transition:0.5s opacity ease, 0.5s max-height ease + } + + article input.detail[type=checkbox]:checked + .more { + visibility: visible; + max-height:1000rem + } + + article input.detail[type=checkbox]:not(:checked) + .more { + overflow: hidden; + max-height: 0px; + opacity:0 + } +} + +article .more > p:first-of-type { + margin-top:0 +} + +.color-scheme-toggle { + display: block; + outline: none; + --toggle-color-fill: var(--color-button-background); + font-size: 12px; + border: 1px solid var(--color-nav-link-color); + border-radius: var(--border-radius); + display: inline-flex; + padding: 1px; + margin-bottom:var(--content-margin-bottom) +} + +.color-scheme-toggle input { + position: absolute; + clip: rect(1px, 1px, 1px, 1px); + clip-path: inset(0px 0px 99.9% 99.9%); + overflow: hidden; + height: 1px; + width: 1px; + padding: 0; + border: 0; + appearance:none +} + +.color-scheme-toggle-label { + border: 1px solid transparent; + border-radius: var(--toggle-border-radius-inner, 2px); + color: var(--color-nav-link-color); + display: inline-block; + text-align: center; + padding: 1px 6px; + min-width: 42px; + box-sizing:border-box +} + +.color-scheme-toggle-label:hover { + cursor:pointer +} + +input:checked + .color-scheme-toggle-label { + background: var(--color-nav-link-color); + color: var(--color-nav-stuck) +} + +[role="contentinfo"] { + display: flex; + justify-content:space-between +} + +.visuallyhidden { + position: absolute; + clip: rect(1px, 1px, 1px, 1px); + clip-path: inset(0px 0px 99.9% 99.9%); + overflow: hidden; + height: 1px; + width: 1px; + padding: 0; + border:0 +} + +@media only screen and (max-width: 767px) { + nav[role="navigation"] { + width: 100%; + position: relative; + border-bottom: 1px solid var(--color-nav-rule); + border-right: none; + padding: 20px 30px; + overflow: hidden + } + + nav.open[role="navigation"] .list-items { + display: block + } + + nav[role="navigation"] .list-items { + padding-top: var(--content-margin-bottom); + display:none + } + + .menu-toggle { + content: ' '; + height: 20px; + width: 20px; + background-image: var(--menu-icon-close); + background-repeat: no-repeat; + background-position: center center; + cursor:pointer + } + + .menu-toggle.open { + background-image:var(--menu-icon) + } + + #logo { + margin: 0; + padding:0 + } + + #logo a { + margin:0 auto + } + + main { + max-width: 100%; + min-width: 320px; + margin-left: 0; + padding: 30px 30px 0 + } + + footer[role="contentinfo"] { + margin-left: 0; + flex-direction:column + } + + .footer-other { + display: flex; + justify-content: space-between; + margin-top:var(--content-margin-bottom) + } + + h1 { + font-size: 48px; + font-weight: 300; + line-height:1 + } + + h2 { + font-size: 40px; + font-weight: 300; + line-height:1.1 + } + + h3 { + font-size: 38px; + font-weight: 300; + line-height:1.1 + } + + h4 { + font-size: 36px; + font-weight: 300; + line-height:1.2 + } + + h5 { + font-size: 24px; + font-weight: 500; + line-height:1.4 + } + + h6 { + font-size: 18px; + font-weight: 700; + line-height:1.5 + } + + div.highlighter-rouge { + margin-left:0 + } + + article blockquote { + margin-left:0.5em + } + + table.downloads { + border:1px var(--color-dropdown-border) solid + } + + table.downloads, table.downloads thead, table.downloads tbody, table.downloads th, table.downloads td, table.downloads tr { + display:block !important + } + + table.downloads thead tr { + position: absolute; + top: -9999px; + left:-9999px + } + + table.downloads tr { + border:1px solid var(--color-dropdown-border) + } + + table.downloads td { + border-left: none !important; + border-right: none !important; + border-bottom: 1px solid var(--color-dropdown-border) !important; + position: relative; + padding-left: 35%; + width:100% !important + } + + table.downloads td:before { + position: absolute; + top: 0.5em; + left: 0.5em; + width: 27.5%; + padding-right: 10px; + white-space: nowrap; + text-align:right + } + + table.downloads td.platform:before { + content: "Platform" + } + + table.downloads td.download:before { + content: "Download"; + top:1em + } + + table.downloads td.date:before { + content: "Date" + } + + table.downloads td.toolchain:before { + content: "Toolchain"; + top:1em + } + + table.downloads td.github-tag:before { + content: "GitHub Tag" + } + + table.downloads td.docker-tag:before { + content: "Docker Tag" + } + + table.downloads td.arch-tag:before { + content: "Architecture" + } +} + +.nav-menu-container { + display: grid; + grid-template-columns: 1fr; + grid-template-rows: 1fr; + align-items:center +} + +.menu-item { + grid-area:1 / 1 / 1 / 1 +} + +.logo-container { + justify-self:center +} + +.menu-toggle { + justify-self:right +} + +@media only print { + html body { + background: white; + font-size: 12pt; + padding:0.5in + } + + html body * { + -webkit-print-color-adjust:exact + } + + a { + color: black !important; + text-decoration: underline !important + } + + a[href^="http:"]:after { + content: " (" attr(href) ") "; + color:#444 + } + + h1, h2, h3, h4, h5, h6, p, article > div, pre, table { + page-break-inside:avoid + } + + details:not([open]) { + visibility:visible + } + + details:not([open]) summary { + display:none !important + } + + details:not([open]) > *, details:not([open]) { + display:block + } + + .alert, .success, .info, .warning, .danger { + margin:1.5em 0 + } + + main { + width: auto; + padding: 0; + border: 0; + float: none !important; + color: black; + background: transparent; + margin: 0; + max-width: 100%; + min-height: 1in + } + + nav[role="navigation"] { + background: transparent; + border: none; + width: auto; + position: static; + padding: 0 + } + + nav[role="navigation"] h2, nav[role="navigation"] ul { + display: none + } + + nav[role="navigation"] #logo { + position: static; + margin-bottom: 1.5em + } + + nav[role="navigation"] #logo a { + background-position: -15px + } + + footer[role="contentinfo"] { + display: none + } +} + +/*# sourceMappingURL=application.css.map */ +""" +} diff --git a/Sources/RegexBenchmark/Suite/Backtracking.swift b/Sources/RegexBenchmark/Suite/Backtracking.swift new file mode 100644 index 000000000..2dfe5c2db --- /dev/null +++ b/Sources/RegexBenchmark/Suite/Backtracking.swift @@ -0,0 +1,45 @@ +import _StringProcessing +import RegexBuilder +import Foundation + +// Tests that involve heavy backtracking + +extension BenchmarkRunner { + mutating func addBacktracking() { + let r = "^ +A" + let s = String(repeating: " ", count: 10000) + + let basicBacktrack = Benchmark( + name: "BasicBacktrack", + regex: try! Regex(r), + ty: .allMatches, + target: s + ) + + let basicBacktrackNS = NSBenchmark( + name: "BasicBacktrackNS", + regex: try! NSRegularExpression(pattern: r), + ty: .all, + target: s + ) + + let basicBacktrackFirstMatch = Benchmark( + name: "BasicBacktrackFirstMatch", + regex: try! Regex(r), + ty: .first, + target: s + ) + + let basicBacktrackNSFirstMatch = NSBenchmark( + name: "BasicBacktrackNSFirstMatch", + regex: try! NSRegularExpression(pattern: r), + ty: .first, + target: s + ) + + register(basicBacktrack) + register(basicBacktrackNS) + register(basicBacktrackFirstMatch) + register(basicBacktrackNSFirstMatch) + } +} diff --git a/Sources/RegexBenchmark/Suite/CssRegex.swift b/Sources/RegexBenchmark/Suite/CssRegex.swift new file mode 100644 index 000000000..bc607154f --- /dev/null +++ b/Sources/RegexBenchmark/Suite/CssRegex.swift @@ -0,0 +1,24 @@ +import Foundation +import _StringProcessing + +extension BenchmarkRunner { + mutating func addCSS() { + let r = "--([a-zA-Z0-9_-]+)\\s*:\\s*(.*?):" + + let cssRegex = Benchmark( + name: "cssRegex", + regex: try! Regex(r), + ty: .allMatches, + target: Inputs.swiftOrgCSS + ) + + let cssRegexNS = NSBenchmark( + name: "cssRegexNS", + regex: try! NSRegularExpression(pattern: r), + ty: .all, + target: Inputs.swiftOrgCSS + ) + register(cssRegex) + register(cssRegexNS) + } +} diff --git a/Sources/RegexBenchmark/Suite/FirstMatch.swift b/Sources/RegexBenchmark/Suite/FirstMatch.swift new file mode 100644 index 000000000..bdc7ca8da --- /dev/null +++ b/Sources/RegexBenchmark/Suite/FirstMatch.swift @@ -0,0 +1,49 @@ +import _StringProcessing +import Foundation + +extension BenchmarkRunner { + mutating func addFirstMatch() { + let r = "a" + let s = String(repeating: " ", count: 100000) + + // this does nothing but loop through the loop in + // Match.swift (Regex._firstMatch) since the engine should fail right away, + let firstMatch = Benchmark( + name: "FirstMatch", + regex: try! Regex(r), + ty: .first, + target: s + ) + + // a comparison with now NSRegularExpression handles this situation + let firstMatchNS = NSBenchmark( + name: "FirstMatchNS", + regex: try! NSRegularExpression(pattern: r), + ty: .first, + target: s + ) + + let s2 = String(repeating: "a", count: 10000) + + // matches calls into firstMatch, so really they're the same + // this also stress tests the captures + let allMatches = Benchmark( + name: "AllMatches", + regex: try! Regex(r), + ty: .allMatches, + target: s2 + ) + + let allMatchesNS = NSBenchmark( + name: "AllMatchesNS", + regex: try! NSRegularExpression(pattern: r), + ty: .all, + target: s2 + ) + + register(firstMatch) + register(firstMatchNS) + register(allMatches) + register(allMatchesNS) + } +} diff --git a/Sources/RegexBenchmark/Suite/ReluctantQuant.swift b/Sources/RegexBenchmark/Suite/ReluctantQuant.swift new file mode 100644 index 000000000..8669a649e --- /dev/null +++ b/Sources/RegexBenchmark/Suite/ReluctantQuant.swift @@ -0,0 +1,42 @@ +import _StringProcessing +import RegexBuilder + +extension BenchmarkRunner { + mutating func addReluctantQuant() { + let size = 500000 + let s = String(repeating: "a", count: size) + + let reluctantQuant = Benchmark( + name: "ReluctantQuant", + regex: Regex { + OneOrMore(.any, .reluctant) + }, + ty: .whole, + target: s + ) + + let eagarQuantWithTerminal = Benchmark( + name: "EagarQuantWithTerminal", + regex: Regex { + OneOrMore(.any, .eager) + ";" + }, + ty: .whole, + target: s + ";" + ) + + let reluctantQuantWithTerminal = Benchmark( + name: "ReluctantQuantWithTerminal", + regex: Regex { + OneOrMore(.any, .reluctant) + ";" + }, + ty: .whole, + target: s + ";" + ) + + register(reluctantQuant) + register(reluctantQuantWithTerminal) + register(eagarQuantWithTerminal) + } +} diff --git a/Sources/RegexBenchmark/Utils/Size.swift b/Sources/RegexBenchmark/Utils/Size.swift new file mode 100644 index 000000000..00802fe59 --- /dev/null +++ b/Sources/RegexBenchmark/Utils/Size.swift @@ -0,0 +1,155 @@ +// Taken from swift-collections-benchmark +import ArgumentParser + +public struct Size: RawRepresentable { + public typealias RawValue = Int + + public let rawValue: RawValue + + public init(_ value: RawValue) { + self.rawValue = value + } + + public init(rawValue value: RawValue) { + self.rawValue = value + } +} + +extension Size: CustomStringConvertible { + public var description: String { + let v = Double(rawValue) + return + rawValue >= 1 << 40 ? String(format: "%.3gT", v * 0x1p-40) + : rawValue >= 1 << 30 ? String(format: "%.3gG", v * 0x1p-30) + : rawValue >= 1 << 20 ? String(format: "%.3gM", v * 0x1p-20) + : rawValue >= 1024 ? String(format: "%.3gk", v * 0x1p-10) + : "\(rawValue)" + } +} + +extension Size: ExpressibleByIntegerLiteral { + public init(integerLiteral value: RawValue) { + self.init(value) + } +} + +extension Size: CodingKey { + public init?(intValue: Int) { + self.init(intValue) + } + + public init?(stringValue: String) { + guard let size = Size(stringValue) else { return nil } + self = size + } + + public var intValue: Int? { rawValue } + public var stringValue: String { "\(rawValue)" } +} + +extension Size: Codable { + public init(from decoder: Decoder) throws { + let container = try decoder.singleValueContainer() + let string = try container.decode(String.self) + guard let value = Int(string, radix: 10) else { + throw DecodingError.dataCorruptedError( + in: container, + debugDescription: "Not an integer: '\(string)'") + } + self.rawValue = value + } + + public func encode(to encoder: Encoder) throws { + var container = encoder.singleValueContainer() + try container.encode("\(rawValue)") + } +} + +extension Size { + public init?(_ string: String) { + var position = string.startIndex + + // Parse digits + loop: while position != string.endIndex { + switch string[position] { + case "0", "1", "2", "3", "4", "5", "6", "7", "8", "9": + string.formIndex(after: &position) + default: + break loop + } + } + let digits = string.prefix(upTo: position) + guard let value = RawValue(digits, radix: 10) else { return nil } + + // Parse optional suffix + let suffix = string.suffix(from: position) + switch suffix { + case "": self.rawValue = value + case "k", "K": self.rawValue = value << 10 + case "m", "M": self.rawValue = value << 20 + case "g", "G": self.rawValue = value << 30 + case "t", "T": self.rawValue = value << 40 + default: return nil + } + } +} + +extension Size: Equatable {} +extension Size: Hashable {} +extension Size: Comparable { + public static func < (left: Self, right: Self) -> Bool { + left.rawValue < right.rawValue + } +} + +extension Size: ExpressibleByArgument { + public init?(argument: String) { + self.init(argument) + } +} + +extension FixedWidthInteger { + var _minimumBitWidth: Int { + Self.bitWidth - self.leadingZeroBitCount + } +} + +extension Size { + private static func _checkSignificantDigits(_ digits: Int) { + precondition(digits >= 1 && digits <= RawValue.bitWidth) + } + + public func roundedDown(significantDigits digits: Int) -> Size { + Self._checkSignificantDigits(digits) + let mask: RawValue = (0 &- 1) << (rawValue._minimumBitWidth - digits) + return Size(rawValue & mask) + } + + public func nextUp(significantDigits digits: Int) -> Size { + Self._checkSignificantDigits(digits) + + let shift = rawValue._minimumBitWidth - digits + let mask: RawValue = (0 &- 1) << shift + guard shift >= 0 else { + return Size(rawValue + 1) + } + return Size((rawValue + (1 << shift)) & mask) + } + + public static func sizes( + for range: ClosedRange, + significantDigits digits: Int + ) -> [Size] { + _checkSignificantDigits(digits) + var result: [Size] = [] + var value = range.lowerBound.roundedDown(significantDigits: digits) + while value < range.lowerBound { + value = value.nextUp(significantDigits: digits) + } + while value <= range.upperBound { + result.append(value) + value = value.nextUp(significantDigits: digits) + } + return result + } +} diff --git a/Sources/RegexBenchmark/Utils/Tick.swift b/Sources/RegexBenchmark/Utils/Tick.swift new file mode 100644 index 000000000..5209fab18 --- /dev/null +++ b/Sources/RegexBenchmark/Utils/Tick.swift @@ -0,0 +1,62 @@ +// Taken from swift-collections-benchmark +#if !USE_FOUNDATION_DATE && (os(macOS) || os(iOS) || os(watchOS) || os(tvOS)) +import Foundation // For the side effect of reexporting Darwin/Glibc + +public struct Tick { + internal let _value: timespec + + internal init(_value: timespec) { + self._value = _value + } + + public static var now: Tick { + guard #available(macOS 10.12, iOS 10.0, tvOS 10.0, watchOS 3.0, *) else { + fatalError("Please enable USE_FOUNDATION_DATE") + } + var now = timespec() + let r = clock_gettime(CLOCK_MONOTONIC_RAW, &now) + precondition(r == 0, "clock_gettime failure") + return Tick(_value: now) + } + + public static var resolution: Time { + guard #available(macOS 10.12, iOS 10.0, tvOS 10.0, watchOS 3.0, *) else { + fatalError("Please enable USE_FOUNDATION_DATE") + } + var res = timespec() + let r = clock_getres(CLOCK_MONOTONIC_RAW, &res) + precondition(r == 0, "clock_getres failure") + return Tick(_value: res).elapsedTime(since: Tick(_value: timespec(tv_sec: 0, tv_nsec: 0))) + } + + public func elapsedTime(since start: Tick) -> Time { + let s = Double(_value.tv_sec - start._value.tv_sec) + let ns = Double(_value.tv_nsec - start._value.tv_nsec) + return Time(s + ns / 1e9) + } +} + +#else + +import Foundation + +public struct Tick { + internal let _value: Date + + internal init(_value: Date) { + self._value = _value + } + + public static var now: Tick { + Tick(_value: Date()) + } + + public func elapsedTime(since start: Tick) -> Time { + Time(Double(_value.timeIntervalSince(start._value))) + } + + public static var resolution: Time { + .nanosecond + } +} +#endif diff --git a/Sources/RegexBenchmark/Utils/Time.swift b/Sources/RegexBenchmark/Utils/Time.swift new file mode 100644 index 000000000..9fa54c1aa --- /dev/null +++ b/Sources/RegexBenchmark/Utils/Time.swift @@ -0,0 +1,143 @@ +// Taken from swift-collections-benchmark + +import Foundation +import ArgumentParser + +public struct Time { + public let seconds: TimeInterval + + public init(_ seconds: TimeInterval) { + precondition(!seconds.isNaN) + self.seconds = seconds + } +} + +extension Time { + public static let second = Time(1) + public static let millisecond = Time(1e-3) + public static let microsecond = Time(1e-6) + public static let nanosecond = Time(1e-9) + public static let picosecond = Time(1e-12) + public static let femtosecond = Time(1e-15) + public static let attosecond = Time(1e-18) + public static let zero = Time(0) +} + +extension Time { + public static func since(_ start: Tick) -> Time { + Tick.now.elapsedTime(since: start) + } +} + +extension Time: RawRepresentable { + public var rawValue: TimeInterval { seconds } + + public init(rawValue: TimeInterval) { + self.seconds = rawValue + } +} + +extension Time: Equatable { + public static func == (left: Self, right: Self) -> Bool { + return left.seconds == right.seconds + } +} + +extension Time: Hashable { + public func hash(into hasher: inout Hasher) { + hasher.combine(seconds) + } +} + +extension Time: Comparable { + public static func < (left: Self, right: Self) -> Bool { + return left.seconds < right.seconds + } +} + +extension Time: CustomStringConvertible { + public var description: String { + if self.seconds == 0 { return "0" } + if self < .attosecond { return String(format: "%.3gas", seconds * 1e18) } + if self < .picosecond { return String(format: "%.3gfs", seconds * 1e15) } + if self < .nanosecond { return String(format: "%.3gps", seconds * 1e12) } + if self < .microsecond { return String(format: "%.3gns", seconds * 1e9) } + if self < .millisecond { return String(format: "%.3gµs", seconds * 1e6) } + if self < .second { return String(format: "%.3gms", seconds * 1e3) } + if self.seconds < 1000 { return String(format: "%.3gs", seconds) } + return String(format: "%gs", seconds.rounded()) + } + + public var typesetDescription: String { + let spc = "\u{200A}" + if self.seconds == 0 { return "0\(spc)s" } + if self < .femtosecond { return String(format: "%.3g\(spc)as", seconds * 1e18) } + if self < .picosecond { return String(format: "%.3g\(spc)fs", seconds * 1e15) } + if self < .nanosecond { return String(format: "%.3g\(spc)ps", seconds * 1e12) } + if self < .microsecond { return String(format: "%.3g\(spc)ns", seconds * 1e9) } + if self < .millisecond { return String(format: "%.3g\(spc)µs", seconds * 1e6) } + if self < .second { return String(format: "%.3g\(spc)ms", seconds * 1e3) } + if self.seconds < 1000 { return String(format: "%.3g\(spc)s", seconds) } + return String(format: "%g\(spc)s", seconds.rounded()) + } +} + +extension Time: Codable { + public init(from decoder: Decoder) throws { + self.seconds = try TimeInterval(from: decoder) + } + + public func encode(to encoder: Encoder) throws { + try self.seconds.encode(to: encoder) + } +} + +extension Time: ExpressibleByArgument { + public init?(argument: String) { + self.init(argument) + } +} + +extension Time { + private static let _scaleFromSuffix: [String: Time] = [ + "": .second, + "s": .second, + "ms": .millisecond, + "µs": .microsecond, + "us": .microsecond, + "ns": .nanosecond, + "ps": .picosecond, + "fs": .femtosecond, + "as": .attosecond, + ] + + private static let _floatingPointCharacterSet = CharacterSet(charactersIn: "+-0123456789.e") + + public init?(_ description: String) { + var description = description.trimmingCharacters(in: .whitespacesAndNewlines) + description = description.lowercased() + if let i = description.rangeOfCharacter(from: Time._floatingPointCharacterSet.inverted) { + let number = description.prefix(upTo: i.lowerBound) + let suffix = description.suffix(from: i.lowerBound) + guard let value = Double(number) else { return nil } + guard let scale = Time._scaleFromSuffix[String(suffix)] else { return nil } + self = Time(value * scale.seconds) + } + else { + guard let value = Double(description) else { return nil } + self = Time(value) + } + } +} + +extension Time { + public func amortized(over size: Size) -> Time { + return Time(seconds / TimeInterval(size.rawValue)) + } +} + +extension Time { + internal func _orIfZero(_ time: Time) -> Time { + self > .zero ? self : time + } +} diff --git a/Sources/RegexBenchmark/Utils/Timer.swift b/Sources/RegexBenchmark/Utils/Timer.swift new file mode 100644 index 000000000..960169fed --- /dev/null +++ b/Sources/RegexBenchmark/Utils/Timer.swift @@ -0,0 +1,65 @@ +// Taken from swift-collections-benchmark + +public struct Timer { + internal var _expectNested: Bool? = nil + public var elapsedTime: Time? = nil { + didSet { + precondition(_expectNested != false, + "Inconsistent timer use: Unexpected call to Timer.elapsedTime setter") + } + } + + public init() {} + + internal init(_expectNested: Bool?) { + self._expectNested = _expectNested + } + + internal static func _measureFirst( + _ body: (inout Timer) -> Void + ) -> (elapsedTime: Time, hasNestedMeasurement: Bool) { + var timer = Timer(_expectNested: nil) + let start = Tick.now + body(&timer) + let end = Tick.now + let elapsed = timer.elapsedTime ?? end.elapsedTime(since: start) + return (elapsedTime: elapsed._orIfZero(Tick.resolution), + hasNestedMeasurement: timer.elapsedTime != nil) + } + + internal static func _nestedMeasure(_ body: (inout Timer) -> Void) -> Time { + var timer = Timer(_expectNested: true) + body(&timer) + guard let elapsed = timer.elapsedTime else { + fatalError("Inconsistent timer use: Expected call to Timer.measure") + } + return elapsed._orIfZero(Tick.resolution) + } + + internal static func _iteratingMeasure( + iterations: Int, + _ body: (inout Timer) -> Void + ) -> Time { + precondition(iterations > 0) + var timer = Timer(_expectNested: false) + let start = Tick.now + for _ in 0 ..< iterations { + body(&timer) + } + let end = Tick.now + let elapsed = end.elapsedTime(since: start)._orIfZero(Tick.resolution) + return Time(elapsed.seconds / Double(iterations)) + } + + @inline(never) + public mutating func measure(_ body: () -> Void) { + precondition(_expectNested != false, + "Inconsistent timer use: Unexpected call to Timer.measure") + let start = Tick.now + body() + let end = Tick.now + elapsedTime = end.elapsedTime(since: start) + _expectNested = false + } +} + diff --git a/Sources/RegexBuilder/Builder.swift b/Sources/RegexBuilder/Builder.swift index be9a48e36..a50f069ec 100644 --- a/Sources/RegexBuilder/Builder.swift +++ b/Sources/RegexBuilder/Builder.swift @@ -25,12 +25,4 @@ public enum RegexComponentBuilder { public static func buildExpression(_ regex: R) -> R { regex } - - public static func buildEither(first component: R) -> R { - component - } - - public static func buildEither(second component: R) -> R { - component - } } diff --git a/Sources/RegexBuilder/DSL.swift b/Sources/RegexBuilder/DSL.swift index f3b0fd702..ecd01c07c 100644 --- a/Sources/RegexBuilder/DSL.swift +++ b/Sources/RegexBuilder/DSL.swift @@ -211,14 +211,6 @@ public struct AlternationBuilder { public static func buildExpression(_ regex: R) -> R { regex } - - public static func buildEither(first component: R) -> R { - component - } - - public static func buildEither(second component: R) -> R { - component - } } @available(SwiftStdlib 5.7, *) diff --git a/Sources/RegexBuilder/Variadics.swift b/Sources/RegexBuilder/Variadics.swift index f06978c8b..f4adddd93 100644 --- a/Sources/RegexBuilder/Variadics.swift +++ b/Sources/RegexBuilder/Variadics.swift @@ -2608,28 +2608,23 @@ extension Capture { @_disfavoredOverload public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } @_disfavoredOverload public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -2638,28 +2633,23 @@ extension TryCapture { @_disfavoredOverload public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } @_disfavoredOverload public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -2687,28 +2677,23 @@ extension Capture { @_disfavoredOverload public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } @_disfavoredOverload public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -2717,28 +2702,23 @@ extension TryCapture { @_disfavoredOverload public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } @_disfavoredOverload public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture), R.RegexOutput == W { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -2760,27 +2740,22 @@ extension Capture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -2788,27 +2763,22 @@ extension Capture { extension TryCapture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -2833,27 +2803,22 @@ extension Capture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -2861,27 +2826,22 @@ extension Capture { extension TryCapture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1), R.RegexOutput == (W, C1) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -2903,27 +2863,22 @@ extension Capture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -2931,27 +2886,22 @@ extension Capture { extension TryCapture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -2976,27 +2926,22 @@ extension Capture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3004,27 +2949,22 @@ extension Capture { extension TryCapture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2), R.RegexOutput == (W, C1, C2) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3046,27 +2986,22 @@ extension Capture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3074,27 +3009,22 @@ extension Capture { extension TryCapture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3119,27 +3049,22 @@ extension Capture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3147,27 +3072,22 @@ extension Capture { extension TryCapture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3), R.RegexOutput == (W, C1, C2, C3) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3189,27 +3109,22 @@ extension Capture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3217,27 +3132,22 @@ extension Capture { extension TryCapture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3262,27 +3172,22 @@ extension Capture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3290,27 +3195,22 @@ extension Capture { extension TryCapture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4), R.RegexOutput == (W, C1, C2, C3, C4) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3332,27 +3232,22 @@ extension Capture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3360,27 +3255,22 @@ extension Capture { extension TryCapture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3405,27 +3295,22 @@ extension Capture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3433,27 +3318,22 @@ extension Capture { extension TryCapture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5), R.RegexOutput == (W, C1, C2, C3, C4, C5) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3475,27 +3355,22 @@ extension Capture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3503,27 +3378,22 @@ extension Capture { extension TryCapture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3548,27 +3418,22 @@ extension Capture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3576,27 +3441,22 @@ extension Capture { extension TryCapture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3618,27 +3478,22 @@ extension Capture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3646,27 +3501,22 @@ extension Capture { extension TryCapture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3691,27 +3541,22 @@ extension Capture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3719,27 +3564,22 @@ extension Capture { extension TryCapture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3761,27 +3601,22 @@ extension Capture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3789,27 +3624,22 @@ extension Capture { extension TryCapture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3834,27 +3664,22 @@ extension Capture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3862,27 +3687,22 @@ extension Capture { extension TryCapture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -3904,27 +3724,22 @@ extension Capture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3932,27 +3747,22 @@ extension Capture { extension TryCapture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -3977,27 +3787,22 @@ extension Capture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -4005,27 +3810,22 @@ extension Capture { extension TryCapture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -4047,27 +3847,22 @@ extension Capture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -4075,27 +3870,22 @@ extension Capture { extension TryCapture { public init( _ component: R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } public init( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -4120,27 +3910,22 @@ extension Capture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -4148,27 +3933,22 @@ extension Capture { extension TryCapture { public init( @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } public init( as reference: Reference, @RegexComponentBuilder _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) where RegexOutput == (Substring, NewCapture, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10), R.RegexOutput == (W, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } diff --git a/Sources/VariadicsGenerator/VariadicsGenerator.swift b/Sources/VariadicsGenerator/VariadicsGenerator.swift index 56e2f3790..b3132c5cd 100644 --- a/Sources/VariadicsGenerator/VariadicsGenerator.swift +++ b/Sources/VariadicsGenerator/VariadicsGenerator.swift @@ -646,28 +646,23 @@ struct VariadicsGenerator: ParsableCommand { \(disfavored)\ public init<\(genericParams), NewCapture>( _ component: R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) \(whereClauseTransformed) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } \(disfavored)\ public init<\(genericParams), NewCapture>( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) \(whereClauseTransformed) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -676,28 +671,23 @@ struct VariadicsGenerator: ParsableCommand { \(disfavored)\ public init<\(genericParams), NewCapture>( _ component: R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) \(whereClauseTransformed) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + self.init(node: .capture( + component.regex.root, + CaptureTransform(transform))) } \(disfavored)\ public init<\(genericParams), NewCapture>( _ component: R, as reference: Reference, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) \(whereClauseTransformed) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component.regex.root))) + component.regex.root, + CaptureTransform(transform))) } } @@ -725,28 +715,23 @@ struct VariadicsGenerator: ParsableCommand { \(disfavored)\ public init<\(genericParams), NewCapture>( @\(concatBuilderName) _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) \(whereClauseTransformed) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } \(disfavored)\ public init<\(genericParams), NewCapture>( as reference: Reference, @\(concatBuilderName) _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture + transform: @escaping (W) throws -> NewCapture ) \(whereClauseTransformed) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } @@ -755,28 +740,23 @@ struct VariadicsGenerator: ParsableCommand { \(disfavored)\ public init<\(genericParams), NewCapture>( @\(concatBuilderName) _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) \(whereClauseTransformed) { - self.init(node: .capture(.transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + self.init(node: .capture( + component().regex.root, + CaptureTransform(transform))) } \(disfavored)\ public init<\(genericParams), NewCapture>( as reference: Reference, @\(concatBuilderName) _ component: () -> R, - transform: @escaping (Substring) throws -> NewCapture? + transform: @escaping (W) throws -> NewCapture? ) \(whereClauseTransformed) { self.init(node: .capture( reference: reference.id, - .transform( - CaptureTransform(resultType: NewCapture.self) { - try transform($0) as Any? - }, - component().regex.root))) + component().regex.root, + CaptureTransform(transform))) } } diff --git a/Sources/_RegexParser/Regex/Parse/CaptureList.swift b/Sources/_RegexParser/Regex/Parse/CaptureList.swift index 2a5a47395..509fbf9bc 100644 --- a/Sources/_RegexParser/Regex/Parse/CaptureList.swift +++ b/Sources/_RegexParser/Regex/Parse/CaptureList.swift @@ -24,13 +24,13 @@ public struct CaptureList { extension CaptureList { public struct Capture { public var name: String? - public var type: Any.Type? + public var type: Any.Type public var optionalDepth: Int public var location: SourceLocation public init( name: String? = nil, - type: Any.Type? = nil, + type: Any.Type = Substring.self, optionalDepth: Int, _ location: SourceLocation ) { @@ -122,18 +122,15 @@ extension AST.Node { break } } - - public var _captureList: CaptureList { - var caps = CaptureList() - self._addCaptures(to: &caps, optionalNesting: 0) - return caps - } } extension AST { - /// Get the capture list for this AST + /// The capture list (including the whole match) of this AST. public var captureList: CaptureList { - root._captureList + var caps = CaptureList() + caps.append(.init(optionalDepth: 0, .fake)) + root._addCaptures(to: &caps, optionalNesting: 0) + return caps } } @@ -151,12 +148,7 @@ extension CaptureList: Equatable {} extension CaptureList.Capture: CustomStringConvertible { public var description: String { - let typeStr: String - if let ty = type { - typeStr = "\(ty)" - } else { - typeStr = "Substring" - } + let typeStr = String(describing: type) let suffix = String(repeating: "?", count: optionalDepth) return typeStr + suffix } diff --git a/Sources/_RegexParser/Regex/Parse/CaptureStructure.swift b/Sources/_RegexParser/Regex/Parse/CaptureStructure.swift index 6cd8001ba..3212699fb 100644 --- a/Sources/_RegexParser/Regex/Parse/CaptureStructure.swift +++ b/Sources/_RegexParser/Regex/Parse/CaptureStructure.swift @@ -225,7 +225,7 @@ extension CaptureStructure: CustomStringConvertible { extension AST { /// The capture structure of this AST for compiler communication. var captureStructure: CaptureStructure { - root._captureList._captureStructure(nestOptionals: true) + captureList._captureStructure(nestOptionals: true) } } @@ -246,10 +246,7 @@ extension CaptureList { extension CaptureList.Capture { func _captureStructure(nestOptionals: Bool) -> CaptureStructure { if optionalDepth == 0 { - if let ty = type { - return .atom(name: name, type: .init(ty)) - } - return .atom(name: name) + return .atom(name: name, type: type == Substring.self ? nil : .init(type)) } var copy = self copy.optionalDepth = 0 diff --git a/Sources/_RegexParser/Regex/Parse/Sema.swift b/Sources/_RegexParser/Regex/Parse/Sema.swift index 83c014d2a..b7d2bfd6f 100644 --- a/Sources/_RegexParser/Regex/Parse/Sema.swift +++ b/Sources/_RegexParser/Regex/Parse/Sema.swift @@ -77,7 +77,7 @@ extension RegexValidator { } switch ref.kind { case .absolute(let i): - guard i <= captures.captures.count else { + guard i < captures.captures.count else { throw error(.invalidReference(i), at: ref.innerLoc) } case .named(let name): diff --git a/Sources/_RegexParser/Utility/TypeConstruction.swift b/Sources/_RegexParser/Utility/TypeConstruction.swift index e368d3513..4d1765e34 100644 --- a/Sources/_RegexParser/Utility/TypeConstruction.swift +++ b/Sources/_RegexParser/Utility/TypeConstruction.swift @@ -139,3 +139,62 @@ public enum TypeConstruction { return _openExistential(childType, do: helper) } } + +extension TypeConstruction { + public static func optionalType( + of base: Base.Type, depth: Int = 1 + ) -> Any.Type { + switch depth { + case 0: return base + case 1: return Base?.self + case 2: return Base??.self + case 3: return Base???.self + case 4: return Base????.self + default: + return optionalType(of: Base????.self, depth: depth - 4) + } + } +} + +extension MemoryLayout { + /// Returns the element index that corresponnds to the given tuple element key + /// path. + /// - Parameters: + /// - keyPath: The key path from a tuple to one of its elements. + /// - elementTypes: The element type of the tuple type. + // TODO: It possible to get element types from the type metadata, but it's + // more efficient to pass them in since we already know them in the matching + // engine. + public static func tupleElementIndex( + of keyPath: PartialKeyPath, + elementTypes: ElementTypes + ) -> Int? where ElementTypes.Element == Any.Type { + guard let byteOffset = offset(of: keyPath) else { + return nil + } + if byteOffset == 0 { return 0 } + var currentOffset = 0 + for (index, type) in elementTypes.enumerated() { + func sizeAndAlignMask(_: T.Type) -> (Int, Int) { + (MemoryLayout.size, MemoryLayout.alignment - 1) + } + // The ABI of an offset-based key path only stores the byte offset, so + // this doesn't work if there's a 0-sized element, e.g. `Void`, + // `(Void, Void)`. (rdar://63819465) + if size == 0 { + return nil + } + let (size, alignMask) = _openExistential(type, do: sizeAndAlignMask) + // Align up the offset for this type. + currentOffset = (currentOffset + alignMask) & ~alignMask + // If it matches the offset we are looking for, `index` is the tuple + // element index. + if currentOffset == byteOffset { + return index + } + // Advance to the past-the-end offset for this element. + currentOffset += size + } + return nil + } +} diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 3a91b6c67..c64c99b6f 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -4,32 +4,44 @@ extension Compiler { struct ByteCodeGen { var options: MatchingOptions var builder = Program.Builder() + /// A Boolean indicating whether the first matchable atom has been emitted. + /// This is used to determine whether to apply initial options. + var hasEmittedFirstMatchableAtom = false init(options: MatchingOptions, captureList: CaptureList) { self.options = options self.builder.captureList = captureList } - - mutating func finish( - ) throws -> Program { - builder.buildAccept() - return try builder.assemble() - } } } extension Compiler.ByteCodeGen { + mutating func emitRoot(_ root: DSLTree.Node) throws -> Program { + // The whole match (`.0` element of output) is equivalent to an implicit + // capture over the entire regex. + try emitNode(.capture(name: nil, reference: nil, root)) + builder.buildAccept() + return try builder.assemble() + } +} + +fileprivate extension Compiler.ByteCodeGen { mutating func emitAtom(_ a: DSLTree.Atom) throws { + defer { + if a.isMatchable { + hasEmittedFirstMatchableAtom = true + } + } switch a { case .any: emitAny() case let .char(c): try emitCharacter(c) - + case let .scalar(s): try emitScalar(s) - + case let .assertion(kind): try emitAssertion(kind.ast) @@ -40,7 +52,7 @@ extension Compiler.ByteCodeGen { builder.buildUnresolvedReference(id: id) case let .changeMatchingOptions(optionSequence): - if !builder.hasReceivedInstructions { + if !hasEmittedFirstMatchableAtom { builder.initialOptions.apply(optionSequence.ast) } options.apply(optionSequence.ast) @@ -65,8 +77,7 @@ extension Compiler.ByteCodeGen { switch ref.kind { case .absolute(let i): - // Backreferences number starting at 1 - builder.buildBackreference(.init(i-1)) + builder.buildBackreference(.init(i)) case .named(let name): try builder.buildNamedReference(name) case .relative: @@ -329,9 +340,8 @@ extension Compiler.ByteCodeGen { } mutating func emitMatcher( - _ matcher: @escaping _MatcherInterface, - into capture: CaptureRegister? = nil - ) { + _ matcher: @escaping _MatcherInterface + ) -> ValueRegister { // TODO: Consider emitting consumer interface if // not captured. This may mean we should store @@ -343,26 +353,7 @@ extension Compiler.ByteCodeGen { let valReg = builder.makeValueRegister() builder.buildMatcher(matcher, into: valReg) - - // TODO: Instruction to store directly - if let cap = capture { - builder.buildMove(valReg, into: cap) - } - } - - mutating func emitTransform( - _ t: CaptureTransform, - _ child: DSLTree.Node, - into cap: CaptureRegister - ) throws { - let transform = builder.makeTransformFunction { - input, range in - try t(input[range]) - } - builder.buildBeginCapture(cap) - try emitNode(child) - builder.buildEndCapture(cap) - builder.buildTransformCapture(cap, transform) + return valReg } mutating func emitNoncapturingGroup( @@ -388,7 +379,7 @@ extension Compiler.ByteCodeGen { throw Unreachable("These should produce a capture node") case .changeMatchingOptions(let optionSequence): - if !builder.hasReceivedInstructions { + if !hasEmittedFirstMatchableAtom { builder.initialOptions.apply(optionSequence) } options.apply(optionSequence) @@ -612,7 +603,8 @@ extension Compiler.ByteCodeGen { builder.buildConsume(by: consumer) } - mutating func emitNode(_ node: DSLTree.Node) throws { + @discardableResult + mutating func emitNode(_ node: DSLTree.Node) throws -> ValueRegister? { switch node { case let .orderedChoice(children): @@ -623,20 +615,38 @@ extension Compiler.ByteCodeGen { try emitConcatenationComponent(child) } - case let .capture(name, refId, child): + case let .capture(name, refId, child, transform): options.beginScope() defer { options.endScope() } let cap = builder.makeCapture(id: refId, name: name) - switch child { - case let .matcher(_, m): - emitMatcher(m, into: cap) - case let .transform(t, child): - try emitTransform(t, child, into: cap) - default: - builder.buildBeginCapture(cap) - try emitNode(child) - builder.buildEndCapture(cap) + builder.buildBeginCapture(cap) + let value = try emitNode(child) + builder.buildEndCapture(cap) + // If the child node produced a custom capture value, e.g. the result of + // a matcher, this should override the captured substring. + if let value { + builder.buildMove(value, into: cap) + } + // If there's a capture transform, apply it now. + if let transform = transform { + let fn = builder.makeTransformFunction { input, cap in + // If it's a substring capture with no custom value, apply the + // transform directly to the substring to avoid existential traffic. + // + // FIXME: separate out this code path. This is fragile, + // slow, and these are clearly different constructs + if let range = cap.range, cap.value == nil { + return try transform(input[range]) + } + + let value = constructExistentialOutputComponent( + from: input, + component: cap.deconstructed, + optionalCount: 0) + return try transform(value) + } + builder.buildTransformCapture(cap, fn) } case let .nonCapturingGroup(kind, child): @@ -704,10 +714,10 @@ extension Compiler.ByteCodeGen { } case let .regexLiteral(l): - try emitNode(l.ast.dslTreeNode) + return try emitNode(l.ast.dslTreeNode) case let .convertedRegexLiteral(n, _): - try emitNode(n) + return try emitNode(n) case .absentFunction: throw Unsupported("absent function") @@ -715,18 +725,14 @@ extension Compiler.ByteCodeGen { throw Unsupported("consumer") case let .matcher(_, f): - emitMatcher(f) - - case .transform: - throw Unreachable( - "Transforms only directly inside captures") + return emitMatcher(f) case .characterPredicate: throw Unsupported("character predicates") case .trivia, .empty: - return + return nil } + return nil } } - diff --git a/Sources/_StringProcessing/Capture.swift b/Sources/_StringProcessing/Capture.swift index 6f87fa625..a8d663651 100644 --- a/Sources/_StringProcessing/Capture.swift +++ b/Sources/_StringProcessing/Capture.swift @@ -13,18 +13,14 @@ // TODO: Where should this live? Inside TypeConstruction? func constructExistentialOutputComponent( - from input: Substring, - in range: Range?, - value: Any?, + from input: String, + component: (range: Range, value: Any?)?, optionalCount: Int ) -> Any { let someCount: Int var underlying: Any - if let v = value { - underlying = v - someCount = optionalCount - } else if let r = range { - underlying = input[r] + if let component = component { + underlying = component.value ?? input[component.range] someCount = optionalCount } else { // Ok since we Any-box every step up the ladder @@ -43,12 +39,11 @@ func constructExistentialOutputComponent( @available(SwiftStdlib 5.7, *) extension AnyRegexOutput.Element { func existentialOutputComponent( - from input: Substring + from input: String ) -> Any { constructExistentialOutputComponent( from: input, - in: range, - value: value, + component: representation.content, optionalCount: representation.optionalDepth ) } @@ -64,15 +59,13 @@ extension Sequence where Element == AnyRegexOutput.Element { // FIXME: This is a stop gap where we still slice the input // and traffic through existentials @available(SwiftStdlib 5.7, *) - func existentialOutput( - from input: Substring - ) -> Any { - var caps = Array() - caps.append(input) - caps.append(contentsOf: self.map { + func existentialOutput(from input: String) -> Any { + let elements = map { $0.existentialOutputComponent(from: input) - }) - return TypeConstruction.tuple(of: caps) + } + return elements.count == 1 + ? elements[0] + : TypeConstruction.tuple(of: elements) } func slices(from input: String) -> [Substring?] { diff --git a/Sources/_StringProcessing/Compiler.swift b/Sources/_StringProcessing/Compiler.swift index 4d97c5758..601cd52a4 100644 --- a/Sources/_StringProcessing/Compiler.swift +++ b/Sources/_StringProcessing/Compiler.swift @@ -28,11 +28,9 @@ class Compiler { __consuming func emit() throws -> Program { // TODO: Handle global options var codegen = ByteCodeGen( - options: options, captureList: tree.root._captureList + options: options, captureList: tree.captureList ) - try codegen.emitNode(tree.root) - let program = try codegen.finish() - return program + return try codegen.emitRoot(tree.root) } } diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index 640fe3c93..a912fd136 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -45,8 +45,6 @@ extension DSLTree.Node { fatalError("FIXME: Is this where we handle them?") case .matcher: fatalError("FIXME: Is this where we handle them?") - case .transform: - fatalError("FIXME: Is this where we handle them?") case .characterPredicate: fatalError("FIXME: Is this where we handle them?") } diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index f706c0471..917e010f6 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -78,11 +78,6 @@ extension MEProgram.Builder { var lastInstructionAddress: InstructionAddress { .init(instructions.endIndex - 1) } - - /// `true` if the builder has received any instructions. - var hasReceivedInstructions: Bool { - !instructions.isEmpty - } mutating func buildNop(_ r: StringRegister? = nil) { instructions.append(.init(.nop, .init(optionalString: r))) diff --git a/Sources/_StringProcessing/Engine/MECapture.swift b/Sources/_StringProcessing/Engine/MECapture.swift index 7003c0261..ec7c3668a 100644 --- a/Sources/_StringProcessing/Engine/MECapture.swift +++ b/Sources/_StringProcessing/Engine/MECapture.swift @@ -32,80 +32,48 @@ extension Processor { struct _StoredCapture { - // Set whenever we push the very first capture, allows us - // to theoretically re-compute anything we want to later. - fileprivate var startState: SavePoint? = nil - - // Save the entire history as we go, so that backtracking - // can just lop-off aborted runs. - // - // Backtracking entries can specify a per-capture stack - // index so that we can abort anything that came after. - // - // By remembering the entire history, we waste space, but - // we get flexibility for now. - // - fileprivate var stack: Array> = [] - - // Also save entire history of captured values -_- - // - // We will need to really zoom in on performance here... - fileprivate var valueStack: Array = [] + var range: Range? = nil + + var value: Any? = nil // An in-progress capture start fileprivate var currentCaptureBegin: Position? = nil fileprivate func _invariantCheck() { - if startState == nil { - assert(stack.isEmpty) - assert(valueStack.isEmpty) - assert(currentCaptureBegin == nil) - } else if currentCaptureBegin == nil { - assert(!stack.isEmpty || !valueStack.isEmpty) - } - if hasValues { - // FIXME: how? - // assert(valueStack.count == stack.count) + if range == nil { + assert(value == nil) } } // MARK: - IPI - var isEmpty: Bool { stack.isEmpty } - - var hasValues: Bool { !valueStack.isEmpty } - - var history: Array> { - stack - } - var valueHistory: Array { - valueStack + var deconstructed: (range: Range, value: Any?)? { + guard let r = range else { return nil } + return (r, value) } - var latest: Range? { stack.last } - - var latestValue: Any? { valueStack.last } - /// Start a new capture. If the previously started one was un-ended, - /// will clear it and restart. If this is the first start, will save `initial`. + /// will clear it and restart. mutating func startCapture( - _ idx: Position, initial: SavePoint + _ idx: Position ) { _invariantCheck() defer { _invariantCheck() } - if self.startState == nil { - self.startState = initial - } currentCaptureBegin = idx } mutating func endCapture(_ idx: Position) { _invariantCheck() - assert(currentCaptureBegin != nil) defer { _invariantCheck() } - stack.append(currentCaptureBegin! ..< idx) + guard let low = currentCaptureBegin else { + fatalError("Invariant violated: ending unstarted capture") + } + + range = low..._StoredCapture> var referencedCaptureOffsets: [ReferenceID: Int] -// func extract(from s: String) -> Array> { -// caps.map { $0.map { s[$0] } } -// } -// - func latestUntyped(from s: String) -> Array { + func latestUntyped(from input: String) -> Array { values.map { - guard let last = $0.latest else { + guard let range = $0.range else { return nil } - return s[last] + return input[range] } } } diff --git a/Sources/_StringProcessing/Engine/MEProgram.swift b/Sources/_StringProcessing/Engine/MEProgram.swift index 8b4737e7a..52aef1511 100644 --- a/Sources/_StringProcessing/Engine/MEProgram.swift +++ b/Sources/_StringProcessing/Engine/MEProgram.swift @@ -11,12 +11,12 @@ @_implementationOnly import _RegexParser -struct MEProgram where Input.Element: Equatable { +struct MEProgram where Input.Element: Equatable { typealias ConsumeFunction = (Input, Range) -> Input.Index? typealias AssertionFunction = (Input, Input.Index, Range) throws -> Bool typealias TransformFunction = - (Input, Range) throws -> Any? + (Input, Processor._StoredCapture) throws -> Any? typealias MatcherFunction = (Input, Input.Index, Range) throws -> (Input.Index, Any)? diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 8f777ad33..4b65528ba 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -32,30 +32,38 @@ struct Processor< typealias Element = Input.Element let input: Input - let bounds: Range let matchMode: MatchMode + let instructions: InstructionList + + // MARK: Resettable state + + // The subject bounds. + // + // FIXME: This also conflates search bounds too! + var bounds: Range + + // The current position in the subject var currentPosition: Position - let instructions: InstructionList var controller: Controller - var cycleCount = 0 - - /// Our register file var registers: Registers - // Used for back tracking var savePoints: [SavePoint] = [] var callStack: [InstructionAddress] = [] + var storedCaptures: Array<_StoredCapture> + var state: State = .inProgress var failureReason: Error? = nil + + // MARK: Metrics, debugging, etc. + var cycleCount = 0 var isTracingEnabled: Bool - var storedCaptures: Array<_StoredCapture> } extension Processor { @@ -88,6 +96,30 @@ extension Processor { _checkInvariants() } + + mutating func reset(searchBounds: Range) { + // FIXME: We currently conflate both subject bounds and search bounds + // This should just reset search bounds + self.bounds = searchBounds + self.currentPosition = self.bounds.lowerBound + + self.controller = Controller(pc: 0) + + self.registers.reset(sentinel: bounds.upperBound) + + self.savePoints.removeAll(keepingCapacity: true) + self.callStack.removeAll(keepingCapacity: true) + + for idx in storedCaptures.indices { + storedCaptures[idx] = .init() + } + + self.state = .inProgress + self.failureReason = nil + + _checkInvariants() + } + func _checkInvariants() { assert(end <= input.endIndex) assert(start >= input.startIndex) @@ -103,17 +135,24 @@ extension Processor { input[bounds] } + // Advance in our input, without any checks or failure signalling + mutating func _uncheckedForcedConsumeOne() { + assert(currentPosition != end) + input.formIndex(after: ¤tPosition) + } + // Advance in our input // // Returns whether the advance succeeded. On failure, our // save point was restored mutating func consume(_ n: Distance) -> Bool { - // Want Collection to provide this behavior... - if input.distance(from: currentPosition, to: end) < n.rawValue { + guard let idx = input.index( + currentPosition, offsetBy: n.rawValue, limitedBy: end + ) else { signalFailure() return false } - currentPosition = input.index(currentPosition, offsetBy: n.rawValue) + currentPosition = idx return true } @@ -140,30 +179,26 @@ extension Processor { return slice } - mutating func match(_ e: Element) { + // Match against the current input element. Returns whether + // it succeeded vs signaling an error. + mutating func match(_ e: Element) -> Bool { guard let cur = load(), cur == e else { signalFailure() - return - } - if consume(1) { - controller.step() + return false } + _uncheckedForcedConsumeOne() + return true } + + // Match against the current input prefix. Returns whether + // it succeeded vs signaling an error. mutating func matchSeq( _ seq: C - ) where C.Element == Input.Element { - let count = seq.count - - guard let inputSlice = load(count: count), - seq.elementsEqual(inputSlice) - else { - signalFailure() - return - } - guard consume(.init(count)) else { - fatalError("unreachable") + ) -> Bool where C.Element == Input.Element { + for e in seq { + guard match(e) else { return false } } - controller.step() + return true } mutating func signalFailure() { @@ -336,18 +371,24 @@ extension Processor { case .match: let reg = payload.element - match(registers[reg]) + if match(registers[reg]) { + controller.step() + } case .matchSequence: let reg = payload.sequence let seq = registers[reg] - matchSeq(seq) + if matchSeq(seq) { + controller.step() + } case .matchSlice: let (lower, upper) = payload.pairedPosPos let range = registers[lower]...ConsumeFunction] - // currently, these are static readonly var assertionFunctions: [MEProgram.AssertionFunction] // Captured-value constructors @@ -44,69 +42,61 @@ extension Processor { // currently, these are for comments and abort messages var strings: [String] + // MARK: writeable, resettable + + // currently, hold output of assertions + var bools: [Bool] // TODO: bitset + // currently, useful for range-based quantification var ints: [Int] - // unused - var floats: [Double] = [] - // Currently, used for `movePosition` and `matchSlice` var positions: [Position] = [] var values: [Any] + } +} - // unused - var instructionAddresses: [InstructionAddress] = [] - - // unused, any application? - var classStackAddresses: [CallStackAddress] = [] - - // unused, any application? - var positionStackAddresses: [PositionStackAddress] = [] - - // unused, any application? - var savePointAddresses: [SavePointStackAddress] = [] - - subscript(_ i: StringRegister) -> String { - strings[i.rawValue] - } - subscript(_ i: SequenceRegister) -> [Element] { - sequences[i.rawValue] - } - subscript(_ i: IntRegister) -> Int { - get { ints[i.rawValue] } - set { ints[i.rawValue] = newValue } - } - subscript(_ i: BoolRegister) -> Bool { - get { bools[i.rawValue] } - set { bools[i.rawValue] = newValue } - } - subscript(_ i: PositionRegister) -> Position { - get { positions[i.rawValue] } - set { positions[i.rawValue] = newValue } - } - subscript(_ i: ValueRegister) -> Any { - get { values[i.rawValue] } - set { - values[i.rawValue] = newValue - } - } - subscript(_ i: ElementRegister) -> Element { - elements[i.rawValue] - } - subscript(_ i: ConsumeFunctionRegister) -> MEProgram.ConsumeFunction { - consumeFunctions[i.rawValue] - } - subscript(_ i: AssertionFunctionRegister) -> MEProgram.AssertionFunction { - assertionFunctions[i.rawValue] - } - subscript(_ i: TransformRegister) -> MEProgram.TransformFunction { - transformFunctions[i.rawValue] - } - subscript(_ i: MatcherRegister) -> MEProgram.MatcherFunction { - matcherFunctions[i.rawValue] +extension Processor.Registers { + subscript(_ i: StringRegister) -> String { + strings[i.rawValue] + } + subscript(_ i: SequenceRegister) -> [Input.Element] { + sequences[i.rawValue] + } + subscript(_ i: IntRegister) -> Int { + get { ints[i.rawValue] } + set { ints[i.rawValue] = newValue } + } + subscript(_ i: BoolRegister) -> Bool { + get { bools[i.rawValue] } + set { bools[i.rawValue] = newValue } + } + subscript(_ i: PositionRegister) -> Input.Index { + get { positions[i.rawValue] } + set { positions[i.rawValue] = newValue } + } + subscript(_ i: ValueRegister) -> Any { + get { values[i.rawValue] } + set { + values[i.rawValue] = newValue } } + subscript(_ i: ElementRegister) -> Input.Element { + elements[i.rawValue] + } + subscript(_ i: ConsumeFunctionRegister) -> MEProgram.ConsumeFunction { + consumeFunctions[i.rawValue] + } + subscript(_ i: AssertionFunctionRegister) -> MEProgram.AssertionFunction { + assertionFunctions[i.rawValue] + } + subscript(_ i: TransformRegister) -> MEProgram.TransformFunction { + transformFunctions[i.rawValue] + } + subscript(_ i: MatcherRegister) -> MEProgram.MatcherFunction { + matcherFunctions[i.rawValue] + } } extension Processor.Registers { @@ -141,20 +131,26 @@ extension Processor.Registers { self.ints = Array(repeating: 0, count: info.ints) - self.floats = Array(repeating: 0, count: info.floats) - self.positions = Array(repeating: sentinel, count: info.positions) self.values = Array( repeating: SentinelValue(), count: info.values) + } - self.instructionAddresses = Array(repeating: 0, count: info.instructionAddresses) - - self.classStackAddresses = Array(repeating: 0, count: info.classStackAddresses) - - self.positionStackAddresses = Array(repeating: 0, count: info.positionStackAddresses) + mutating func reset(sentinel: Input.Index) { + self.bools._setAll(to: false) + self.ints._setAll(to: 0) + self.positions._setAll(to: sentinel) + self.values._setAll(to: SentinelValue()) + } +} - self.savePointAddresses = Array(repeating: 0, count: info.savePointAddresses) +// TODO: Productize into general algorithm +extension MutableCollection { + mutating func _setAll(to e: Element) { + for idx in self.indices { + self[idx] = e + } } } @@ -177,7 +173,6 @@ extension MEProgram { var positionStackAddresses = 0 var savePointAddresses = 0 var captures = 0 - } } @@ -197,12 +192,7 @@ extension Processor.Registers: CustomStringConvertible { \(formatRegisters("bools", bools))\ \(formatRegisters("strings", strings))\ \(formatRegisters("ints", ints))\ - \(formatRegisters("floats", floats))\ \(formatRegisters("positions", positions))\ - \(formatRegisters("instructionAddresses", instructionAddresses))\ - \(formatRegisters("classStackAddresses", classStackAddresses))\ - \(formatRegisters("positionStackAddresses", positionStackAddresses))\ - \(formatRegisters("savePointAddresses", savePointAddresses))\ """ } diff --git a/Sources/_StringProcessing/Engine/Structuralize.swift b/Sources/_StringProcessing/Engine/Structuralize.swift index 129ac1677..bc3adf701 100644 --- a/Sources/_StringProcessing/Engine/Structuralize.swift +++ b/Sources/_StringProcessing/Engine/Structuralize.swift @@ -3,8 +3,7 @@ extension CaptureList { @available(SwiftStdlib 5.7, *) func createElements( - _ list: MECaptureList, - _ input: String + _ list: MECaptureList ) -> [AnyRegexOutput.ElementRepresentation] { assert(list.values.count == captures.count) @@ -13,10 +12,9 @@ extension CaptureList { for (i, (cap, meStored)) in zip(captures, list.values).enumerated() { let element = AnyRegexOutput.ElementRepresentation( optionalDepth: cap.optionalDepth, - bounds: meStored.latest, + content: meStored.deconstructed, name: cap.name, - referenceID: list.referencedCaptureOffsets.first { $1 == i }?.key, - value: meStored.latestValue + referenceID: list.referencedCaptureOffsets.first { $1 == i }?.key ) result.append(element) diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index 4f428cf06..295a732de 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -19,6 +19,33 @@ struct Executor { self.engine = Engine(program, enableTracing: enablesTracing) } + @available(SwiftStdlib 5.7, *) + func firstMatch( + _ input: String, + in inputRange: Range, + graphemeSemantic: Bool + ) throws -> Regex.Match? { + var cpu = engine.makeProcessor( + input: input, bounds: inputRange, matchMode: .partialFromFront) + + var low = inputRange.lowerBound + let high = inputRange.upperBound + while true { + if let m: Regex.Match = try _match( + input, in: low..= high { return nil } + if graphemeSemantic { + input.formIndex(after: &low) + } else { + input.unicodeScalars.formIndex(after: &low) + } + cpu.reset(searchBounds: low..( _ input: String, @@ -27,7 +54,15 @@ struct Executor { ) throws -> Regex.Match? { var cpu = engine.makeProcessor( input: input, bounds: inputRange, matchMode: mode) + return try _match(input, in: inputRange, using: &cpu) + } + @available(SwiftStdlib 5.7, *) + func _match( + _ input: String, + in inputRange: Range, + using cpu: inout Processor + ) throws -> Regex.Match? { guard let endIdx = cpu.consume() else { if let e = cpu.failureReason { throw e @@ -40,31 +75,10 @@ struct Executor { referencedCaptureOffsets: engine.program.referencedCaptureOffsets) let range = inputRange.lowerBound.. Output? { let elements = map { - $0.existentialOutputComponent(from: input[...]) + $0.existentialOutputComponent(from: input) } return TypeConstruction.tuple(of: elements) as? Output } @@ -52,7 +52,7 @@ extension AnyRegexOutput: RandomAccessCollection { /// The range over which a value was captured. `nil` for no-capture. public var range: Range? { - representation.bounds + representation.content?.range } /// The slice of the input over which a value was captured. `nil` for no-capture. @@ -60,11 +60,13 @@ extension AnyRegexOutput: RandomAccessCollection { range.map { input[$0] } } - /// The captured value, `nil` for no-capture + /// The captured value, `nil` for no-capture. public var value: Any? { - // FIXME: Should this return the substring for default-typed - // values? - representation.value + representation.value(forInput: input) + } + + public var type: Any.Type { + representation.type } /// The name of this capture, if it has one, otherwise `nil`. @@ -166,7 +168,7 @@ extension Regex { /// Returns whether a named-capture with `name` exists public func contains(captureNamed name: String) -> Bool { - program.tree.root._captureList.captures.contains(where: { + program.tree.captureList.captures.contains(where: { $0.name == name }) } @@ -194,8 +196,7 @@ extension Regex.Match where Output == AnyRegexOutput { public init(_ match: Regex.Match) { self.init( anyRegexOutput: match.anyRegexOutput, - range: match.range, - value: match.value + range: match.range ) } } @@ -229,17 +230,15 @@ extension AnyRegexOutput { /// `Substring` has optional depth `0`, and `Int??` has optional depth `2`. let optionalDepth: Int - /// The bounds of the output element. - let bounds: Range? + /// The capture content representation, i.e. the element bounds and the + /// value (if available). + let content: (range: Range, value: Any?)? /// The name of the capture. var name: String? = nil /// The capture reference this element refers to. var referenceID: ReferenceID? = nil - - /// If the output vaule is strongly typed, then this will be set. - var value: Any? = nil } internal init(input: String, elements: [ElementRepresentation]) { @@ -250,17 +249,15 @@ extension AnyRegexOutput { @available(SwiftStdlib 5.7, *) extension AnyRegexOutput.ElementRepresentation { fileprivate func value(forInput input: String) -> Any { - // Ok for now because `existentialMatchComponent` - // wont slice the input if there's no range to slice with - // - // FIXME: This is ugly :-/ - let input = bounds.map { input[$0] } ?? "" - - return constructExistentialOutputComponent( + constructExistentialOutputComponent( from: input, - in: bounds, - value: nil, + component: content, optionalCount: optionalDepth ) } + + var type: Any.Type { + content?.value.map { Swift.type(of: $0) } + ?? TypeConstruction.optionalType(of: Substring.self, depth: optionalDepth) + } } diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 1f0fc93b3..72c5f1526 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -37,7 +37,8 @@ extension DSLTree { /// /// (...), (?...) case capture( - name: String? = nil, reference: ReferenceID? = nil, Node) + name: String? = nil, reference: ReferenceID? = nil, Node, + CaptureTransform? = nil) /// Matches a noncapturing subpattern. case nonCapturingGroup(_AST.GroupKind, Node) @@ -88,9 +89,6 @@ extension DSLTree { // MARK: - Extensibility points - /// Transform a range into a value, most often used inside captures - case transform(CaptureTransform, Node) - case consumer(_ConsumerInterface) case matcher(Any.Type, _MatcherInterface) @@ -254,7 +252,7 @@ public typealias _CharacterPredicateInterface = ( extension DSLTree.Node { @_spi(RegexBuilder) - public var children: [DSLTree.Node]? { + public var children: [DSLTree.Node] { switch self { case let .orderedChoice(v): return v @@ -264,9 +262,8 @@ extension DSLTree.Node { // Treat this transparently return n.children - case let .capture(_, _, n): return [n] + case let .capture(_, _, n, _): return [n] case let .nonCapturingGroup(_, n): return [n] - case let .transform(_, n): return [n] case let .quantification(_, _, n): return [n] case let .conditional(_, t, f): return [t,f] @@ -336,20 +333,7 @@ extension DSLTree.Node { return n.hasCapture default: - return self.children?.any(\.hasCapture) ?? false - } - } -} - -extension DSLTree.Node { - /// For typed capture-producing nodes, the type produced. - var valueCaptureType: AnyType? { - switch self { - case let .matcher(t, _): - return AnyType(t) - case let .transform(t, _): - return AnyType(t.resultType) - default: return nil + return self.children.any(\.hasCapture) } } } @@ -387,40 +371,80 @@ public struct ReferenceID: Hashable { @_spi(RegexBuilder) public struct CaptureTransform: Hashable, CustomStringConvertible { - public enum Closure { - case failable((Substring) throws -> Any?) - case nonfailable((Substring) throws -> Any) + enum Closure { + /// A failable transform. + case failable((Any) throws -> Any?) + /// Specialized case of `failable` for performance. + case substringFailable((Substring) throws -> Any?) + /// A non-failable transform. + case nonfailable((Any) throws -> Any) + /// Specialized case of `failable` for performance. + case substringNonfailable((Substring) throws -> Any?) } - public let resultType: Any.Type - public let closure: Closure + let argumentType: Any.Type + let resultType: Any.Type + let closure: Closure - public init(resultType: Any.Type, closure: Closure) { + init(argumentType: Any.Type, resultType: Any.Type, closure: Closure) { + self.argumentType = argumentType self.resultType = resultType self.closure = closure } - public init( - resultType: Any.Type, - _ closure: @escaping (Substring) throws -> Any + public init( + _ userSpecifiedTransform: @escaping (Argument) throws -> Result ) { - self.init(resultType: resultType, closure: .nonfailable(closure)) + let closure: Closure + if let substringTransform = userSpecifiedTransform + as? (Substring) throws -> Result { + closure = .substringNonfailable(substringTransform) + } else { + closure = .nonfailable { + try userSpecifiedTransform($0 as! Argument) as Any + } + } + self.init( + argumentType: Argument.self, + resultType: Result.self, + closure: closure) } - public init( - resultType: Any.Type, - _ closure: @escaping (Substring) throws -> Any? + public init( + _ userSpecifiedTransform: @escaping (Argument) throws -> Result? ) { - self.init(resultType: resultType, closure: .failable(closure)) + let closure: Closure + if let substringTransform = userSpecifiedTransform + as? (Substring) throws -> Result? { + closure = .substringFailable(substringTransform) + } else { + closure = .failable { + try userSpecifiedTransform($0 as! Argument) as Any? + } + } + self.init( + argumentType: Argument.self, + resultType: Result.self, + closure: closure) } - public func callAsFunction(_ input: Substring) throws -> Any? { + func callAsFunction(_ input: Any) throws -> Any? { switch closure { - case .nonfailable(let closure): - let result = try closure(input) + case .nonfailable(let transform): + let result = try transform(input) assert(type(of: result) == resultType) return result - case .failable(let closure): - guard let result = try closure(input) else { + case .substringNonfailable(let transform): + let result = try transform(input as! Substring) + assert(type(of: result) == resultType) + return result + case .failable(let transform): + guard let result = try transform(input) else { + return nil + } + assert(type(of: result) == resultType) + return result + case .substringFailable(let transform): + guard let result = try transform(input as! Substring) else { return nil } assert(type(of: result) == resultType) @@ -428,6 +452,19 @@ public struct CaptureTransform: Hashable, CustomStringConvertible { } } + func callAsFunction(_ input: Substring) throws -> Any? { + switch closure { + case .substringFailable(let transform): + return try transform(input) + case .substringNonfailable(let transform): + return try transform(input) + case .failable(let transform): + return try transform(input) + case .nonfailable(let transform): + return try transform(input) + } + } + public static func == (lhs: CaptureTransform, rhs: CaptureTransform) -> Bool { unsafeBitCast(lhs.closure, to: (Int, Int).self) == unsafeBitCast(rhs.closure, to: (Int, Int).self) @@ -440,7 +477,7 @@ public struct CaptureTransform: Hashable, CustomStringConvertible { } public var description: String { - "" + "" } } @@ -466,10 +503,10 @@ extension DSLTree.Node { child._addCaptures(to: &list, optionalNesting: nesting) } - case let .capture(name, _, child): + case let .capture(name, _, child, transform): list.append(.init( name: name, - type: child.valueCaptureType?.base, + type: transform?.resultType ?? child.wholeMatchType, optionalDepth: nesting, .fake)) child._addCaptures(to: &list, optionalNesting: nesting) @@ -513,23 +550,54 @@ extension DSLTree.Node { case .matcher: break - case .transform(_, let child): - child._addCaptures(to: &list, optionalNesting: nesting) - case .customCharacterClass, .atom, .trivia, .empty, .quotedLiteral, .consumer, .characterPredicate: break } } - var _captureList: CaptureList { - var list = CaptureList() - self._addCaptures(to: &list, optionalNesting: 0) - return list + /// Returns true if the node is output-forwarding, i.e. not defining its own + /// output but forwarding its only child's output. + var isOutputForwarding: Bool { + switch self { + case .nonCapturingGroup: + return true + case .orderedChoice, .concatenation, .capture, + .conditional, .quantification, .customCharacterClass, .atom, + .trivia, .empty, .quotedLiteral, .regexLiteral, .absentFunction, + .convertedRegexLiteral, .consumer, + .characterPredicate, .matcher: + return false + } + } + + /// Returns the output-defining node, peering through any output-forwarding + /// nodes. + var outputDefiningNode: Self { + if isOutputForwarding { + assert(children.count == 1) + return children[0].outputDefiningNode + } + return self + } + + /// Returns the type of the whole match, i.e. `.0` element type of the output. + var wholeMatchType: Any.Type { + if case .matcher(let type, _) = outputDefiningNode { + return type + } + return Substring.self } } extension DSLTree { + var captureList: CaptureList { + var list = CaptureList() + list.append(.init(type: root.wholeMatchType, optionalDepth: 0, .fake)) + root._addCaptures(to: &list, optionalNesting: 0) + return list + } + /// Presents a wrapped version of `DSLTree.Node` that can provide an internal /// `_TreeNode` conformance. struct _Tree: _TreeNode { @@ -549,9 +617,8 @@ extension DSLTree { // Treat this transparently return _Tree(n).children - case let .capture(_, _, n): return [_Tree(n)] + case let .capture(_, _, n, _): return [_Tree(n)] case let .nonCapturingGroup(_, n): return [_Tree(n)] - case let .transform(_, n): return [_Tree(n)] case let .quantification(_, _, n): return [_Tree(n)] case let .conditional(_, t, f): return [_Tree(t), _Tree(f)] @@ -691,3 +758,17 @@ extension DSLTree { } } } + +extension DSLTree.Atom { + /// Returns a Boolean indicating whether the atom represents a pattern that's + /// matchable, e.g. a character or a scalar, not representing a change of + /// matching options or an assertion. + var isMatchable: Bool { + switch self { + case .changeMatchingOptions, .assertion: + return false + case .char, .scalar, .any, .backreference, .symbolicReference, .unconverted: + return true + } + } +} diff --git a/Sources/_StringProcessing/Regex/Match.swift b/Sources/_StringProcessing/Regex/Match.swift index 98f2e66a6..8020d2e9b 100644 --- a/Sources/_StringProcessing/Regex/Match.swift +++ b/Sources/_StringProcessing/Regex/Match.swift @@ -21,49 +21,36 @@ extension Regex { /// The range of the overall match. public let range: Range - - let value: Any? } } @available(SwiftStdlib 5.7, *) extension Regex.Match { + var input: String { + anyRegexOutput.input + } + /// The output produced from the match operation. public var output: Output { if Output.self == AnyRegexOutput.self { - let wholeMatchCapture = AnyRegexOutput.ElementRepresentation( - optionalDepth: 0, - bounds: range - ) - - let output = AnyRegexOutput( - input: anyRegexOutput.input, - elements: [wholeMatchCapture] + anyRegexOutput._elements - ) - - return output as! Output - } else if Output.self == Substring.self { - // FIXME: Plumb whole match (`.0`) through the matching engine. - return anyRegexOutput.input[range] as! Output - } else if anyRegexOutput.isEmpty, value != nil { - // FIXME: This is a workaround for whole-match values not - // being modeled as part of captures. We might want to - // switch to a model where results are alongside captures - return value! as! Output - } else { - guard value == nil else { - fatalError("FIXME: what would this mean?") - } - let typeErasedMatch = anyRegexOutput.existentialOutput( - from: anyRegexOutput.input[range] - ) - return typeErasedMatch as! Output + return anyRegexOutput as! Output } + let typeErasedMatch = anyRegexOutput.existentialOutput( + from: anyRegexOutput.input + ) + return typeErasedMatch as! Output } /// Accesses a capture by its name or number. public subscript(dynamicMember keyPath: KeyPath) -> T { - output[keyPath: keyPath] + // Note: We should be able to get the element offset from the key path + // itself even at compile time. We need a better way of doing this. + guard let outputTupleOffset = MemoryLayout.tupleElementIndex( + of: keyPath, elementTypes: anyRegexOutput.map(\.type) + ) else { + return output[keyPath: keyPath] + } + return anyRegexOutput[outputTupleOffset].value as! T } /// Accesses a capture using the `.0` syntax, even when the match isn't a tuple. @@ -81,9 +68,8 @@ extension Regex.Match { ) else { preconditionFailure("Reference did not capture any match in the regex") } - return element.existentialOutputComponent( - from: anyRegexOutput.input[...] + from: input ) as! Capture } } @@ -151,22 +137,10 @@ extension Regex { _ input: String, in inputRange: Range ) throws -> Regex.Match? { - // FIXME: Something more efficient, likely an engine interface, and we - // should scrap the RegexConsumer crap and call this - - var low = inputRange.lowerBound - let high = inputRange.upperBound - while true { - if let m = try _match(input, in: low..= high { return nil } - if regex.initialOptions.semanticLevel == .graphemeCluster { - input.formIndex(after: &low) - } else { - input.unicodeScalars.formIndex(after: &low) - } - } + let executor = Executor(program: regex.program.loweredProgram) + let graphemeSemantic = regex.initialOptions.semanticLevel == .graphemeCluster + return try executor.firstMatch( + input, in: inputRange, graphemeSemantic: graphemeSemantic) } } diff --git a/Sources/_StringProcessing/Utility/TypeVerification.swift b/Sources/_StringProcessing/Utility/TypeVerification.swift index 0ad8aa325..c3aa53c7a 100644 --- a/Sources/_StringProcessing/Utility/TypeVerification.swift +++ b/Sources/_StringProcessing/Utility/TypeVerification.swift @@ -18,10 +18,10 @@ extension Regex { return true } - var tupleElements: [Any.Type] = [Substring.self] - var labels = " " + var tupleElements: [Any.Type] = [] + var labels = "" - for capture in program.tree.root._captureList.captures { + for capture in program.tree.captureList.captures { var captureType: Any.Type = capture.type ?? Substring.self var i = capture.optionalDepth @@ -41,7 +41,7 @@ extension Regex { // If we have no captures, then our Regex must be Regex. if tupleElements.count == 1 { - return Output.self == Substring.self + return Output.self == program.tree.root.wholeMatchType } let createdType = TypeConstruction.tupleType( diff --git a/Sources/_StringProcessing/Utility/TypedIndex.swift b/Sources/_StringProcessing/Utility/TypedIndex.swift index adde06a3e..c95e7f93b 100644 --- a/Sources/_StringProcessing/Utility/TypedIndex.swift +++ b/Sources/_StringProcessing/Utility/TypedIndex.swift @@ -78,11 +78,11 @@ extension TypedIndex: BidirectionalCollection where C: BidirectionalCollection { // failure in the Swift repo. #if false extension TypedIndex: RangeReplaceableCollection where C: RangeReplaceableCollection { - init() { rawValue = C() } + init() { content = C() } mutating func replaceSubrange(_ subrange: Range, with newElements: C) where C : Collection, C.Element == Element { - let rawRange = subrange.lowerBound.rawValue ..< subrange.upperBound.rawValue - rawValue.replaceSubrange(rawRange, with: newElements) + let rawRange = subrange.lowerBound.content ..< subrange.upperBound.content + content.replaceSubrange(rawRange, with: newElements) } // TODO: append, and all the other customization hooks... diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index f325b579f..7971b3a49 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -427,7 +427,19 @@ class RegexDSLTests: XCTestCase { CharacterClass.digit } } - + + try _testDSLCaptures( + ("abcdef2", ("abcdef2", "f")), + matchType: (Substring, Substring??).self, ==) + { + Optionally { + ZeroOrMore { + Capture(CharacterClass.word) + } + CharacterClass.digit + } + } + try _testDSLCaptures( ("aaabbbcccdddeeefff", "aaabbbcccdddeeefff"), ("aaaabbbcccdddeeefff", nil), @@ -513,6 +525,29 @@ class RegexDSLTests: XCTestCase { */ } + func testCaptureTransform() throws { + try _testDSLCaptures( + ("aaaa1", ("aaaa1", "aaa")), + matchType: (Substring, Substring).self, ==) + { + Capture { + OneOrMore("a") + } transform: { + $0.dropFirst() + } + One(.digit) + } + try _testDSLCaptures( + ("aaaa1", ("aaaa1", "a")), + matchType: (Substring, Substring??).self, ==) + { + ZeroOrMore { + Capture("a", transform: { Optional.some($0) }) + } + One(.digit) + } + } + func testCapturelessQuantification() throws { // This test is to make sure that a captureless quantification, when used // straight out of the quantifier (without being wrapped in a builder), is @@ -586,10 +621,10 @@ class RegexDSLTests: XCTestCase { let regex3 = Regex { OneOrMore("a") Capture { - TryCapture("b") { Int($0) } - ZeroOrMore { - TryCapture("c") { Double($0) } - } + TryCapture("b", transform: { Int($0) }) + ZeroOrMore( + TryCapture("c", transform: { Double($0) }) + ) Optionally("e") } } @@ -897,57 +932,64 @@ class RegexDSLTests: XCTestCase { } } } - - func testSemanticVersionExample() { - struct SemanticVersion: Equatable { - var major: Int - var minor: Int - var patch: Int - var dev: String? - } - struct SemanticVersionParser: CustomConsumingRegexComponent { - typealias RegexOutput = SemanticVersion - func consuming( - _ input: String, - startingAt index: String.Index, - in bounds: Range - ) throws -> (upperBound: String.Index, output: SemanticVersion)? { - let regex = Regex { - TryCapture(OneOrMore(.digit)) { Int($0) } + + struct SemanticVersion: Equatable { + var major: Int + var minor: Int + var patch: Int + var dev: String? + } + struct SemanticVersionParser: CustomConsumingRegexComponent { + typealias RegexOutput = SemanticVersion + func consuming( + _ input: String, + startingAt index: String.Index, + in bounds: Range + ) throws -> (upperBound: String.Index, output: SemanticVersion)? { + let regex = Regex { + TryCapture(OneOrMore(.digit)) { Int($0) } + "." + TryCapture(OneOrMore(.digit)) { Int($0) } + Optionally { "." TryCapture(OneOrMore(.digit)) { Int($0) } - Optionally { - "." - TryCapture(OneOrMore(.digit)) { Int($0) } - } - Optionally { - "-" - Capture(OneOrMore(.word)) - } } + Optionally { + "-" + Capture(OneOrMore(.word)) + } + } - guard let match = input[index..