diff --git a/Sources/Exercises/Participants/PEGParticipant.swift b/Sources/Exercises/Participants/PEGParticipant.swift
index 670f286a8..2b8a597f5 100644
--- a/Sources/Exercises/Participants/PEGParticipant.swift
+++ b/Sources/Exercises/Participants/PEGParticipant.swift
@@ -40,7 +40,7 @@ private func graphemeBreakPropertyData(forLine line: String) -> GraphemeBreakEnt
let program = PEG.Program(start: "Entry", environment: ["Entry": entry])
let vm = program.compile(for: String.self)
- let engine = program.transpile(for: String.self)
+ let engine = try! program.transpile(for: String.self)
_ = (vm, engine)
fatalError("Unsupported")
diff --git a/Sources/Prototypes/PEG/PEGTranspile.swift b/Sources/Prototypes/PEG/PEGTranspile.swift
index 440310b08..a6f724a03 100644
--- a/Sources/Prototypes/PEG/PEGTranspile.swift
+++ b/Sources/Prototypes/PEG/PEGTranspile.swift
@@ -13,7 +13,7 @@ import _MatchingEngine
extension PEG.VM {
typealias MEProgram = _MatchingEngine.Program
- func transpile() -> MEProgram {
+ func transpile() throws -> MEProgram {
typealias Builder = MEProgram.Builder
var builder = MEProgram.Builder()
@@ -106,14 +106,14 @@ extension PEG.VM {
}
}
- return builder.assemble()
+ return try builder.assemble()
}
}
extension PEG.Program {
public func transpile(
for input: Input.Type = Input.self
- ) -> Engine where Input.Element == Element {
- Engine(compile(for: input).vm.transpile())
+ ) throws -> Engine where Input.Element == Element {
+ try Engine(compile(for: input).vm.transpile())
}
}
diff --git a/Sources/_MatchingEngine/Engine/Builder.swift b/Sources/_MatchingEngine/Engine/Builder.swift
index d75593a0d..f6917b142 100644
--- a/Sources/_MatchingEngine/Engine/Builder.swift
+++ b/Sources/_MatchingEngine/Engine/Builder.swift
@@ -230,7 +230,7 @@ extension Program.Builder {
// TODO: Mutating because of fail address fixup, drop when
// that's removed
- public mutating func assemble() -> Program {
+ public mutating func assemble() throws -> Program {
// TODO: This will add a fail instruction at the end every
// time it's assembled. Better to do to the local instruction
// list copy, but that complicates logic. It's possible we
@@ -262,12 +262,12 @@ extension Program.Builder {
case .splitSaving:
guard let fix2 = tok.second else {
- fatalError("unreachable")
+ throw Unreachable("TODO: reason")
}
let saving = addressTokens[fix2.rawValue]!
payload = .init(addr: addr, addr2: saving)
- default: fatalError("unreachable")
+ default: throw Unreachable("TODO: reason")
}
diff --git a/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift b/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift
index f3a892d70..b6b882ad2 100644
--- a/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift
+++ b/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift
@@ -1785,7 +1785,7 @@ extension Source {
if customCC {
return .char(char)
}
- fatalError("unreachable")
+ throw Unreachable("TODO: reason")
// (sometimes) special metacharacters
case ".": return customCC ? .char(".") : .any
diff --git a/Sources/_MatchingEngine/Regex/Parse/Parse.swift b/Sources/_MatchingEngine/Regex/Parse/Parse.swift
index 84c703068..21012d29c 100644
--- a/Sources/_MatchingEngine/Regex/Parse/Parse.swift
+++ b/Sources/_MatchingEngine/Regex/Parse/Parse.swift
@@ -223,7 +223,7 @@ extension Parser {
continue
}
- fatalError("unreachable?")
+ throw Unreachable("TODO: reason")
}
guard !result.isEmpty else {
return .empty(.init(loc(_start)))
diff --git a/Sources/_MatchingEngine/Utility/Errors.swift b/Sources/_MatchingEngine/Utility/Errors.swift
new file mode 100644
index 000000000..8dbe67fa0
--- /dev/null
+++ b/Sources/_MatchingEngine/Utility/Errors.swift
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+public struct Unsupported: Error, CustomStringConvertible {
+ var message: String
+ var file: String
+ var line: Int
+
+ public var description: String { """
+ Unsupported: '\(message)'
+ \(file):\(line)
+ """
+ }
+
+ public init(
+ _ s: String,
+ file: StaticString = #file,
+ line: UInt = #line
+ ) {
+ self.message = s
+ self.file = file.description
+ self.line = Int(asserting: line)
+ }
+}
+
+public struct Unreachable: Error, CustomStringConvertible {
+ var message: String
+ var file: String
+ var line: Int
+
+ public var description: String { """
+ Unreachable: '\(message)'
+ \(file):\(line)
+ """
+ }
+
+ public init(
+ _ s: String,
+ file: StaticString = #file,
+ line: UInt = #line
+ ) {
+ self.message = s
+ self.file = file.description
+ self.line = Int(asserting: line)
+ }
+}
diff --git a/Sources/_MatchingEngine/Utility/Misc.swift b/Sources/_MatchingEngine/Utility/Misc.swift
index 3a53eda68..bd1e395b5 100644
--- a/Sources/_MatchingEngine/Utility/Misc.swift
+++ b/Sources/_MatchingEngine/Utility/Misc.swift
@@ -15,16 +15,6 @@ extension FixedWidthInteger {
}
}
-// TODO: Replace all fatal error unreachables with these calls.
-// We will likely want to convert them to unhandleable throws
-// or something similar.
-func unreachable(_ s: @autoclosure () -> String) -> Never {
- fatalError("unreachable \(s())")
-}
-func unreachable() -> Never {
- fatalError("unreachable")
-}
-
extension Substring {
var string: String { String(self) }
}
diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift
index 9edee5de7..8dbcb9026 100644
--- a/Sources/_StringProcessing/ByteCodeGen.swift
+++ b/Sources/_StringProcessing/ByteCodeGen.swift
@@ -5,9 +5,10 @@ extension Compiler {
var options: MatchingOptions
var builder = _MatchingEngine.Program.Builder()
- mutating func finish() -> _MatchingEngine.Program {
+ mutating func finish(
+ ) throws -> _MatchingEngine.Program {
builder.buildAccept()
- return builder.assemble()
+ return try builder.assemble()
}
}
}
@@ -90,7 +91,7 @@ extension Compiler.ByteCodeGen {
case .resetStartOfMatch:
// FIXME: Figure out how to communicate this out
- throw unsupported(#"\K (reset/keep assertion)"#)
+ throw Unsupported(#"\K (reset/keep assertion)"#)
case .firstMatchingPositionInSubject:
// TODO: We can probably build a nice model with API here
@@ -100,11 +101,11 @@ extension Compiler.ByteCodeGen {
case .textSegment:
// This we should be able to do!
- throw unsupported(#"\y (text segment)"#)
+ throw Unsupported(#"\y (text segment)"#)
case .notTextSegment:
// This we should be able to do!
- throw unsupported(#"\Y (not text segment)"#)
+ throw Unsupported(#"\Y (not text segment)"#)
case .startOfLine:
builder.buildAssert { (input, pos, bounds) in
@@ -204,7 +205,7 @@ extension Compiler.ByteCodeGen {
_ child: DSLTree.Node
) throws {
guard kind.forwards else {
- throw unsupported("backwards assertions")
+ throw Unsupported("backwards assertions")
}
let positive = kind.positive
@@ -259,7 +260,7 @@ extension Compiler.ByteCodeGen {
switch kind {
case .lookahead, .negativeLookahead,
.lookbehind, .negativeLookbehind:
- fatalError("unreachable")
+ throw Unreachable("TODO: reason")
case .capture, .namedCapture:
let cap = builder.makeCapture()
@@ -299,7 +300,7 @@ extension Compiler.ByteCodeGen {
// Ok
break
default:
- fatalError("unreachable?")
+ throw Unreachable("TODO: reason")
}
// Compiler and/or parser should enforce these invariants
diff --git a/Sources/_StringProcessing/Compiler.swift b/Sources/_StringProcessing/Compiler.swift
index 881a8a898..64491b8e2 100644
--- a/Sources/_StringProcessing/Compiler.swift
+++ b/Sources/_StringProcessing/Compiler.swift
@@ -34,7 +34,7 @@ class Compiler {
// TODO: Handle global options
var codegen = ByteCodeGen(options: options)
try codegen.emitNode(tree.root)
- let program = codegen.finish()
+ let program = try codegen.finish()
return RegexProgram(program: program)
}
}
diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift
index 1feb83410..3979dab34 100644
--- a/Sources/_StringProcessing/ConsumerInterface.swift
+++ b/Sources/_StringProcessing/ConsumerInterface.swift
@@ -11,58 +11,6 @@
import _MatchingEngine
-struct Unsupported: Error, CustomStringConvertible {
- var message: String
- var file: String
- var line: Int
-
- var description: String { """
- Unsupported: '\(message)'
- \(file):\(line)
- """
- }
-
- init(
- _ s: String,
- file: StaticString = #file,
- line: UInt = #line
- ) {
- self.message = s
- self.file = file.description
- self.line = Int(asserting: line)
- }
-}
-
-// TODO: Remove
-func unsupported(
- _ s: String,
- file: StaticString = #file,
- line: UInt = #line
-) -> Unsupported {
- return Unsupported(s, file: file, line: line)
-}
-
-struct Unreachable: Error, CustomStringConvertible {
- var message: String
- var file: String
- var line: Int
-
- var description: String { """
- Unreachable: '\(message)'
- \(file):\(line)
- """
- }
-}
-
-func unreachable(
- _ s: String,
- file: StaticString = #file,
- line: Int = #line
-) -> Unreachable {
- return Unreachable(
- message: s, file: String(describing: file), line: line)
-}
-
extension DSLTree.Node {
/// Attempt to generate a consumer from this AST node
///
@@ -233,10 +181,10 @@ extension DSLTree.CustomCharacterClass.Member {
case let .range(low, high):
// TODO:
guard let lhs = low.literalCharacterValue else {
- throw unsupported("\(low) in range")
+ throw Unsupported("\(low) in range")
}
guard let rhs = high.literalCharacterValue else {
- throw unsupported("\(high) in range")
+ throw Unsupported("\(high) in range")
}
return { input, bounds in
@@ -315,10 +263,10 @@ extension AST.CustomCharacterClass.Member {
case .range(let r):
guard let lhs = r.lhs.literalCharacterValue else {
- throw unsupported("\(r.lhs) in range")
+ throw Unsupported("\(r.lhs) in range")
}
guard let rhs = r.rhs.literalCharacterValue else {
- throw unsupported("\(r.rhs) in range")
+ throw Unsupported("\(r.rhs) in range")
}
return { input, bounds in
@@ -333,7 +281,7 @@ extension AST.CustomCharacterClass.Member {
case .atom(let atom):
guard let gen = try atom.generateConsumer(opts) else {
- throw unsupported("TODO")
+ throw Unsupported("TODO")
}
return gen
@@ -352,7 +300,8 @@ extension AST.CustomCharacterClass.Member {
}
case .trivia:
- throw unreachable("Should have been stripped by caller")
+ throw Unreachable(
+ "Should have been stripped by caller")
case .setOperation(let lhs, let op, let rhs):
// TODO: We should probably have a component type
@@ -520,22 +469,23 @@ extension AST.Atom.CharacterProperty {
return value ? cons : invert(cons)
case .script(let s):
- throw unsupported("TODO: Map script: \(s)")
+ throw Unsupported("TODO: Map script: \(s)")
case .scriptExtension(let s):
- throw unsupported("TODO: Map script: \(s)")
+ throw Unsupported("TODO: Map script: \(s)")
case .posix(let p):
return p.generateConsumer(opts)
case .pcreSpecial(let s):
- throw unsupported("TODO: map PCRE special: \(s)")
+ throw Unsupported("TODO: map PCRE special: \(s)")
case .onigurumaSpecial(let s):
- throw unsupported("TODO: map Oniguruma special: \(s)")
+ throw Unsupported("TODO: map Oniguruma special: \(s)")
case let .other(key, value):
- throw unsupported("TODO: map other \(key ?? "")=\(value)")
+ throw Unsupported(
+ "TODO: map other \(key ?? "")=\(value)")
}
}()
@@ -593,7 +543,8 @@ extension Unicode.BinaryProperty {
if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) {
return consumeScalarProp(\.isEmojiModifierBase)
} else {
- throw unsupported("isEmojiModifierBase on old OSes")
+ throw Unsupported(
+ "isEmojiModifierBase on old OSes")
}
case .emojiComponent:
break
@@ -601,19 +552,20 @@ extension Unicode.BinaryProperty {
if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) {
return consumeScalarProp(\.isEmojiModifier)
} else {
- throw unsupported("isEmojiModifier on old OSes")
+ throw Unsupported("isEmojiModifier on old OSes")
}
case .emoji:
if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) {
return consumeScalarProp(\.isEmoji)
} else {
- throw unsupported("isEmoji on old OSes")
+ throw Unsupported("isEmoji on old OSes")
}
case .emojiPresentation:
if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) {
return consumeScalarProp(\.isEmojiPresentation)
} else {
- throw unsupported("isEmojiPresentation on old OSes")
+ throw Unsupported(
+ "isEmojiPresentation on old OSes")
}
case .extender:
return consumeScalarProp(\.isExtender)
@@ -701,10 +653,10 @@ extension Unicode.BinaryProperty {
return consumeScalarProp(\.isXIDStart)
case .expandsOnNFC, .expandsOnNFD, .expandsOnNFKD,
.expandsOnNFKC:
- throw unsupported("Unicode-deprecated: \(self)")
+ throw Unsupported("Unicode-deprecated: \(self)")
}
- throw unsupported("TODO: map prop \(self)")
+ throw Unsupported("TODO: map prop \(self)")
}
}
@@ -802,7 +754,8 @@ extension Unicode.ExtendedGeneralCategory {
])
case .casedLetter:
- throw unsupported("TODO: cased letter? not the property?")
+ throw Unsupported(
+ "TODO: cased letter? not the property?")
case .control:
return consumeScalarGC(.control)
diff --git a/Sources/_StringProcessing/Legacy/LegacyCompile.swift b/Sources/_StringProcessing/Legacy/LegacyCompile.swift
index ac0ec6a45..c3bed55c8 100644
--- a/Sources/_StringProcessing/Legacy/LegacyCompile.swift
+++ b/Sources/_StringProcessing/Legacy/LegacyCompile.swift
@@ -51,7 +51,7 @@ func compile(
instructions.append(.any)
return
default:
- throw unsupported("Unsupported: \(a)")
+ throw Unsupported("Unsupported: \(a)")
}
case let .group(kind, child):
@@ -68,7 +68,7 @@ func compile(
return
default:
- throw unsupported("Unsupported group \(kind)")
+ throw Unsupported("Unsupported group \(kind)")
}
case let .groupTransform(kind, child, transform) where kind == .capture:
@@ -78,7 +78,8 @@ func compile(
return
case let .groupTransform(kind, _, _):
- throw unsupported("Unsupported group transform \(kind)")
+ throw Unsupported(
+ "Unsupported group transform \(kind)")
case let .concatenation(children):
let childrenHaveCaptures = children.any(\.hasCapture)
@@ -224,7 +225,8 @@ func compile(
}
return
default:
- throw unsupported("Unsupported: \((amount, kind))")
+ throw Unsupported(
+ "Unsupported: \((amount, kind))")
}
case let .alternation(children):
@@ -292,22 +294,22 @@ func compile(
return
case .conditional:
- throw unsupported("Conditionals")
+ throw Unsupported("Conditionals")
case .absentFunction:
- throw unsupported("Absent functions")
+ throw Unsupported("Absent functions")
case .customCharacterClass:
- fatalError("unreachable")
+ throw Unreachable("TODO: reason")
case let .atom(a) where a.characterClass != nil:
- fatalError("unreachable")
+ throw Unreachable("TODO: reason")
case let .convertedRegexLiteral(node, _):
try compileNode(node)
case .characterPredicate, .consumer, .consumerValidator:
- throw unsupported("DSL extensions")
+ throw Unsupported("DSL extensions")
case let .regexLiteral(re):
try compileNode(re.dslTreeNode)
diff --git a/Sources/_StringProcessing/RegexDSL/ASTConversion.swift b/Sources/_StringProcessing/RegexDSL/ASTConversion.swift
new file mode 100644
index 000000000..7364f8048
--- /dev/null
+++ b/Sources/_StringProcessing/RegexDSL/ASTConversion.swift
@@ -0,0 +1,212 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+import _MatchingEngine
+
+extension AST {
+ var dslTree: DSLTree {
+ return DSLTree(
+ root.dslTreeNode, options: globalOptions?.dslTreeOptions)
+ }
+}
+
+extension AST.GlobalMatchingOptionSequence {
+ var dslTreeOptions: DSLTree.Options {
+ // TODO: map options
+ return .init()
+ }
+}
+
+extension AST.Node {
+ /// Converts an AST node to a `convertedRegexLiteral` node.
+ var dslTreeNode: DSLTree.Node {
+ func wrap(_ node: DSLTree.Node) -> DSLTree.Node {
+ switch node {
+ case .convertedRegexLiteral:
+ // FIXME: DSL can have one item concats
+// assertionFailure("Double wrapping?")
+ return node
+ default:
+ break
+ }
+ // TODO: Should we do this for the
+ // single-concatenation child too, or should?
+ // we wrap _that_?
+ return .convertedRegexLiteral(node, self)
+ }
+
+ // Convert the top-level node without wrapping
+ func convert() -> DSLTree.Node {
+ switch self {
+ case let .alternation(v):
+ let children = v.children.map(\.dslTreeNode)
+ return .alternation(children)
+
+ case let .concatenation(v):
+ // Coalesce adjacent children who can produce a
+ // string literal representation
+ let astChildren = v.children
+ func coalesce(
+ _ idx: Array.Index
+ ) -> (Array.Index, String)? {
+ var result = ""
+ var idx = idx
+ while idx < astChildren.endIndex {
+ let atom: AST.Atom? = astChildren[idx].as()
+
+ // TODO: For printing, nice to coalesce
+ // scalars literals too. We likely need a different
+ // approach even before we have a better IR.
+ guard let char = atom?.singleCharacter else {
+ break
+ }
+ result.append(char)
+ astChildren.formIndex(after: &idx)
+ }
+ return result.count <= 1 ? nil : (idx, result)
+ }
+
+ // No need to nest single children concatenations
+ if astChildren.count == 1 {
+ return astChildren.first!.dslTreeNode
+ }
+
+ // Check for a single child post-coalescing
+ if let (idx, str) = coalesce(astChildren.startIndex),
+ idx == astChildren.endIndex
+ {
+ return .quotedLiteral(str)
+ }
+
+ // Coalesce adjacent string children
+ var curIdx = astChildren.startIndex
+ var children = Array()
+ while curIdx < astChildren.endIndex {
+ if let (nextIdx, str) = coalesce(curIdx) {
+ // TODO: Track source info...
+ children.append(.quotedLiteral(str))
+ curIdx = nextIdx
+ } else {
+ children.append(astChildren[curIdx].dslTreeNode)
+ children.formIndex(after: &curIdx)
+ }
+ }
+ return .concatenation(children)
+
+ case let .group(v):
+ let child = v.child.dslTreeNode
+ return .group(v.kind.value, child)
+
+ case let .conditional(v):
+ let trueBranch = v.trueBranch.dslTreeNode
+ let falseBranch = v.falseBranch.dslTreeNode
+ return .conditional(
+ v.condition.kind, trueBranch, falseBranch)
+
+ case let .quantification(v):
+ let child = v.child.dslTreeNode
+ return .quantification(
+ v.amount.value, v.kind.value, child)
+
+ case let .quote(v):
+ return .quotedLiteral(v.literal)
+
+ case let .trivia(v):
+ return .trivia(v.contents)
+
+ case let .atom(v):
+ return .atom(v.dslTreeAtom)
+
+ case let .customCharacterClass(ccc):
+ return .customCharacterClass(ccc.dslTreeClass)
+
+ case .empty(_):
+ return .empty
+
+ case let .groupTransform(v, transform):
+ let child = v.child.dslTreeNode
+ return .groupTransform(
+ v.kind.value, child, transform)
+
+ case let .absentFunction(a):
+ // TODO: What should this map to?
+ return .absentFunction(a)
+ }
+ }
+
+ let converted = convert()
+ return wrap(converted)
+ }
+}
+
+extension AST.CustomCharacterClass {
+ var dslTreeClass: DSLTree.CustomCharacterClass {
+ // TODO: Not quite 1-1
+ func convert(
+ _ member: Member
+ ) -> DSLTree.CustomCharacterClass.Member {
+ switch member {
+ case let .custom(ccc):
+ return .custom(ccc.dslTreeClass)
+
+ case let .range(r):
+ return .range(
+ r.lhs.dslTreeAtom, r.rhs.dslTreeAtom)
+
+ case let .atom(a):
+ return .atom(a.dslTreeAtom)
+
+ case let .quote(q):
+ return .quotedLiteral(q.literal)
+
+ case let .setOperation(lhs, op, rhs):
+ let lhs = DSLTree.CustomCharacterClass(
+ members: lhs.map(convert),
+ isInverted: false)
+ let rhs = DSLTree.CustomCharacterClass(
+ members: rhs.map(convert),
+ isInverted: false)
+
+ switch op.value {
+ case .subtraction:
+ return .subtraction(lhs, rhs)
+ case .intersection:
+ return .intersection(lhs, rhs)
+ case .symmetricDifference:
+ return .symmetricDifference(lhs, rhs)
+ }
+ case let .trivia(t):
+ return .trivia(t.contents)
+ }
+ }
+
+ return .init(
+ members: members.map(convert),
+ isInverted: self.isInverted)
+ }
+}
+
+extension AST.Atom {
+ var dslTreeAtom: DSLTree.Atom {
+ if let kind = assertionKind {
+ return .assertion(kind)
+ }
+
+ switch self.kind {
+ case let .char(c): return .char(c)
+ case let .scalar(s): return .scalar(s)
+ case .any: return .any
+ case let .backreference(r): return .backreference(r)
+
+ default: return .unconverted(self)
+ }
+ }
+}
diff --git a/Sources/_StringProcessing/RegexDSL/Core.swift b/Sources/_StringProcessing/RegexDSL/Core.swift
index b426d1565..136182be3 100644
--- a/Sources/_StringProcessing/RegexDSL/Core.swift
+++ b/Sources/_StringProcessing/RegexDSL/Core.swift
@@ -11,15 +11,6 @@
import _MatchingEngine
-@dynamicMemberLookup
-public struct RegexMatch {
- public let range: Range
- public let match: Match
-
- public subscript(dynamicMember keyPath: KeyPath) -> T {
- match[keyPath: keyPath]
- }
-}
/// A type that represents a regular expression.
public protocol RegexProtocol {
@@ -112,83 +103,7 @@ public struct Regex: RegexProtocol {
}
}
-extension RegexProtocol {
- public func match(in input: String) -> RegexMatch? {
- _match(
- input, in: input.startIndex.. RegexMatch? {
- _match(
- input.base, in: input.startIndex..,
- mode: MatchMode = .wholeString
- ) -> RegexMatch? {
- // TODO: Remove this branch when the matching engine supports captures.
- if regex.hasCapture {
- let vm = HareVM(program: regex.program.legacyLoweredProgram)
- guard let (range, captures) = vm.execute(
- input: input, in: inputRange, mode: mode
- )?.destructure else {
- return nil
- }
- let convertedMatch: Match
- if Match.self == (Substring, DynamicCaptures).self {
- convertedMatch = (input[range], DynamicCaptures(captures)) as! Match
- } else {
- let typeErasedMatch = captures.matchValue(
- withWholeMatch: input[range]
- )
- convertedMatch = typeErasedMatch as! Match
- }
- return RegexMatch(range: range, match: convertedMatch)
- }
-
- let executor = Executor(program: regex.program.loweredProgram)
- guard let result = executor.execute(
- input: input, in: inputRange, mode: mode
- ) else {
- return nil
- }
- let convertedMatch: Match
- if Match.self == (Substring, DynamicCaptures).self {
- convertedMatch = (input[result.range], DynamicCaptures.empty) as! Match
- } else {
- assert(Match.self == Substring.self)
- convertedMatch = input[result.range] as! Match
- }
- return RegexMatch(range: result.range, match: convertedMatch)
- }
-}
-
-extension String {
- public func match(_ regex: R) -> RegexMatch? {
- regex.match(in: self)
- }
- public func match(
- @RegexBuilder _ content: () -> R
- ) -> RegexMatch? {
- match(content())
- }
-}
-extension Substring {
- public func match(_ regex: R) -> RegexMatch? {
- regex.match(in: self)
- }
-
- public func match(
- @RegexBuilder _ content: () -> R
- ) -> RegexMatch? {
- match(content())
- }
-}
public struct MockRegexLiteral: RegexProtocol {
public typealias MatchValue = Substring
public let regex: Regex
diff --git a/Sources/_StringProcessing/RegexDSL/DSLTree.swift b/Sources/_StringProcessing/RegexDSL/DSLTree.swift
index 5e79575bb..b3b14b1d1 100644
--- a/Sources/_StringProcessing/RegexDSL/DSLTree.swift
+++ b/Sources/_StringProcessing/RegexDSL/DSLTree.swift
@@ -185,192 +185,6 @@ extension DSLTree.Node {
}
}
-extension AST.Node {
- /// Converts an AST node to a `convertedRegexLiteral` node.
- var dslTreeNode: DSLTree.Node {
- func wrap(_ node: DSLTree.Node) -> DSLTree.Node {
- switch node {
- case .convertedRegexLiteral:
- // FIXME: how can this happen?
-// assertionFailure("Double wrapping?")
- return node
- default:
- break
- }
- // TODO: Should we do this for the
- // single-concatenation child too, or should?
- // we wrap _that_?
- return .convertedRegexLiteral(node, self)
- }
-
- // Convert the top-level node without wrapping
- func convert() -> DSLTree.Node {
- switch self {
- case let .alternation(v):
- let children = v.children.map(\.dslTreeNode)
- return .alternation(children)
-
- case let .concatenation(v):
- // Coalesce adjacent children who can produce a
- // string literal representation
- let astChildren = v.children
- func coalesce(
- _ idx: Array.Index
- ) -> (Array.Index, String)? {
- var result = ""
- var idx = idx
- while idx < astChildren.endIndex {
- let atom: AST.Atom? = astChildren[idx].as()
-
- // TODO: For printing, nice to coalesce
- // scalars literals too. We likely need a different
- // approach even before we have a better IR.
- guard let char = atom?.singleCharacter else {
- break
- }
- result.append(char)
- astChildren.formIndex(after: &idx)
- }
- return result.count <= 1 ? nil : (idx, result)
- }
-
- // No need to nest single children concatenations
- if astChildren.count == 1 {
- return astChildren.first!.dslTreeNode
- }
-
- // Check for a single child post-coalescing
- if let (idx, str) = coalesce(astChildren.startIndex),
- idx == astChildren.endIndex
- {
- return .quotedLiteral(str)
- }
-
- // Coalesce adjacent string children
- var curIdx = astChildren.startIndex
- var children = Array()
- while curIdx < astChildren.endIndex {
- if let (nextIdx, str) = coalesce(curIdx) {
- // TODO: Track source info...
- children.append(.quotedLiteral(str))
- curIdx = nextIdx
- } else {
- children.append(astChildren[curIdx].dslTreeNode)
- children.formIndex(after: &curIdx)
- }
- }
- return .concatenation(children)
-
- case let .group(v):
- let child = v.child.dslTreeNode
- return .group(v.kind.value, child)
-
- case let .conditional(v):
- let trueBranch = v.trueBranch.dslTreeNode
- let falseBranch = v.falseBranch.dslTreeNode
- return .conditional(
- v.condition.kind, trueBranch, falseBranch)
-
- case let .quantification(v):
- let child = v.child.dslTreeNode
- return .quantification(
- v.amount.value, v.kind.value, child)
-
- case let .quote(v):
- return .quotedLiteral(v.literal)
-
- case let .trivia(v):
- return .trivia(v.contents)
-
- case let .atom(v):
- return .atom(v.dslTreeAtom)
-
- case let .customCharacterClass(ccc):
- return .customCharacterClass(ccc.dslTreeClass)
-
- case .empty(_):
- return .empty
-
- case let .groupTransform(v, transform):
- let child = v.child.dslTreeNode
- return .groupTransform(
- v.kind.value, child, transform)
-
- case let .absentFunction(a):
- // TODO: What should this map to?
- return .absentFunction(a)
- }
- }
-
- let converted = convert()
- return wrap(converted)
- }
-}
-
-extension AST.CustomCharacterClass {
- var dslTreeClass: DSLTree.CustomCharacterClass {
- // TODO: Not quite 1-1
- func convert(
- _ member: Member
- ) -> DSLTree.CustomCharacterClass.Member {
- switch member {
- case let .custom(ccc):
- return .custom(ccc.dslTreeClass)
-
- case let .range(r):
- return .range(
- r.lhs.dslTreeAtom, r.rhs.dslTreeAtom)
-
- case let .atom(a):
- return .atom(a.dslTreeAtom)
-
- case let .quote(q):
- return .quotedLiteral(q.literal)
-
- case let .setOperation(lhs, op, rhs):
- let lhs = DSLTree.CustomCharacterClass(
- members: lhs.map(convert),
- isInverted: false)
- let rhs = DSLTree.CustomCharacterClass(
- members: rhs.map(convert),
- isInverted: false)
-
- switch op.value {
- case .subtraction:
- return .subtraction(lhs, rhs)
- case .intersection:
- return .intersection(lhs, rhs)
- case .symmetricDifference:
- return .symmetricDifference(lhs, rhs)
- }
- case let .trivia(t):
- return .trivia(t.contents)
- }
- }
-
- return .init(
- members: members.map(convert),
- isInverted: self.isInverted)
- }
-}
-
-extension AST.Atom {
- var dslTreeAtom: DSLTree.Atom {
- if let kind = assertionKind {
- return .assertion(kind)
- }
-
- switch self.kind {
- case let .char(c): return .char(c)
- case let .scalar(s): return .scalar(s)
- case .any: return .any
- case let .backreference(r): return .backreference(r)
-
- default: return .unconverted(self)
- }
- }
-}
-
extension DSLTree.Atom {
// Return the Character or promote a scalar to a Character
var literalCharacterValue: Character? {
@@ -398,20 +212,6 @@ extension DSLTree {
}
}
-extension AST.GlobalMatchingOptionSequence {
- var dslTreeOptions: DSLTree.Options {
- // TODO: map options
- return .init()
- }
-}
-
-extension AST {
- var dslTree: DSLTree {
- return DSLTree(
- root.dslTreeNode, options: globalOptions?.dslTreeOptions)
- }
-}
-
extension DSLTree {
var hasCapture: Bool {
root.hasCapture
diff --git a/Sources/_StringProcessing/RegexDSL/Match.swift b/Sources/_StringProcessing/RegexDSL/Match.swift
new file mode 100644
index 000000000..a2e30c3a3
--- /dev/null
+++ b/Sources/_StringProcessing/RegexDSL/Match.swift
@@ -0,0 +1,98 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+@dynamicMemberLookup
+public struct RegexMatch {
+ public let range: Range
+ public let match: Match
+
+ public subscript(dynamicMember keyPath: KeyPath) -> T {
+ match[keyPath: keyPath]
+ }
+}
+
+extension RegexProtocol {
+ public func match(in input: String) -> RegexMatch? {
+ _match(
+ input, in: input.startIndex.. RegexMatch? {
+ _match(
+ input.base, in: input.startIndex..,
+ mode: MatchMode = .wholeString
+ ) -> RegexMatch? {
+ // TODO: Remove this branch when the matching engine supports captures.
+ if regex.hasCapture {
+ let vm = HareVM(program: regex.program.legacyLoweredProgram)
+ guard let (range, captures) = vm.execute(
+ input: input, in: inputRange, mode: mode
+ )?.destructure else {
+ return nil
+ }
+ let convertedMatch: Match
+ if Match.self == (Substring, DynamicCaptures).self {
+ convertedMatch = (input[range], DynamicCaptures(captures)) as! Match
+ } else {
+ let typeErasedMatch = captures.matchValue(
+ withWholeMatch: input[range]
+ )
+ convertedMatch = typeErasedMatch as! Match
+ }
+ return RegexMatch(range: range, match: convertedMatch)
+ }
+
+ let executor = Executor(program: regex.program.loweredProgram)
+ guard let result = executor.execute(
+ input: input, in: inputRange, mode: mode
+ ) else {
+ return nil
+ }
+ let convertedMatch: Match
+ if Match.self == (Substring, DynamicCaptures).self {
+ convertedMatch = (input[result.range], DynamicCaptures.empty) as! Match
+ } else {
+ assert(Match.self == Substring.self)
+ convertedMatch = input[result.range] as! Match
+ }
+ return RegexMatch(range: result.range, match: convertedMatch)
+ }
+}
+
+extension String {
+ public func match(_ regex: R) -> RegexMatch? {
+ regex.match(in: self)
+ }
+
+ public func match(
+ @RegexBuilder _ content: () -> R
+ ) -> RegexMatch? {
+ match(content())
+ }
+}
+extension Substring {
+ public func match(_ regex: R) -> RegexMatch? {
+ regex.match(in: self)
+ }
+
+ public func match(
+ @RegexBuilder _ content: () -> R
+ ) -> RegexMatch? {
+ match(content())
+ }
+}
diff --git a/Tests/MatchingEngineTests/MatchingEngineTests.swift b/Tests/MatchingEngineTests/MatchingEngineTests.swift
index d69eb1cf9..aac65733d 100644
--- a/Tests/MatchingEngineTests/MatchingEngineTests.swift
+++ b/Tests/MatchingEngineTests/MatchingEngineTests.swift
@@ -144,7 +144,7 @@ func makeEngine(
) -> Engine {
var builder = Program.Builder()
constructor(&builder)
- let program = builder.assemble()
+ let program = try! builder.assemble()
let engine = Engine(program)
show(engine)
return engine