diff --git a/Sources/Exercises/Participants/PEGParticipant.swift b/Sources/Exercises/Participants/PEGParticipant.swift index 670f286a8..2b8a597f5 100644 --- a/Sources/Exercises/Participants/PEGParticipant.swift +++ b/Sources/Exercises/Participants/PEGParticipant.swift @@ -40,7 +40,7 @@ private func graphemeBreakPropertyData(forLine line: String) -> GraphemeBreakEnt let program = PEG.Program(start: "Entry", environment: ["Entry": entry]) let vm = program.compile(for: String.self) - let engine = program.transpile(for: String.self) + let engine = try! program.transpile(for: String.self) _ = (vm, engine) fatalError("Unsupported") diff --git a/Sources/Prototypes/PEG/PEGTranspile.swift b/Sources/Prototypes/PEG/PEGTranspile.swift index 440310b08..a6f724a03 100644 --- a/Sources/Prototypes/PEG/PEGTranspile.swift +++ b/Sources/Prototypes/PEG/PEGTranspile.swift @@ -13,7 +13,7 @@ import _MatchingEngine extension PEG.VM { typealias MEProgram = _MatchingEngine.Program - func transpile() -> MEProgram { + func transpile() throws -> MEProgram { typealias Builder = MEProgram.Builder var builder = MEProgram.Builder() @@ -106,14 +106,14 @@ extension PEG.VM { } } - return builder.assemble() + return try builder.assemble() } } extension PEG.Program { public func transpile( for input: Input.Type = Input.self - ) -> Engine where Input.Element == Element { - Engine(compile(for: input).vm.transpile()) + ) throws -> Engine where Input.Element == Element { + try Engine(compile(for: input).vm.transpile()) } } diff --git a/Sources/_MatchingEngine/Engine/Builder.swift b/Sources/_MatchingEngine/Engine/Builder.swift index d75593a0d..f6917b142 100644 --- a/Sources/_MatchingEngine/Engine/Builder.swift +++ b/Sources/_MatchingEngine/Engine/Builder.swift @@ -230,7 +230,7 @@ extension Program.Builder { // TODO: Mutating because of fail address fixup, drop when // that's removed - public mutating func assemble() -> Program { + public mutating func assemble() throws -> Program { // TODO: This will add a fail instruction at the end every // time it's assembled. Better to do to the local instruction // list copy, but that complicates logic. It's possible we @@ -262,12 +262,12 @@ extension Program.Builder { case .splitSaving: guard let fix2 = tok.second else { - fatalError("unreachable") + throw Unreachable("TODO: reason") } let saving = addressTokens[fix2.rawValue]! payload = .init(addr: addr, addr2: saving) - default: fatalError("unreachable") + default: throw Unreachable("TODO: reason") } diff --git a/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift b/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift index f3a892d70..b6b882ad2 100644 --- a/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift +++ b/Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift @@ -1785,7 +1785,7 @@ extension Source { if customCC { return .char(char) } - fatalError("unreachable") + throw Unreachable("TODO: reason") // (sometimes) special metacharacters case ".": return customCC ? .char(".") : .any diff --git a/Sources/_MatchingEngine/Regex/Parse/Parse.swift b/Sources/_MatchingEngine/Regex/Parse/Parse.swift index 84c703068..21012d29c 100644 --- a/Sources/_MatchingEngine/Regex/Parse/Parse.swift +++ b/Sources/_MatchingEngine/Regex/Parse/Parse.swift @@ -223,7 +223,7 @@ extension Parser { continue } - fatalError("unreachable?") + throw Unreachable("TODO: reason") } guard !result.isEmpty else { return .empty(.init(loc(_start))) diff --git a/Sources/_MatchingEngine/Utility/Errors.swift b/Sources/_MatchingEngine/Utility/Errors.swift new file mode 100644 index 000000000..8dbe67fa0 --- /dev/null +++ b/Sources/_MatchingEngine/Utility/Errors.swift @@ -0,0 +1,54 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +public struct Unsupported: Error, CustomStringConvertible { + var message: String + var file: String + var line: Int + + public var description: String { """ + Unsupported: '\(message)' + \(file):\(line) + """ + } + + public init( + _ s: String, + file: StaticString = #file, + line: UInt = #line + ) { + self.message = s + self.file = file.description + self.line = Int(asserting: line) + } +} + +public struct Unreachable: Error, CustomStringConvertible { + var message: String + var file: String + var line: Int + + public var description: String { """ + Unreachable: '\(message)' + \(file):\(line) + """ + } + + public init( + _ s: String, + file: StaticString = #file, + line: UInt = #line + ) { + self.message = s + self.file = file.description + self.line = Int(asserting: line) + } +} diff --git a/Sources/_MatchingEngine/Utility/Misc.swift b/Sources/_MatchingEngine/Utility/Misc.swift index 3a53eda68..bd1e395b5 100644 --- a/Sources/_MatchingEngine/Utility/Misc.swift +++ b/Sources/_MatchingEngine/Utility/Misc.swift @@ -15,16 +15,6 @@ extension FixedWidthInteger { } } -// TODO: Replace all fatal error unreachables with these calls. -// We will likely want to convert them to unhandleable throws -// or something similar. -func unreachable(_ s: @autoclosure () -> String) -> Never { - fatalError("unreachable \(s())") -} -func unreachable() -> Never { - fatalError("unreachable") -} - extension Substring { var string: String { String(self) } } diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 9edee5de7..8dbcb9026 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -5,9 +5,10 @@ extension Compiler { var options: MatchingOptions var builder = _MatchingEngine.Program.Builder() - mutating func finish() -> _MatchingEngine.Program { + mutating func finish( + ) throws -> _MatchingEngine.Program { builder.buildAccept() - return builder.assemble() + return try builder.assemble() } } } @@ -90,7 +91,7 @@ extension Compiler.ByteCodeGen { case .resetStartOfMatch: // FIXME: Figure out how to communicate this out - throw unsupported(#"\K (reset/keep assertion)"#) + throw Unsupported(#"\K (reset/keep assertion)"#) case .firstMatchingPositionInSubject: // TODO: We can probably build a nice model with API here @@ -100,11 +101,11 @@ extension Compiler.ByteCodeGen { case .textSegment: // This we should be able to do! - throw unsupported(#"\y (text segment)"#) + throw Unsupported(#"\y (text segment)"#) case .notTextSegment: // This we should be able to do! - throw unsupported(#"\Y (not text segment)"#) + throw Unsupported(#"\Y (not text segment)"#) case .startOfLine: builder.buildAssert { (input, pos, bounds) in @@ -204,7 +205,7 @@ extension Compiler.ByteCodeGen { _ child: DSLTree.Node ) throws { guard kind.forwards else { - throw unsupported("backwards assertions") + throw Unsupported("backwards assertions") } let positive = kind.positive @@ -259,7 +260,7 @@ extension Compiler.ByteCodeGen { switch kind { case .lookahead, .negativeLookahead, .lookbehind, .negativeLookbehind: - fatalError("unreachable") + throw Unreachable("TODO: reason") case .capture, .namedCapture: let cap = builder.makeCapture() @@ -299,7 +300,7 @@ extension Compiler.ByteCodeGen { // Ok break default: - fatalError("unreachable?") + throw Unreachable("TODO: reason") } // Compiler and/or parser should enforce these invariants diff --git a/Sources/_StringProcessing/Compiler.swift b/Sources/_StringProcessing/Compiler.swift index 881a8a898..64491b8e2 100644 --- a/Sources/_StringProcessing/Compiler.swift +++ b/Sources/_StringProcessing/Compiler.swift @@ -34,7 +34,7 @@ class Compiler { // TODO: Handle global options var codegen = ByteCodeGen(options: options) try codegen.emitNode(tree.root) - let program = codegen.finish() + let program = try codegen.finish() return RegexProgram(program: program) } } diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index 1feb83410..3979dab34 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -11,58 +11,6 @@ import _MatchingEngine -struct Unsupported: Error, CustomStringConvertible { - var message: String - var file: String - var line: Int - - var description: String { """ - Unsupported: '\(message)' - \(file):\(line) - """ - } - - init( - _ s: String, - file: StaticString = #file, - line: UInt = #line - ) { - self.message = s - self.file = file.description - self.line = Int(asserting: line) - } -} - -// TODO: Remove -func unsupported( - _ s: String, - file: StaticString = #file, - line: UInt = #line -) -> Unsupported { - return Unsupported(s, file: file, line: line) -} - -struct Unreachable: Error, CustomStringConvertible { - var message: String - var file: String - var line: Int - - var description: String { """ - Unreachable: '\(message)' - \(file):\(line) - """ - } -} - -func unreachable( - _ s: String, - file: StaticString = #file, - line: Int = #line -) -> Unreachable { - return Unreachable( - message: s, file: String(describing: file), line: line) -} - extension DSLTree.Node { /// Attempt to generate a consumer from this AST node /// @@ -233,10 +181,10 @@ extension DSLTree.CustomCharacterClass.Member { case let .range(low, high): // TODO: guard let lhs = low.literalCharacterValue else { - throw unsupported("\(low) in range") + throw Unsupported("\(low) in range") } guard let rhs = high.literalCharacterValue else { - throw unsupported("\(high) in range") + throw Unsupported("\(high) in range") } return { input, bounds in @@ -315,10 +263,10 @@ extension AST.CustomCharacterClass.Member { case .range(let r): guard let lhs = r.lhs.literalCharacterValue else { - throw unsupported("\(r.lhs) in range") + throw Unsupported("\(r.lhs) in range") } guard let rhs = r.rhs.literalCharacterValue else { - throw unsupported("\(r.rhs) in range") + throw Unsupported("\(r.rhs) in range") } return { input, bounds in @@ -333,7 +281,7 @@ extension AST.CustomCharacterClass.Member { case .atom(let atom): guard let gen = try atom.generateConsumer(opts) else { - throw unsupported("TODO") + throw Unsupported("TODO") } return gen @@ -352,7 +300,8 @@ extension AST.CustomCharacterClass.Member { } case .trivia: - throw unreachable("Should have been stripped by caller") + throw Unreachable( + "Should have been stripped by caller") case .setOperation(let lhs, let op, let rhs): // TODO: We should probably have a component type @@ -520,22 +469,23 @@ extension AST.Atom.CharacterProperty { return value ? cons : invert(cons) case .script(let s): - throw unsupported("TODO: Map script: \(s)") + throw Unsupported("TODO: Map script: \(s)") case .scriptExtension(let s): - throw unsupported("TODO: Map script: \(s)") + throw Unsupported("TODO: Map script: \(s)") case .posix(let p): return p.generateConsumer(opts) case .pcreSpecial(let s): - throw unsupported("TODO: map PCRE special: \(s)") + throw Unsupported("TODO: map PCRE special: \(s)") case .onigurumaSpecial(let s): - throw unsupported("TODO: map Oniguruma special: \(s)") + throw Unsupported("TODO: map Oniguruma special: \(s)") case let .other(key, value): - throw unsupported("TODO: map other \(key ?? "")=\(value)") + throw Unsupported( + "TODO: map other \(key ?? "")=\(value)") } }() @@ -593,7 +543,8 @@ extension Unicode.BinaryProperty { if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) { return consumeScalarProp(\.isEmojiModifierBase) } else { - throw unsupported("isEmojiModifierBase on old OSes") + throw Unsupported( + "isEmojiModifierBase on old OSes") } case .emojiComponent: break @@ -601,19 +552,20 @@ extension Unicode.BinaryProperty { if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) { return consumeScalarProp(\.isEmojiModifier) } else { - throw unsupported("isEmojiModifier on old OSes") + throw Unsupported("isEmojiModifier on old OSes") } case .emoji: if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) { return consumeScalarProp(\.isEmoji) } else { - throw unsupported("isEmoji on old OSes") + throw Unsupported("isEmoji on old OSes") } case .emojiPresentation: if #available(macOS 10.12.2, iOS 10.2, tvOS 10.1, watchOS 3.1.1, *) { return consumeScalarProp(\.isEmojiPresentation) } else { - throw unsupported("isEmojiPresentation on old OSes") + throw Unsupported( + "isEmojiPresentation on old OSes") } case .extender: return consumeScalarProp(\.isExtender) @@ -701,10 +653,10 @@ extension Unicode.BinaryProperty { return consumeScalarProp(\.isXIDStart) case .expandsOnNFC, .expandsOnNFD, .expandsOnNFKD, .expandsOnNFKC: - throw unsupported("Unicode-deprecated: \(self)") + throw Unsupported("Unicode-deprecated: \(self)") } - throw unsupported("TODO: map prop \(self)") + throw Unsupported("TODO: map prop \(self)") } } @@ -802,7 +754,8 @@ extension Unicode.ExtendedGeneralCategory { ]) case .casedLetter: - throw unsupported("TODO: cased letter? not the property?") + throw Unsupported( + "TODO: cased letter? not the property?") case .control: return consumeScalarGC(.control) diff --git a/Sources/_StringProcessing/Legacy/LegacyCompile.swift b/Sources/_StringProcessing/Legacy/LegacyCompile.swift index ac0ec6a45..c3bed55c8 100644 --- a/Sources/_StringProcessing/Legacy/LegacyCompile.swift +++ b/Sources/_StringProcessing/Legacy/LegacyCompile.swift @@ -51,7 +51,7 @@ func compile( instructions.append(.any) return default: - throw unsupported("Unsupported: \(a)") + throw Unsupported("Unsupported: \(a)") } case let .group(kind, child): @@ -68,7 +68,7 @@ func compile( return default: - throw unsupported("Unsupported group \(kind)") + throw Unsupported("Unsupported group \(kind)") } case let .groupTransform(kind, child, transform) where kind == .capture: @@ -78,7 +78,8 @@ func compile( return case let .groupTransform(kind, _, _): - throw unsupported("Unsupported group transform \(kind)") + throw Unsupported( + "Unsupported group transform \(kind)") case let .concatenation(children): let childrenHaveCaptures = children.any(\.hasCapture) @@ -224,7 +225,8 @@ func compile( } return default: - throw unsupported("Unsupported: \((amount, kind))") + throw Unsupported( + "Unsupported: \((amount, kind))") } case let .alternation(children): @@ -292,22 +294,22 @@ func compile( return case .conditional: - throw unsupported("Conditionals") + throw Unsupported("Conditionals") case .absentFunction: - throw unsupported("Absent functions") + throw Unsupported("Absent functions") case .customCharacterClass: - fatalError("unreachable") + throw Unreachable("TODO: reason") case let .atom(a) where a.characterClass != nil: - fatalError("unreachable") + throw Unreachable("TODO: reason") case let .convertedRegexLiteral(node, _): try compileNode(node) case .characterPredicate, .consumer, .consumerValidator: - throw unsupported("DSL extensions") + throw Unsupported("DSL extensions") case let .regexLiteral(re): try compileNode(re.dslTreeNode) diff --git a/Sources/_StringProcessing/RegexDSL/ASTConversion.swift b/Sources/_StringProcessing/RegexDSL/ASTConversion.swift new file mode 100644 index 000000000..7364f8048 --- /dev/null +++ b/Sources/_StringProcessing/RegexDSL/ASTConversion.swift @@ -0,0 +1,212 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +import _MatchingEngine + +extension AST { + var dslTree: DSLTree { + return DSLTree( + root.dslTreeNode, options: globalOptions?.dslTreeOptions) + } +} + +extension AST.GlobalMatchingOptionSequence { + var dslTreeOptions: DSLTree.Options { + // TODO: map options + return .init() + } +} + +extension AST.Node { + /// Converts an AST node to a `convertedRegexLiteral` node. + var dslTreeNode: DSLTree.Node { + func wrap(_ node: DSLTree.Node) -> DSLTree.Node { + switch node { + case .convertedRegexLiteral: + // FIXME: DSL can have one item concats +// assertionFailure("Double wrapping?") + return node + default: + break + } + // TODO: Should we do this for the + // single-concatenation child too, or should? + // we wrap _that_? + return .convertedRegexLiteral(node, self) + } + + // Convert the top-level node without wrapping + func convert() -> DSLTree.Node { + switch self { + case let .alternation(v): + let children = v.children.map(\.dslTreeNode) + return .alternation(children) + + case let .concatenation(v): + // Coalesce adjacent children who can produce a + // string literal representation + let astChildren = v.children + func coalesce( + _ idx: Array.Index + ) -> (Array.Index, String)? { + var result = "" + var idx = idx + while idx < astChildren.endIndex { + let atom: AST.Atom? = astChildren[idx].as() + + // TODO: For printing, nice to coalesce + // scalars literals too. We likely need a different + // approach even before we have a better IR. + guard let char = atom?.singleCharacter else { + break + } + result.append(char) + astChildren.formIndex(after: &idx) + } + return result.count <= 1 ? nil : (idx, result) + } + + // No need to nest single children concatenations + if astChildren.count == 1 { + return astChildren.first!.dslTreeNode + } + + // Check for a single child post-coalescing + if let (idx, str) = coalesce(astChildren.startIndex), + idx == astChildren.endIndex + { + return .quotedLiteral(str) + } + + // Coalesce adjacent string children + var curIdx = astChildren.startIndex + var children = Array() + while curIdx < astChildren.endIndex { + if let (nextIdx, str) = coalesce(curIdx) { + // TODO: Track source info... + children.append(.quotedLiteral(str)) + curIdx = nextIdx + } else { + children.append(astChildren[curIdx].dslTreeNode) + children.formIndex(after: &curIdx) + } + } + return .concatenation(children) + + case let .group(v): + let child = v.child.dslTreeNode + return .group(v.kind.value, child) + + case let .conditional(v): + let trueBranch = v.trueBranch.dslTreeNode + let falseBranch = v.falseBranch.dslTreeNode + return .conditional( + v.condition.kind, trueBranch, falseBranch) + + case let .quantification(v): + let child = v.child.dslTreeNode + return .quantification( + v.amount.value, v.kind.value, child) + + case let .quote(v): + return .quotedLiteral(v.literal) + + case let .trivia(v): + return .trivia(v.contents) + + case let .atom(v): + return .atom(v.dslTreeAtom) + + case let .customCharacterClass(ccc): + return .customCharacterClass(ccc.dslTreeClass) + + case .empty(_): + return .empty + + case let .groupTransform(v, transform): + let child = v.child.dslTreeNode + return .groupTransform( + v.kind.value, child, transform) + + case let .absentFunction(a): + // TODO: What should this map to? + return .absentFunction(a) + } + } + + let converted = convert() + return wrap(converted) + } +} + +extension AST.CustomCharacterClass { + var dslTreeClass: DSLTree.CustomCharacterClass { + // TODO: Not quite 1-1 + func convert( + _ member: Member + ) -> DSLTree.CustomCharacterClass.Member { + switch member { + case let .custom(ccc): + return .custom(ccc.dslTreeClass) + + case let .range(r): + return .range( + r.lhs.dslTreeAtom, r.rhs.dslTreeAtom) + + case let .atom(a): + return .atom(a.dslTreeAtom) + + case let .quote(q): + return .quotedLiteral(q.literal) + + case let .setOperation(lhs, op, rhs): + let lhs = DSLTree.CustomCharacterClass( + members: lhs.map(convert), + isInverted: false) + let rhs = DSLTree.CustomCharacterClass( + members: rhs.map(convert), + isInverted: false) + + switch op.value { + case .subtraction: + return .subtraction(lhs, rhs) + case .intersection: + return .intersection(lhs, rhs) + case .symmetricDifference: + return .symmetricDifference(lhs, rhs) + } + case let .trivia(t): + return .trivia(t.contents) + } + } + + return .init( + members: members.map(convert), + isInverted: self.isInverted) + } +} + +extension AST.Atom { + var dslTreeAtom: DSLTree.Atom { + if let kind = assertionKind { + return .assertion(kind) + } + + switch self.kind { + case let .char(c): return .char(c) + case let .scalar(s): return .scalar(s) + case .any: return .any + case let .backreference(r): return .backreference(r) + + default: return .unconverted(self) + } + } +} diff --git a/Sources/_StringProcessing/RegexDSL/Core.swift b/Sources/_StringProcessing/RegexDSL/Core.swift index b426d1565..136182be3 100644 --- a/Sources/_StringProcessing/RegexDSL/Core.swift +++ b/Sources/_StringProcessing/RegexDSL/Core.swift @@ -11,15 +11,6 @@ import _MatchingEngine -@dynamicMemberLookup -public struct RegexMatch { - public let range: Range - public let match: Match - - public subscript(dynamicMember keyPath: KeyPath) -> T { - match[keyPath: keyPath] - } -} /// A type that represents a regular expression. public protocol RegexProtocol { @@ -112,83 +103,7 @@ public struct Regex: RegexProtocol { } } -extension RegexProtocol { - public func match(in input: String) -> RegexMatch? { - _match( - input, in: input.startIndex.. RegexMatch? { - _match( - input.base, in: input.startIndex.., - mode: MatchMode = .wholeString - ) -> RegexMatch? { - // TODO: Remove this branch when the matching engine supports captures. - if regex.hasCapture { - let vm = HareVM(program: regex.program.legacyLoweredProgram) - guard let (range, captures) = vm.execute( - input: input, in: inputRange, mode: mode - )?.destructure else { - return nil - } - let convertedMatch: Match - if Match.self == (Substring, DynamicCaptures).self { - convertedMatch = (input[range], DynamicCaptures(captures)) as! Match - } else { - let typeErasedMatch = captures.matchValue( - withWholeMatch: input[range] - ) - convertedMatch = typeErasedMatch as! Match - } - return RegexMatch(range: range, match: convertedMatch) - } - - let executor = Executor(program: regex.program.loweredProgram) - guard let result = executor.execute( - input: input, in: inputRange, mode: mode - ) else { - return nil - } - let convertedMatch: Match - if Match.self == (Substring, DynamicCaptures).self { - convertedMatch = (input[result.range], DynamicCaptures.empty) as! Match - } else { - assert(Match.self == Substring.self) - convertedMatch = input[result.range] as! Match - } - return RegexMatch(range: result.range, match: convertedMatch) - } -} - -extension String { - public func match(_ regex: R) -> RegexMatch? { - regex.match(in: self) - } - public func match( - @RegexBuilder _ content: () -> R - ) -> RegexMatch? { - match(content()) - } -} -extension Substring { - public func match(_ regex: R) -> RegexMatch? { - regex.match(in: self) - } - - public func match( - @RegexBuilder _ content: () -> R - ) -> RegexMatch? { - match(content()) - } -} public struct MockRegexLiteral: RegexProtocol { public typealias MatchValue = Substring public let regex: Regex diff --git a/Sources/_StringProcessing/RegexDSL/DSLTree.swift b/Sources/_StringProcessing/RegexDSL/DSLTree.swift index 5e79575bb..b3b14b1d1 100644 --- a/Sources/_StringProcessing/RegexDSL/DSLTree.swift +++ b/Sources/_StringProcessing/RegexDSL/DSLTree.swift @@ -185,192 +185,6 @@ extension DSLTree.Node { } } -extension AST.Node { - /// Converts an AST node to a `convertedRegexLiteral` node. - var dslTreeNode: DSLTree.Node { - func wrap(_ node: DSLTree.Node) -> DSLTree.Node { - switch node { - case .convertedRegexLiteral: - // FIXME: how can this happen? -// assertionFailure("Double wrapping?") - return node - default: - break - } - // TODO: Should we do this for the - // single-concatenation child too, or should? - // we wrap _that_? - return .convertedRegexLiteral(node, self) - } - - // Convert the top-level node without wrapping - func convert() -> DSLTree.Node { - switch self { - case let .alternation(v): - let children = v.children.map(\.dslTreeNode) - return .alternation(children) - - case let .concatenation(v): - // Coalesce adjacent children who can produce a - // string literal representation - let astChildren = v.children - func coalesce( - _ idx: Array.Index - ) -> (Array.Index, String)? { - var result = "" - var idx = idx - while idx < astChildren.endIndex { - let atom: AST.Atom? = astChildren[idx].as() - - // TODO: For printing, nice to coalesce - // scalars literals too. We likely need a different - // approach even before we have a better IR. - guard let char = atom?.singleCharacter else { - break - } - result.append(char) - astChildren.formIndex(after: &idx) - } - return result.count <= 1 ? nil : (idx, result) - } - - // No need to nest single children concatenations - if astChildren.count == 1 { - return astChildren.first!.dslTreeNode - } - - // Check for a single child post-coalescing - if let (idx, str) = coalesce(astChildren.startIndex), - idx == astChildren.endIndex - { - return .quotedLiteral(str) - } - - // Coalesce adjacent string children - var curIdx = astChildren.startIndex - var children = Array() - while curIdx < astChildren.endIndex { - if let (nextIdx, str) = coalesce(curIdx) { - // TODO: Track source info... - children.append(.quotedLiteral(str)) - curIdx = nextIdx - } else { - children.append(astChildren[curIdx].dslTreeNode) - children.formIndex(after: &curIdx) - } - } - return .concatenation(children) - - case let .group(v): - let child = v.child.dslTreeNode - return .group(v.kind.value, child) - - case let .conditional(v): - let trueBranch = v.trueBranch.dslTreeNode - let falseBranch = v.falseBranch.dslTreeNode - return .conditional( - v.condition.kind, trueBranch, falseBranch) - - case let .quantification(v): - let child = v.child.dslTreeNode - return .quantification( - v.amount.value, v.kind.value, child) - - case let .quote(v): - return .quotedLiteral(v.literal) - - case let .trivia(v): - return .trivia(v.contents) - - case let .atom(v): - return .atom(v.dslTreeAtom) - - case let .customCharacterClass(ccc): - return .customCharacterClass(ccc.dslTreeClass) - - case .empty(_): - return .empty - - case let .groupTransform(v, transform): - let child = v.child.dslTreeNode - return .groupTransform( - v.kind.value, child, transform) - - case let .absentFunction(a): - // TODO: What should this map to? - return .absentFunction(a) - } - } - - let converted = convert() - return wrap(converted) - } -} - -extension AST.CustomCharacterClass { - var dslTreeClass: DSLTree.CustomCharacterClass { - // TODO: Not quite 1-1 - func convert( - _ member: Member - ) -> DSLTree.CustomCharacterClass.Member { - switch member { - case let .custom(ccc): - return .custom(ccc.dslTreeClass) - - case let .range(r): - return .range( - r.lhs.dslTreeAtom, r.rhs.dslTreeAtom) - - case let .atom(a): - return .atom(a.dslTreeAtom) - - case let .quote(q): - return .quotedLiteral(q.literal) - - case let .setOperation(lhs, op, rhs): - let lhs = DSLTree.CustomCharacterClass( - members: lhs.map(convert), - isInverted: false) - let rhs = DSLTree.CustomCharacterClass( - members: rhs.map(convert), - isInverted: false) - - switch op.value { - case .subtraction: - return .subtraction(lhs, rhs) - case .intersection: - return .intersection(lhs, rhs) - case .symmetricDifference: - return .symmetricDifference(lhs, rhs) - } - case let .trivia(t): - return .trivia(t.contents) - } - } - - return .init( - members: members.map(convert), - isInverted: self.isInverted) - } -} - -extension AST.Atom { - var dslTreeAtom: DSLTree.Atom { - if let kind = assertionKind { - return .assertion(kind) - } - - switch self.kind { - case let .char(c): return .char(c) - case let .scalar(s): return .scalar(s) - case .any: return .any - case let .backreference(r): return .backreference(r) - - default: return .unconverted(self) - } - } -} - extension DSLTree.Atom { // Return the Character or promote a scalar to a Character var literalCharacterValue: Character? { @@ -398,20 +212,6 @@ extension DSLTree { } } -extension AST.GlobalMatchingOptionSequence { - var dslTreeOptions: DSLTree.Options { - // TODO: map options - return .init() - } -} - -extension AST { - var dslTree: DSLTree { - return DSLTree( - root.dslTreeNode, options: globalOptions?.dslTreeOptions) - } -} - extension DSLTree { var hasCapture: Bool { root.hasCapture diff --git a/Sources/_StringProcessing/RegexDSL/Match.swift b/Sources/_StringProcessing/RegexDSL/Match.swift new file mode 100644 index 000000000..a2e30c3a3 --- /dev/null +++ b/Sources/_StringProcessing/RegexDSL/Match.swift @@ -0,0 +1,98 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +@dynamicMemberLookup +public struct RegexMatch { + public let range: Range + public let match: Match + + public subscript(dynamicMember keyPath: KeyPath) -> T { + match[keyPath: keyPath] + } +} + +extension RegexProtocol { + public func match(in input: String) -> RegexMatch? { + _match( + input, in: input.startIndex.. RegexMatch? { + _match( + input.base, in: input.startIndex.., + mode: MatchMode = .wholeString + ) -> RegexMatch? { + // TODO: Remove this branch when the matching engine supports captures. + if regex.hasCapture { + let vm = HareVM(program: regex.program.legacyLoweredProgram) + guard let (range, captures) = vm.execute( + input: input, in: inputRange, mode: mode + )?.destructure else { + return nil + } + let convertedMatch: Match + if Match.self == (Substring, DynamicCaptures).self { + convertedMatch = (input[range], DynamicCaptures(captures)) as! Match + } else { + let typeErasedMatch = captures.matchValue( + withWholeMatch: input[range] + ) + convertedMatch = typeErasedMatch as! Match + } + return RegexMatch(range: range, match: convertedMatch) + } + + let executor = Executor(program: regex.program.loweredProgram) + guard let result = executor.execute( + input: input, in: inputRange, mode: mode + ) else { + return nil + } + let convertedMatch: Match + if Match.self == (Substring, DynamicCaptures).self { + convertedMatch = (input[result.range], DynamicCaptures.empty) as! Match + } else { + assert(Match.self == Substring.self) + convertedMatch = input[result.range] as! Match + } + return RegexMatch(range: result.range, match: convertedMatch) + } +} + +extension String { + public func match(_ regex: R) -> RegexMatch? { + regex.match(in: self) + } + + public func match( + @RegexBuilder _ content: () -> R + ) -> RegexMatch? { + match(content()) + } +} +extension Substring { + public func match(_ regex: R) -> RegexMatch? { + regex.match(in: self) + } + + public func match( + @RegexBuilder _ content: () -> R + ) -> RegexMatch? { + match(content()) + } +} diff --git a/Tests/MatchingEngineTests/MatchingEngineTests.swift b/Tests/MatchingEngineTests/MatchingEngineTests.swift index d69eb1cf9..aac65733d 100644 --- a/Tests/MatchingEngineTests/MatchingEngineTests.swift +++ b/Tests/MatchingEngineTests/MatchingEngineTests.swift @@ -144,7 +144,7 @@ func makeEngine( ) -> Engine { var builder = Program.Builder() constructor(&builder) - let program = builder.assemble() + let program = try! builder.assemble() let engine = Engine(program) show(engine) return engine