diff --git a/Sources/_StringProcessing/Capture.swift b/Sources/_StringProcessing/Capture.swift index 915c4c5d7..ecfc558fe 100644 --- a/Sources/_StringProcessing/Capture.swift +++ b/Sources/_StringProcessing/Capture.swift @@ -71,6 +71,11 @@ extension StructuredCapture { value: storedCapture?.value, optionalCount: optionalCount) } + + func slice(from input: String) -> Substring? { + guard let r = storedCapture?.range else { return nil } + return input[r] + } } extension Sequence where Element == StructuredCapture { @@ -86,5 +91,10 @@ extension Sequence where Element == StructuredCapture { }) return TypeConstruction.tuple(of: caps) } + + func slices(from input: String) -> [Substring?] { + self.map { $0.slice(from: input) } + } } + diff --git a/Sources/_StringProcessing/Engine/Consume.swift b/Sources/_StringProcessing/Engine/Consume.swift index cfb803de8..a4a3bf26c 100644 --- a/Sources/_StringProcessing/Engine/Consume.swift +++ b/Sources/_StringProcessing/Engine/Consume.swift @@ -25,16 +25,10 @@ extension Engine { } extension Engine where Input == String { - func consume( - _ input: Input - ) -> (Input.Index, CaptureList)? { - consume(input, in: input.startIndex ..< input.endIndex) - } - func consume( _ input: Input, in range: Range, - matchMode: MatchMode = .partialFromFront + matchMode: MatchMode ) -> (Input.Index, CaptureList)? { if enableTracing { print("Consume: \(input)") diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index 5c098c5c5..9de2b0b3d 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -19,69 +19,33 @@ struct Executor { self.engine = Engine(program, enableTracing: enablesTracing) } - struct Result { - var range: Range - var captures: [StructuredCapture] - var referencedCaptureOffsets: [ReferenceID: Int] - - var destructure: ( - matched: Range, - captures: [StructuredCapture], - referencedCaptureOffsets: [ReferenceID: Int] - ) { - (range, captures, referencedCaptureOffsets) - } - - init( - _ matched: Range, _ captures: [StructuredCapture], - _ referencedCaptureOffsets: [ReferenceID: Int] - ) { - self.range = matched - self.captures = captures - self.referencedCaptureOffsets = referencedCaptureOffsets - } - } - - func execute( - input: String, - in range: Range, - mode: MatchMode = .wholeString - ) -> Result? { + func match( + _ input: String, + in inputRange: Range, + _ mode: MatchMode + ) throws -> RegexMatch? { guard let (endIdx, capList) = engine.consume( - input, in: range, matchMode: mode + input, in: inputRange, matchMode: mode ) else { return nil } let capStruct = engine.program.captureStructure - do { - let range = range.lowerBound.. Result? { - self.execute( - input: input.base, - in: input.startIndex.., - mode: MatchMode = .wholeString - ) -> (Range, CaptureList)? { - engine.consume( - input, in: range, matchMode: mode - ).map { endIndex, capture in - (range.lowerBound.., + _ mode: MatchMode + ) throws -> RegexMatch<(Substring, DynamicCaptures)>? { + try match(input, in: inputRange, mode) } } diff --git a/Sources/_StringProcessing/RegexDSL/Match.swift b/Sources/_StringProcessing/RegexDSL/Match.swift index 2dd31c379..c2593b22a 100644 --- a/Sources/_StringProcessing/RegexDSL/Match.swift +++ b/Sources/_StringProcessing/RegexDSL/Match.swift @@ -69,16 +69,11 @@ extension RegexProtocol { mode: MatchMode = .wholeString ) -> RegexMatch? { let executor = Executor(program: regex.program.loweredProgram) - guard let (range, captures, captureOffsets) = executor.execute( - input: input, in: inputRange, mode: mode - )?.destructure else { - return nil + do { + return try executor.match(input, in: inputRange, mode) + } catch { + fatalError(String(describing: error)) } - return RegexMatch( - input: input, - range: range, - rawCaptures: captures, - referencedCaptureOffsets: captureOffsets) } } diff --git a/Tests/MatchingEngineTests/MatchingEngineTests.swift b/Tests/MatchingEngineTests/MatchingEngineTests.swift index b7c89661d..ccfe85ec7 100644 --- a/Tests/MatchingEngineTests/MatchingEngineTests.swift +++ b/Tests/MatchingEngineTests/MatchingEngineTests.swift @@ -13,289 +13,5 @@ import XCTest @testable import _StringProcessing -/// Hold context and run variety of ad-hoc tests -/// -/// TODO: Use these to demonstrate first-order approximation of what -/// overhead such an engine imposes -fileprivate struct Test: ExpressibleByStringLiteral { - var input: String - var aEater: String - var manyAEater: String - var eatUntilA: String - var eatThroughA: String - - // TODO: Have tests explicitly show each step of type binding, - // input binding, etc. - var enableTracing: Bool? = nil - - /* - - until first A - through first A - until / through last A - etc - - */ - - var file: String - var line: UInt - - init( - _ s: String, - enableTracing: Bool? = nil, - file: String = #file, - line: UInt = #line - ) { - self.input = s - self.aEater = s.first == "A" ? String(s.dropFirst()) : s - self.manyAEater = String(s.drop(while: { $0 == "A" })) - - if let firstIdx = s.firstIndex(of: "A") { - self.eatUntilA = String(s[firstIdx...]) - self.eatThroughA = String(eatUntilA.dropFirst()) - } else { - self.eatUntilA = s - self.eatThroughA = s - } - - self.enableTracing = enableTracing - -// self.untilFirstAEater = String( -// s[(s.firstIndex(where: { $0 == "A" }) ?? s.startIndex)...]) - - - self.file = file - self.line = line - } - init( - stringLiteral: String, - file: String = #file, - line: UInt = #line - ) { - self.init(stringLiteral, file: file, line: line) - } - init(stringLiteral: String) { - // NOTE: Can't get source location of a literal... - self.init(stringLiteral) - } - - var slicedInput: (String, Range) { - let prefix = "aAa prefix ⚠️" - let suffix = "⚠️ aAa suffix" - let outer = prefix + input + suffix - let range = outer.mapOffsets( - (lower: prefix.count, upper: -suffix.count)) - return (outer, range) - } - - func check(_ engine: Engine, expected: String) { - var engine = engine - if let t = enableTracing { - engine.enableTracing = t - } - let output: String - let outputFromSlice: String - - if let (idx, _) = engine.consume(input) { - output = String(input[idx...]) - } else { - output = input - } - - let (outerInput, range) = slicedInput - if let (idx, _) = engine.consume(outerInput, in: range) { - outputFromSlice = String(outerInput[idx..? = nil, - manyAEater: Engine? = nil, - eatUntilA: Engine? = nil, - eatThroughA: Engine? = nil - ) { - if let engine = aEater { - check(engine, expected: self.aEater) - } - if let engine = manyAEater { - check(engine, expected: self.manyAEater) - } - if let engine = eatUntilA { - check(engine, expected: self.eatUntilA) - } - if let engine = eatThroughA { - check(engine, expected: self.eatThroughA) - } - } -} - -var doPrint = false -func show(_ s: CustomStringConvertible) { - if doPrint { print(s) } -} - -func makeEngine( - _ constructor: (inout Program.Builder) -> () -) -> Engine { - var builder = Program.Builder() - constructor(&builder) - let program = try! builder.assemble() - let engine = Engine(program) - show(engine) - return engine -} - -// Eat an A off the front -// -// [0] match "A" -// [1] accept -// -let aEater: Engine = { - makeEngine { builder in - builder.buildMatch("A") - builder.buildAccept() - } -}() - -// Eat many "A"s off the input -// -// [0] saveAddress [3] // .accept -// [1] match "A" -// [2] goto [1] // match "A" -// [3] accept -// -// NOTE: a save would restore input position, which we -// actually don't want to do. -// -// NOTE: We should compare with a more sophisticated match -// instruction that can take at least or at most, etc. -// -let manyAEater: Engine = { - makeEngine { builder in - let accTok = builder.makeAddress() - let matchTok = builder.makeAddress() - - builder.buildSaveAddress(accTok) - builder.buildMatch("A") - builder.resolve(matchTok) - builder.buildBranch(to: matchTok) - builder.buildAccept() - builder.resolve(accTok) - } -}() - -// Eat until you find an A (FAIL if no A) -// -// [0] assert #0 #0 -// [1] condBranch #0 [x] // accept -// [2] advance(1) -// [3] goto 0 -// [4] accept -// -// NOTE: This check-consume-else-branch pattern -// could be pretty common and might be worth a dedicated -// instruction. -let eatUntilA: Engine = { - makeEngine { builder in - let reg = builder.makeBoolRegister() - let accTok = builder.makeAddress() - let assertTok = builder.makeAddress() - builder.buildAssert("A", into: reg) - builder.resolve(assertTok) - builder.buildCondBranch(reg, to: accTok) - builder.buildAdvance(1) - builder.buildBranch(to: assertTok) - builder.buildAccept() - builder.resolve(accTok) - } -}() - -// Eat through the first A (FAIL if no A) -// -// [0] assert #0 #0 -// [1] advance(1) -// [2] condBranch #0 [x] // accept -// [3] goto 0 -// [4] accept -let eatThroughA: Engine = { - makeEngine { builder in - let reg = builder.makeBoolRegister() - let accTok = builder.makeAddress() - let assertTok = builder.makeAddress() - builder.buildAssert("A", into: reg) - builder.resolve(assertTok) - builder.buildAdvance(1) - builder.buildCondBranch(reg, to: accTok) - builder.buildBranch(to: assertTok) - builder.buildAccept() - builder.resolve(accTok) - } -}() - - - -class MatchingEngineTests: XCTestCase { - - func testAEaters() { - let tests: Array = [ - Test("abc"), - Test("Abc"), - Test("AAbc"), - Test(""), - Test("A"), - Test("b"), - Test("bbbA"), - Test("bbAbA"), - ] - - for test in tests { - test.check(aEater: aEater) - test.check(manyAEater: manyAEater) - test.check(eatUntilA: eatUntilA) - test.check(eatThroughA: eatThroughA) - } - } - - func testThreeLetterRepeat() { - // Check for a repeated 3-letter sequence, such as in - // `(...)\1` - // - // [0] movePosition(into: %low) - // [1] advance(3) - // [2] movePosition(into: %high) - // [3] matchSlice(%low, %high) - // [4] accept - let threeLetterRepeat: Engine = { - makeEngine { builder in - let low = builder.makePositionRegister( - initializingWithCurrentPosition: ()) - builder.buildAdvance(3) - let high = builder.makePositionRegister( - initializingWithCurrentPosition: ()) - builder.buildMatchSlice(lower: low, upper: high) - builder.buildAccept() - } - }() - - let tests: Array<(String, Bool)> = [ - ("abcabc", true), - ("abcabc_____", true), - ("dddddd_____", true), - ("🥳🧟‍♀️c🥳🧟‍♀️c", true), - ("abccba", false), - ("abcabb", false), - ("abcbac", false), - ("🥳🧟‍♀️c🥳🧟‍♂️c", false), - ] - - for (test, expect) in tests { - let match = threeLetterRepeat.consume(test) != nil - XCTAssertEqual(expect, match) - } - } -} +// TODO: Unit tests for the engine itself. Functional testing +// is handled by regex tests. diff --git a/Tests/RegexTests/CaptureTests.swift b/Tests/RegexTests/CaptureTests.swift index 9f3cc313b..cc3568c1d 100644 --- a/Tests/RegexTests/CaptureTests.swift +++ b/Tests/RegexTests/CaptureTests.swift @@ -142,13 +142,15 @@ func captureTest( for (input, output) in tests { let inputRange = input.startIndex.. searcher algorithm var start = input.startIndex while true { - if let (range, caps) = self.executeFlat( - input: input, + if let result = try! self.dynamicMatch( + input, in: start..