Skip to content

Quick bug fix / workaround for whole-match values #191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions Sources/_StringProcessing/Capture.swift
Original file line number Diff line number Diff line change
Expand Up @@ -96,5 +96,3 @@ extension Sequence where Element == StructuredCapture {
self.map { $0.slice(from: input) }
}
}


42 changes: 9 additions & 33 deletions Sources/_StringProcessing/Engine/Consume.swift
Original file line number Diff line number Diff line change
Expand Up @@ -24,41 +24,17 @@ extension Engine {
}
}

extension Engine where Input == String {
func consume(
_ input: Input,
in range: Range<Input.Index>,
matchMode: MatchMode
) -> (Input.Index, CaptureList)? {
if enableTracing {
print("Consume: \(input)")
}

var cpu = makeProcessor(input: input, bounds: range, matchMode: matchMode)
let result: Input.Index? = {
while true {
switch cpu.state {
case .accept:
return cpu.currentPosition
case .fail:
return nil
case .inProgress: cpu.cycle()
}
}
}()

if enableTracing {
if let idx = result {
print("Result: \(input[..<idx]) | \(input[idx...])")
} else {
print("Result: nil")
extension Processor where Input == String {
mutating func consume() -> Input.Index? {
while true {
switch self.state {
case .accept:
return self.currentPosition
case .fail:
return nil
case .inProgress: self.cycle()
}
}
guard let result = result else { return nil }

let capList = cpu.storedCaptures
return (result, CaptureList(
values: capList, referencedCaptureOffsets: program.referencedCaptureOffsets))
}
}

28 changes: 24 additions & 4 deletions Sources/_StringProcessing/Executor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,41 @@ struct Executor {
in inputRange: Range<String.Index>,
_ mode: MatchMode
) throws -> RegexMatch<Match>? {
guard let (endIdx, capList) = engine.consume(
input, in: inputRange, matchMode: mode
) else {
var cpu = engine.makeProcessor(
input: input, bounds: inputRange, matchMode: mode)

guard let endIdx = cpu.consume() else {
return nil
}

let capList = CaptureList(
values: cpu.storedCaptures,
referencedCaptureOffsets: engine.program.referencedCaptureOffsets)

let capStruct = engine.program.captureStructure
let range = inputRange.lowerBound..<endIdx
let caps = try capStruct.structuralize(
capList, input)

// FIXME: This is a workaround for not tracking (or
// specially compiling) whole-match values.
let value: Any?
if Match.self != Substring.self,
Match.self != (Substring, DynamicCaptures).self,
caps.isEmpty
{
value = cpu.registers.values.first
assert(value != nil, "hmm, what would this mean?")
} else {
value = nil
}

return RegexMatch(
input: input,
range: range,
rawCaptures: caps,
referencedCaptureOffsets: capList.referencedCaptureOffsets)
referencedCaptureOffsets: capList.referencedCaptureOffsets,
value: value)
}

func dynamicMatch(
Expand Down
10 changes: 10 additions & 0 deletions Sources/_StringProcessing/RegexDSL/Match.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ public struct RegexMatch<Match> {
let rawCaptures: [StructuredCapture]
let referencedCaptureOffsets: [ReferenceID: Int]

let value: Any?

public var match: Match {
if Match.self == (Substring, DynamicCaptures).self {
// FIXME(rdar://89449323): Compiler assertion
Expand All @@ -25,7 +27,15 @@ public struct RegexMatch<Match> {
} else if Match.self == Substring.self {
// FIXME: Plumb whole match (`.0`) through the matching engine.
return input[range] as! Match
} else if rawCaptures.isEmpty, value != nil {
// FIXME: This is a workaround for whole-match values not
// being modeled as part of captures. We might want to
// switch to a model where results are alongside captures
return value! as! Match
} else {
guard value == nil else {
fatalError("FIXME: what would this mean?")
}
let typeErasedMatch = rawCaptures.existentialMatch(from: input[range])
return typeErasedMatch as! Match
}
Expand Down
17 changes: 8 additions & 9 deletions Tests/RegexTests/CustomTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,14 @@ extension RegexTests {
("55z", .match, nil),
("55z", .firstMatch, "55"))

// FIXME: Requires we return a value instead of a range
// customTest(
// Regex {
// Numbler()
// },
// ("ab123c", .firstMatch, 1),
// ("abc", .firstMatch, nil),
// ("55z", .match, nil),
// ("55z", .firstMatch, 5))
customTest(
Regex {
Numbler()
},
("ab123c", .firstMatch, 1),
("abc", .firstMatch, nil),
("55z", .match, nil),
("55z", .firstMatch, 5))

// TODO: Convert below tests to better infra. Right now
// it's hard because `Match` is constrained to be
Expand Down