Skip to content

Obtain match output elements without materializing the output. #469

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions Sources/_RegexParser/Utility/TypeConstruction.swift
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,62 @@ public enum TypeConstruction {
return _openExistential(childType, do: helper)
}
}

extension TypeConstruction {
public static func optionalType<Base>(
of base: Base.Type, depth: Int = 1
) -> Any.Type {
switch depth {
case 0: return base
case 1: return Base?.self
case 2: return Base??.self
case 3: return Base???.self
case 4: return Base????.self
default:
return optionalType(of: Base????.self, depth: depth - 4)
}
}
}

extension MemoryLayout {
/// Returns the element index that corresponnds to the given tuple element key
/// path.
/// - Parameters:
/// - keyPath: The key path from a tuple to one of its elements.
/// - elementTypes: The element type of the tuple type.
// TODO: It possible to get element types from the type metadata, but it's
// more efficient to pass them in since we already know them in the matching
// engine.
public static func tupleElementIndex<ElementTypes: Collection>(
of keyPath: PartialKeyPath<T>,
elementTypes: ElementTypes
) -> Int? where ElementTypes.Element == Any.Type {
guard let byteOffset = offset(of: keyPath) else {
return nil
}
if byteOffset == 0 { return 0 }
var currentOffset = 0
for (index, type) in elementTypes.enumerated() {
func sizeAndAlignMask<T>(_: T.Type) -> (Int, Int) {
(MemoryLayout<T>.size, MemoryLayout<T>.alignment - 1)
}
// The ABI of an offset-based key path only stores the byte offset, so
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lorentey just reminded me of this... 😭

> \(Void, Void).0 == \(Void, Void).1
$R0: Bool = true

@Azoy, do you have the radar number for this issue I can put in the comment?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rdar://63819465

// this doesn't work if there's a 0-sized element, e.g. `Void`,
// `(Void, Void)`. (rdar://63819465)
if size == 0 {
return nil
}
let (size, alignMask) = _openExistential(type, do: sizeAndAlignMask)
// Align up the offset for this type.
currentOffset = (currentOffset + alignMask) & ~alignMask
// If it matches the offset we are looking for, `index` is the tuple
// element index.
if currentOffset == byteOffset {
return index
}
// Advance to the past-the-end offset for this element.
currentOffset += size
}
return nil
}
}
13 changes: 10 additions & 3 deletions Sources/_StringProcessing/Regex/AnyRegexOutput.swift
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,11 @@ extension AnyRegexOutput: RandomAccessCollection {

/// The captured value, `nil` for no-capture
public var value: Any? {
// FIXME: Should this return the substring for default-typed
// values?
representation.value
representation.value ?? substring
}

internal var type: Any.Type {
representation.type
}

/// The name of this capture, if it has one, otherwise `nil`.
Expand Down Expand Up @@ -263,4 +265,9 @@ extension AnyRegexOutput.ElementRepresentation {
optionalCount: optionalDepth
)
}

var type: Any.Type {
value.map { Swift.type(of: $0) }
?? TypeConstruction.optionalType(of: Substring.self, depth: optionalDepth)
}
}
33 changes: 26 additions & 7 deletions Sources/_StringProcessing/Regex/Match.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ extension Regex {

@available(SwiftStdlib 5.7, *)
extension Regex.Match {
var input: String {
anyRegexOutput.input
}

/// The output produced from the match operation.
public var output: Output {
if Output.self == AnyRegexOutput.self {
Expand All @@ -37,33 +41,48 @@ extension Regex.Match {
)

let output = AnyRegexOutput(
input: anyRegexOutput.input,
input: input,
elements: [wholeMatchCapture] + anyRegexOutput._elements
)

return output as! Output
} else if Output.self == Substring.self {
// FIXME: Plumb whole match (`.0`) through the matching engine.
return anyRegexOutput.input[range] as! Output
} else if anyRegexOutput.isEmpty, value != nil {
return input[range] as! Output
} else if anyRegexOutput.isEmpty, let value {
// FIXME: This is a workaround for whole-match values not
// being modeled as part of captures. We might want to
// switch to a model where results are alongside captures
return value! as! Output
return value as! Output
} else {
guard value == nil else {
fatalError("FIXME: what would this mean?")
}
let typeErasedMatch = anyRegexOutput.existentialOutput(
from: anyRegexOutput.input[range]
from: input[range]
)
return typeErasedMatch as! Output
}
}

var wholeMatchType: Any.Type {
value.map { type(of: $0) } ?? Substring.self
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if it's optional? Also, are we tracking nested optionals correctly here (I.e. do they get turned into values or are they counted)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When there's no dedicated value for whole match, it can only be a Substring.

}

/// Accesses a capture by its name or number.
public subscript<T>(dynamicMember keyPath: KeyPath<Output, T>) -> T {
output[keyPath: keyPath]
// Note: We should be able to get the element offset from the key path
// itself even at compile time. We need a better way of doing this.
guard let outputTupleOffset = MemoryLayout.tupleElementIndex(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we store the whole match inside the capture list, we will be able to precompute these offsets for all elements in a Regex.Match. I'll do that next.

of: keyPath, elementTypes: [wholeMatchType] + anyRegexOutput.map(\.type)
) else {
return output[keyPath: keyPath]
}
if outputTupleOffset == 0 {
return value.map { $0 as! T } ?? (input[range] as! T)
} else {
return anyRegexOutput[outputTupleOffset - 1].value as! T
}
}

/// Accesses a capture using the `.0` syntax, even when the match isn't a tuple.
Expand All @@ -83,7 +102,7 @@ extension Regex.Match {
}

return element.existentialOutputComponent(
from: anyRegexOutput.input[...]
from: input[...]
) as! Capture
}
}
Expand Down
14 changes: 13 additions & 1 deletion Tests/RegexBuilderTests/RegexDSLTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,19 @@ class RegexDSLTests: XCTestCase {
CharacterClass.digit
}
}


try _testDSLCaptures(
("abcdef2", ("abcdef2", "f")),
matchType: (Substring, Substring??).self, ==)
{
Optionally {
ZeroOrMore {
Capture(CharacterClass.word)
}
CharacterClass.digit
}
}

try _testDSLCaptures(
("aaabbbcccdddeeefff", "aaabbbcccdddeeefff"),
("aaaabbbcccdddeeefff", nil),
Expand Down