diff --git a/Sources/_RegexParser/Utility/TypeConstruction.swift b/Sources/_RegexParser/Utility/TypeConstruction.swift index 54250c96b..39b45959a 100644 --- a/Sources/_RegexParser/Utility/TypeConstruction.swift +++ b/Sources/_RegexParser/Utility/TypeConstruction.swift @@ -139,3 +139,62 @@ public enum TypeConstruction { return _openExistential(childType, do: helper) } } + +extension TypeConstruction { + public static func optionalType( + of base: Base.Type, depth: Int = 1 + ) -> Any.Type { + switch depth { + case 0: return base + case 1: return Base?.self + case 2: return Base??.self + case 3: return Base???.self + case 4: return Base????.self + default: + return optionalType(of: Base????.self, depth: depth - 4) + } + } +} + +extension MemoryLayout { + /// Returns the element index that corresponnds to the given tuple element key + /// path. + /// - Parameters: + /// - keyPath: The key path from a tuple to one of its elements. + /// - elementTypes: The element type of the tuple type. + // TODO: It possible to get element types from the type metadata, but it's + // more efficient to pass them in since we already know them in the matching + // engine. + public static func tupleElementIndex( + of keyPath: PartialKeyPath, + elementTypes: ElementTypes + ) -> Int? where ElementTypes.Element == Any.Type { + guard let byteOffset = offset(of: keyPath) else { + return nil + } + if byteOffset == 0 { return 0 } + var currentOffset = 0 + for (index, type) in elementTypes.enumerated() { + func sizeAndAlignMask(_: T.Type) -> (Int, Int) { + (MemoryLayout.size, MemoryLayout.alignment - 1) + } + // The ABI of an offset-based key path only stores the byte offset, so + // this doesn't work if there's a 0-sized element, e.g. `Void`, + // `(Void, Void)`. (rdar://63819465) + if size == 0 { + return nil + } + let (size, alignMask) = _openExistential(type, do: sizeAndAlignMask) + // Align up the offset for this type. + currentOffset = (currentOffset + alignMask) & ~alignMask + // If it matches the offset we are looking for, `index` is the tuple + // element index. + if currentOffset == byteOffset { + return index + } + // Advance to the past-the-end offset for this element. + currentOffset += size + } + return nil + } +} diff --git a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift index df70e9fc7..acec2054e 100644 --- a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift +++ b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift @@ -62,9 +62,11 @@ extension AnyRegexOutput: RandomAccessCollection { /// The captured value, `nil` for no-capture public var value: Any? { - // FIXME: Should this return the substring for default-typed - // values? - representation.value + representation.value ?? substring + } + + internal var type: Any.Type { + representation.type } /// The name of this capture, if it has one, otherwise `nil`. @@ -263,4 +265,9 @@ extension AnyRegexOutput.ElementRepresentation { optionalCount: optionalDepth ) } + + var type: Any.Type { + value.map { Swift.type(of: $0) } + ?? TypeConstruction.optionalType(of: Substring.self, depth: optionalDepth) + } } diff --git a/Sources/_StringProcessing/Regex/Match.swift b/Sources/_StringProcessing/Regex/Match.swift index a9234020f..331057b9e 100644 --- a/Sources/_StringProcessing/Regex/Match.swift +++ b/Sources/_StringProcessing/Regex/Match.swift @@ -28,6 +28,10 @@ extension Regex { @available(SwiftStdlib 5.7, *) extension Regex.Match { + var input: String { + anyRegexOutput.input + } + /// The output produced from the match operation. public var output: Output { if Output.self == AnyRegexOutput.self { @@ -37,33 +41,48 @@ extension Regex.Match { ) let output = AnyRegexOutput( - input: anyRegexOutput.input, + input: input, elements: [wholeMatchCapture] + anyRegexOutput._elements ) return output as! Output } else if Output.self == Substring.self { // FIXME: Plumb whole match (`.0`) through the matching engine. - return anyRegexOutput.input[range] as! Output - } else if anyRegexOutput.isEmpty, value != nil { + return input[range] as! Output + } else if anyRegexOutput.isEmpty, let value { // FIXME: This is a workaround for whole-match values not // being modeled as part of captures. We might want to // switch to a model where results are alongside captures - return value! as! Output + return value as! Output } else { guard value == nil else { fatalError("FIXME: what would this mean?") } let typeErasedMatch = anyRegexOutput.existentialOutput( - from: anyRegexOutput.input[range] + from: input[range] ) return typeErasedMatch as! Output } } + var wholeMatchType: Any.Type { + value.map { type(of: $0) } ?? Substring.self + } + /// Accesses a capture by its name or number. public subscript(dynamicMember keyPath: KeyPath) -> T { - output[keyPath: keyPath] + // Note: We should be able to get the element offset from the key path + // itself even at compile time. We need a better way of doing this. + guard let outputTupleOffset = MemoryLayout.tupleElementIndex( + of: keyPath, elementTypes: [wholeMatchType] + anyRegexOutput.map(\.type) + ) else { + return output[keyPath: keyPath] + } + if outputTupleOffset == 0 { + return value.map { $0 as! T } ?? (input[range] as! T) + } else { + return anyRegexOutput[outputTupleOffset - 1].value as! T + } } /// Accesses a capture using the `.0` syntax, even when the match isn't a tuple. @@ -83,7 +102,7 @@ extension Regex.Match { } return element.existentialOutputComponent( - from: anyRegexOutput.input[...] + from: input[...] ) as! Capture } } diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index f325b579f..5a88adf6b 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -427,7 +427,19 @@ class RegexDSLTests: XCTestCase { CharacterClass.digit } } - + + try _testDSLCaptures( + ("abcdef2", ("abcdef2", "f")), + matchType: (Substring, Substring??).self, ==) + { + Optionally { + ZeroOrMore { + Capture(CharacterClass.word) + } + CharacterClass.digit + } + } + try _testDSLCaptures( ("aaabbbcccdddeeefff", "aaabbbcccdddeeefff"), ("aaaabbbcccdddeeefff", nil),