diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 93dca17a8..d6389c1f6 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -99,12 +99,16 @@ extension Compiler.ByteCodeGen { } case .textSegment: - // This we should be able to do! - throw Unsupported(#"\y (text segment)"#) + builder.buildAssert { (input, pos, _) in + // FIXME: Grapheme or word based on options + input.isOnGraphemeClusterBoundary(pos) + } case .notTextSegment: - // This we should be able to do! - throw Unsupported(#"\Y (not text segment)"#) + builder.buildAssert { (input, pos, _) in + // FIXME: Grapheme or word based on options + !input.isOnGraphemeClusterBoundary(pos) + } case .startOfLine: builder.buildAssert { (input, pos, bounds) in diff --git a/Sources/_StringProcessing/RegexDSL/DSL.swift b/Sources/_StringProcessing/RegexDSL/DSL.swift index 17f006231..a21dce82d 100644 --- a/Sources/_StringProcessing/RegexDSL/DSL.swift +++ b/Sources/_StringProcessing/RegexDSL/DSL.swift @@ -17,8 +17,7 @@ extension String: RegexProtocol { public typealias Match = Substring public var regex: Regex { - let atoms = self.map { atom(.char($0)) } - return .init(ast: concat(atoms)) + .init(node: .quotedLiteral(self)) } } @@ -26,8 +25,7 @@ extension Substring: RegexProtocol { public typealias Match = Substring public var regex: Regex { - let atoms = self.map { atom(.char($0)) } - return .init(ast: concat(atoms)) + .init(node: .quotedLiteral(String(self))) } } @@ -35,7 +33,15 @@ extension Character: RegexProtocol { public typealias Match = Substring public var regex: Regex { - .init(ast: atom(.char(self))) + .init(node: .atom(.char(self))) + } +} + +extension UnicodeScalar: RegexProtocol { + public typealias Match = Substring + + public var regex: Regex { + .init(node: .atom(.scalar(self))) } } diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 3cd2df585..13b9c08d5 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -876,7 +876,8 @@ extension RegexTests { #"\d+\b"#, ("123", "123"), (" 123", "123"), - ("123 456", "123")) + ("123 456", "123"), + ("123A 456", "456")) firstMatchTests( #"\d+\b\s\b\d+"#, ("123", nil), @@ -892,7 +893,18 @@ extension RegexTests { // TODO: \G and \K // TODO: Oniguruma \y and \Y - + firstMatchTests( + #"\u{65}"#, // Scalar 'e' is present in both: + ("Cafe\u{301}", "e"), // composed and + ("Sol Cafe", "e")) // standalone + firstMatchTests( + #"\u{65}\y"#, // Grapheme boundary assertion + ("Cafe\u{301}", nil), + ("Sol Cafe", "e")) + firstMatchTests( + #"\u{65}\Y"#, // Grapheme non-boundary assertion + ("Cafe\u{301}", "e"), + ("Sol Cafe", nil)) } func testMatchGroups() { diff --git a/Tests/RegexTests/RegexDSLTests.swift b/Tests/RegexTests/RegexDSLTests.swift index 554ef905f..3ac21c8ca 100644 --- a/Tests/RegexTests/RegexDSLTests.swift +++ b/Tests/RegexTests/RegexDSLTests.swift @@ -280,6 +280,16 @@ class RegexDSLTests: XCTestCase { Anchor.endOfLine } + try _testDSLCaptures( + ("Cafe\u{301}", nil), + ("Cafe", "Cafe"), + matchType: Substring.self, ==) + { + oneOrMore(.word) + UnicodeScalar("e") + Anchor.textSegmentBoundary + } + try _testDSLCaptures( ("aaaaa1", "aaaaa1"), ("aaaaa2", nil),