@@ -837,6 +837,30 @@ extension RegexTests {
837
837
var eComposed : String { " é " }
838
838
var eDecomposed : String { " e \u{301} " }
839
839
840
+ func testIndividualScalars( ) {
841
+ // Expectation: A standalone Unicode scalar value in a regex literal
842
+ // can match either that specific scalar value or participate in matching
843
+ // as a character.
844
+
845
+ matchTest ( #"\u{65}\u{301}$"# , input: eDecomposed, match: eDecomposed)
846
+ // FIXME: Decomposed character in regex literal doesn't match an equivalent character
847
+ matchTest ( #"\u{65}\u{301}$"# , input: eComposed, match: eComposed,
848
+ xfail: true )
849
+
850
+ matchTest ( #"\u{65}"# , input: eDecomposed, match: " e " )
851
+ matchTest ( #"\u{65}$"# , input: eDecomposed, match: nil )
852
+ // FIXME: \y is unsupported
853
+ matchTest ( #"\u{65}\y"# , input: eDecomposed, match: nil ,
854
+ xfail: true )
855
+
856
+ // FIXME: Unicode scalars are only matched at the start of a grapheme cluster
857
+ matchTest ( #"\u{301}"# , input: eDecomposed, match: " \u{301} " ,
858
+ xfail: true )
859
+ // FIXME: \y is unsupported
860
+ matchTest ( #"\y\u{301}"# , input: eDecomposed, match: nil ,
861
+ xfail: true )
862
+ }
863
+
840
864
func testCanonicalEquivalence( ) throws {
841
865
// Expectation: Matching should use canonical equivalence whenever comparing
842
866
// characters, so a user can write characters using any equivalent spelling
@@ -846,20 +870,20 @@ extension RegexTests {
846
870
#"é$"# ,
847
871
( eComposed, eComposed) ,
848
872
( eDecomposed, eDecomposed) )
849
-
873
+
850
874
// FIXME: Decomposed character in regex literal doesn't match an equivalent character
851
875
matchTests (
852
876
#"e\u{301}$"# ,
853
877
( eComposed, eComposed) ,
854
878
( eDecomposed, eDecomposed) ,
855
879
xfail: true )
856
-
880
+
857
881
matchTests (
858
882
#"e$"# ,
859
883
( eComposed, nil ) ,
860
884
( eDecomposed, nil ) )
861
885
}
862
-
886
+
863
887
func testCanonicalEquivalenceCharacterClass( ) throws {
864
888
// Expectation: Character classes should match equivalent characters to the
865
889
// same degree, regardless of how they are spelled. Unicode "property
@@ -980,8 +1004,7 @@ extension RegexTests {
980
1004
xfail: true )
981
1005
matchTest ( #"e\O"# , input: eDecomposed, match: eDecomposed,
982
1006
xfail: true )
983
- // TODO: Should these two match or not?
984
- matchTest ( #"\O\u{301}"# , input: eComposed, match: eComposed,
1007
+ matchTest ( #"\O\u{301}"# , input: eComposed, match: nil ,
985
1008
xfail: true )
986
1009
matchTest ( #"e\O"# , input: eComposed, match: nil ,
987
1010
xfail: true )
@@ -993,5 +1016,9 @@ extension RegexTests {
993
1016
( eDecomposed, eDecomposed) ,
994
1017
xfail: true )
995
1018
}
1019
+
1020
+ // TODO: Add test for implied grapheme cluster requirement at group boundaries
1021
+
1022
+ // TODO: Add test for grapheme boundaries at start/end of match
996
1023
}
997
1024
0 commit comments