@@ -329,21 +329,23 @@ extension RegexTests {
329
329
parseTest ( #"\070"# , scalar ( " \u{38} " ) )
330
330
parseTest ( #"\07A"# , concat ( scalar ( " \u{7} " ) , " A " ) )
331
331
parseTest ( #"\08"# , concat ( scalar ( " \u{0} " ) , " 8 " ) )
332
- parseTest ( #"\0707"# , concat ( scalar ( " \u{38} " ) , " 7 " ) )
332
+ parseTest ( #"\0707"# , scalar ( " \u{1C7} " ) )
333
333
334
334
parseTest ( #"[\0]"# , charClass ( scalar_m ( " \u{0} " ) ) )
335
335
parseTest ( #"[\01]"# , charClass ( scalar_m ( " \u{1} " ) ) )
336
336
parseTest ( #"[\070]"# , charClass ( scalar_m ( " \u{38} " ) ) )
337
337
338
338
parseTest ( #"[\07A]"# , charClass ( scalar_m ( " \u{7} " ) , " A " ) )
339
339
parseTest ( #"[\08]"# , charClass ( scalar_m ( " \u{0} " ) , " 8 " ) )
340
- parseTest ( #"[\0707]"# , charClass ( scalar_m ( " \u{38 } " ) , " 7 " ) )
340
+ parseTest ( #"[\0707]"# , charClass ( scalar_m ( " \u{1C7 } " ) ) )
341
341
342
- parseTest ( #"[\1]"# , charClass ( scalar_m ( " \u{1} " ) ) )
343
- parseTest ( #"[\123]"# , charClass ( scalar_m ( " \u{53} " ) ) )
344
- parseTest ( #"[\101]"# , charClass ( scalar_m ( " \u{41} " ) ) )
345
- parseTest ( #"[\7777]"# , charClass ( scalar_m ( " \u{1FF} " ) , " 7 " ) )
346
- parseTest ( #"[\181]"# , charClass ( scalar_m ( " \u{1} " ) , " 8 " , " 1 " ) )
342
+ // TODO: These are treated as octal sequences by PCRE, we should warn and
343
+ // suggest user prefix with 0.
344
+ parseTest ( #"[\1]"# , charClass ( " 1 " ) )
345
+ parseTest ( #"[\123]"# , charClass ( " 1 " , " 2 " , " 3 " ) )
346
+ parseTest ( #"[\101]"# , charClass ( " 1 " , " 0 " , " 1 " ) )
347
+ parseTest ( #"[\7777]"# , charClass ( " 7 " , " 7 " , " 7 " , " 7 " ) )
348
+ parseTest ( #"[\181]"# , charClass ( " 1 " , " 8 " , " 1 " ) )
347
349
348
350
// We take *up to* the first two valid digits for \x. No valid digits is 0.
349
351
parseTest ( #"\x"# , scalar ( " \u{0} " ) )
@@ -492,6 +494,10 @@ extension RegexTests {
492
494
#"a\Q \Q \\.\Eb"# ,
493
495
concat ( " a " , quote ( #" \Q \\."# ) , " b " ) )
494
496
497
+ // These follow the PCRE behavior.
498
+ parseTest ( #"\Q\\E"# , quote ( " \\ " ) )
499
+ parseTest ( #"\E"# , " E " )
500
+
495
501
parseTest ( #"a" ."b"# , concat ( " a " , quote ( " . " ) , " b " ) ,
496
502
syntax: . experimental)
497
503
parseTest ( #"a" .""b""# , concat ( " a " , quote ( " . " ) , quote ( " b " ) ) ,
@@ -797,11 +803,9 @@ extension RegexTests {
797
803
)
798
804
}
799
805
800
- // TODO: Some of these behaviors are unintuitive, we should likely warn on
801
- // some of them.
802
- parseTest ( #"\10"# , scalar ( " \u{8} " ) )
803
- parseTest ( #"\18"# , concat ( scalar ( " \u{1} " ) , " 8 " ) )
804
- parseTest ( #"\7777"# , concat ( scalar ( " \u{1FF} " ) , " 7 " ) )
806
+ parseTest ( #"\10"# , backreference ( . absolute( 10 ) ) )
807
+ parseTest ( #"\18"# , backreference ( . absolute( 18 ) ) )
808
+ parseTest ( #"\7777"# , backreference ( . absolute( 7777 ) ) )
805
809
parseTest ( #"\91"# , backreference ( . absolute( 91 ) ) )
806
810
807
811
parseTest (
@@ -813,21 +817,22 @@ extension RegexTests {
813
817
parseTest (
814
818
#"()()()()()()()()()\10()"# ,
815
819
concat ( Array ( repeating: capture ( empty ( ) ) , count: 9 )
816
- + [ scalar ( " \u{8} " ) , capture ( empty ( ) ) ] ) ,
820
+ + [ backreference ( . absolute ( 10 ) ) , capture ( empty ( ) ) ] ) ,
817
821
captures: . tuple( Array ( repeating: . atom( ) , count: 10 ) )
818
822
)
819
- parseTest ( #"()()\10"# ,
820
- concat ( capture ( empty ( ) ) , capture ( empty ( ) ) , scalar ( " \u{8} " ) ) ,
821
- captures: . tuple( . atom( ) , . atom( ) ) )
823
+ parseTest ( #"()()\10"# , concat (
824
+ capture ( empty ( ) ) , capture ( empty ( ) ) , backreference ( . absolute( 10 ) ) ) ,
825
+ captures: . tuple( . atom( ) , . atom( ) )
826
+ )
822
827
823
828
// A capture of three empty captures.
824
829
let fourCaptures = capture (
825
830
concat ( capture ( empty ( ) ) , capture ( empty ( ) ) , capture ( empty ( ) ) )
826
831
)
827
832
parseTest (
828
833
// There are 9 capture groups in total here.
829
- #"((()()())(()()()))\10"# ,
830
- concat ( capture ( concat ( fourCaptures, fourCaptures) ) , scalar ( " \u{8} " ) ) ,
834
+ #"((()()())(()()()))\10"# , concat ( capture ( concat (
835
+ fourCaptures, fourCaptures) ) , backreference ( . absolute ( 10 ) ) ) ,
831
836
captures: . tuple( Array ( repeating: . atom( ) , count: 9 ) )
832
837
)
833
838
parseTest (
@@ -852,7 +857,7 @@ extension RegexTests {
852
857
concat ( Array ( repeating: capture ( empty ( ) ) , count: 40 ) + [ scalar ( " " ) ] ) ,
853
858
captures: . tuple( Array ( repeating: . atom( ) , count: 40 ) )
854
859
)
855
- parseTest ( #"\40"# , scalar ( " " ) )
860
+ parseTest ( #"\40"# , backreference ( . absolute ( 40 ) ) )
856
861
parseTest (
857
862
String ( repeating: " () " , count: 40 ) + #"\40"# ,
858
863
concat ( Array ( repeating: capture ( empty ( ) ) , count: 40 )
@@ -862,7 +867,7 @@ extension RegexTests {
862
867
863
868
parseTest ( #"\7"# , backreference ( . absolute( 7 ) ) )
864
869
865
- parseTest ( #"\11"# , scalar ( " \u{9} " ) )
870
+ parseTest ( #"\11"# , backreference ( . absolute ( 11 ) ) )
866
871
parseTest (
867
872
String ( repeating: " () " , count: 11 ) + #"\11"# ,
868
873
concat ( Array ( repeating: capture ( empty ( ) ) , count: 11 )
@@ -876,12 +881,11 @@ extension RegexTests {
876
881
captures: . tuple( Array ( repeating: . atom( ) , count: 11 ) )
877
882
)
878
883
879
- parseTest ( #"\0113"# , concat ( scalar ( " \u{9} " ) , " 3 " ) )
880
- parseTest ( #"\113"# , scalar ( " \u{4B} " ) )
881
- parseTest ( #"\377"# , scalar ( " \u{FF} " ) )
884
+ parseTest ( #"\0113"# , scalar ( " \u{4B} " ) )
885
+ parseTest ( #"\113"# , backreference ( . absolute ( 113 ) ) )
886
+ parseTest ( #"\377"# , backreference ( . absolute ( 377 ) ) )
882
887
parseTest ( #"\81"# , backreference ( . absolute( 81 ) ) )
883
888
884
-
885
889
parseTest ( #"\g1"# , backreference ( . absolute( 1 ) ) )
886
890
parseTest ( #"\g001"# , backreference ( . absolute( 1 ) ) )
887
891
parseTest ( #"\g52"# , backreference ( . absolute( 52 ) ) )
0 commit comments