10
10
11
11
use std:: borrow:: Cow ;
12
12
use std:: collections:: HashMap ;
13
- use std:: collections:: hash_map:: Iter ;
14
13
use std:: fmt;
15
14
use std:: ops:: Index ;
16
15
#[ cfg( feature = "pattern" ) ]
17
16
use std:: str:: pattern:: { Pattern , Searcher , SearchStep } ;
18
17
use std:: str:: FromStr ;
18
+ use std:: sync:: Arc ;
19
19
20
20
use exec:: { Exec , ExecBuilder } ;
21
21
use syntax;
@@ -186,6 +186,8 @@ pub struct ExNative {
186
186
#[ doc( hidden) ]
187
187
pub names : & ' static & ' static [ Option < & ' static str > ] ,
188
188
#[ doc( hidden) ]
189
+ pub groups : & ' static & ' static [ ( & ' static str , usize ) ] ,
190
+ #[ doc( hidden) ]
189
191
pub prog : fn ( & mut CaptureIdxs , & str , usize ) -> bool ,
190
192
}
191
193
@@ -395,9 +397,13 @@ impl Regex {
395
397
/// The `0`th capture group is always unnamed, so it must always be
396
398
/// accessed with `at(0)` or `[0]`.
397
399
pub fn captures < ' t > ( & self , text : & ' t str ) -> Option < Captures < ' t > > {
398
- let mut caps = self . alloc_captures ( ) ;
399
- if exec ( self , & mut caps, text, 0 ) {
400
- Some ( Captures :: new ( self , text, caps) )
400
+ let mut locs = self . alloc_captures ( ) ;
401
+ if exec ( self , & mut locs, text, 0 ) {
402
+ Some ( Captures {
403
+ text : text,
404
+ locs : locs,
405
+ named_groups : NamedGroups :: from_regex ( self )
406
+ } )
401
407
} else {
402
408
None
403
409
}
@@ -804,6 +810,71 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
804
810
}
805
811
}
806
812
813
+ enum NamedGroups {
814
+ Empty ,
815
+ Native ( & ' static [ ( & ' static str , usize ) ] ) ,
816
+ Dynamic ( Arc < HashMap < String , usize > > ) ,
817
+ }
818
+
819
+ impl NamedGroups {
820
+ fn from_regex ( regex : & Regex ) -> NamedGroups {
821
+ match * regex {
822
+ Regex :: Native ( ExNative { ref groups, .. } ) =>
823
+ NamedGroups :: Native ( groups) ,
824
+ Regex :: Dynamic ( ref exec) => {
825
+ let groups = exec. named_groups ( ) ;
826
+ if groups. is_empty ( ) {
827
+ NamedGroups :: Empty
828
+ } else {
829
+ NamedGroups :: Dynamic ( groups. clone ( ) )
830
+ }
831
+ }
832
+ }
833
+ }
834
+
835
+ fn pos ( & self , name : & str ) -> Option < usize > {
836
+ match * self {
837
+ NamedGroups :: Empty => None ,
838
+ NamedGroups :: Native ( groups) => {
839
+ groups. binary_search_by ( |& ( n, _) | n. cmp ( name) )
840
+ . ok ( ) . map ( |i| groups[ i] . 1 )
841
+ } ,
842
+ NamedGroups :: Dynamic ( ref groups) => {
843
+ groups. get ( name) . map ( |i| * i)
844
+ } ,
845
+ }
846
+ }
847
+
848
+ fn iter < ' n > ( & ' n self ) -> NamedGroupsIter < ' n > {
849
+ match * self {
850
+ NamedGroups :: Empty => NamedGroupsIter :: Empty ,
851
+ NamedGroups :: Native ( g) => NamedGroupsIter :: Native ( g. iter ( ) ) ,
852
+ NamedGroups :: Dynamic ( ref g) => NamedGroupsIter :: Dynamic ( g. iter ( ) ) ,
853
+ }
854
+ }
855
+ }
856
+
857
+ enum NamedGroupsIter < ' n > {
858
+ Empty ,
859
+ Native ( :: std:: slice:: Iter < ' static , ( & ' static str , usize ) > ) ,
860
+ Dynamic ( :: std:: collections:: hash_map:: Iter < ' n , String , usize > ) ,
861
+ }
862
+
863
+ impl < ' n > Iterator for NamedGroupsIter < ' n > {
864
+ type Item = ( & ' n str , usize ) ;
865
+
866
+ fn next ( & mut self ) -> Option < Self :: Item > {
867
+ match * self {
868
+ NamedGroupsIter :: Empty =>
869
+ None ,
870
+ NamedGroupsIter :: Native ( ref mut it) =>
871
+ it. next ( ) . map ( |& v| v) ,
872
+ NamedGroupsIter :: Dynamic ( ref mut it) =>
873
+ it. next ( ) . map ( |( s, i) | ( s. as_ref ( ) , * i) )
874
+ }
875
+ }
876
+ }
877
+
807
878
/// Captures represents a group of captured strings for a single match.
808
879
///
809
880
/// The 0th capture always corresponds to the entire match. Each subsequent
@@ -818,34 +889,10 @@ impl<'r, 't> Iterator for RegexSplitsN<'r, 't> {
818
889
pub struct Captures < ' t > {
819
890
text : & ' t str ,
820
891
locs : Vec < Option < usize > > ,
821
- named : Option < HashMap < String , usize > > ,
892
+ named_groups : NamedGroups ,
822
893
}
823
894
824
895
impl < ' t > Captures < ' t > {
825
- fn new (
826
- re : & Regex ,
827
- search : & ' t str ,
828
- locs : Vec < Option < usize > > ,
829
- ) -> Captures < ' t > {
830
- let named =
831
- if re. captures_len ( ) == 0 {
832
- None
833
- } else {
834
- let mut named = HashMap :: new ( ) ;
835
- for ( i, name) in re. capture_names ( ) . enumerate ( ) {
836
- if let Some ( name) = name {
837
- named. insert ( name. to_owned ( ) , i) ;
838
- }
839
- }
840
- Some ( named)
841
- } ;
842
- Captures {
843
- text : search,
844
- locs : locs,
845
- named : named,
846
- }
847
- }
848
-
849
896
/// Returns the start and end positions of the Nth capture group.
850
897
/// Returns `None` if `i` is not a valid capture group or if the capture
851
898
/// group did not match anything.
@@ -874,15 +921,7 @@ impl<'t> Captures<'t> {
874
921
/// `name` isn't a valid capture group or didn't match anything, then
875
922
/// `None` is returned.
876
923
pub fn name ( & self , name : & str ) -> Option < & ' t str > {
877
- match self . named {
878
- None => None ,
879
- Some ( ref h) => {
880
- match h. get ( name) {
881
- None => None ,
882
- Some ( i) => self . at ( * i) ,
883
- }
884
- }
885
- }
924
+ self . named_groups . pos ( name) . and_then ( |i| self . at ( i) )
886
925
}
887
926
888
927
/// Creates an iterator of all the capture groups in order of appearance
@@ -895,7 +934,7 @@ impl<'t> Captures<'t> {
895
934
/// appearance in the regular expression. Positions are byte indices
896
935
/// in terms of the original string matched.
897
936
pub fn iter_pos ( & ' t self ) -> SubCapturesPos < ' t > {
898
- SubCapturesPos { idx : 0 , caps : self , }
937
+ SubCapturesPos { idx : 0 , locs : & self . locs }
899
938
}
900
939
901
940
/// Creates an iterator of all named groups as an tuple with the group
@@ -904,7 +943,7 @@ impl<'t> Captures<'t> {
904
943
pub fn iter_named ( & ' t self ) -> SubCapturesNamed < ' t > {
905
944
SubCapturesNamed {
906
945
caps : self ,
907
- inner : self . named . as_ref ( ) . map ( |n| n . iter ( ) ) ,
946
+ names : self . named_groups . iter ( )
908
947
}
909
948
}
910
949
@@ -978,16 +1017,16 @@ impl<'t> Index<&'t str> for Captures<'t> {
978
1017
/// An iterator over capture groups for a particular match of a regular
979
1018
/// expression.
980
1019
///
981
- /// `'t ` is the lifetime of the matched text .
982
- pub struct SubCaptures < ' t > {
1020
+ /// `'c ` is the lifetime of the captures .
1021
+ pub struct SubCaptures < ' c > {
983
1022
idx : usize ,
984
- caps : & ' t Captures < ' t > ,
1023
+ caps : & ' c Captures < ' c > ,
985
1024
}
986
1025
987
- impl < ' t > Iterator for SubCaptures < ' t > {
988
- type Item = Option < & ' t str > ;
1026
+ impl < ' c > Iterator for SubCaptures < ' c > {
1027
+ type Item = Option < & ' c str > ;
989
1028
990
- fn next ( & mut self ) -> Option < Option < & ' t str > > {
1029
+ fn next ( & mut self ) -> Option < Option < & ' c str > > {
991
1030
if self . idx < self . caps . len ( ) {
992
1031
self . idx += 1 ;
993
1032
Some ( self . caps . at ( self . idx - 1 ) )
@@ -1002,42 +1041,43 @@ impl<'t> Iterator for SubCaptures<'t> {
1002
1041
///
1003
1042
/// Positions are byte indices in terms of the original string matched.
1004
1043
///
1005
- /// `'t ` is the lifetime of the matched text .
1006
- pub struct SubCapturesPos < ' t > {
1044
+ /// `'c ` is the lifetime of the captures .
1045
+ pub struct SubCapturesPos < ' c > {
1007
1046
idx : usize ,
1008
- caps : & ' t Captures < ' t > ,
1047
+ locs : & ' c [ Option < usize > ]
1009
1048
}
1010
1049
1011
- impl < ' t > Iterator for SubCapturesPos < ' t > {
1050
+ impl < ' c > Iterator for SubCapturesPos < ' c > {
1012
1051
type Item = Option < ( usize , usize ) > ;
1013
1052
1014
1053
fn next ( & mut self ) -> Option < Option < ( usize , usize ) > > {
1015
- if self . idx < self . caps . len ( ) {
1016
- self . idx += 1 ;
1017
- Some ( self . caps . pos ( self . idx - 1 ) )
1018
- } else {
1019
- None
1054
+ if self . idx >= self . locs . len ( ) {
1055
+ return None
1020
1056
}
1057
+ let r = match ( self . locs [ self . idx ] , self . locs [ self . idx + 1 ] ) {
1058
+ ( Some ( s) , Some ( e) ) => Some ( ( s, e) ) ,
1059
+ ( None , None ) => None ,
1060
+ _ => unreachable ! ( )
1061
+ } ;
1062
+ self . idx += 2 ;
1063
+ Some ( r)
1021
1064
}
1022
1065
}
1023
1066
1024
1067
/// An Iterator over named capture groups as a tuple with the group
1025
1068
/// name and the value.
1026
1069
///
1027
- /// `'t ` is the lifetime of the matched text .
1028
- pub struct SubCapturesNamed < ' t > {
1029
- caps : & ' t Captures < ' t > ,
1030
- inner : Option < Iter < ' t , String , usize > > ,
1070
+ /// `'c ` is the lifetime of the captures .
1071
+ pub struct SubCapturesNamed < ' c > {
1072
+ caps : & ' c Captures < ' c > ,
1073
+ names : NamedGroupsIter < ' c > ,
1031
1074
}
1032
1075
1033
- impl < ' t > Iterator for SubCapturesNamed < ' t > {
1034
- type Item = ( & ' t str , Option < & ' t str > ) ;
1076
+ impl < ' c > Iterator for SubCapturesNamed < ' c > {
1077
+ type Item = ( & ' c str , Option < & ' c str > ) ;
1035
1078
1036
- fn next ( & mut self ) -> Option < ( & ' t str , Option < & ' t str > ) > {
1037
- match self . inner . as_mut ( ) . map_or ( None , |it| it. next ( ) ) {
1038
- Some ( ( name, pos) ) => Some ( ( name, self . caps . at ( * pos) ) ) ,
1039
- None => None
1040
- }
1079
+ fn next ( & mut self ) -> Option < ( & ' c str , Option < & ' c str > ) > {
1080
+ self . names . next ( ) . map ( |( name, pos) | ( name, self . caps . at ( pos) ) )
1041
1081
}
1042
1082
}
1043
1083
@@ -1081,7 +1121,11 @@ impl<'r, 't> Iterator for FindCaptures<'r, 't> {
1081
1121
}
1082
1122
self . last_end = e;
1083
1123
self . last_match = Some ( self . last_end ) ;
1084
- Some ( Captures :: new ( self . re , self . search , caps) )
1124
+ Some ( Captures {
1125
+ text : self . search ,
1126
+ locs : caps,
1127
+ named_groups : NamedGroups :: from_regex ( self . re ) ,
1128
+ } )
1085
1129
}
1086
1130
}
1087
1131
0 commit comments