@@ -680,7 +680,7 @@ impl CharClass {
680
680
self . canonicalize ( )
681
681
}
682
682
683
- /// Canonicalze any sequence of ranges.
683
+ /// Canonicalize any sequence of ranges.
684
684
///
685
685
/// This is responsible for enforcing the canonical format invariants
686
686
/// as described on the docs for the `CharClass` type.
@@ -703,6 +703,43 @@ impl CharClass {
703
703
ordered
704
704
}
705
705
706
+ /// Calculate the intersection of two canonical character classes.
707
+ ///
708
+ /// The returned intersection is canonical.
709
+ fn intersection ( & self , other : & CharClass ) -> CharClass {
710
+ if self . ranges . is_empty ( ) || other. ranges . is_empty ( ) {
711
+ return CharClass :: empty ( ) ;
712
+ }
713
+
714
+ let mut intersection = CharClass :: empty ( ) ;
715
+
716
+ let mut iter_a = self . ranges . iter ( ) ;
717
+ let mut iter_b = other. ranges . iter ( ) ;
718
+ let mut a = iter_a. next ( ) . unwrap ( ) ;
719
+ let mut b = iter_b. next ( ) . unwrap ( ) ;
720
+ loop {
721
+ if let Some ( i) = a. intersection ( & b) {
722
+ intersection. ranges . push ( i) ;
723
+ }
724
+
725
+ // If the range with the smaller end didn't match this time,
726
+ // it won't ever match, so move on to the next one.
727
+ let ( iter, item) = if a. end < b. end {
728
+ ( & mut iter_a, & mut a)
729
+ } else {
730
+ ( & mut iter_b, & mut b)
731
+ } ;
732
+ if let Some ( v) = iter. next ( ) {
733
+ * item = v;
734
+ } else {
735
+ // No more ranges to check, done.
736
+ break ;
737
+ }
738
+ }
739
+
740
+ intersection. canonicalize ( )
741
+ }
742
+
706
743
/// Negates the character class.
707
744
///
708
745
/// For all `c` where `c` is a Unicode scalar value, `c` matches `self`
@@ -801,6 +838,18 @@ impl ClassRange {
801
838
max ( self . start , other. start ) <= inc_char ( min ( self . end , other. end ) )
802
839
}
803
840
841
+ /// Returns the intersection of the two ranges if they have common
842
+ /// characters, `None` otherwise.
843
+ fn intersection ( & self , other : & ClassRange ) -> Option < ClassRange > {
844
+ let start = max ( self . start , other. start ) ;
845
+ let end = min ( self . end , other. end ) ;
846
+ if start <= end {
847
+ Some ( ClassRange :: new ( start, end) )
848
+ } else {
849
+ None
850
+ }
851
+ }
852
+
804
853
/// Creates a new range representing the union of `self` and `other.
805
854
fn merge ( self , other : ClassRange ) -> ClassRange {
806
855
ClassRange {
@@ -1907,6 +1956,108 @@ mod tests {
1907
1956
] ) ) ;
1908
1957
}
1909
1958
1959
+ #[ test]
1960
+ fn class_intersection_empty ( ) {
1961
+ let cls1 = class ( & [ ] ) ;
1962
+ let cls2 = class ( & [ ( 'a' , 'a' ) ] ) ;
1963
+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
1964
+ }
1965
+
1966
+ #[ test]
1967
+ fn class_intersection_single_equal ( ) {
1968
+ let cls1 = class ( & [ ( 'a' , 'a' ) ] ) ;
1969
+ let cls2 = class ( & [ ( 'a' , 'a' ) ] ) ;
1970
+ assert_intersection ( cls1, cls2, class ( & [ ( 'a' , 'a' ) ] ) ) ;
1971
+ }
1972
+
1973
+ #[ test]
1974
+ fn class_intersection_single_unequal ( ) {
1975
+ let cls1 = class ( & [ ( 'a' , 'a' ) ] ) ;
1976
+ let cls2 = class ( & [ ( 'b' , 'b' ) ] ) ;
1977
+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
1978
+ }
1979
+
1980
+ #[ test]
1981
+ fn class_intersection_single_in_other ( ) {
1982
+ let cls1 = class ( & [ ( 'a' , 'a' ) ] ) ;
1983
+ let cls2 = class ( & [ ( 'a' , 'c' ) ] ) ;
1984
+ assert_intersection ( cls1, cls2, class ( & [ ( 'a' , 'a' ) ] ) ) ;
1985
+ }
1986
+
1987
+ #[ test]
1988
+ fn class_intersection_range_in_other ( ) {
1989
+ let cls1 = class ( & [ ( 'a' , 'b' ) ] ) ;
1990
+ let cls2 = class ( & [ ( 'a' , 'c' ) ] ) ;
1991
+ assert_intersection ( cls1, cls2, class ( & [ ( 'a' , 'b' ) ] ) ) ;
1992
+ }
1993
+
1994
+ #[ test]
1995
+ fn class_intersection_range_intersection ( ) {
1996
+ let cls1 = class ( & [ ( 'a' , 'b' ) ] ) ;
1997
+ let cls2 = class ( & [ ( 'b' , 'c' ) ] ) ;
1998
+ assert_intersection ( cls1, cls2, class ( & [ ( 'b' , 'b' ) ] ) ) ;
1999
+ }
2000
+
2001
+ #[ test]
2002
+ fn class_intersection_only_adjacent ( ) {
2003
+ let cls1 = class ( & [ ( 'a' , 'b' ) ] ) ;
2004
+ let cls2 = class ( & [ ( 'c' , 'd' ) ] ) ;
2005
+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
2006
+ }
2007
+
2008
+ #[ test]
2009
+ fn class_intersection_range_subset ( ) {
2010
+ let cls1 = class ( & [ ( 'b' , 'c' ) ] ) ;
2011
+ let cls2 = class ( & [ ( 'a' , 'd' ) ] ) ;
2012
+ assert_intersection ( cls1, cls2, class ( & [ ( 'b' , 'c' ) ] ) ) ;
2013
+ }
2014
+
2015
+ #[ test]
2016
+ fn class_intersection_many_ranges_in_one_big ( ) {
2017
+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2018
+ let cls2 = class ( & [ ( 'a' , 'h' ) ] ) ;
2019
+ assert_intersection ( cls1, cls2, class ( & [
2020
+ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' )
2021
+ ] ) ) ;
2022
+ }
2023
+
2024
+ #[ test]
2025
+ fn class_intersection_many_ranges_same ( ) {
2026
+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2027
+ let cls2 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2028
+ assert_intersection ( cls1, cls2, class ( & [
2029
+ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' )
2030
+ ] ) ) ;
2031
+ }
2032
+
2033
+ #[ test]
2034
+ fn class_intersection_multiple_non_intersecting ( ) {
2035
+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'g' , 'h' ) ] ) ;
2036
+ let cls2 = class ( & [ ( 'd' , 'e' ) , ( 'k' , 'l' ) ] ) ;
2037
+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
2038
+ }
2039
+
2040
+ #[ test]
2041
+ fn class_intersection_non_intersecting_then_intersecting ( ) {
2042
+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'd' , 'e' ) , ( 'g' , 'h' ) ] ) ;
2043
+ let cls2 = class ( & [ ( 'h' , 'h' ) ] ) ;
2044
+ assert_intersection ( cls1, cls2, class ( & [ ( 'h' , 'h' ) ] ) ) ;
2045
+ }
2046
+
2047
+ #[ test]
2048
+ fn class_intersection_adjacent_alternating ( ) {
2049
+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'e' , 'f' ) , ( 'i' , 'j' ) ] ) ;
2050
+ let cls2 = class ( & [ ( 'c' , 'd' ) , ( 'g' , 'h' ) , ( 'k' , 'l' ) ] ) ;
2051
+ assert_intersection ( cls1, cls2, class ( & [ ] ) ) ;
2052
+ }
2053
+
2054
+ #[ test]
2055
+ fn class_intersection_overlapping_alternating ( ) {
2056
+ let cls1 = class ( & [ ( 'a' , 'b' ) , ( 'c' , 'd' ) , ( 'e' , 'f' ) ] ) ;
2057
+ let cls2 = class ( & [ ( 'b' , 'c' ) , ( 'd' , 'e' ) , ( 'f' , 'g' ) ] ) ;
2058
+ assert_intersection ( cls1, cls2, class ( & [ ( 'b' , 'f' ) ] ) ) ;
2059
+ }
2060
+
1910
2061
#[ test]
1911
2062
fn class_canon_overlap_many_case_fold ( ) {
1912
2063
let cls = class ( & [
@@ -2056,4 +2207,10 @@ mod tests {
2056
2207
let expr = e ( "(?-u)[-./]" ) ;
2057
2208
assert_eq ! ( "(?-u:[-\\ .-/])" , expr. to_string( ) ) ;
2058
2209
}
2210
+
2211
+ fn assert_intersection ( cls1 : CharClass , cls2 : CharClass , expected : CharClass ) {
2212
+ // intersection operation should be commutative
2213
+ assert_eq ! ( cls1. intersection( & cls2) , expected) ;
2214
+ assert_eq ! ( cls2. intersection( & cls1) , expected) ;
2215
+ }
2059
2216
}
0 commit comments