@@ -29,3 +29,82 @@ mat!(uni_boundary_none, r"\d\b", "6δ", None);
29
29
mat ! ( uni_boundary_ogham, r"\d\b" , "6 " , Some ( ( 0 , 1 ) ) ) ;
30
30
mat ! ( uni_not_boundary_none, r"\d\B" , "6δ" , Some ( ( 0 , 1 ) ) ) ;
31
31
mat ! ( uni_not_boundary_ogham, r"\d\B" , "6 " , None ) ;
32
+
33
+ // Test general categories.
34
+ //
35
+ // We should test more, but there's a lot. Write a script to generate more of
36
+ // these tests.
37
+ mat ! ( uni_class_gencat_cased_letter,
38
+ r"\p{Cased_Letter}" , "A" , Some ( ( 0 , 3 ) ) ) ;
39
+ mat ! ( uni_class_gencat_close_punctuation,
40
+ r"\p{Close_Punctuation}" , "❯" , Some ( ( 0 , 3 ) ) ) ;
41
+ mat ! ( uni_class_gencat_connector_punctuation,
42
+ r"\p{Connector_Punctuation}" , "⁀" , Some ( ( 0 , 3 ) ) ) ;
43
+ mat ! ( uni_class_gencat_control,
44
+ r"\p{Control}" , "\u{9f} " , Some ( ( 0 , 2 ) ) ) ;
45
+ mat ! ( uni_class_gencat_currency_symbol,
46
+ r"\p{Currency_Symbol}" , "£" , Some ( ( 0 , 3 ) ) ) ;
47
+ mat ! ( uni_class_gencat_dash_punctuation,
48
+ r"\p{Dash_Punctuation}" , "〰" , Some ( ( 0 , 3 ) ) ) ;
49
+ mat ! ( uni_class_gencat_decimal_numer,
50
+ r"\p{Decimal_Number}" , "𑓙" , Some ( ( 0 , 4 ) ) ) ;
51
+ mat ! ( uni_class_gencat_enclosing_mark,
52
+ r"\p{Enclosing_Mark}" , "\u{A672} " , Some ( ( 0 , 3 ) ) ) ;
53
+ mat ! ( uni_class_gencat_final_punctuation,
54
+ r"\p{Final_Punctuation}" , "⸡" , Some ( ( 0 , 3 ) ) ) ;
55
+ mat ! ( uni_class_gencat_format,
56
+ r"\p{Format}" , "\u{E007F} " , Some ( ( 0 , 4 ) ) ) ;
57
+ mat ! ( uni_class_gencat_initial_punctuation,
58
+ r"\p{Initial_Punctuation}" , "⸜" , Some ( ( 0 , 3 ) ) ) ;
59
+ mat ! ( uni_class_gencat_letter,
60
+ r"\p{Letter}" , "Έ" , Some ( ( 0 , 2 ) ) ) ;
61
+ mat ! ( uni_class_gencat_letter_number,
62
+ r"\p{Letter_Number}" , "ↂ" , Some ( ( 0 , 3 ) ) ) ;
63
+ mat ! ( uni_class_gencat_line_separator,
64
+ r"\p{Line_Separator}" , "\u{2028} " , Some ( ( 0 , 3 ) ) ) ;
65
+ mat ! ( uni_class_gencat_lowercase_letter,
66
+ r"\p{Lowercase_Letter}" , "ϛ" , Some ( ( 0 , 2 ) ) ) ;
67
+ mat ! ( uni_class_gencat_mark,
68
+ r"\p{Mark}" , "\u{E01EF} " , Some ( ( 0 , 4 ) ) ) ;
69
+ mat ! ( uni_class_gencat_math,
70
+ r"\p{Math}" , "⋿" , Some ( ( 0 , 3 ) ) ) ;
71
+ mat ! ( uni_class_gencat_modifier_letter,
72
+ r"\p{Modifier_Letter}" , "𖭃" , Some ( ( 0 , 4 ) ) ) ;
73
+ mat ! ( uni_class_gencat_modifier_symbol,
74
+ r"\p{Modifier_Symbol}" , "🏿" , Some ( ( 0 , 4 ) ) ) ;
75
+ mat ! ( uni_class_gencat_nonspacing_mark,
76
+ r"\p{Nonspacing_Mark}" , "\u{1E94A} " , Some ( ( 0 , 4 ) ) ) ;
77
+ mat ! ( uni_class_gencat_number,
78
+ r"\p{Number}" , "⓿" , Some ( ( 0 , 3 ) ) ) ;
79
+ mat ! ( uni_class_gencat_open_punctuation,
80
+ r"\p{Open_Punctuation}" , "⦅" , Some ( ( 0 , 3 ) ) ) ;
81
+ mat ! ( uni_class_gencat_other,
82
+ r"\p{Other}" , "\u{bc9} " , Some ( ( 0 , 3 ) ) ) ;
83
+ mat ! ( uni_class_gencat_other_letter,
84
+ r"\p{Other_Letter}" , "ꓷ" , Some ( ( 0 , 3 ) ) ) ;
85
+ mat ! ( uni_class_gencat_other_number,
86
+ r"\p{Other_Number}" , "㉏" , Some ( ( 0 , 3 ) ) ) ;
87
+ mat ! ( uni_class_gencat_other_punctuation,
88
+ r"\p{Other_Punctuation}" , "𞥞" , Some ( ( 0 , 4 ) ) ) ;
89
+ mat ! ( uni_class_gencat_other_symbol,
90
+ r"\p{Other_Symbol}" , "⅌" , Some ( ( 0 , 3 ) ) ) ;
91
+ mat ! ( uni_class_gencat_paragraph_separator,
92
+ r"\p{Paragraph_Separator}" , "\u{2029} " , Some ( ( 0 , 3 ) ) ) ;
93
+ mat ! ( uni_class_gencat_private_use,
94
+ r"\p{Private_Use}" , "\u{10FFFD} " , Some ( ( 0 , 4 ) ) ) ;
95
+ mat ! ( uni_class_gencat_punctuation,
96
+ r"\p{Punctuation}" , "𑁍" , Some ( ( 0 , 4 ) ) ) ;
97
+ mat ! ( uni_class_gencat_separator,
98
+ r"\p{Separator}" , "\u{3000} " , Some ( ( 0 , 3 ) ) ) ;
99
+ mat ! ( uni_class_gencat_space_separator,
100
+ r"\p{Space_Separator}" , "\u{205F} " , Some ( ( 0 , 3 ) ) ) ;
101
+ mat ! ( uni_class_gencat_spacing_mark,
102
+ r"\p{Spacing_Mark}" , "\u{16F7E} " , Some ( ( 0 , 4 ) ) ) ;
103
+ mat ! ( uni_class_gencat_symbol,
104
+ r"\p{Symbol}" , "⯈" , Some ( ( 0 , 3 ) ) ) ;
105
+ mat ! ( uni_class_gencat_titlecase_letter,
106
+ r"\p{Titlecase_Letter}" , "ῼ" , Some ( ( 0 , 3 ) ) ) ;
107
+ mat ! ( uni_class_gencat_unassigned,
108
+ r"\p{Unassigned}" , "\u{10FFFF} " , Some ( ( 0 , 4 ) ) ) ;
109
+ mat ! ( uni_class_gencat_uppercase_letter,
110
+ r"\p{Uppercase_Letter}" , "Ꝋ" , Some ( ( 0 , 3 ) ) ) ;
0 commit comments