Skip to content

Commit 7c17b7d

Browse files
committed
tests: add Unicode general category tests
1 parent 2711afe commit 7c17b7d

File tree

1 file changed

+79
-0
lines changed

1 file changed

+79
-0
lines changed

tests/unicode.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,82 @@ mat!(uni_boundary_none, r"\d\b", "6δ", None);
2929
mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1)));
3030
mat!(uni_not_boundary_none, r"\d\B", "6δ", Some((0, 1)));
3131
mat!(uni_not_boundary_ogham, r"\d\B", "6 ", None);
32+
33+
// Test general categories.
34+
//
35+
// We should test more, but there's a lot. Write a script to generate more of
36+
// these tests.
37+
mat!(uni_class_gencat_cased_letter,
38+
r"\p{Cased_Letter}", "A", Some((0, 3)));
39+
mat!(uni_class_gencat_close_punctuation,
40+
r"\p{Close_Punctuation}", "❯", Some((0, 3)));
41+
mat!(uni_class_gencat_connector_punctuation,
42+
r"\p{Connector_Punctuation}", "⁀", Some((0, 3)));
43+
mat!(uni_class_gencat_control,
44+
r"\p{Control}", "\u{9f}", Some((0, 2)));
45+
mat!(uni_class_gencat_currency_symbol,
46+
r"\p{Currency_Symbol}", "£", Some((0, 3)));
47+
mat!(uni_class_gencat_dash_punctuation,
48+
r"\p{Dash_Punctuation}", "〰", Some((0, 3)));
49+
mat!(uni_class_gencat_decimal_numer,
50+
r"\p{Decimal_Number}", "𑓙", Some((0, 4)));
51+
mat!(uni_class_gencat_enclosing_mark,
52+
r"\p{Enclosing_Mark}", "\u{A672}", Some((0, 3)));
53+
mat!(uni_class_gencat_final_punctuation,
54+
r"\p{Final_Punctuation}", "⸡", Some((0, 3)));
55+
mat!(uni_class_gencat_format,
56+
r"\p{Format}", "\u{E007F}", Some((0, 4)));
57+
mat!(uni_class_gencat_initial_punctuation,
58+
r"\p{Initial_Punctuation}", "⸜", Some((0, 3)));
59+
mat!(uni_class_gencat_letter,
60+
r"\p{Letter}", "Έ", Some((0, 2)));
61+
mat!(uni_class_gencat_letter_number,
62+
r"\p{Letter_Number}", "ↂ", Some((0, 3)));
63+
mat!(uni_class_gencat_line_separator,
64+
r"\p{Line_Separator}", "\u{2028}", Some((0, 3)));
65+
mat!(uni_class_gencat_lowercase_letter,
66+
r"\p{Lowercase_Letter}", "ϛ", Some((0, 2)));
67+
mat!(uni_class_gencat_mark,
68+
r"\p{Mark}", "\u{E01EF}", Some((0, 4)));
69+
mat!(uni_class_gencat_math,
70+
r"\p{Math}", "⋿", Some((0, 3)));
71+
mat!(uni_class_gencat_modifier_letter,
72+
r"\p{Modifier_Letter}", "𖭃", Some((0, 4)));
73+
mat!(uni_class_gencat_modifier_symbol,
74+
r"\p{Modifier_Symbol}", "🏿", Some((0, 4)));
75+
mat!(uni_class_gencat_nonspacing_mark,
76+
r"\p{Nonspacing_Mark}", "\u{1E94A}", Some((0, 4)));
77+
mat!(uni_class_gencat_number,
78+
r"\p{Number}", "⓿", Some((0, 3)));
79+
mat!(uni_class_gencat_open_punctuation,
80+
r"\p{Open_Punctuation}", "⦅", Some((0, 3)));
81+
mat!(uni_class_gencat_other,
82+
r"\p{Other}", "\u{bc9}", Some((0, 3)));
83+
mat!(uni_class_gencat_other_letter,
84+
r"\p{Other_Letter}", "ꓷ", Some((0, 3)));
85+
mat!(uni_class_gencat_other_number,
86+
r"\p{Other_Number}", "㉏", Some((0, 3)));
87+
mat!(uni_class_gencat_other_punctuation,
88+
r"\p{Other_Punctuation}", "𞥞", Some((0, 4)));
89+
mat!(uni_class_gencat_other_symbol,
90+
r"\p{Other_Symbol}", "⅌", Some((0, 3)));
91+
mat!(uni_class_gencat_paragraph_separator,
92+
r"\p{Paragraph_Separator}", "\u{2029}", Some((0, 3)));
93+
mat!(uni_class_gencat_private_use,
94+
r"\p{Private_Use}", "\u{10FFFD}", Some((0, 4)));
95+
mat!(uni_class_gencat_punctuation,
96+
r"\p{Punctuation}", "𑁍", Some((0, 4)));
97+
mat!(uni_class_gencat_separator,
98+
r"\p{Separator}", "\u{3000}", Some((0, 3)));
99+
mat!(uni_class_gencat_space_separator,
100+
r"\p{Space_Separator}", "\u{205F}", Some((0, 3)));
101+
mat!(uni_class_gencat_spacing_mark,
102+
r"\p{Spacing_Mark}", "\u{16F7E}", Some((0, 4)));
103+
mat!(uni_class_gencat_symbol,
104+
r"\p{Symbol}", "⯈", Some((0, 3)));
105+
mat!(uni_class_gencat_titlecase_letter,
106+
r"\p{Titlecase_Letter}", "ῼ", Some((0, 3)));
107+
mat!(uni_class_gencat_unassigned,
108+
r"\p{Unassigned}", "\u{10FFFF}", Some((0, 4)));
109+
mat!(uni_class_gencat_uppercase_letter,
110+
r"\p{Uppercase_Letter}", "Ꝋ", Some((0, 3)));

0 commit comments

Comments
 (0)