@@ -22,9 +22,11 @@ def fetch(f):
22
22
exit (1 )
23
23
24
24
25
- def load_general_categories (f ):
25
+ def load_unicode_data (f ):
26
26
fetch (f )
27
27
gencats = {}
28
+ canon_decomp = {}
29
+ compat_decomp = {}
28
30
curr_cat = ""
29
31
c_lo = 0
30
32
c_hi = 0
@@ -38,6 +40,18 @@ def load_general_categories(f):
38
40
39
41
code = int (code , 16 )
40
42
43
+ if decomp != "" :
44
+ if decomp .startswith ('<' ):
45
+ seq = []
46
+ for i in decomp .split ()[1 :]:
47
+ seq .append (int (i , 16 ))
48
+ compat_decomp [code ] = seq
49
+ else :
50
+ seq = []
51
+ for i in decomp .split ():
52
+ seq .append (int (i , 16 ))
53
+ canon_decomp [code ] = seq
54
+
41
55
if curr_cat == "" :
42
56
curr_cat = gencat
43
57
c_lo = code
@@ -53,7 +67,8 @@ def load_general_categories(f):
53
67
curr_cat = gencat
54
68
c_lo = code
55
69
c_hi = code
56
- return gencats
70
+
71
+ return (canon_decomp , compat_decomp , gencats )
57
72
58
73
59
74
def load_derived_core_properties (f ):
@@ -96,7 +111,7 @@ def escape_char(c):
96
111
return "'\\ u%4.4x'" % c
97
112
return "'\\ U%8.8x'" % c
98
113
99
- def emit_rust_module (f , mod , tbl ):
114
+ def emit_property_module (f , mod , tbl ):
100
115
f .write ("mod %s {\n " % mod )
101
116
keys = tbl .keys ()
102
117
keys .sort ()
@@ -120,53 +135,63 @@ def emit_rust_module(f, mod, tbl):
120
135
f .write (" }\n \n " )
121
136
f .write ("}\n " )
122
137
123
-
124
- def emit_cpp_module (f , mod , tbl ):
125
- keys = tbl .keys ()
126
- keys .sort ()
127
-
128
- for cat in keys :
129
-
130
- singles = []
131
- ranges = []
132
-
133
- for pair in tbl [cat ]:
134
- if pair [0 ] == pair [1 ]:
135
- singles .append (pair [0 ])
136
- else :
137
- ranges .append (pair )
138
-
139
- f .write ("bool %s_%s(unsigned c) {\n " % (mod , cat ))
140
- for pair in ranges :
141
- f .write (" if (0x%x <= c && c <= 0x%x) { return true; }\n "
142
- % pair )
143
- if len (singles ) > 0 :
144
- f .write (" switch (c) {\n " );
145
- for single in singles :
146
- f .write (" case 0x%x:\n " % single )
147
- f .write (" return true;\n " );
148
- f .write (" default:\n " );
149
- f .write (" return false;\n " );
150
- f .write (" }\n " )
151
- f .write ("return false;\n " )
152
- f .write ("}\n \n " )
153
-
154
-
155
- def emit_module (rf , cf , mod , tbl ):
156
- emit_rust_module (rf , mod , tbl )
157
- emit_cpp_module (cf , mod , tbl )
138
+ def emit_decomp_module (f , canon , compat ):
139
+ canon_keys = canon .keys ()
140
+ canon_keys .sort ()
141
+
142
+ compat_keys = compat .keys ()
143
+ compat_keys .sort ()
144
+ f .write ("mod decompose {\n \n " );
145
+ f .write (" export canonical, compatibility;\n \n " )
146
+ f .write (" fn canonical(c: char, i: block(char)) { d(c, i, false); }\n \n " )
147
+ f .write (" fn compatibility(c: char, i: block(char)) { d(c, i, true); }\n \n " )
148
+ f .write (" fn d(c: char, i: block(char), k: bool) {\n " )
149
+
150
+ f .write (" if c <= '\\ x7f' { i(c); ret; }\n " )
151
+
152
+ # First check the canonical decompositions
153
+ f .write (" // Canonical decomposition\n " )
154
+ f .write (" alt c {\n " )
155
+ for char in canon_keys :
156
+ f .write (" %s {\n " % escape_char (char ))
157
+ for d in canon [char ]:
158
+ f .write (" d(%s, i, k);\n "
159
+ % escape_char (d ))
160
+ f .write (" }\n " )
161
+
162
+ f .write (" _ { }\n " )
163
+ f .write (" }\n \n " )
164
+
165
+ # Bottom out if we're not doing compat.
166
+ f .write (" if !k { i(c); ret; }\n \n " )
167
+
168
+ # Then check the compatibility decompositions
169
+ f .write (" // Compatibility decomposition\n " )
170
+ f .write (" alt c {\n " )
171
+ for char in compat_keys :
172
+ f .write (" %s {\n " % escape_char (char ))
173
+ for d in compat [char ]:
174
+ f .write (" d(%s, i, k);\n "
175
+ % escape_char (d ))
176
+ f .write (" }\n " )
177
+
178
+ f .write (" _ { }\n " )
179
+ f .write (" }\n \n " )
180
+
181
+ # Finally bottom out.
182
+ f .write (" i(c);\n " )
183
+ f .write (" }\n " )
184
+ f .write ("}\n \n " )
158
185
159
186
r = "unicode.rs"
160
- c = "unicode.cpp"
161
- for i in [r , c ]:
187
+ for i in [r ]:
162
188
if os .path .exists (i ):
163
189
os .remove (i );
164
-
165
190
rf = open (r , "w" )
166
- cf = open (c , "w" )
167
191
168
- emit_module (rf , cf , "general_category" ,
169
- load_general_categories ("UnicodeData.txt" ))
192
+ (canon_decomp , compat_decomp , gencats ) = load_unicode_data ("UnicodeData.txt" )
193
+ emit_decomp_module (rf , canon_decomp , compat_decomp )
194
+ emit_property_module (rf , "general_category" , gencats )
170
195
171
- emit_module (rf , cf , "derived_property" ,
172
- load_derived_core_properties ("DerivedCoreProperties.txt" ))
196
+ emit_property_module (rf , "derived_property" ,
197
+ load_derived_core_properties ("DerivedCoreProperties.txt" ))
0 commit comments