@@ -26,11 +26,15 @@ def fetch(f):
26
26
def load_unicode_data (f ):
27
27
fetch (f )
28
28
gencats = {}
29
+ combines = []
29
30
canon_decomp = {}
30
31
compat_decomp = {}
31
32
curr_cat = ""
33
+ curr_combine = ""
32
34
c_lo = 0
33
35
c_hi = 0
36
+ com_lo = 0
37
+ com_hi = 0
34
38
for line in fileinput .input (f ):
35
39
fields = line .split (";" )
36
40
if len (fields ) != 15 :
@@ -69,7 +73,21 @@ def load_unicode_data(f):
69
73
c_lo = code
70
74
c_hi = code
71
75
72
- return (canon_decomp , compat_decomp , gencats )
76
+ if curr_combine == "" :
77
+ curr_combine = combine
78
+ com_lo = code
79
+ com_hi = code
80
+
81
+ if curr_combine == combine :
82
+ com_hi = code
83
+ else :
84
+ if curr_combine != "0" :
85
+ combines .append ((com_lo , com_hi , curr_combine ))
86
+ curr_combine = combine
87
+ com_lo = code
88
+ com_hi = code
89
+
90
+ return (canon_decomp , compat_decomp , gencats , combines )
73
91
74
92
75
93
def load_derived_core_properties (f ):
@@ -178,50 +196,149 @@ def emit_property_module_old(f, mod, tbl):
178
196
f .write (" }\n \n " )
179
197
f .write ("}\n " )
180
198
181
- def emit_decomp_module (f , canon , compat ):
199
+ def format_table_content (f , content , indent ):
200
+ line = " " * indent
201
+ first = True
202
+ for chunk in content .split ("," ):
203
+ if len (line ) + len (chunk ) < 98 :
204
+ if first :
205
+ line += chunk
206
+ else :
207
+ line += ", " + chunk
208
+ first = False
209
+ else :
210
+ f .write (line + ",\n " )
211
+ line = " " * indent + chunk
212
+ f .write (line )
213
+
214
+ def emit_decomp_module (f , canon , compat , combine ):
182
215
canon_keys = canon .keys ()
183
216
canon_keys .sort ()
184
217
185
218
compat_keys = compat .keys ()
186
219
compat_keys .sort ()
187
- f .write ("mod decompose {\n \n " );
188
- f .write (" export canonical, compatibility;\n \n " )
189
- f .write (" fn canonical(c: char, i: block(char)) "
190
- + "{ d(c, i, false); }\n \n " )
191
- f .write (" fn compatibility(c: char, i: block(char)) "
192
- + "{ d(c, i, true); }\n \n " )
193
- f .write (" fn d(c: char, i: block(char), k: bool) {\n " )
220
+ f .write ("pub mod decompose {\n " );
221
+ f .write (" use option::Option;\n " );
222
+ f .write (" use option::{Some, None};\n " );
223
+ f .write (" use vec::ImmutableVector;\n " );
224
+ f .write ("""
225
+ fn bsearch_table(c: char, r: &'static [(char, &'static [char])]) -> Option<&'static [char]> {
226
+ use cmp::{Equal, Less, Greater};
227
+ match r.bsearch(|&(val, _)| {
228
+ if c == val { Equal }
229
+ else if val < c { Less }
230
+ else { Greater }
231
+ }) {
232
+ Some(idx) => {
233
+ let (_, result) = r[idx];
234
+ Some(result)
235
+ }
236
+ None => None
237
+ }
238
+ }\n
239
+ """ )
194
240
195
- f .write (" if c <= '\\ x7f' { i(c); ret; }\n " )
241
+ f .write ("""
242
+ fn bsearch_range_value_table(c: char, r: &'static [(char, char, u8)]) -> u8 {
243
+ use cmp::{Equal, Less, Greater};
244
+ match r.bsearch(|&(lo, hi, _)| {
245
+ if lo <= c && c <= hi { Equal }
246
+ else if hi < c { Less }
247
+ else { Greater }
248
+ }) {
249
+ Some(idx) => {
250
+ let (_, _, result) = r[idx];
251
+ result
252
+ }
253
+ None => 0
254
+ }
255
+ }\n \n
256
+ """ )
196
257
197
- # First check the canonical decompositions
198
- f .write (" // Canonical decomposition\n " )
199
- f .write (" alt c {\n " )
258
+ f .write (" // Canonical decompositions\n " )
259
+ f .write (" static canonical_table : &'static [(char, &'static [char])] = &[\n " )
260
+ data = ""
261
+ first = True
200
262
for char in canon_keys :
201
- f .write (" %s {\n " % escape_char (char ))
263
+ if not first :
264
+ data += ","
265
+ first = False
266
+ data += "(%s,&[" % escape_char (char )
267
+ first2 = True
202
268
for d in canon [char ]:
203
- f .write (" d(%s, i, k);\n "
204
- % escape_char (d ))
205
- f .write (" }\n " )
269
+ if not first2 :
270
+ data += ","
271
+ first2 = False
272
+ data += escape_char (d )
273
+ data += "])"
274
+ format_table_content (f , data , 8 )
275
+ f .write ("\n ];\n \n " )
276
+
277
+ f .write (" // Compatibility decompositions\n " )
278
+ f .write (" static compatibility_table : &'static [(char, &'static [char])] = &[\n " )
279
+ data = ""
280
+ first = True
281
+ for char in compat_keys :
282
+ if not first :
283
+ data += ","
284
+ first = False
285
+ data += "(%s,&[" % escape_char (char )
286
+ first2 = True
287
+ for d in compat [char ]:
288
+ if not first2 :
289
+ data += ","
290
+ first2 = False
291
+ data += escape_char (d )
292
+ data += "])"
293
+ format_table_content (f , data , 8 )
294
+ f .write ("\n ];\n \n " )
295
+
296
+ f .write (" static combining_class_table : &'static [(char, char, u8)] = &[\n " )
297
+ ix = 0
298
+ for pair in combine :
299
+ f .write (ch_prefix (ix ))
300
+ f .write ("(%s, %s, %s)" % (escape_char (pair [0 ]), escape_char (pair [1 ]), pair [2 ]))
301
+ ix += 1
302
+ f .write ("\n ];\n " )
303
+
304
+ f .write (" pub fn canonical(c: char, i: &fn(char)) "
305
+ + "{ d(c, i, false); }\n \n " )
306
+ f .write (" pub fn compatibility(c: char, i: &fn(char)) "
307
+ + "{ d(c, i, true); }\n \n " )
308
+ f .write (" pub fn canonical_combining_class(c: char) -> u8 {\n "
309
+ + " bsearch_range_value_table(c, combining_class_table)\n "
310
+ + " }\n \n " )
311
+ f .write (" fn d(c: char, i: &fn(char), k: bool) {\n " )
312
+ f .write (" use iterator::Iterator;\n " );
206
313
207
- f .write (" _ { }\n " )
208
- f .write (" }\n \n " )
314
+ f .write (" if c <= '\\ x7f' { i(c); return; }\n " )
315
+
316
+ # First check the canonical decompositions
317
+ f .write ("""
318
+ match bsearch_table(c, canonical_table) {
319
+ Some(canon) => {
320
+ for x in canon.iter() {
321
+ d(*x, |b| i(b), k);
322
+ }
323
+ return;
324
+ }
325
+ None => ()
326
+ }\n \n """ )
209
327
210
328
# Bottom out if we're not doing compat.
211
- f .write (" if !k { i(c); ret ; }\n \n " )
329
+ f .write (" if !k { i(c); return ; }\n " )
212
330
213
331
# Then check the compatibility decompositions
214
- f .write (" // Compatibility decomposition\n " )
215
- f .write (" alt c {\n " )
216
- for char in compat_keys :
217
- f .write (" %s {\n " % escape_char (char ))
218
- for d in compat [char ]:
219
- f .write (" d(%s, i, k);\n "
220
- % escape_char (d ))
221
- f .write (" }\n " )
222
-
223
- f .write (" _ { }\n " )
224
- f .write (" }\n \n " )
332
+ f .write ("""
333
+ match bsearch_table(c, compatibility_table) {
334
+ Some(compat) => {
335
+ for x in compat.iter() {
336
+ d(*x, |b| i(b), k);
337
+ }
338
+ return;
339
+ }
340
+ None => ()
341
+ }\n \n """ )
225
342
226
343
# Finally bottom out.
227
344
f .write (" i(c);\n " )
@@ -234,7 +351,7 @@ def emit_decomp_module(f, canon, compat):
234
351
os .remove (i );
235
352
rf = open (r , "w" )
236
353
237
- (canon_decomp , compat_decomp , gencats ) = load_unicode_data ("UnicodeData.txt" )
354
+ (canon_decomp , compat_decomp , gencats , combines ) = load_unicode_data ("UnicodeData.txt" )
238
355
239
356
# Preamble
240
357
rf .write ('''// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
@@ -256,7 +373,7 @@ def emit_decomp_module(f, canon, compat):
256
373
257
374
emit_property_module (rf , "general_category" , gencats )
258
375
259
- # emit_decomp_module(rf, canon_decomp, compat_decomp)
376
+ emit_decomp_module (rf , canon_decomp , compat_decomp , combines )
260
377
261
378
derived = load_derived_core_properties ("DerivedCoreProperties.txt" )
262
379
emit_property_module (rf , "derived_property" , derived )
0 commit comments