22
22
23
23
import fileinput , re , os , sys , operator
24
24
25
- preamble = '''// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
25
+ preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
26
26
// file at the top-level directory of this distribution and at
27
27
// http://rust-lang.org/COPYRIGHT.
28
28
//
@@ -67,7 +67,6 @@ def is_surrogate(n):
67
67
def load_unicode_data (f ):
68
68
fetch (f )
69
69
gencats = {}
70
- combines = {}
71
70
72
71
udict = {};
73
72
range_start = - 1 ;
@@ -98,22 +97,15 @@ def load_unicode_data(f):
98
97
gencats [cat ] = []
99
98
gencats [cat ].append (code )
100
99
101
- # record combining class, if any
102
- if combine != "0" :
103
- if combine not in combines :
104
- combines [combine ] = []
105
- combines [combine ].append (code )
106
-
107
100
# generate Not_Assigned from Assigned
108
101
gencats ["Cn" ] = gen_unassigned (gencats ["Assigned" ])
109
102
# Assigned is not a real category
110
103
del (gencats ["Assigned" ])
111
104
# Other contains Not_Assigned
112
105
gencats ["C" ].extend (gencats ["Cn" ])
113
106
gencats = group_cats (gencats )
114
- combines = to_combines (group_cats (combines ))
115
107
116
- return ( gencats , combines )
108
+ return gencats
117
109
118
110
def group_cats (cats ):
119
111
cats_out = {}
@@ -150,14 +142,6 @@ def gen_unassigned(assigned):
150
142
return ([i for i in range (0 , 0xd800 ) if i not in assigned ] +
151
143
[i for i in range (0xe000 , 0x110000 ) if i not in assigned ])
152
144
153
- def to_combines (combs ):
154
- combs_out = []
155
- for comb in combs :
156
- for (lo , hi ) in combs [comb ]:
157
- combs_out .append ((lo , hi , comb ))
158
- combs_out .sort (key = lambda comb : comb [0 ])
159
- return combs_out
160
-
161
145
def format_table_content (f , content , indent ):
162
146
line = " " * indent
163
147
first = True
@@ -203,44 +187,12 @@ def load_properties(f, interestingprops):
203
187
if prop not in props :
204
188
props [prop ] = []
205
189
props [prop ].append ((d_lo , d_hi ))
206
- return props
207
190
208
- # load all widths of want_widths, except those in except_cats
209
- def load_east_asian_width (want_widths , except_cats ):
210
- f = "EastAsianWidth.txt"
211
- fetch (f )
212
- widths = {}
213
- re1 = re .compile ("^([0-9A-F]+);(\w+) +# (\w+)" )
214
- re2 = re .compile ("^([0-9A-F]+)\.\.([0-9A-F]+);(\w+) +# (\w+)" )
191
+ # optimize props if possible
192
+ for prop in props :
193
+ props [prop ] = group_cat (ungroup_cat (props [prop ]))
215
194
216
- for line in fileinput .input (f ):
217
- width = None
218
- d_lo = 0
219
- d_hi = 0
220
- cat = None
221
- m = re1 .match (line )
222
- if m :
223
- d_lo = m .group (1 )
224
- d_hi = m .group (1 )
225
- width = m .group (2 )
226
- cat = m .group (3 )
227
- else :
228
- m = re2 .match (line )
229
- if m :
230
- d_lo = m .group (1 )
231
- d_hi = m .group (2 )
232
- width = m .group (3 )
233
- cat = m .group (4 )
234
- else :
235
- continue
236
- if cat in except_cats or width not in want_widths :
237
- continue
238
- d_lo = int (d_lo , 16 )
239
- d_hi = int (d_hi , 16 )
240
- if width not in widths :
241
- widths [width ] = []
242
- widths [width ].append ((d_lo , d_hi ))
243
- return widths
195
+ return props
244
196
245
197
def escape_char (c ):
246
198
return "'\\ u{%x}'" % c
@@ -261,7 +213,7 @@ def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
261
213
format_table_content (f , data , 8 )
262
214
f .write ("\n ];\n \n " )
263
215
264
- def emit_property_module (f , mod , tbl , emit_fn ):
216
+ def emit_property_module (f , mod , tbl ):
265
217
f .write ("pub mod %s {\n " % mod )
266
218
keys = tbl .keys ()
267
219
keys .sort ()
@@ -286,43 +238,6 @@ def emit_regex_module(f, cats, w_data):
286
238
287
239
f .write ("}\n \n " )
288
240
289
- def remove_from_wtable (wtable , val ):
290
- wtable_out = []
291
- while wtable :
292
- if wtable [0 ][1 ] < val :
293
- wtable_out .append (wtable .pop (0 ))
294
- elif wtable [0 ][0 ] > val :
295
- break
296
- else :
297
- (wt_lo , wt_hi , width , width_cjk ) = wtable .pop (0 )
298
- if wt_lo == wt_hi == val :
299
- continue
300
- elif wt_lo == val :
301
- wtable_out .append ((wt_lo + 1 , wt_hi , width , width_cjk ))
302
- elif wt_hi == val :
303
- wtable_out .append ((wt_lo , wt_hi - 1 , width , width_cjk ))
304
- else :
305
- wtable_out .append ((wt_lo , val - 1 , width , width_cjk ))
306
- wtable_out .append ((val + 1 , wt_hi , width , width_cjk ))
307
- if wtable :
308
- wtable_out .extend (wtable )
309
- return wtable_out
310
-
311
-
312
-
313
- def optimize_width_table (wtable ):
314
- wtable_out = []
315
- w_this = wtable .pop (0 )
316
- while wtable :
317
- if w_this [1 ] == wtable [0 ][0 ] - 1 and w_this [2 :3 ] == wtable [0 ][2 :3 ]:
318
- w_tmp = wtable .pop (0 )
319
- w_this = (w_this [0 ], w_tmp [1 ], w_tmp [2 ], w_tmp [3 ])
320
- else :
321
- wtable_out .append (w_this )
322
- w_this = wtable .pop (0 )
323
- wtable_out .append (w_this )
324
- return wtable_out
325
-
326
241
if __name__ == "__main__" :
327
242
r = "unicode.rs"
328
243
if os .path .exists (r ):
@@ -336,7 +251,7 @@ def optimize_width_table(wtable):
336
251
with open ("ReadMe.txt" ) as readme :
337
252
pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
338
253
unicode_version = re .search (pattern , readme .read ()).groups ()
339
- ( gencats , combines ) = load_unicode_data ("UnicodeData.txt" )
254
+ gencats = load_unicode_data ("UnicodeData.txt" )
340
255
want_derived = ["XID_Start" , "XID_Continue" , "Alphabetic" , "Lowercase" , "Uppercase" ]
341
256
other_derived = ["Default_Ignorable_Code_Point" , "Grapheme_Extend" ]
342
257
derived = load_properties ("DerivedCoreProperties.txt" , want_derived + other_derived )
@@ -346,11 +261,11 @@ def optimize_width_table(wtable):
346
261
347
262
# all of these categories will also be available as \p{} in libregex
348
263
allcats = []
349
- for (name , cat , pfuns ) in ("general_category" , gencats , [ "N" , "Cc" ] ), \
350
- ("derived_property" , derived , want_derived ), \
351
- ("script" , scripts , [] ), \
352
- ("property" , props , [ "White_Space" ] ):
353
- emit_property_module (rf , name , cat , pfuns )
264
+ for (name , cat ) in ("general_category" , gencats ), \
265
+ ("derived_property" , derived ), \
266
+ ("script" , scripts ), \
267
+ ("property" , props ):
268
+ emit_property_module (rf , name , cat )
354
269
allcats .extend (map (lambda x : (x , name ), cat ))
355
270
allcats .sort (key = lambda c : c [0 ])
356
271
0 commit comments