@@ -144,13 +144,212 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
144
144
NULL
145
145
};
146
146
147
+ /* The following perfect hashing table was amended from gperf, and hashing code was generated using gperf.
148
+ * The table was amended to refer to the table above such that it is lighter for the data cache.
149
+ * Command used: gperf encodings.txt --readonly-tables --null-strings --ignore-case
150
+ * The encodings.txt contains all the contents of the name fields of the mbfl_encoding_ptr_list table. */
151
+
152
+ static const int8_t mbfl_encoding_ptr_list_after_hashing [187 ] = {
153
+ -1 , -1 , -1 ,
154
+ 65 ,
155
+ 23 ,
156
+ 9 ,
157
+ -1 ,
158
+ 60 ,
159
+ 36 ,
160
+ -1 , -1 ,
161
+ 58 ,
162
+ 42 ,
163
+ -1 , -1 ,
164
+ 18 ,
165
+ 27 ,
166
+ 77 ,
167
+ 26 ,
168
+ 40 ,
169
+ 72 ,
170
+ 12 ,
171
+ 10 ,
172
+ 2 ,
173
+ 31 ,
174
+ -1 , -1 ,
175
+ 75 ,
176
+ 74 ,
177
+ 33 ,
178
+ 45 ,
179
+ -1 ,
180
+ 67 ,
181
+ 13 ,
182
+ -1 ,
183
+ 51 ,
184
+ 53 ,
185
+ 11 ,
186
+ 1 ,
187
+ -1 ,
188
+ 48 ,
189
+ 56 ,
190
+ -1 ,
191
+ 38 ,
192
+ 20 ,
193
+ 46 ,
194
+ 54 ,
195
+ -1 ,
196
+ 14 ,
197
+ 24 ,
198
+ 44 ,
199
+ 39 ,
200
+ 43 ,
201
+ -1 ,
202
+ 30 ,
203
+ 49 ,
204
+ 57 ,
205
+ 76 ,
206
+ -1 , -1 ,
207
+ 68 ,
208
+ 73 ,
209
+ 7 ,
210
+ 16 ,
211
+ -1 ,
212
+ 35 ,
213
+ 66 ,
214
+ -1 , -1 , -1 ,
215
+ 47 ,
216
+ 55 ,
217
+ -1 , -1 , -1 ,
218
+ 63 ,
219
+ 15 ,
220
+ 8 ,
221
+ 17 ,
222
+ -1 ,
223
+ 21 ,
224
+ 70 ,
225
+ -1 ,
226
+ 29 ,
227
+ 5 ,
228
+ 6 ,
229
+ 61 ,
230
+ -1 , -1 ,
231
+ 71 ,
232
+ 52 ,
233
+ 3 ,
234
+ 37 ,
235
+ -1 , -1 ,
236
+ 28 ,
237
+ -1 , -1 , -1 ,
238
+ 32 ,
239
+ 50 ,
240
+ 34 ,
241
+ -1 , -1 , -1 ,
242
+ 62 ,
243
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
244
+ 59 ,
245
+ 0 ,
246
+ -1 , -1 , -1 , -1 ,
247
+ 22 ,
248
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
249
+ 25 ,
250
+ 41 ,
251
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
252
+ 19 ,
253
+ -1 , -1 , -1 ,
254
+ 4 ,
255
+ -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
256
+ 69 ,
257
+ -1 , -1 , -1 , -1 ,
258
+ 64 ,
259
+ };
260
+
261
+ static unsigned int mbfl_name2encoding_perfect_hash (const char * str , size_t len )
262
+ {
263
+ static const unsigned char asso_values [] =
264
+ {
265
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
266
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
267
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
268
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
269
+ 187 , 187 , 187 , 187 , 187 , 0 , 187 , 187 , 5 , 20 ,
270
+ 0 , 15 , 40 , 10 , 25 , 70 , 5 , 60 , 187 , 187 ,
271
+ 187 , 187 , 187 , 187 , 187 , 75 , 5 , 0 , 20 , 5 ,
272
+ 0 , 75 , 5 , 0 , 40 , 75 , 20 , 0 , 0 , 0 ,
273
+ 35 , 45 , 50 , 0 , 75 , 0 , 187 , 0 , 187 , 187 ,
274
+ 0 , 187 , 187 , 187 , 187 , 187 , 187 , 75 , 5 , 0 ,
275
+ 20 , 5 , 0 , 75 , 5 , 0 , 40 , 75 , 20 , 0 ,
276
+ 0 , 0 , 35 , 45 , 50 , 0 , 75 , 0 , 187 , 0 ,
277
+ 187 , 187 , 0 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
278
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
279
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
280
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
281
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
282
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
283
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
284
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
285
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
286
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
287
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
288
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
289
+ 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 , 187 ,
290
+ 187 , 187 , 187 , 187 , 187 , 187
291
+ };
292
+ unsigned int hval = len ;
293
+
294
+ switch (hval )
295
+ {
296
+ default :
297
+ hval += asso_values [(unsigned char )str [6 ]];
298
+ ZEND_FALLTHROUGH ;
299
+ case 6 :
300
+ hval += asso_values [(unsigned char )str [5 ]];
301
+ ZEND_FALLTHROUGH ;
302
+ case 5 :
303
+ hval += asso_values [(unsigned char )str [4 ]];
304
+ ZEND_FALLTHROUGH ;
305
+ case 4 :
306
+ case 3 :
307
+ hval += asso_values [(unsigned char )str [2 ]];
308
+ ZEND_FALLTHROUGH ;
309
+ case 2 :
310
+ case 1 :
311
+ hval += asso_values [(unsigned char )str [0 ]];
312
+ break ;
313
+ }
314
+ return hval + asso_values [(unsigned char )str [len - 1 ]];
315
+ }
316
+
317
+ #define NAME_HASH_MIN_NAME_LENGTH 2
318
+ #define NAME_HASH_MAX_NAME_LENGTH 23
319
+
147
320
const mbfl_encoding * mbfl_name2encoding (const char * name )
148
321
{
149
- const mbfl_encoding * * encoding ;
322
+ const mbfl_encoding * const * encoding ;
150
323
324
+ /* Sanity check perfect hash for name.
325
+ * Never enable this in production, this is only a development-time sanity check! */
326
+ #if ZEND_DEBUG && 0
151
327
for (encoding = mbfl_encoding_ptr_list ; * encoding ; encoding ++ ) {
152
- if (strcasecmp ((* encoding )-> name , name ) == 0 ) {
153
- return * encoding ;
328
+ size_t name_length = strlen ((* encoding )-> name );
329
+ if (!(name_length <= NAME_HASH_MAX_NAME_LENGTH && name_length >= NAME_HASH_MIN_NAME_LENGTH )) {
330
+ fprintf (stderr , "name length is not satisfying bound check: %zu %s\n" , name_length , (* encoding )-> name );
331
+ abort ();
332
+ }
333
+ unsigned int key = mbfl_name2encoding_perfect_hash ((* encoding )-> name , name_length );
334
+ if (mbfl_encoding_ptr_list [mbfl_encoding_ptr_list_after_hashing [key ]] != * encoding ) {
335
+ fprintf (stderr , "mbfl_name2encoding_perfect_hash: key %u %s mismatch\n" , key , (* encoding )-> name );
336
+ abort ();
337
+ }
338
+ }
339
+ #endif
340
+
341
+ /* Use perfect hash lookup for name */
342
+ size_t name_len = strlen (name );
343
+ if (name_len <= NAME_HASH_MAX_NAME_LENGTH && name_len >= NAME_HASH_MIN_NAME_LENGTH ) {
344
+ unsigned int key = mbfl_name2encoding_perfect_hash (name , name_len );
345
+ if (key <= 186 ) {
346
+ int8_t offset = mbfl_encoding_ptr_list_after_hashing [key ];
347
+ if (offset >= 0 ) {
348
+ encoding = mbfl_encoding_ptr_list + offset ;
349
+ if (strcasecmp ((* encoding )-> name , name ) == 0 ) {
350
+ return * encoding ;
351
+ }
352
+ }
154
353
}
155
354
}
156
355
0 commit comments