@@ -285,24 +285,23 @@ size_t mbfl_buffer_illegalchars(mbfl_buffer_converter *convd)
285
285
/*
286
286
* encoding detector
287
287
*/
288
- static int mbfl_estimate_encoding_likelihood (int c , void * data )
288
+ static int mbfl_estimate_encoding_likelihood (int c , void * void_data )
289
289
{
290
- mbfl_convert_filter * filter = * ((mbfl_convert_filter * * )data );
291
- uintptr_t * score = (uintptr_t * )(& filter -> opaque );
290
+ mbfl_encoding_detector_data * data = void_data ;
292
291
293
292
/* Receive wchars decoded from test string using candidate encoding
294
293
* If the test string was invalid in the candidate encoding, we assume
295
294
* it's the wrong one. */
296
295
if (c & MBFL_WCSGROUP_THROUGH ) {
297
- filter -> num_illegalchar ++ ;
296
+ data -> num_illegalchars ++ ;
298
297
} else if (php_unicode_is_cntrl (c ) || php_unicode_is_private (c )) {
299
298
/* Otherwise, count how many control characters and 'private use'
300
299
* codepoints we see. Those are rarely used and may indicate that
301
300
* the candidate encoding is not the right one. */
302
- * score += 10 ;
301
+ data -> score += 10 ;
303
302
} else if (php_unicode_is_punct (c )) {
304
303
/* Punctuation is also less common than letters/digits */
305
- ( * score ) ++ ;
304
+ data -> score ++ ;
306
305
}
307
306
return c ;
308
307
}
@@ -315,14 +314,14 @@ mbfl_encoding_detector *mbfl_encoding_detector_new(const mbfl_encoding **elist,
315
314
316
315
mbfl_encoding_detector * identd = emalloc (sizeof (mbfl_encoding_detector ));
317
316
identd -> filter_list = ecalloc (elistsz , sizeof (mbfl_convert_filter * ));
317
+ identd -> filter_data = ecalloc (elistsz , sizeof (mbfl_encoding_detector_data ));
318
318
319
319
int filter_list_size = 0 ;
320
320
for (int i = 0 ; i < elistsz ; i ++ ) {
321
321
mbfl_convert_filter * filter = mbfl_convert_filter_new (elist [i ], & mbfl_encoding_wchar ,
322
- mbfl_estimate_encoding_likelihood , NULL , & identd -> filter_list [ i ]);
322
+ mbfl_estimate_encoding_likelihood , NULL , & identd -> filter_data [ filter_list_size ]);
323
323
if (filter ) {
324
324
identd -> filter_list [filter_list_size ++ ] = filter ;
325
- filter -> opaque = (void * )0 ;
326
325
}
327
326
}
328
327
identd -> filter_list_size = filter_list_size ;
@@ -336,6 +335,7 @@ void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd)
336
335
mbfl_convert_filter_delete (identd -> filter_list [i ]);
337
336
}
338
337
efree (identd -> filter_list );
338
+ efree (identd -> filter_data );
339
339
efree (identd );
340
340
}
341
341
@@ -351,7 +351,7 @@ int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *str
351
351
mbfl_convert_filter * filter = identd -> filter_list [i ];
352
352
if (!filter -> num_illegalchar ) {
353
353
(* filter -> filter_function )(* p , filter );
354
- if (filter -> num_illegalchar ) {
354
+ if (identd -> filter_data [ i ]. num_illegalchars ) {
355
355
bad ++ ;
356
356
}
357
357
}
@@ -374,14 +374,15 @@ int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *str
374
374
375
375
const mbfl_encoding * mbfl_encoding_detector_judge (mbfl_encoding_detector * identd )
376
376
{
377
- uintptr_t best_score = UINT_MAX ; /* Low score is 'better' */
377
+ size_t best_score = SIZE_MAX ; /* Low score is 'better' */
378
378
const mbfl_encoding * enc = NULL ;
379
379
380
380
for (int i = 0 ; i < identd -> filter_list_size ; i ++ ) {
381
381
mbfl_convert_filter * filter = identd -> filter_list [i ];
382
- if (!filter -> num_illegalchar && (uintptr_t )filter -> opaque < best_score ) {
382
+ mbfl_encoding_detector_data * data = & identd -> filter_data [i ];
383
+ if (!data -> num_illegalchars && data -> score < best_score ) {
383
384
enc = filter -> from ;
384
- best_score = ( uintptr_t ) filter -> opaque ;
385
+ best_score = data -> score ;
385
386
}
386
387
}
387
388
0 commit comments