@@ -370,90 +370,41 @@ static inline unsigned int get_next_char(
370
370
static enum entity_charset determine_charset (char * charset_hint )
371
371
{
372
372
size_t i ;
373
- enum entity_charset charset = cs_utf_8 ;
374
- size_t len = 0 ;
375
373
const zend_encoding * zenc ;
376
374
377
- /* Default is now UTF-8 */
378
- if (charset_hint == NULL )
379
- return cs_utf_8 ;
375
+ if (charset_hint && * charset_hint ) {
376
+ /* Explicitly passed charset */
377
+ goto det_charset ;
378
+ }
380
379
381
- if ((len = strlen (charset_hint )) != 0 ) {
380
+ charset_hint = get_default_charset ();
381
+ if (charset_hint && * charset_hint ) {
382
+ /* default_charset or internal_encoding */
382
383
goto det_charset ;
383
384
}
384
385
385
386
zenc = zend_multibyte_get_internal_encoding ();
386
387
if (zenc != NULL ) {
388
+ /* mbstring.internal_encoding or mb_internal_encoding() */
389
+ // TODO: We *shouldn't* be taking this into account anymore.
387
390
charset_hint = (char * )zend_multibyte_get_encoding_name (zenc );
388
- if (charset_hint != NULL && (len = strlen (charset_hint )) != 0 ) {
389
- if (len == sizeof ("auto" )- 1 && !memcmp ("auto" , charset_hint , sizeof ("auto" )- 1 )) {
390
- charset_hint = NULL ;
391
- len = 0 ;
392
- } else {
393
- goto det_charset ;
394
- }
395
- }
396
- }
397
-
398
- charset_hint = SG (default_charset );
399
- if (charset_hint != NULL && (len = strlen (charset_hint )) != 0 ) {
400
- goto det_charset ;
401
- }
402
-
403
- /* try to detect the charset for the locale */
404
- #if HAVE_NL_LANGINFO && defined(CODESET )
405
- charset_hint = nl_langinfo (CODESET );
406
- if (charset_hint != NULL && (len = strlen (charset_hint )) != 0 ) {
407
- goto det_charset ;
408
- }
409
- #endif
410
-
411
- /* try to figure out the charset from the locale */
412
- {
413
- char * localename ;
414
- char * dot , * at ;
415
-
416
- /* lang[_territory][.codeset][@modifier] */
417
- localename = setlocale (LC_CTYPE , NULL );
418
-
419
- dot = strchr (localename , '.' );
420
- if (dot ) {
421
- dot ++ ;
422
- /* locale specifies a codeset */
423
- at = strchr (dot , '@' );
424
- if (at )
425
- len = at - dot ;
426
- else
427
- len = strlen (dot );
428
- charset_hint = dot ;
429
- } else {
430
- /* no explicit name; see if the name itself
431
- * is the charset */
432
- charset_hint = localename ;
433
- len = strlen (charset_hint );
434
- }
435
391
}
436
392
437
393
det_charset :
438
-
439
394
if (charset_hint ) {
440
- int found = 0 ;
441
-
395
+ size_t len = strlen (charset_hint );
442
396
/* now walk the charset map and look for the codeset */
443
397
for (i = 0 ; i < sizeof (charset_map )/sizeof (charset_map [0 ]); i ++ ) {
444
398
if (len == charset_map [i ].codeset_len &&
445
399
zend_binary_strcasecmp (charset_hint , len , charset_map [i ].codeset , len ) == 0 ) {
446
- charset = charset_map [i ].charset ;
447
- found = 1 ;
448
- break ;
400
+ return charset_map [i ].charset ;
449
401
}
450
402
}
451
- if (!found ) {
452
- php_error_docref (NULL , E_WARNING , "Charset `%s' not supported, assuming utf-8" ,
453
- charset_hint );
454
- }
403
+
404
+ php_error_docref (NULL , E_WARNING , "Charset `%s' not supported, assuming utf-8" ,
405
+ charset_hint );
455
406
}
456
- return charset ;
407
+ return cs_utf_8 ;
457
408
}
458
409
/* }}} */
459
410
@@ -1384,7 +1335,6 @@ PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldle
1384
1335
static void php_html_entities (INTERNAL_FUNCTION_PARAMETERS , int all )
1385
1336
{
1386
1337
zend_string * str , * hint_charset = NULL ;
1387
- char * default_charset ;
1388
1338
zend_long flags = ENT_COMPAT ;
1389
1339
zend_string * replaced ;
1390
1340
zend_bool double_encode = 1 ;
@@ -1397,10 +1347,9 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
1397
1347
Z_PARAM_BOOL (double_encode );
1398
1348
ZEND_PARSE_PARAMETERS_END ();
1399
1349
1400
- if (!hint_charset ) {
1401
- default_charset = get_default_charset ();
1402
- }
1403
- replaced = php_escape_html_entities_ex ((unsigned char * )ZSTR_VAL (str ), ZSTR_LEN (str ), all , (int ) flags , (hint_charset ? ZSTR_VAL (hint_charset ) : default_charset ), double_encode );
1350
+ replaced = php_escape_html_entities_ex (
1351
+ (unsigned char * )ZSTR_VAL (str ), ZSTR_LEN (str ), all , (int ) flags ,
1352
+ hint_charset ? ZSTR_VAL (hint_charset ) : NULL , double_encode );
1404
1353
RETVAL_STR (replaced );
1405
1354
}
1406
1355
/* }}} */
@@ -1462,7 +1411,6 @@ PHP_FUNCTION(htmlspecialchars_decode)
1462
1411
PHP_FUNCTION (html_entity_decode )
1463
1412
{
1464
1413
zend_string * str , * hint_charset = NULL ;
1465
- char * default_charset ;
1466
1414
zend_long quote_style = ENT_COMPAT ;
1467
1415
zend_string * replaced ;
1468
1416
@@ -1473,10 +1421,8 @@ PHP_FUNCTION(html_entity_decode)
1473
1421
Z_PARAM_STR (hint_charset )
1474
1422
ZEND_PARSE_PARAMETERS_END ();
1475
1423
1476
- if (!hint_charset ) {
1477
- default_charset = get_default_charset ();
1478
- }
1479
- replaced = php_unescape_html_entities (str , 1 /*all*/ , (int )quote_style , (hint_charset ? ZSTR_VAL (hint_charset ) : default_charset ));
1424
+ replaced = php_unescape_html_entities (
1425
+ str , 1 /*all*/ , (int )quote_style , hint_charset ? ZSTR_VAL (hint_charset ) : NULL );
1480
1426
1481
1427
if (replaced ) {
1482
1428
RETURN_STR (replaced );
0 commit comments