Skip to content

Commit 4a51262

Browse files
committed
Don't respect mbstring.internal_encoding in htmlentities()
htmlentities() has nothing to do with mbstring and should not depend on its ini settings. It should only respect the global default_charset and internal_encoding settings. This is exactly why they were introduced...
1 parent c50cfc4 commit 4a51262

File tree

2 files changed

+23
-23
lines changed

2 files changed

+23
-23
lines changed

ext/standard/html.c

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -365,36 +365,18 @@ static inline unsigned int get_next_char(
365365
/* }}} */
366366

367367
/* {{{ entity_charset determine_charset
368-
* returns the charset identifier based on current locale or a hint.
369-
* defaults to UTF-8 */
368+
* Returns the charset identifier based on an explicitly provided charset,
369+
* the internal_encoding and default_charset ini settings, or UTF-8 by default. */
370370
static enum entity_charset determine_charset(char *charset_hint, zend_bool quiet)
371371
{
372-
size_t i;
373-
const zend_encoding *zenc;
374-
375-
if (charset_hint && *charset_hint) {
376-
/* Explicitly passed charset */
377-
goto det_charset;
372+
if (!charset_hint || !*charset_hint) {
373+
charset_hint = get_default_charset();
378374
}
379375

380-
charset_hint = get_default_charset();
381376
if (charset_hint && *charset_hint) {
382-
/* default_charset or internal_encoding */
383-
goto det_charset;
384-
}
385-
386-
zenc = zend_multibyte_get_internal_encoding();
387-
if (zenc != NULL) {
388-
/* mbstring.internal_encoding or mb_internal_encoding() */
389-
// TODO: We *shouldn't* be taking this into account anymore.
390-
charset_hint = (char *)zend_multibyte_get_encoding_name(zenc);
391-
}
392-
393-
det_charset:
394-
if (charset_hint) {
395377
size_t len = strlen(charset_hint);
396378
/* now walk the charset map and look for the codeset */
397-
for (i = 0; i < sizeof(charset_map)/sizeof(charset_map[0]); i++) {
379+
for (size_t i = 0; i < sizeof(charset_map)/sizeof(charset_map[0]); i++) {
398380
if (len == charset_map[i].codeset_len &&
399381
zend_binary_strcasecmp(charset_hint, len, charset_map[i].codeset, len) == 0) {
400382
return charset_map[i].charset;
@@ -406,6 +388,7 @@ static enum entity_charset determine_charset(char *charset_hint, zend_bool quiet
406388
charset_hint);
407389
}
408390
}
391+
409392
return cs_utf_8;
410393
}
411394
/* }}} */
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
--TEST--
2+
htmlentities() should not be influenced by mb_internal_encoding()
3+
--INI--
4+
default_charset=
5+
internal_encoding=
6+
mbstring.internal_encoding=ISO-8859-1
7+
--FILE--
8+
<?php
9+
10+
var_dump(htmlentities('äöü'));
11+
12+
?>
13+
--EXPECT--
14+
PHP Deprecated: PHP Startup: Use of mbstring.internal_encoding is deprecated in Unknown on line 0
15+
16+
Deprecated: PHP Startup: Use of mbstring.internal_encoding is deprecated in Unknown on line 0
17+
string(18) "&auml;&ouml;&uuml;"

0 commit comments

Comments
 (0)