Skip to content

Commit c50cfc4

Browse files
committed
Add quiet parameter to internal HTML entities API
In some places, we need to make sure that no warnings are thrown due to unknown encoding. The error reporting code tried to avoid this by determining a "safe charset", but this introduces subtle discrepancies in which charset is picked (normally internal_encoding takes precedence). Avoid this by suppressing the warning in the first place. While here, use the fallback logic to print error messages with substitution characters more consistently, to avoid skipping parts of the error message entirely.
1 parent d6ac8b2 commit c50cfc4

File tree

8 files changed

+42
-55
lines changed

8 files changed

+42
-55
lines changed

ext/filter/sanitizing_filters.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,9 @@ void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
251251
} else {
252252
quotes = ENT_NOQUOTES;
253253
}
254-
buf = php_escape_html_entities_ex((unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), 1, quotes, SG(default_charset), 0);
254+
buf = php_escape_html_entities_ex(
255+
(unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), /* all */ 1, quotes,
256+
/* charset_hint */ NULL, /* double_encode */ 0, /* quiet */ 0);
255257
zval_ptr_dtor(value);
256258
ZVAL_STR(value, buf);
257259
}

ext/standard/html.c

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ static inline unsigned int get_next_char(
367367
/* {{{ entity_charset determine_charset
368368
* returns the charset identifier based on current locale or a hint.
369369
* defaults to UTF-8 */
370-
static enum entity_charset determine_charset(char *charset_hint)
370+
static enum entity_charset determine_charset(char *charset_hint, zend_bool quiet)
371371
{
372372
size_t i;
373373
const zend_encoding *zenc;
@@ -401,8 +401,10 @@ static enum entity_charset determine_charset(char *charset_hint)
401401
}
402402
}
403403

404-
php_error_docref(NULL, E_WARNING, "Charset `%s' not supported, assuming utf-8",
405-
charset_hint);
404+
if (!quiet) {
405+
php_error_docref(NULL, E_WARNING, "Charset `%s' not supported, assuming utf-8",
406+
charset_hint);
407+
}
406408
}
407409
return cs_utf_8;
408410
}
@@ -1006,7 +1008,7 @@ PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int fl
10061008
}
10071009

10081010
if (all) {
1009-
charset = determine_charset(hint_charset);
1011+
charset = determine_charset(hint_charset, /* quiet */ 0);
10101012
} else {
10111013
charset = cs_8859_1; /* charset shouldn't matter, use ISO-8859-1 for performance */
10121014
}
@@ -1030,9 +1032,9 @@ PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int fl
10301032
}
10311033
/* }}} */
10321034

1033-
PHPAPI zend_string *php_escape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset)
1035+
PHPAPI zend_string *php_escape_html_entities(const unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset)
10341036
{
1035-
return php_escape_html_entities_ex(old, oldlen, all, flags, hint_charset, 1);
1037+
return php_escape_html_entities_ex(old, oldlen, all, flags, hint_charset, 1, /* quiet */ 0);
10361038
}
10371039

10381040
/* {{{ find_entity_for_char */
@@ -1042,7 +1044,7 @@ static inline void find_entity_for_char(
10421044
const entity_stage1_row *table,
10431045
const unsigned char **entity,
10441046
size_t *entity_len,
1045-
unsigned char *old,
1047+
const unsigned char *old,
10461048
size_t oldlen,
10471049
size_t *cursor)
10481050
{
@@ -1118,11 +1120,11 @@ static inline void find_entity_for_char_basic(
11181120

11191121
/* {{{ php_escape_html_entities
11201122
*/
1121-
PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode)
1123+
PHPAPI zend_string *php_escape_html_entities_ex(const unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode, zend_bool quiet)
11221124
{
11231125
size_t cursor, maxlen, len;
11241126
zend_string *replaced;
1125-
enum entity_charset charset = determine_charset(hint_charset);
1127+
enum entity_charset charset = determine_charset(hint_charset, quiet);
11261128
int doctype = flags & ENT_HTML_DOC_TYPE_MASK;
11271129
entity_table_opt entity_table;
11281130
const enc_to_uni *to_uni_table = NULL;
@@ -1132,7 +1134,7 @@ PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldle
11321134
size_t replacement_len = 0;
11331135

11341136
if (all) { /* replace with all named entities */
1135-
if (CHARSET_PARTIAL_SUPPORT(charset)) {
1137+
if (!quiet && CHARSET_PARTIAL_SUPPORT(charset)) {
11361138
php_error_docref(NULL, E_NOTICE, "Only basic entities "
11371139
"substitution is supported for multi-byte encodings other than UTF-8; "
11381140
"functionality is equivalent to htmlspecialchars");
@@ -1349,7 +1351,7 @@ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
13491351

13501352
replaced = php_escape_html_entities_ex(
13511353
(unsigned char*)ZSTR_VAL(str), ZSTR_LEN(str), all, (int) flags,
1352-
hint_charset ? ZSTR_VAL(hint_charset) : NULL, double_encode);
1354+
hint_charset ? ZSTR_VAL(hint_charset) : NULL, double_encode, /* quiet */ 0);
13531355
RETVAL_STR(replaced);
13541356
}
13551357
/* }}} */
@@ -1519,7 +1521,7 @@ PHP_FUNCTION(get_html_translation_table)
15191521
Z_PARAM_STRING(charset_hint, charset_hint_len)
15201522
ZEND_PARSE_PARAMETERS_END();
15211523

1522-
charset = determine_charset(charset_hint);
1524+
charset = determine_charset(charset_hint, /* quiet */ 0);
15231525
doctype = flags & ENT_HTML_DOC_TYPE_MASK;
15241526
LIMIT_ALL(all, doctype, charset);
15251527

ext/standard/html.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@
4444

4545
void register_html_constants(INIT_FUNC_ARGS);
4646

47-
PHPAPI zend_string *php_escape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset);
48-
PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode);
47+
PHPAPI zend_string *php_escape_html_entities(const unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset);
48+
PHPAPI zend_string *php_escape_html_entities_ex(const unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode, zend_bool quiet);
4949
PHPAPI zend_string *php_unescape_html_entities(zend_string *str, int all, int flags, char *hint_charset);
5050
PHPAPI unsigned int php_next_utf8_char(const unsigned char *str, size_t str_len, size_t *cursor, int *status);
5151

ext/standard/tests/strings/bug68996.phpt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@ html_errors=1
66
<?php
77
fopen("\xfc\x63", "r");
88
?>
9-
--EXPECTF--
9+
--EXPECT--
1010
<br />
11-
<b>Warning</b>: : Failed to open stream: No such file or directory in <b>%sbug68996.php</b> on line <b>%d</b><br />
11+
<b>Warning</b>: fopen(�c): Failed to open stream: No such file or directory in <b>/home/nikic/php-src/ext/standard/tests/strings/bug68996.php</b> on line <b>2</b><br />

ext/standard/url_scanner_ex.re

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -758,9 +758,9 @@ static inline int php_url_scanner_add_var_impl(char *name, size_t name_len, char
758758
smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
759759
encoded = php_raw_url_encode(value, value_len);
760760
smart_str_appendl(&svalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
761-
encoded = php_escape_html_entities_ex((unsigned char*)name, name_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0);
761+
encoded = php_escape_html_entities_ex((unsigned char*)name, name_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, NULL, /* double_encode */ 0, /* quiet */ 1);
762762
smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
763-
encoded = php_escape_html_entities_ex((unsigned char*)value, value_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0);
763+
encoded = php_escape_html_entities_ex((unsigned char*)value, value_len, 0, ENT_QUOTES|ENT_SUBSTITUTE, NULL, /* double_encode */ 0, /* quiet */ 1);
764764
smart_str_appendl(&hvalue, ZSTR_VAL(encoded), ZSTR_LEN(encoded)); zend_string_free(encoded);
765765
} else {
766766
smart_str_appendl(&sname, name, name_len);
@@ -860,7 +860,7 @@ static inline int php_url_scanner_reset_var_impl(zend_string *name, int encode,
860860
encoded = php_raw_url_encode(ZSTR_VAL(name), ZSTR_LEN(name));
861861
smart_str_appendl(&sname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
862862
zend_string_free(encoded);
863-
encoded = php_escape_html_entities_ex((unsigned char *)ZSTR_VAL(name), ZSTR_LEN(name), 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), 0);
863+
encoded = php_escape_html_entities_ex((unsigned char *)ZSTR_VAL(name), ZSTR_LEN(name), 0, ENT_QUOTES|ENT_SUBSTITUTE, SG(default_charset), /* double_encode */ 0, /* quiet */ 1);
864864
smart_str_appendl(&hname, ZSTR_VAL(encoded), ZSTR_LEN(encoded));
865865
zend_string_free(encoded);
866866
} else {

main/main.c

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -95,31 +95,6 @@ PHPAPI size_t core_globals_offset;
9595

9696
#define SAFE_FILENAME(f) ((f)?(f):"-")
9797

98-
static char *get_safe_charset_hint(void) {
99-
ZEND_TLS char *lastHint = NULL;
100-
ZEND_TLS char *lastCodeset = NULL;
101-
char *hint = SG(default_charset);
102-
size_t len = strlen(hint);
103-
size_t i = 0;
104-
105-
if (lastHint == SG(default_charset)) {
106-
return lastCodeset;
107-
}
108-
109-
lastHint = hint;
110-
lastCodeset = NULL;
111-
112-
for (i = 0; i < sizeof(charset_map)/sizeof(charset_map[0]); i++) {
113-
if (len == charset_map[i].codeset_len
114-
&& zend_binary_strcasecmp(hint, len, charset_map[i].codeset, len) == 0) {
115-
lastCodeset = (char*)charset_map[i].codeset;
116-
break;
117-
}
118-
}
119-
120-
return lastCodeset;
121-
}
122-
12398
/* {{{ PHP_INI_MH
12499
*/
125100
static PHP_INI_MH(OnSetFacility)
@@ -937,6 +912,19 @@ PHPAPI size_t php_printf(const char *format, ...)
937912
}
938913
/* }}} */
939914

915+
static zend_string *escape_html(const char *buffer, size_t buffer_len) {
916+
zend_string *result = php_escape_html_entities_ex(
917+
(const unsigned char *) buffer, buffer_len, 0, ENT_COMPAT,
918+
/* charset_hint */ NULL, /* double_encode */ 1, /* quiet */ 1);
919+
if (!result || ZSTR_LEN(result) == 0) {
920+
/* Retry with substituting invalid chars on fail. */
921+
result = php_escape_html_entities_ex(
922+
(const unsigned char *) buffer, buffer_len, 0, ENT_COMPAT | ENT_HTML_SUBSTITUTE_ERRORS,
923+
/* charset_hint */ NULL, /* double_encode */ 1, /* quiet */ 1);
924+
}
925+
return result;
926+
}
927+
940928
/* {{{ php_verror */
941929
/* php_verror is called from php_error_docref<n> functions.
942930
* Its purpose is to unify error messages and automatically generate clickable
@@ -962,12 +950,7 @@ PHPAPI ZEND_COLD void php_verror(const char *docref, const char *params, int typ
962950
buffer_len = (int)vspprintf(&buffer, 0, format, args);
963951

964952
if (PG(html_errors)) {
965-
replace_buffer = php_escape_html_entities((unsigned char*)buffer, buffer_len, 0, ENT_COMPAT, get_safe_charset_hint());
966-
/* Retry with substituting invalid chars on fail. */
967-
if (!replace_buffer || ZSTR_LEN(replace_buffer) < 1) {
968-
replace_buffer = php_escape_html_entities((unsigned char*)buffer, buffer_len, 0, ENT_COMPAT | ENT_HTML_SUBSTITUTE_ERRORS, get_safe_charset_hint());
969-
}
970-
953+
replace_buffer = escape_html(buffer, buffer_len);
971954
efree(buffer);
972955

973956
if (replace_buffer) {
@@ -1032,7 +1015,7 @@ PHPAPI ZEND_COLD void php_verror(const char *docref, const char *params, int typ
10321015
}
10331016

10341017
if (PG(html_errors)) {
1035-
replace_origin = php_escape_html_entities((unsigned char*)origin, origin_len, 0, ENT_COMPAT, get_safe_charset_hint());
1018+
replace_origin = escape_html(origin, origin_len);
10361019
efree(origin);
10371020
origin = ZSTR_VAL(replace_origin);
10381021
}
@@ -1335,7 +1318,7 @@ static ZEND_COLD void php_error_cb(int orig_type, const char *error_filename, co
13351318

13361319
if (PG(html_errors)) {
13371320
if (type == E_ERROR || type == E_PARSE) {
1338-
zend_string *buf = php_escape_html_entities((unsigned char*)buffer, buffer_len, 0, ENT_COMPAT, get_safe_charset_hint());
1321+
zend_string *buf = escape_html(buffer, buffer_len);
13391322
php_printf("%s<br />\n<b>%s</b>: %s in <b>%s</b> on line <b>%" PRIu32 "</b><br />\n%s", STR_PRINT(prepend_string), error_type_str, ZSTR_VAL(buf), error_filename, error_lineno, STR_PRINT(append_string));
13401323
zend_string_free(buf);
13411324
} else {

sapi/cli/php_cli_server.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1978,7 +1978,7 @@ static int php_cli_server_send_error_page(php_cli_server *server, php_cli_server
19781978
php_cli_server_content_sender_ctor(&client->content_sender);
19791979
client->content_sender_initialized = 1;
19801980

1981-
escaped_request_uri = php_escape_html_entities_ex((unsigned char *)client->request.request_uri, client->request.request_uri_len, 0, ENT_QUOTES, NULL, 0);
1981+
escaped_request_uri = php_escape_html_entities_ex((unsigned char *)client->request.request_uri, client->request.request_uri_len, 0, ENT_QUOTES, NULL, /* double_encode */ 0, /* quiet */ 0);
19821982

19831983
{
19841984
static const char prologue_template[] = "<!doctype html><html><head><title>%d %s</title>";

sapi/fpm/fpm/fpm_status.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ int fpm_status_handle_request(void) /* {{{ */
517517
if (!encode) {
518518
query_string = proc.query_string;
519519
} else {
520-
tmp_query_string = php_escape_html_entities_ex((unsigned char *)proc.query_string, strlen(proc.query_string), 1, ENT_HTML_IGNORE_ERRORS & ENT_COMPAT, NULL, 1);
520+
tmp_query_string = php_escape_html_entities_ex((unsigned char *)proc.query_string, strlen(proc.query_string), 1, ENT_HTML_IGNORE_ERRORS & ENT_COMPAT, NULL, /* double_encode */ 1, /* quiet */ 0);
521521
query_string = ZSTR_VAL(tmp_query_string);
522522
}
523523
}

0 commit comments

Comments
 (0)