Skip to content

Commit 7dd332f

Browse files
committed
Refactor mb_substitute_character()
Using the new Fast ZPP API for string|int|null This also fixes Bug #79448 which was too disruptive to fix in PHP 7.x
1 parent 28af364 commit 7dd332f

9 files changed

+289
-160
lines changed

ext/mbstring/mbstring.c

Lines changed: 40 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1527,7 +1527,7 @@ PHP_FUNCTION(mb_detect_order)
15271527

15281528
static inline int php_mb_check_code_point(zend_long cp)
15291529
{
1530-
if (cp <= 0 || cp >= 0x110000) {
1530+
if (cp < 0 || cp >= 0x110000) {
15311531
/* Out of Unicode range */
15321532
return 0;
15331533
}
@@ -1544,61 +1544,58 @@ static inline int php_mb_check_code_point(zend_long cp)
15441544
return 1;
15451545
}
15461546

1547-
/* {{{ proto mixed mb_substitute_character([mixed substchar])
1547+
/* {{{ proto string|int|true mb_substitute_character([string|int|null substitute_character])
15481548
Sets the current substitute_character or returns the current substitute_character */
15491549
PHP_FUNCTION(mb_substitute_character)
15501550
{
1551-
zval *arg1 = NULL;
1551+
zend_string *substitute_character = NULL;
1552+
zend_long substitute_codepoint;
1553+
zend_bool substitute_is_null = 1;
15521554

1553-
if (zend_parse_parameters(ZEND_NUM_ARGS(), "|z", &arg1) == FAILURE) {
1554-
RETURN_THROWS();
1555-
}
1555+
ZEND_PARSE_PARAMETERS_START(0, 1)
1556+
Z_PARAM_OPTIONAL
1557+
Z_PARAM_STR_OR_LONG_OR_NULL(substitute_character, substitute_codepoint, substitute_is_null)
1558+
ZEND_PARSE_PARAMETERS_END();
15561559

1557-
if (!arg1) {
1560+
if (substitute_is_null) {
15581561
if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
15591562
RETURN_STRING("none");
1560-
} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1563+
}
1564+
if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
15611565
RETURN_STRING("long");
1562-
} else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1566+
}
1567+
if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
15631568
RETURN_STRING("entity");
1564-
} else {
1565-
RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
15661569
}
1567-
} else {
1568-
RETVAL_TRUE;
1569-
1570-
switch (Z_TYPE_P(arg1)) {
1571-
case IS_STRING:
1572-
if (strncasecmp("none", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
1573-
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1574-
} else if (strncasecmp("long", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
1575-
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1576-
} else if (strncasecmp("entity", Z_STRVAL_P(arg1), Z_STRLEN_P(arg1)) == 0) {
1577-
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1578-
} else {
1579-
convert_to_long_ex(arg1);
1570+
RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1571+
}
15801572

1581-
if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
1582-
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1583-
MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
1584-
} else {
1585-
php_error_docref(NULL, E_WARNING, "Unknown character");
1586-
RETURN_FALSE;
1587-
}
1588-
}
1589-
break;
1590-
default:
1591-
convert_to_long_ex(arg1);
1592-
if (php_mb_check_code_point(Z_LVAL_P(arg1))) {
1593-
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1594-
MBSTRG(current_filter_illegal_substchar) = Z_LVAL_P(arg1);
1595-
} else {
1596-
php_error_docref(NULL, E_WARNING, "Unknown character");
1597-
RETURN_FALSE;
1598-
}
1599-
break;
1573+
if (substitute_character != NULL) {
1574+
if (zend_string_equals_literal_ci(substitute_character, "none")) {
1575+
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1576+
RETURN_TRUE;
1577+
}
1578+
if (zend_string_equals_literal_ci(substitute_character, "long")) {
1579+
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1580+
RETURN_TRUE;
1581+
}
1582+
if (zend_string_equals_literal_ci(substitute_character, "entity")) {
1583+
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1584+
RETURN_TRUE;
16001585
}
1586+
/* Invalid string value */
1587+
zend_argument_value_error(1, "must be 'none', 'long', 'entity' or a valid codepoint");
1588+
RETURN_THROWS();
1589+
}
1590+
/* Integer codepoint passed */
1591+
if (!php_mb_check_code_point(substitute_codepoint)) {
1592+
zend_argument_value_error(1, "is not a valid codepoint");
1593+
RETURN_THROWS();
16011594
}
1595+
1596+
MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1597+
MBSTRG(current_filter_illegal_substchar) = substitute_codepoint;
1598+
RETURN_TRUE;
16021599
}
16031600
/* }}} */
16041601

ext/mbstring/mbstring.stub.php

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ function mb_http_output(string $encoding = UNKNOWN): string|bool {}
1212

1313
function mb_detect_order(array|string $encoding = UNKNOWN): array|bool {}
1414

15-
/** @param string|int $substchar */
16-
function mb_substitute_character($substchar = UNKNOWN): string|int|bool {}
15+
function mb_substitute_character(string|int|null $substitute_character = null): string|int|bool {}
1716

1817
function mb_preferred_mime_name(string $encoding): string|false {}
1918

ext/mbstring/mbstring_arginfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_detect_order, 0, 0, MAY_BE_AR
1919
ZEND_END_ARG_INFO()
2020

2121
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_substitute_character, 0, 0, MAY_BE_STRING|MAY_BE_LONG|MAY_BE_BOOL)
22-
ZEND_ARG_INFO(0, substchar)
22+
ZEND_ARG_TYPE_MASK(0, substitute_character, MAY_BE_STRING|MAY_BE_LONG|MAY_BE_NULL, "null")
2323
ZEND_END_ARG_INFO()
2424

2525
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_preferred_mime_name, 0, 1, MAY_BE_STRING|MAY_BE_FALSE)

ext/mbstring/tests/bug69079.phpt

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,27 +9,32 @@ mb_internal_encoding('UTF-8');
99
var_dump(mb_substitute_character(0x1F600));
1010
var_dump(bin2hex(mb_scrub("\xff")));
1111
mb_substitute_character(0x3f); // Reset to '?', as the next call will fail
12-
var_dump(mb_substitute_character(0xD800)); // Surrogate (illegal)
12+
try {
13+
var_dump(mb_substitute_character(0xD800)); // Surrogate (illegal)
14+
} catch (\ValueError $e) {
15+
echo $e->getMessage() . \PHP_EOL;
16+
}
1317
var_dump(bin2hex(mb_scrub("\xff")));
1418

1519
mb_internal_encoding('EUC-JP-2004');
1620

1721
mb_substitute_character(0x63); // Reset to '?', as the next call will fail
18-
mb_substitute_character(0x8fa1ef); // EUC-JP-2004 encoding of U+50AA (illegal)
22+
try {
23+
mb_substitute_character(0x8fa1ef); // EUC-JP-2004 encoding of U+50AA (illegal)
24+
} catch (\ValueError $e) {
25+
echo $e->getMessage() . \PHP_EOL;
26+
}
1927
var_dump(bin2hex(mb_scrub("\x8d")));
2028

2129
mb_substitute_character(0x50aa);
2230
var_dump(bin2hex(mb_scrub("\x8d")));
2331

2432
?>
25-
--EXPECTF--
33+
--EXPECT--
2634
bool(true)
2735
string(8) "f09f9880"
28-
29-
Warning: mb_substitute_character(): Unknown character in %s on line %d
30-
bool(false)
36+
mb_substitute_character(): Argument #1 ($substitute_character) is not a valid codepoint
3137
string(2) "3f"
32-
33-
Warning: mb_substitute_character(): Unknown character in %s on line %d
38+
mb_substitute_character(): Argument #1 ($substitute_character) is not a valid codepoint
3439
string(2) "63"
3540
string(6) "8fa1ef"

ext/mbstring/tests/mb_substitute_character.phpt

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,13 @@ var_dump(mb_substitute_character('entity'));
2323
var_dump(mb_substitute_character());
2424
var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8")));
2525

26-
var_dump(mb_substitute_character('BAD_NAME'));
26+
try {
27+
var_dump(mb_substitute_character('BAD_NAME'));
28+
} catch (\ValueError $e) {
29+
echo $e->getMessage() . \PHP_EOL;
30+
}
2731
?>
28-
--EXPECTF--
32+
--EXPECT--
2933
bool(true)
3034
int(12356)
3135
string(8) "82a282a0"
@@ -38,6 +42,4 @@ string(4) "82a0"
3842
bool(true)
3943
string(6) "entity"
4044
string(20) "262378323636303b82a0"
41-
42-
Warning: mb_substitute_character(): Unknown character in %s on line %d
43-
bool(false)
45+
mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint

ext/mbstring/tests/mb_substitute_character_basic.phpt

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ Test mb_substitute_character() function : basic functionality
33
--SKIPIF--
44
<?php
55
extension_loaded('mbstring') or die('skip');
6-
function_exists('mb_substitute_character') or die("skip mb_substitute_character() is not available in this build");
76
?>
87
--FILE--
98
<?php
@@ -22,12 +21,19 @@ var_dump( mb_substitute_character(66) );
2221
var_dump( mb_substitute_character() );
2322
var_dump( mb_substitute_character(1234) );
2423
var_dump( mb_substitute_character() );
25-
var_dump( mb_substitute_character("none") );
24+
var_dump( mb_substitute_character('none') );
2625
var_dump( mb_substitute_character() );
27-
var_dump( mb_substitute_character("b") );
26+
// Check string case insensitivity
27+
var_dump( mb_substitute_character('LoNg') );
28+
var_dump( mb_substitute_character() );
29+
try {
30+
var_dump( mb_substitute_character("b") );
31+
} catch (\ValueError $e) {
32+
echo $e->getMessage() . \PHP_EOL;
33+
}
2834

2935
?>
30-
--EXPECTF--
36+
--EXPECT--
3137
*** Testing mb_substitute_character() : basic functionality ***
3238
int(63)
3339
bool(true)
@@ -36,6 +42,6 @@ bool(true)
3642
int(1234)
3743
bool(true)
3844
string(4) "none"
39-
40-
Warning: mb_substitute_character(): Unknown character in %s on line %d
41-
bool(false)
45+
bool(true)
46+
string(4) "long"
47+
mb_substitute_character(): Argument #1 ($substitute_character) must be 'none', 'long', 'entity' or a valid codepoint

ext/mbstring/tests/mb_substitute_character_variation2.phpt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ Test mb_substitute_character() function : variation unmappable out char for conv
33
--SKIPIF--
44
<?php
55
extension_loaded('mbstring') or die('skip');
6-
function_exists('mb_substitute_character') or die("skip mb_substitute_character() is not available in this build");
76
?>
87
--FILE--
98
<?php

0 commit comments

Comments
 (0)