From f979af6d5cfa5d5145b3f3467a38b4b5612916af Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Wed, 30 Sep 2020 16:52:04 +0200 Subject: [PATCH 1/3] Allow empty needle in grapheme_str*pos, grapheme_str*str --- ext/intl/grapheme/grapheme_string.c | 23 +----- ext/intl/grapheme/grapheme_util.c | 11 +++ ext/intl/tests/grapheme_empty.phpt | 124 +++++++++++----------------- 3 files changed, 60 insertions(+), 98 deletions(-) diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index e45330e15085e..a4e17dc29ed12 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -124,11 +124,6 @@ PHP_FUNCTION(grapheme_strpos) /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */ - if (needle_len == 0) { - zend_argument_value_error(2, "cannot be empty"); - RETURN_THROWS(); - } - if (offset >= 0 && grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0) { /* quick check to see if the string might be there * I realize that 'offset' is 'grapheme count offset' but will work in spite of that @@ -178,11 +173,6 @@ PHP_FUNCTION(grapheme_stripos) /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */ - if (needle_len == 0) { - zend_argument_value_error(2, "cannot be empty"); - RETURN_THROWS(); - } - is_ascii = ( grapheme_ascii_check((unsigned char*)haystack, haystack_len) >= 0 ); if ( is_ascii ) { @@ -244,11 +234,6 @@ PHP_FUNCTION(grapheme_strrpos) /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */ - if (needle_len == 0) { - zend_argument_value_error(2, "cannot be empty"); - RETURN_THROWS(); - } - is_ascii = grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0; if ( is_ascii ) { @@ -304,11 +289,6 @@ PHP_FUNCTION(grapheme_strripos) /* the offset is 'grapheme count offset' so it still might be invalid - we'll check it later */ - if (needle_len == 0) { - zend_argument_value_error(2, "cannot be empty"); - RETURN_THROWS(); - } - is_ascii = grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0; if ( is_ascii ) { @@ -573,8 +553,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas } if (needle_len == 0) { - zend_argument_value_error(2, "cannot be empty"); - RETURN_THROWS(); + RETURN_STRINGL(haystack, part ? 0 : haystack_len); } if ( !f_ignore_case ) { diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c index 8633fddf5bc10..729e74250df05 100644 --- a/ext/intl/grapheme/grapheme_util.c +++ b/ext/intl/grapheme/grapheme_util.c @@ -126,6 +126,17 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, ubrk_setText(bi, uhaystack, uhaystack_len, &status); STRPOS_CHECK_STATUS(status, "Failed to set up iterator"); + if (uneedle_len == 0) { + offset_pos = grapheme_get_haystack_offset(bi, last ? uhaystack_len : offset); + if (offset_pos == -1) { + zend_argument_value_error(3, "must be contained in argument #1 ($haystack)"); + ret_pos = -1; + goto finish; + } + ret_pos = offset_pos; + goto finish; + } + status = U_ZERO_ERROR; src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, "", bi, &status); STRPOS_CHECK_STATUS(status, "Error creating search object"); diff --git a/ext/intl/tests/grapheme_empty.phpt b/ext/intl/tests/grapheme_empty.phpt index 31b3d8cc4393f..490c63cdca703 100644 --- a/ext/intl/tests/grapheme_empty.phpt +++ b/ext/intl/tests/grapheme_empty.phpt @@ -7,82 +7,54 @@ Test grapheme_strpos-alike functions with empty needle ini_set("intl.error_level", E_WARNING); -try { - var_dump(grapheme_strpos("abc", "", -1)); -} catch (ValueError $exception) { - echo $exception->getMessage() . "\n"; -} - -try { - var_dump(grapheme_strpos("abc", "")); -} catch (ValueError $exception) { - echo $exception->getMessage() . "\n"; -} - -try { - var_dump(grapheme_strpos("abc", "", -1)); -} catch (ValueError $exception) { - echo $exception->getMessage() . "\n"; -} - -try { - var_dump(grapheme_stripos("abc", "")); -} catch (ValueError $exception) { - echo $exception->getMessage() . "\n"; -} - -try { - var_dump(grapheme_stripos("abc", "", -1)); -} catch (ValueError $exception) { - echo $exception->getMessage() . "\n"; -} - -try { - var_dump(grapheme_strrpos("abc", "")); -} catch (ValueError $exception) { - echo $exception->getMessage() . "\n"; -} - -try { - var_dump(grapheme_strrpos("abc", "", -1)); -} catch (ValueError $exception) { - echo $exception->getMessage() . "\n"; -} - -try { - var_dump(grapheme_strripos("abc", "")); -} catch (ValueError $exception) { - echo $exception->getMessage() . "\n"; -} - -try { - var_dump(grapheme_strripos("abc", "", 1)); -} catch (ValueError $exception) { - echo $exception->getMessage() . "\n"; -} - -try { - var_dump(grapheme_strstr("abc", "")); -} catch (ValueError $exception) { - echo $exception->getMessage() . "\n"; -} - -try { - var_dump(grapheme_stristr("abc", "")); -} catch (ValueError $exception) { - echo $exception->getMessage() . "\n"; -} +var_dump(grapheme_strpos("abc", "")); +var_dump(grapheme_strpos("abc", "", -1)); +var_dump(grapheme_stripos("abc", "")); +var_dump(grapheme_stripos("abc", "", -1)); +var_dump(grapheme_strrpos("abc", "")); +var_dump(grapheme_strrpos("abc", "", -1)); +var_dump(grapheme_strripos("abc", "")); +var_dump(grapheme_strripos("abc", "", 1)); +var_dump(grapheme_strstr("abc", "")); +var_dump(grapheme_strstr("abc", "", true)); +var_dump(grapheme_stristr("abc", "")); +var_dump(grapheme_stristr("abc", "", true)); +var_dump(grapheme_strpos("äbc", "")); +var_dump(grapheme_strpos("äbc", "", -1)); +var_dump(grapheme_stripos("äbc", "")); +var_dump(grapheme_stripos("äbc", "", -1)); +var_dump(grapheme_strrpos("äbc", "")); +var_dump(grapheme_strrpos("äbc", "", -1)); +var_dump(grapheme_strripos("äbc", "")); +var_dump(grapheme_strripos("äbc", "", 1)); +var_dump(grapheme_strstr("äbc", "")); +var_dump(grapheme_strstr("äbc", "", true)); +var_dump(grapheme_stristr("äbc", "")); +var_dump(grapheme_stristr("äbc", "", true)); ?> --EXPECT-- -grapheme_strpos(): Argument #2 ($needle) cannot be empty -grapheme_strpos(): Argument #2 ($needle) cannot be empty -grapheme_strpos(): Argument #2 ($needle) cannot be empty -grapheme_stripos(): Argument #2 ($needle) cannot be empty -grapheme_stripos(): Argument #2 ($needle) cannot be empty -grapheme_strrpos(): Argument #2 ($needle) cannot be empty -grapheme_strrpos(): Argument #2 ($needle) cannot be empty -grapheme_strripos(): Argument #2 ($needle) cannot be empty -grapheme_strripos(): Argument #2 ($needle) cannot be empty -grapheme_strstr(): Argument #2 ($needle) cannot be empty -grapheme_stristr(): Argument #2 ($needle) cannot be empty +int(0) +int(2) +int(0) +int(2) +int(3) +int(2) +int(3) +int(3) +string(3) "abc" +string(0) "" +string(3) "abc" +string(0) "" +int(0) +int(2) +int(0) +int(2) +int(3) +int(3) +int(3) +int(3) +string(4) "äbc" +string(0) "" +string(4) "äbc" +string(0) "" From 23cb5106fc734a0595abce23b4821c1d0f923593 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Thu, 1 Oct 2020 15:34:26 +0200 Subject: [PATCH 2/3] Remove obsolete code as it is now handled by grapheme_strpos_utf16 --- ext/intl/grapheme/grapheme_string.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index a4e17dc29ed12..7d8df14d2e807 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -552,10 +552,6 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas RETURN_THROWS(); } - if (needle_len == 0) { - RETURN_STRINGL(haystack, part ? 0 : haystack_len); - } - if ( !f_ignore_case ) { /* ASCII optimization: quick check to see if the string might be there */ From b3403804b6c25bb3dc181fb522b3b2a66f4289ad Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Thu, 1 Oct 2020 15:36:39 +0200 Subject: [PATCH 3/3] Bring handling of negative offsets for grapheme_strr*pos in line with mb_strr*pos --- ext/intl/grapheme/grapheme_util.c | 2 +- ext/intl/tests/grapheme_empty.phpt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c index 729e74250df05..2625a93263e87 100644 --- a/ext/intl/grapheme/grapheme_util.c +++ b/ext/intl/grapheme/grapheme_util.c @@ -127,7 +127,7 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, STRPOS_CHECK_STATUS(status, "Failed to set up iterator"); if (uneedle_len == 0) { - offset_pos = grapheme_get_haystack_offset(bi, last ? uhaystack_len : offset); + offset_pos = grapheme_get_haystack_offset(bi, last && offset >= 0 ? uhaystack_len : offset); if (offset_pos == -1) { zend_argument_value_error(3, "must be contained in argument #1 ($haystack)"); ret_pos = -1; diff --git a/ext/intl/tests/grapheme_empty.phpt b/ext/intl/tests/grapheme_empty.phpt index 490c63cdca703..873abf624e2f4 100644 --- a/ext/intl/tests/grapheme_empty.phpt +++ b/ext/intl/tests/grapheme_empty.phpt @@ -51,7 +51,7 @@ int(2) int(0) int(2) int(3) -int(3) +int(2) int(3) int(3) string(4) "äbc"