From e1935e9efb6bba1cce8b94d5680bc6a899e68875 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Sat, 7 Dec 2019 02:44:34 +0100 Subject: [PATCH 1/3] Allow empty needles in mb_strpos and mb_strstr function family. MBstring analogous implementation to 6d578482a933be7597b686b59a935b316161d251 --- ext/mbstring/libmbfl/mbfl/mbfilter.c | 30 +++++-- ext/mbstring/libmbfl/mbfl/mbfilter.h | 2 +- ext/mbstring/mbstring.c | 24 ------ .../tests/mb_stripos_empty_needle.phpt | 86 +++++++++++++++++++ .../tests/mb_stristr_empty_needle.phpt | 37 ++++++++ .../tests/mb_strpos_empty_needle.phpt | 86 +++++++++++++++++++ .../tests/mb_strrchr_empty_needle.phpt | 37 ++++++++ .../tests/mb_strripos_empty_needle.phpt | 86 +++++++++++++++++++ .../tests/mb_strrpos_empty_needle.phpt | 86 +++++++++++++++++++ .../tests/mb_strstr_empty_needle.phpt | 37 ++++++++ 10 files changed, 481 insertions(+), 30 deletions(-) create mode 100644 ext/mbstring/tests/mb_stripos_empty_needle.phpt create mode 100644 ext/mbstring/tests/mb_stristr_empty_needle.phpt create mode 100644 ext/mbstring/tests/mb_strpos_empty_needle.phpt create mode 100644 ext/mbstring/tests/mb_strrchr_empty_needle.phpt create mode 100644 ext/mbstring/tests/mb_strripos_empty_needle.phpt create mode 100644 ext/mbstring/tests/mb_strrpos_empty_needle.phpt create mode 100644 ext/mbstring/tests/mb_strstr_empty_needle.phpt diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index 1cb6d28e7b4e0..31b586a4f64be 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -656,7 +656,7 @@ filter_count_output(int c, void *data) } size_t -mbfl_strlen(mbfl_string *string) +mbfl_strlen(const mbfl_string *string) { size_t len, n, k; unsigned char *p; @@ -855,13 +855,33 @@ mbfl_strpos( needle_u8 = needle; } - if (needle_u8->len < 1) { - result = (size_t) -8; + result = (size_t) -1; + if (haystack_u8->len < needle_u8->len) { goto out; } - result = (size_t) -1; - if (haystack_u8->len < needle_u8->len) { + if (needle_u8->len == 0) { + /* Out of bound offset */ + if ( + (offset > 0 && offset > mbfl_strlen(haystack_u8)) + || (offset < 0 && -offset > mbfl_strlen(haystack_u8)) + ) { + return -16; + } + + if (reverse) { + if (offset < 0) { + result = (size_t) -offset; + } else { + result = mbfl_strlen(haystack_u8) - offset;; + } + } else { + if (offset < 0) { + result = mbfl_strlen(haystack_u8) + offset; + } else { + result = (size_t) offset; + } + } goto out; } diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.h b/ext/mbstring/libmbfl/mbfl/mbfilter.h index 0966e2df44b57..edba946f72087 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.h @@ -193,7 +193,7 @@ static inline int mbfl_is_error(size_t len) { * strlen */ MBFLAPI extern size_t -mbfl_strlen(mbfl_string *string); +mbfl_strlen(const mbfl_string *string); /* * oddlen diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index a0dbb3a30ea9e..1615dc2178cf3 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -2110,11 +2110,6 @@ PHP_FUNCTION(mb_strpos) } } - if (needle.len == 0) { - php_error_docref(NULL, E_WARNING, "Empty delimiter"); - RETURN_FALSE; - } - n = mbfl_strpos(&haystack, &needle, offset, reverse); if (!mbfl_is_error(n)) { RETVAL_LONG(n); @@ -2189,11 +2184,6 @@ PHP_FUNCTION(mb_stripos) RETURN_THROWS(); } - if (needle.len == 0) { - php_error_docref(NULL, E_WARNING, "Empty delimiter"); - RETURN_FALSE; - } - n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding); if (!mbfl_is_error(n)) { @@ -2246,11 +2236,6 @@ PHP_FUNCTION(mb_strstr) RETURN_FALSE; } - if (needle.len == 0) { - php_error_docref(NULL, E_WARNING, "Empty delimiter"); - RETURN_FALSE; - } - n = mbfl_strpos(&haystack, &needle, 0, 0); if (!mbfl_is_error(n)) { if (part) { @@ -2350,11 +2335,6 @@ PHP_FUNCTION(mb_stristr) RETURN_FALSE; } - if (!needle.len) { - php_error_docref(NULL, E_WARNING, "Empty delimiter"); - RETURN_FALSE; - } - n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding); if (mbfl_is_error(n)) { RETURN_FALSE; @@ -4849,10 +4829,6 @@ MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t ol break; } - if (needle.len == 0) { - break; - } - if (offset != 0) { size_t haystack_char_len = mbfl_strlen(&haystack); diff --git a/ext/mbstring/tests/mb_stripos_empty_needle.phpt b/ext/mbstring/tests/mb_stripos_empty_needle.phpt new file mode 100644 index 0000000000000..31e21f1cd5c91 --- /dev/null +++ b/ext/mbstring/tests/mb_stripos_empty_needle.phpt @@ -0,0 +1,86 @@ +--TEST-- +Test mb_stripos() function : with empty needle +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +-- ASCII string without offset -- +int(0) + +-- ASCII string with in range positive offset -- +int(2) + +-- ASCII string with in range negative offset -- +int(5) + +-- ASCII string with out of bound positive offset -- + +Warning: mb_stripos(): Offset not contained in string in %s on line %d +bool(false) + +-- ASCII string with out of bound negative offset -- + +Warning: mb_stripos(): Offset not contained in string in %s on line %d +bool(false) + +-- Multi-byte string without offset -- +int(0) + +-- Multi-byte string with in range positive offset -- +int(2) + +-- Multi-byte string with in range negative offset -- +int(19) + +-- Multi-byte string with out of bound positive offset -- + +Warning: mb_stripos(): Offset not contained in string in %s on line %d +bool(false) + +-- Multi-byte string with out of bound negative offset -- + +Warning: mb_stripos(): Offset not contained in string in %s on line %d +bool(false) diff --git a/ext/mbstring/tests/mb_stristr_empty_needle.phpt b/ext/mbstring/tests/mb_stristr_empty_needle.phpt new file mode 100644 index 0000000000000..60850a942c5cb --- /dev/null +++ b/ext/mbstring/tests/mb_stristr_empty_needle.phpt @@ -0,0 +1,37 @@ +--TEST-- +Test mb_stristr() function : with empty needle +--SKIPIF-- + +--FILE-- + +--EXPECT-- +-- ASCII string -- +string(7) "abc def" +string(7) "abc def" +string(0) "" + +-- Multibyte string -- +string(53) "日本語テキストです。0123456789。" +string(53) "日本語テキストです。0123456789。" +string(0) "" diff --git a/ext/mbstring/tests/mb_strpos_empty_needle.phpt b/ext/mbstring/tests/mb_strpos_empty_needle.phpt new file mode 100644 index 0000000000000..31612647d641f --- /dev/null +++ b/ext/mbstring/tests/mb_strpos_empty_needle.phpt @@ -0,0 +1,86 @@ +--TEST-- +Test mb_strpos() function : with empty needle +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +-- ASCII string without offset -- +int(0) + +-- ASCII string with in range positive offset -- +int(2) + +-- ASCII string with in range negative offset -- +int(5) + +-- ASCII string with out of bound positive offset -- + +Warning: mb_strpos(): Offset not contained in string in %s on line %d +bool(false) + +-- ASCII string with out of bound negative offset -- + +Warning: mb_strpos(): Offset not contained in string in %s on line %d +bool(false) + +-- Multi-byte string without offset -- +int(0) + +-- Multi-byte string with in range positive offset -- +int(2) + +-- Multi-byte string with in range negative offset -- +int(19) + +-- Multi-byte string with out of bound positive offset -- + +Warning: mb_strpos(): Offset not contained in string in %s on line %d +bool(false) + +-- Multi-byte string with out of bound negative offset -- + +Warning: mb_strpos(): Offset not contained in string in %s on line %d +bool(false) diff --git a/ext/mbstring/tests/mb_strrchr_empty_needle.phpt b/ext/mbstring/tests/mb_strrchr_empty_needle.phpt new file mode 100644 index 0000000000000..1efacc90d23eb --- /dev/null +++ b/ext/mbstring/tests/mb_strrchr_empty_needle.phpt @@ -0,0 +1,37 @@ +--TEST-- +Test mb_strrchr() function : with empty needle +--SKIPIF-- + +--FILE-- + +--EXPECT-- +-- ASCII string -- +string(0) "" +string(0) "" +string(0) "" + +-- Multibyte string -- +string(0) "" +string(0) "" +string(0) "" diff --git a/ext/mbstring/tests/mb_strripos_empty_needle.phpt b/ext/mbstring/tests/mb_strripos_empty_needle.phpt new file mode 100644 index 0000000000000..6f4c575468643 --- /dev/null +++ b/ext/mbstring/tests/mb_strripos_empty_needle.phpt @@ -0,0 +1,86 @@ +--TEST-- +Test mb_strripos() function : with empty needle +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +-- ASCII string without offset -- +int(7) + +-- ASCII string with in range positive offset -- +int(5) + +-- ASCII string with in range negative offset -- +int(2) + +-- ASCII string with out of bound positive offset -- + +Warning: mb_strripos(): Offset is greater than the length of haystack string in %s on line %d +bool(false) + +-- ASCII string with out of bound negative offset -- + +Warning: mb_strripos(): Offset is greater than the length of haystack string in %s on line %d +bool(false) + +-- Multi-byte string without offset -- +int(21) + +-- Multi-byte string with in range positive offset -- +int(19) + +-- Multi-byte string with in range negative offset -- +int(2) + +-- Multi-byte string with out of bound positive offset -- + +Warning: mb_strripos(): Offset is greater than the length of haystack string in %s on line %d +bool(false) + +-- Multi-byte string with out of bound negative offset -- + +Warning: mb_strripos(): Offset is greater than the length of haystack string in %s on line %d +bool(false) diff --git a/ext/mbstring/tests/mb_strrpos_empty_needle.phpt b/ext/mbstring/tests/mb_strrpos_empty_needle.phpt new file mode 100644 index 0000000000000..78b591ee2f7b5 --- /dev/null +++ b/ext/mbstring/tests/mb_strrpos_empty_needle.phpt @@ -0,0 +1,86 @@ +--TEST-- +Test mb_strrpos() function : with empty needle +--SKIPIF-- + +--FILE-- + +--EXPECTF-- +-- ASCII string without offset -- +int(7) + +-- ASCII string with in range positive offset -- +int(5) + +-- ASCII string with in range negative offset -- +int(2) + +-- ASCII string with out of bound positive offset -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d +bool(false) + +-- ASCII string with out of bound negative offset -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d +bool(false) + +-- Multi-byte string without offset -- +int(21) + +-- Multi-byte string with in range positive offset -- +int(19) + +-- Multi-byte string with in range negative offset -- +int(2) + +-- Multi-byte string with out of bound positive offset -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d +bool(false) + +-- Multi-byte string with out of bound negative offset -- + +Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d +bool(false) diff --git a/ext/mbstring/tests/mb_strstr_empty_needle.phpt b/ext/mbstring/tests/mb_strstr_empty_needle.phpt new file mode 100644 index 0000000000000..fd72a7707e374 --- /dev/null +++ b/ext/mbstring/tests/mb_strstr_empty_needle.phpt @@ -0,0 +1,37 @@ +--TEST-- +Test mb_strstr() function : with empty needle +--SKIPIF-- + +--FILE-- + +--EXPECT-- +-- ASCII string -- +string(7) "abc def" +string(7) "abc def" +string(0) "" + +-- Multibyte string -- +string(53) "日本語テキストです。0123456789。" +string(53) "日本語テキストです。0123456789。" +string(0) "" From b4c3f25149f3f6052298f46cc0679db23f1bef01 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Fri, 13 Dec 2019 19:58:37 +0100 Subject: [PATCH 2/3] Save haystack length, and move out of bound offset check outside a specialized path --- ext/mbstring/libmbfl/mbfl/mbfilter.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index 31b586a4f64be..98822847262a3 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -817,6 +817,7 @@ mbfl_strpos( int reverse) { size_t result; + size_t haystack_length; mbfl_string _haystack_u8, _needle_u8; const mbfl_string *haystack_u8, *needle_u8 = NULL; const unsigned char *u8_tbl; @@ -855,29 +856,31 @@ mbfl_strpos( needle_u8 = needle; } + /* Check if offset is out of bound */ + haystack_length = mbfl_strlen(haystack_u8); + if ( + (offset > 0 && offset > haystack_length) + || (offset < 0 && -offset > haystack_length) + ) { + result = -16; + goto out; + } + result = (size_t) -1; if (haystack_u8->len < needle_u8->len) { goto out; } if (needle_u8->len == 0) { - /* Out of bound offset */ - if ( - (offset > 0 && offset > mbfl_strlen(haystack_u8)) - || (offset < 0 && -offset > mbfl_strlen(haystack_u8)) - ) { - return -16; - } - if (reverse) { if (offset < 0) { result = (size_t) -offset; } else { - result = mbfl_strlen(haystack_u8) - offset;; + result = haystack_length - offset;; } } else { if (offset < 0) { - result = mbfl_strlen(haystack_u8) + offset; + result = haystack_length + offset; } else { result = (size_t) offset; } From 290e9f10e1ddb1b80fda625434144990e899ed38 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Tue, 7 Jan 2020 17:25:36 +0100 Subject: [PATCH 3/3] Logic fix for reverse case and move mbfl_strlen call to only be done when needle length is 0 --- ext/mbstring/libmbfl/mbfl/mbfilter.c | 25 +++++++++---------- .../tests/mb_strripos_empty_needle.phpt | 8 +++--- .../tests/mb_strrpos_empty_needle.phpt | 8 +++--- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index 98822847262a3..07b5dd3e52f2d 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -817,7 +817,6 @@ mbfl_strpos( int reverse) { size_t result; - size_t haystack_length; mbfl_string _haystack_u8, _needle_u8; const mbfl_string *haystack_u8, *needle_u8 = NULL; const unsigned char *u8_tbl; @@ -856,27 +855,27 @@ mbfl_strpos( needle_u8 = needle; } - /* Check if offset is out of bound */ - haystack_length = mbfl_strlen(haystack_u8); - if ( - (offset > 0 && offset > haystack_length) - || (offset < 0 && -offset > haystack_length) - ) { - result = -16; - goto out; - } - result = (size_t) -1; if (haystack_u8->len < needle_u8->len) { goto out; } if (needle_u8->len == 0) { + size_t haystack_length = mbfl_strlen(haystack_u8); + /* Check if offset is out of bound */ + if ( + (offset > 0 && offset > haystack_length) + || (offset < 0 && -offset > haystack_length) + ) { + result = -16; + goto out; + } + if (reverse) { if (offset < 0) { - result = (size_t) -offset; + result = haystack_length + offset; } else { - result = haystack_length - offset;; + result = haystack_length; } } else { if (offset < 0) { diff --git a/ext/mbstring/tests/mb_strripos_empty_needle.phpt b/ext/mbstring/tests/mb_strripos_empty_needle.phpt index 6f4c575468643..2eaf8cbe1ec1b 100644 --- a/ext/mbstring/tests/mb_strripos_empty_needle.phpt +++ b/ext/mbstring/tests/mb_strripos_empty_needle.phpt @@ -51,10 +51,10 @@ var_dump(mb_strripos($string_mb, '', -150)); int(7) -- ASCII string with in range positive offset -- -int(5) +int(7) -- ASCII string with in range negative offset -- -int(2) +int(5) -- ASCII string with out of bound positive offset -- @@ -70,10 +70,10 @@ bool(false) int(21) -- Multi-byte string with in range positive offset -- -int(19) +int(21) -- Multi-byte string with in range negative offset -- -int(2) +int(19) -- Multi-byte string with out of bound positive offset -- diff --git a/ext/mbstring/tests/mb_strrpos_empty_needle.phpt b/ext/mbstring/tests/mb_strrpos_empty_needle.phpt index 78b591ee2f7b5..a56c9c364b8f2 100644 --- a/ext/mbstring/tests/mb_strrpos_empty_needle.phpt +++ b/ext/mbstring/tests/mb_strrpos_empty_needle.phpt @@ -51,10 +51,10 @@ var_dump(mb_strrpos($string_mb, '', -150)); int(7) -- ASCII string with in range positive offset -- -int(5) +int(7) -- ASCII string with in range negative offset -- -int(2) +int(5) -- ASCII string with out of bound positive offset -- @@ -70,10 +70,10 @@ bool(false) int(21) -- Multi-byte string with in range positive offset -- -int(19) +int(21) -- Multi-byte string with in range negative offset -- -int(2) +int(19) -- Multi-byte string with out of bound positive offset --