Skip to content

Commit ef287bf

Browse files
authored
Minor refactoring of std string extension (#8196)
Mainly using more appropriate types, early returns, and moving the happy path to the primary scope (failure path is guarded by ``UNEXPECTED`` macros.
1 parent 820f695 commit ef287bf

File tree

3 files changed

+63
-70
lines changed

3 files changed

+63
-70
lines changed

ext/standard/php_string.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ PHP_MINIT_FUNCTION(string_intrin);
3333
strnatcmp_ex(a, strlen(a), b, strlen(b), 0)
3434
#define strnatcasecmp(a, b) \
3535
strnatcmp_ex(a, strlen(a), b, strlen(b), 1)
36-
PHPAPI int strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t b_len, int fold_case);
36+
PHPAPI int strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t b_len, bool is_case_insensitive);
3737
PHPAPI struct lconv *localeconv_r(struct lconv *out);
3838
PHPAPI char *php_strtoupper(char *s, size_t len);
3939
PHPAPI char *php_strtolower(char *s, size_t len);

ext/standard/string.c

Lines changed: 60 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,6 @@ void register_string_constants(INIT_FUNC_ARGS)
8989
}
9090
/* }}} */
9191

92-
int php_tag_find(char *tag, size_t len, const char *set);
93-
9492
/* this is read-only, so it's ok */
9593
ZEND_SET_ALIGNED(16, static const char hexconvtab[]) = "0123456789abcdef";
9694

@@ -695,11 +693,11 @@ PHP_FUNCTION(strcoll)
695693
* it needs to be incrementing.
696694
* Returns: FAILURE/SUCCESS whether the input was correct (i.e. no range errors)
697695
*/
698-
static inline int php_charmask(const unsigned char *input, size_t len, char *mask)
696+
static inline zend_result php_charmask(const unsigned char *input, size_t len, char *mask)
699697
{
700698
const unsigned char *end;
701699
unsigned char c;
702-
int result = SUCCESS;
700+
zend_result result = SUCCESS;
703701

704702
memset(mask, 0, 256);
705703
for (end = input+len; input < end; input++) {
@@ -1144,7 +1142,7 @@ PHP_FUNCTION(explode)
11441142
PHPAPI void php_implode(const zend_string *glue, HashTable *pieces, zval *return_value)
11451143
{
11461144
zval *tmp;
1147-
int numelems;
1145+
uint32_t numelems;
11481146
zend_string *str;
11491147
char *cptr;
11501148
size_t len = 0;
@@ -1606,7 +1604,7 @@ PHP_FUNCTION(pathinfo)
16061604
zval tmp;
16071605
char *path, *dirname;
16081606
size_t path_len;
1609-
int have_basename;
1607+
bool have_basename;
16101608
zend_long opt = PHP_PATHINFO_ALL;
16111609
zend_string *ret = NULL;
16121610

@@ -1744,16 +1742,14 @@ PHP_FUNCTION(stristr)
17441742

17451743
found = php_stristr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(haystack), ZSTR_LEN(needle));
17461744

1747-
if (found) {
1748-
found_offset = found - ZSTR_VAL(haystack);
1749-
if (part) {
1750-
RETVAL_STRINGL(ZSTR_VAL(haystack), found_offset);
1751-
} else {
1752-
RETVAL_STRINGL(ZSTR_VAL(haystack) + found_offset, ZSTR_LEN(haystack) - found_offset);
1753-
}
1754-
} else {
1755-
RETVAL_FALSE;
1745+
if (UNEXPECTED(!found)) {
1746+
RETURN_FALSE;
1747+
}
1748+
found_offset = found - ZSTR_VAL(haystack);
1749+
if (part) {
1750+
RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
17561751
}
1752+
RETURN_STRINGL(ZSTR_VAL(haystack) + found_offset, ZSTR_LEN(haystack) - found_offset);
17571753
}
17581754
/* }}} */
17591755

@@ -1774,15 +1770,14 @@ PHP_FUNCTION(strstr)
17741770

17751771
found = php_memnstr(ZSTR_VAL(haystack), ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
17761772

1777-
if (found) {
1778-
found_offset = found - ZSTR_VAL(haystack);
1779-
if (part) {
1780-
RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1781-
} else {
1782-
RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
1783-
}
1773+
if (UNEXPECTED(!found)) {
1774+
RETURN_FALSE;
17841775
}
1785-
RETURN_FALSE;
1776+
found_offset = found - ZSTR_VAL(haystack);
1777+
if (part) {
1778+
RETURN_STRINGL(ZSTR_VAL(haystack), found_offset);
1779+
}
1780+
RETURN_STRINGL(ZSTR_VAL(haystack) + found_offset, ZSTR_LEN(haystack) - found_offset);
17861781
}
17871782
/* }}} */
17881783

@@ -1863,11 +1858,10 @@ PHP_FUNCTION(strpos)
18631858
ZSTR_VAL(needle), ZSTR_LEN(needle),
18641859
ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
18651860

1866-
if (found) {
1867-
RETURN_LONG(found - ZSTR_VAL(haystack));
1868-
} else {
1861+
if (UNEXPECTED(!found)) {
18691862
RETURN_FALSE;
18701863
}
1864+
RETURN_LONG(found - ZSTR_VAL(haystack));
18711865
}
18721866
/* }}} */
18731867

@@ -1896,11 +1890,10 @@ PHP_FUNCTION(stripos)
18961890
found = (char*)php_memnistr(ZSTR_VAL(haystack) + offset,
18971891
ZSTR_VAL(needle), ZSTR_LEN(needle), ZSTR_VAL(haystack) + ZSTR_LEN(haystack));
18981892

1899-
if (found) {
1900-
RETVAL_LONG(found - ZSTR_VAL(haystack));
1901-
} else {
1902-
RETVAL_FALSE;
1893+
if (UNEXPECTED(!found)) {
1894+
RETURN_FALSE;
19031895
}
1896+
RETURN_LONG(found - ZSTR_VAL(haystack));
19041897
}
19051898
/* }}} */
19061899

@@ -1940,11 +1933,12 @@ PHP_FUNCTION(strrpos)
19401933
}
19411934
}
19421935

1943-
if ((found = zend_memnrstr(p, ZSTR_VAL(needle), ZSTR_LEN(needle), e))) {
1944-
RETURN_LONG(found - ZSTR_VAL(haystack));
1945-
}
1936+
found = zend_memnrstr(p, ZSTR_VAL(needle), ZSTR_LEN(needle), e);
19461937

1947-
RETURN_FALSE;
1938+
if (UNEXPECTED(!found)) {
1939+
RETURN_FALSE;
1940+
}
1941+
RETURN_LONG(found - ZSTR_VAL(haystack));
19481942
}
19491943
/* }}} */
19501944

@@ -2043,12 +2037,11 @@ PHP_FUNCTION(strrchr)
20432037
ZEND_PARSE_PARAMETERS_END();
20442038

20452039
found = zend_memrchr(ZSTR_VAL(haystack), *ZSTR_VAL(needle), ZSTR_LEN(haystack));
2046-
if (found) {
2047-
found_offset = found - ZSTR_VAL(haystack);
2048-
RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
2049-
} else {
2040+
if (UNEXPECTED(!found)) {
20502041
RETURN_FALSE;
20512042
}
2043+
found_offset = found - ZSTR_VAL(haystack);
2044+
RETURN_STRINGL(found, ZSTR_LEN(haystack) - found_offset);
20522045
}
20532046
/* }}} */
20542047

@@ -2782,7 +2775,7 @@ static void php_strtr_array(zval *return_value, zend_string *input, HashTable *p
27822775
zend_ulong num_key;
27832776
zend_string *str_key;
27842777
size_t len, pos, old_pos;
2785-
int num_keys = 0;
2778+
bool has_num_keys = false;
27862779
size_t minlen = 128*1024;
27872780
size_t maxlen = 0;
27882781
HashTable str_hash;
@@ -2799,10 +2792,10 @@ static void php_strtr_array(zval *return_value, zend_string *input, HashTable *p
27992792
/* check if original array has numeric keys */
28002793
ZEND_HASH_FOREACH_STR_KEY(pats, str_key) {
28012794
if (UNEXPECTED(!str_key)) {
2802-
num_keys = 1;
2795+
has_num_keys = true;
28032796
} else {
28042797
len = ZSTR_LEN(str_key);
2805-
if (UNEXPECTED(len < 1)) {
2798+
if (UNEXPECTED(len == 0)) {
28062799
php_error_docref(NULL, E_WARNING, "Ignoring replacement of empty string");
28072800
continue;
28082801
} else if (UNEXPECTED(len > slen)) {
@@ -2821,7 +2814,7 @@ static void php_strtr_array(zval *return_value, zend_string *input, HashTable *p
28212814
}
28222815
} ZEND_HASH_FOREACH_END();
28232816

2824-
if (UNEXPECTED(num_keys)) {
2817+
if (UNEXPECTED(has_num_keys)) {
28252818
zend_string *key_used;
28262819
/* we have to rebuild HashTable with numeric keys */
28272820
zend_hash_init(&str_hash, zend_hash_num_elements(pats), NULL, NULL, 0);
@@ -2961,7 +2954,7 @@ static zend_always_inline zend_long count_chars(const char *p, zend_long length,
29612954
/* }}} */
29622955

29632956
/* {{{ php_char_to_str_ex */
2964-
static zend_string* php_char_to_str_ex(zend_string *str, char from, char *to, size_t to_len, int case_sensitivity, zend_long *replace_count)
2957+
static zend_string* php_char_to_str_ex(zend_string *str, char from, char *to, size_t to_len, bool case_sensitivity, zend_long *replace_count)
29652958
{
29662959
zend_string *result;
29672960
size_t char_count;
@@ -3319,7 +3312,7 @@ PHP_FUNCTION(strtr)
33193312
ZSTR_VAL(str_key)[0],
33203313
ZSTR_VAL(replace),
33213314
ZSTR_LEN(replace),
3322-
1,
3315+
/* case_sensitive */ true,
33233316
NULL));
33243317
} else {
33253318
zend_long dummy;
@@ -3455,7 +3448,7 @@ PHP_FUNCTION(similar_text)
34553448
{
34563449
zend_string *t1, *t2;
34573450
zval *percent = NULL;
3458-
int ac = ZEND_NUM_ARGS();
3451+
bool compute_percentage = ZEND_NUM_ARGS() >= 3;
34593452
size_t sim;
34603453

34613454
ZEND_PARSE_PARAMETERS_START(2, 3)
@@ -3466,7 +3459,7 @@ PHP_FUNCTION(similar_text)
34663459
ZEND_PARSE_PARAMETERS_END();
34673460

34683461
if (ZSTR_LEN(t1) + ZSTR_LEN(t2) == 0) {
3469-
if (ac > 2) {
3462+
if (compute_percentage) {
34703463
ZEND_TRY_ASSIGN_REF_DOUBLE(percent, 0);
34713464
}
34723465

@@ -3475,7 +3468,7 @@ PHP_FUNCTION(similar_text)
34753468

34763469
sim = php_similar_char(ZSTR_VAL(t1), ZSTR_LEN(t1), ZSTR_VAL(t2), ZSTR_LEN(t2));
34773470

3478-
if (ac > 2) {
3471+
if (compute_percentage) {
34793472
ZEND_TRY_ASSIGN_REF_DOUBLE(percent, sim * 200.0 / (ZSTR_LEN(t1) + ZSTR_LEN(t2)));
34803473
}
34813474

@@ -3893,8 +3886,7 @@ static zend_always_inline quad_word aarch64_contains_slash_chars(uint8x16_t x) {
38933886

38943887
static zend_always_inline char *aarch64_add_slashes(quad_word res, const char *source, char *target)
38953888
{
3896-
int i = 0;
3897-
for (; i < 16; i++) {
3889+
for (int i = 0; i < 16; i++) {
38983890
char s = source[i];
38993891
if (res.mem[i] == 0)
39003892
*target++ = s;
@@ -4156,7 +4148,7 @@ PHPAPI void php_stripslashes(zend_string *str)
41564148
/* {{{ php_str_replace_in_subject */
41574149
static zend_long php_str_replace_in_subject(
41584150
zend_string *search_str, HashTable *search_ht, zend_string *replace_str, HashTable *replace_ht,
4159-
zend_string *subject_str, zval *result, int case_sensitivity
4151+
zend_string *subject_str, zval *result, bool case_sensitivity
41604152
) {
41614153
zval *search_entry;
41624154
zend_string *tmp_result;
@@ -4317,7 +4309,7 @@ static zend_long php_str_replace_in_subject(
43174309
/* }}} */
43184310

43194311
/* {{{ php_str_replace_common */
4320-
static void php_str_replace_common(INTERNAL_FUNCTION_PARAMETERS, int case_sensitivity)
4312+
static void php_str_replace_common(INTERNAL_FUNCTION_PARAMETERS, bool case_sensitivity)
43214313
{
43224314
zend_string *search_str;
43234315
HashTable *search_ht;
@@ -4832,10 +4824,11 @@ PHP_FUNCTION(parse_str)
48324824
* 0 start tag
48334825
* 1 first non-whitespace char seen
48344826
*/
4835-
int php_tag_find(char *tag, size_t len, const char *set) {
4827+
static bool php_tag_find(char *tag, size_t len, const char *set) {
48364828
char c, *n;
48374829
const char *t;
4838-
int state=0, done=0;
4830+
int state = 0;
4831+
bool done = 0;
48394832
char *norm;
48404833

48414834
if (len == 0) {
@@ -5387,7 +5380,7 @@ PHP_FUNCTION(count_chars)
53875380
/* }}} */
53885381

53895382
/* {{{ php_strnatcmp */
5390-
static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, int fold_case)
5383+
static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, bool is_case_insensitive)
53915384
{
53925385
zend_string *s1, *s2;
53935386

@@ -5398,7 +5391,7 @@ static void php_strnatcmp(INTERNAL_FUNCTION_PARAMETERS, int fold_case)
53985391

53995392
RETURN_LONG(strnatcmp_ex(ZSTR_VAL(s1), ZSTR_LEN(s1),
54005393
ZSTR_VAL(s2), ZSTR_LEN(s2),
5401-
fold_case));
5394+
is_case_insensitive));
54025395
}
54035396
/* }}} */
54045397

@@ -5409,11 +5402,18 @@ PHP_FUNCTION(strnatcmp)
54095402
}
54105403
/* }}} */
54115404

5405+
/* {{{ Returns the result of case-insensitive string comparison using 'natural' algorithm */
5406+
PHP_FUNCTION(strnatcasecmp)
5407+
{
5408+
php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
5409+
}
5410+
/* }}} */
5411+
54125412
/* {{{ Returns numeric formatting information based on the current locale */
54135413
PHP_FUNCTION(localeconv)
54145414
{
54155415
zval grouping, mon_grouping;
5416-
int len, i;
5416+
size_t len, i;
54175417

54185418
ZEND_PARSE_PARAMETERS_NONE();
54195419

@@ -5427,14 +5427,14 @@ PHP_FUNCTION(localeconv)
54275427
localeconv_r( &currlocdata );
54285428

54295429
/* Grab the grouping data out of the array */
5430-
len = (int)strlen(currlocdata.grouping);
5430+
len = strlen(currlocdata.grouping);
54315431

54325432
for (i = 0; i < len; i++) {
54335433
add_index_long(&grouping, i, currlocdata.grouping[i]);
54345434
}
54355435

54365436
/* Grab the monetary grouping data out of the array */
5437-
len = (int)strlen(currlocdata.mon_grouping);
5437+
len = strlen(currlocdata.mon_grouping);
54385438

54395439
for (i = 0; i < len; i++) {
54405440
add_index_long(&mon_grouping, i, currlocdata.mon_grouping[i]);
@@ -5463,13 +5463,6 @@ PHP_FUNCTION(localeconv)
54635463
}
54645464
/* }}} */
54655465

5466-
/* {{{ Returns the result of case-insensitive string comparison using 'natural' algorithm */
5467-
PHP_FUNCTION(strnatcasecmp)
5468-
{
5469-
php_strnatcmp(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
5470-
}
5471-
/* }}} */
5472-
54735466
/* {{{ Returns the number of times a substring occurs in the string */
54745467
PHP_FUNCTION(substr_count)
54755468
{
@@ -5741,7 +5734,7 @@ PHP_FUNCTION(str_rot13)
57415734
}
57425735
/* }}} */
57435736

5744-
static void php_string_shuffle(char *str, zend_long len) /* {{{ */
5737+
static void php_string_shuffle(char *str, size_t len) /* {{{ */
57455738
{
57465739
zend_long n_elems, rnd_idx, n_left;
57475740
char temp;
@@ -5777,7 +5770,7 @@ PHP_FUNCTION(str_shuffle)
57775770

57785771
RETVAL_STRINGL(ZSTR_VAL(arg), ZSTR_LEN(arg));
57795772
if (Z_STRLEN_P(return_value) > 1) {
5780-
php_string_shuffle(Z_STRVAL_P(return_value), (zend_long) Z_STRLEN_P(return_value));
5773+
php_string_shuffle(Z_STRVAL_P(return_value), Z_STRLEN_P(return_value));
57815774
}
57825775
}
57835776
/* }}} */

ext/standard/strnatcmp.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ compare_left(char const **a, char const *aend, char const **b, char const *bend)
8585
/* }}} */
8686

8787
/* {{{ strnatcmp_ex */
88-
PHPAPI int strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t b_len, int fold_case)
88+
PHPAPI int strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t b_len, bool is_case_insensitive)
8989
{
9090
unsigned char ca, cb;
9191
char const *ap, *bp;
@@ -146,7 +146,7 @@ PHPAPI int strnatcmp_ex(char const *a, size_t a_len, char const *b, size_t b_len
146146
}
147147
}
148148

149-
if (fold_case) {
149+
if (is_case_insensitive) {
150150
ca = toupper((int)(unsigned char)ca);
151151
cb = toupper((int)(unsigned char)cb);
152152
}

0 commit comments

Comments
 (0)