Skip to content

UTF-8 flag cleanup, and str_repeat #10490

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions Zend/zend_operators.c
Original file line number Diff line number Diff line change
Expand Up @@ -1955,11 +1955,7 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval
size_t op2_len = Z_STRLEN_P(op2);
size_t result_len = op1_len + op2_len;
zend_string *result_str;
uint32_t flags = 0;

if (ZSTR_IS_VALID_UTF8(Z_STR_P(op1)) && ZSTR_IS_VALID_UTF8(Z_STR_P(op2))) {
flags = IS_STR_VALID_UTF8;
}
uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(Z_STR_P(op1), Z_STR_P(op2));

if (UNEXPECTED(op1_len > ZSTR_MAX_LEN - op2_len)) {
zend_throw_error(NULL, "String size overflow");
Expand Down
8 changes: 3 additions & 5 deletions Zend/zend_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,9 @@ ZEND_API zend_string* ZEND_FASTCALL zend_interned_string_find_permanent(zend_str
return zend_interned_string_ht_lookup(str, &interned_strings_permanent);
}

static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent) {
uint32_t flags = 0;
if (ZSTR_IS_VALID_UTF8(str)) {
flags = IS_STR_VALID_UTF8;
}
static zend_string* ZEND_FASTCALL zend_init_string_for_interning(zend_string *str, bool persistent)
{
uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(str);
zend_ulong h = ZSTR_H(str);
zend_string_delref(str);
str = zend_string_init(ZSTR_VAL(str), ZSTR_LEN(str), persistent);
Expand Down
21 changes: 21 additions & 0 deletions Zend/zend_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,27 @@ END_EXTERN_C()
#define ZSTR_IS_INTERNED(s) (GC_FLAGS(s) & IS_STR_INTERNED)
#define ZSTR_IS_VALID_UTF8(s) (GC_FLAGS(s) & IS_STR_VALID_UTF8)

/* These are properties, encoded as flags, that will hold on the resulting string
* after concatenating two strings that have these property.
* Example: concatenating two UTF-8 strings yields another UTF-8 string. */
#define ZSTR_COPYABLE_CONCAT_PROPERTIES (IS_STR_VALID_UTF8)

#define ZSTR_GET_COPYABLE_CONCAT_PROPERTIES(s) (GC_FLAGS(s) & ZSTR_COPYABLE_CONCAT_PROPERTIES)
/* This macro returns the copyable concat properties which hold on both strings. */
#define ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(s1, s2) (GC_FLAGS(s1) & GC_FLAGS(s2) & ZSTR_COPYABLE_CONCAT_PROPERTIES)

#define ZSTR_COPY_CONCAT_PROPERTIES(out, in) do { \
zend_string *_out = (out); \
uint32_t properties = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES((in)); \
GC_ADD_FLAGS(_out, properties); \
} while (0)

#define ZSTR_COPY_CONCAT_PROPERTIES_BOTH(out, in1, in2) do { \
zend_string *_out = (out); \
uint32_t properties = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH((in1), (in2)); \
GC_ADD_FLAGS(_out, properties); \
} while (0)

#define ZSTR_EMPTY_ALLOC() zend_empty_string
#define ZSTR_CHAR(c) zend_one_char_string[c]
#define ZSTR_KNOWN(idx) zend_known_strings[idx]
Expand Down
16 changes: 3 additions & 13 deletions Zend/zend_vm_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -384,11 +384,7 @@ ZEND_VM_HANDLER(8, ZEND_CONCAT, CONST|TMPVAR|CV, CONST|TMPVAR|CV, SPEC(NO_CONST_
zend_string *op1_str = Z_STR_P(op1);
zend_string *op2_str = Z_STR_P(op2);
zend_string *str;
uint32_t flags = 0;

if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) {
flags = IS_STR_VALID_UTF8;
}
uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str);

if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) {
if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) {
Expand Down Expand Up @@ -3147,11 +3143,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP
zend_string *op1_str = Z_STR_P(op1);
zend_string *op2_str = Z_STR_P(op2);
zend_string *str;
uint32_t flags = 0;

if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) {
flags = IS_STR_VALID_UTF8;
}
uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_str, op2_str);

if (OP1_TYPE != IS_CONST && UNEXPECTED(ZSTR_LEN(op1_str) == 0)) {
if (OP2_TYPE == IS_CONST || OP2_TYPE == IS_CV) {
Expand Down Expand Up @@ -3248,9 +3240,7 @@ ZEND_VM_COLD_CONSTCONST_HANDLER(53, ZEND_FAST_CONCAT, CONST|TMPVAR|CV, CONST|TMP
memcpy(ZSTR_VAL(str), ZSTR_VAL(op1_str), ZSTR_LEN(op1_str));
memcpy(ZSTR_VAL(str) + ZSTR_LEN(op1_str), ZSTR_VAL(op2_str), ZSTR_LEN(op2_str)+1);

if (ZSTR_IS_VALID_UTF8(op1_str) && ZSTR_IS_VALID_UTF8(op2_str)) {
GC_ADD_FLAGS(str, IS_STR_VALID_UTF8);
}
ZSTR_COPY_CONCAT_PROPERTIES_BOTH(str, op1_str, op2_str);
ZVAL_NEW_STR(EX_VAR(opline->result.var), str);
if (OP1_TYPE != IS_CONST) {
zend_string_release_ex(op1_str, 0);
Expand Down
Loading