-
Notifications
You must be signed in to change notification settings - Fork 7.9k
Use HT for recursion protection in JSON encode #7589
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -93,19 +93,26 @@ static inline void php_json_encode_double(smart_str *buf, double d, int options) | |
} | ||
/* }}} */ | ||
|
||
#define PHP_JSON_HASH_PROTECT_RECURSION(_tmp_ht) \ | ||
do { \ | ||
if (_tmp_ht) { \ | ||
GC_TRY_PROTECT_RECURSION(_tmp_ht); \ | ||
} \ | ||
} while (0) | ||
|
||
#define PHP_JSON_HASH_UNPROTECT_RECURSION(_tmp_ht) \ | ||
do { \ | ||
if (_tmp_ht) { \ | ||
GC_TRY_UNPROTECT_RECURSION(_tmp_ht); \ | ||
} \ | ||
} while (0) | ||
static inline zend_result php_json_protect_recursion(php_json_encoder *encoder, zend_refcounted *rc) | ||
{ | ||
if (GC_FLAGS(rc) & GC_IMMUTABLE) { | ||
return SUCCESS; | ||
} | ||
if (zend_hash_index_add_empty_element(&encoder->recursive, (uintptr_t) rc)) { | ||
GC_ADDREF(rc); | ||
return SUCCESS; | ||
} | ||
return FAILURE; | ||
} | ||
|
||
static inline void php_json_unprotect_recursion(php_json_encoder *encoder, zend_refcounted *rc) | ||
{ | ||
if (GC_FLAGS(rc) & GC_IMMUTABLE) { | ||
return; | ||
} | ||
GC_DELREF(rc); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, the original code was increasing the reference count of the properties table, and decreasing the reference count of the properties table, to prevent the property table from getting freed during iteration. Now that we're referencing the object, your PR is calling GC_ADDREF and GC_DELREF to prevent the object from getting freed during iteration. If this PR were to be reopened or a if someone were to base code on this in the future, would it need to check if GC_DELREF returns 0 and free the array/object in question, to avoid leaks if jsonSerialize removed the last reference to a value as a side effect? i.e. call rc_dtor_func if unexpectedly 0
|
||
zend_hash_index_del(&encoder->recursive, (uintptr_t) rc); | ||
} | ||
|
||
static int php_json_encode_array(smart_str *buf, zval *val, int options, php_json_encoder *encoder) /* {{{ */ | ||
{ | ||
|
@@ -126,14 +133,12 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso | |
zval *prop; | ||
int i; | ||
|
||
if (GC_IS_RECURSIVE(obj)) { | ||
if (php_json_protect_recursion(encoder, (zend_refcounted *) obj) == FAILURE) { | ||
encoder->error_code = PHP_JSON_ERROR_RECURSION; | ||
smart_str_appendl(buf, "null", 4); | ||
return FAILURE; | ||
} | ||
|
||
PHP_JSON_HASH_PROTECT_RECURSION(obj); | ||
|
||
smart_str_appendc(buf, '{'); | ||
|
||
++encoder->depth; | ||
|
@@ -174,12 +179,12 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso | |
|
||
if (php_json_encode_zval(buf, prop, options, encoder) == FAILURE && | ||
!(options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR)) { | ||
PHP_JSON_HASH_UNPROTECT_RECURSION(obj); | ||
php_json_unprotect_recursion(encoder, (zend_refcounted *) obj); | ||
return FAILURE; | ||
} | ||
} | ||
|
||
PHP_JSON_HASH_UNPROTECT_RECURSION(obj); | ||
php_json_unprotect_recursion(encoder, (zend_refcounted *) obj); | ||
if (encoder->depth > encoder->max_depth) { | ||
encoder->error_code = PHP_JSON_ERROR_DEPTH; | ||
if (!(options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR)) { | ||
|
@@ -199,15 +204,13 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso | |
r = PHP_JSON_OUTPUT_OBJECT; | ||
} | ||
|
||
if (myht && GC_IS_RECURSIVE(myht)) { | ||
if (myht && php_json_protect_recursion(encoder, (zend_refcounted *) myht)) { | ||
encoder->error_code = PHP_JSON_ERROR_RECURSION; | ||
smart_str_appendl(buf, "null", 4); | ||
zend_release_properties(prop_ht); | ||
return FAILURE; | ||
} | ||
|
||
PHP_JSON_HASH_PROTECT_RECURSION(myht); | ||
|
||
if (r == PHP_JSON_OUTPUT_ARRAY) { | ||
smart_str_appendc(buf, '['); | ||
} else { | ||
|
@@ -277,14 +280,18 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso | |
|
||
if (php_json_encode_zval(buf, data, options, encoder) == FAILURE && | ||
!(options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR)) { | ||
PHP_JSON_HASH_UNPROTECT_RECURSION(myht); | ||
if (myht) { | ||
php_json_unprotect_recursion(encoder, (zend_refcounted *) myht); | ||
} | ||
zend_release_properties(prop_ht); | ||
return FAILURE; | ||
} | ||
} ZEND_HASH_FOREACH_END(); | ||
} | ||
|
||
PHP_JSON_HASH_UNPROTECT_RECURSION(myht); | ||
if (myht) { | ||
php_json_unprotect_recursion(encoder, (zend_refcounted *) myht); | ||
} | ||
|
||
if (encoder->depth > encoder->max_depth) { | ||
encoder->error_code = PHP_JSON_ERROR_DEPTH; | ||
|
@@ -529,20 +536,17 @@ static int php_json_escape_string( | |
static int php_json_encode_serializable_object(smart_str *buf, zval *val, int options, php_json_encoder *encoder) /* {{{ */ | ||
{ | ||
zend_class_entry *ce = Z_OBJCE_P(val); | ||
HashTable* myht = Z_OBJPROP_P(val); | ||
zval retval, fname; | ||
int return_code; | ||
|
||
if (myht && GC_IS_RECURSIVE(myht)) { | ||
if (php_json_protect_recursion(encoder, Z_COUNTED_P(val)) == FAILURE) { | ||
encoder->error_code = PHP_JSON_ERROR_RECURSION; | ||
if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) { | ||
smart_str_appendl(buf, "null", 4); | ||
} | ||
return FAILURE; | ||
} | ||
|
||
PHP_JSON_HASH_PROTECT_RECURSION(myht); | ||
|
||
ZVAL_STRING(&fname, "jsonSerialize"); | ||
|
||
if (FAILURE == call_user_function(NULL, val, &fname, &retval, 0, NULL) || Z_TYPE(retval) == IS_UNDEF) { | ||
|
@@ -554,7 +558,7 @@ static int php_json_encode_serializable_object(smart_str *buf, zval *val, int op | |
if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) { | ||
smart_str_appendl(buf, "null", 4); | ||
} | ||
PHP_JSON_HASH_UNPROTECT_RECURSION(myht); | ||
php_json_unprotect_recursion(encoder, Z_COUNTED_P(val)); | ||
return FAILURE; | ||
} | ||
|
||
|
@@ -566,19 +570,19 @@ static int php_json_encode_serializable_object(smart_str *buf, zval *val, int op | |
if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) { | ||
smart_str_appendl(buf, "null", 4); | ||
} | ||
PHP_JSON_HASH_UNPROTECT_RECURSION(myht); | ||
php_json_unprotect_recursion(encoder, Z_COUNTED_P(val)); | ||
return FAILURE; | ||
} | ||
|
||
if ((Z_TYPE(retval) == IS_OBJECT) && | ||
(Z_OBJ(retval) == Z_OBJ_P(val))) { | ||
/* Handle the case where jsonSerialize does: return $this; by going straight to encode array */ | ||
PHP_JSON_HASH_UNPROTECT_RECURSION(myht); | ||
php_json_unprotect_recursion(encoder, Z_COUNTED_P(val)); | ||
return_code = php_json_encode_array(buf, &retval, options, encoder); | ||
} else { | ||
/* All other types, encode as normal */ | ||
return_code = php_json_encode_zval(buf, &retval, options, encoder); | ||
PHP_JSON_HASH_UNPROTECT_RECURSION(myht); | ||
php_json_unprotect_recursion(encoder, Z_COUNTED_P(val)); | ||
} | ||
|
||
zval_ptr_dtor(&retval); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,11 +26,20 @@ struct _php_json_encoder { | |
int depth; | ||
int max_depth; | ||
php_json_error_code error_code; | ||
HashTable recursive; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Separately from the previous comment, there's the question of whether the json recursion protection should be per-request (in request globals) rather than per call to json_encoder (in request globals, in RINIT/RSHUTDOWN) instead. My preference is for the former E.g. a JsonSerializable::jsonSerialize implementation calling |
||
}; | ||
|
||
static inline void php_json_encode_init(php_json_encoder *encoder) | ||
{ | ||
memset(encoder, 0, sizeof(php_json_encoder)); | ||
encoder->depth = 0; | ||
encoder->max_depth = 0; | ||
encoder->error_code = 0; | ||
zend_hash_init(&encoder->recursive, 0, NULL, NULL, 0); | ||
} | ||
|
||
static inline void php_json_encode_destroy(php_json_encoder *encoder) | ||
{ | ||
zend_hash_destroy(&encoder->recursive); | ||
} | ||
|
||
int php_json_encode_zval(smart_str *buf, zval *val, int options, php_json_encoder *encoder); | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Leaving a note if anyone bases functionality on this in the future or restores this:
As I'd discovered in #7690 - using pointers as hash indexes directly leads to a lot of hash collisions and performance issues. Shifting by ZEND_MM_ALIGNED_OFFSET_LOG2 instead helps noticeably (to work with both malloc and emalloc)
(e.g. if 44-byte zend_array instances (on 64-bit platforms) are aligned to 16 bytes in practice with emalloc on a platform (low bit of a pointer is the byte address), then they'll all collide on the same 1 in 16 hash buckets)