Skip to content

Improve performance of WeakReference/WeakMap. Avoid hash collisions on pointers. #7690

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 64 additions & 45 deletions Zend/zend_weakrefs.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,16 @@ typedef struct _zend_weakmap_iterator {
uint32_t ht_iter;
} zend_weakmap_iterator;

/* The EG(weakrefs) ht is a map from object address a tagged pointer, that may be one of
* zend_weakref*, zend_weakmap* or HashTable*. */
/* EG(weakrefs) is a map from a key corresponding to a zend_object pointer to all the WeakReference and/or WeakMap entries relating to that pointer.
*
* 1. For a single WeakReference,
* the HashTable's corresponding value's tag is a ZEND_WEAKREF_TAG_REF and the pointer is a singleton WeakReference instance (zend_weakref *) for that zend_object pointer (from WeakReference::create()).
* 2. For a single WeakMap, the HashTable's corresponding value's tag is a ZEND_WEAKREF_TAG_MAP and the pointer is a WeakMap instance (zend_weakmap *).
* 3. For multiple values associated with the same zend_object pointer, the HashTable entry's tag is a ZEND_WEAKREF_TAG_HT with a HashTable mapping
* tagged pointers of at most 1 WeakReference and 1 or more WeakMaps to the same tagged pointer.
*
* ZEND_MM_ALIGNED_OFFSET_LOG2 is at least 2 on supported architectures (pointers to the objects in question are aligned to 4 bytes (1<<2) even on 32-bit systems),
* i.e. the least two significant bits of the pointer can be used as a tag (ZEND_WEAKREF_TAG_*). */
#define ZEND_WEAKREF_TAG_REF 0
#define ZEND_WEAKREF_TAG_MAP 1
#define ZEND_WEAKREF_TAG_HT 2
Expand All @@ -56,38 +64,40 @@ static zend_object_handlers zend_weakmap_handlers;
#define zend_weakmap_fetch(z) zend_weakmap_from(Z_OBJ_P(z))

static inline void zend_weakref_unref_single(
void *ptr, uintptr_t tag, zend_ulong obj_addr)
void *ptr, uintptr_t tag, zend_object *object)
{
if (tag == ZEND_WEAKREF_TAG_REF) {
/* Unreferencing WeakReference (at ptr) singleton that pointed to object. */
zend_weakref *wr = ptr;
wr->referent = NULL;
} else {
/* unreferencing WeakMap entry (at ptr) with a key of object. */
ZEND_ASSERT(tag == ZEND_WEAKREF_TAG_MAP);
zend_hash_index_del((HashTable *) ptr, obj_addr);
zend_hash_index_del((HashTable *) ptr, zend_object_to_weakref_key(object));
}
}

static void zend_weakref_unref(zend_ulong obj_addr, void *tagged_ptr) {
static void zend_weakref_unref(zend_object *object, void *tagged_ptr) {
void *ptr = ZEND_WEAKREF_GET_PTR(tagged_ptr);
uintptr_t tag = ZEND_WEAKREF_GET_TAG(tagged_ptr);
if (tag == ZEND_WEAKREF_TAG_HT) {
HashTable *ht = ptr;
ZEND_HASH_MAP_FOREACH_PTR(ht, tagged_ptr) {
zend_weakref_unref_single(
ZEND_WEAKREF_GET_PTR(tagged_ptr), ZEND_WEAKREF_GET_TAG(tagged_ptr), obj_addr);
ZEND_WEAKREF_GET_PTR(tagged_ptr), ZEND_WEAKREF_GET_TAG(tagged_ptr), object);
} ZEND_HASH_FOREACH_END();
zend_hash_destroy(ht);
FREE_HASHTABLE(ht);
} else {
zend_weakref_unref_single(ptr, tag, obj_addr);
zend_weakref_unref_single(ptr, tag, object);
}
}

static void zend_weakref_register(zend_object *object, void *payload) {
GC_ADD_FLAGS(object, IS_OBJ_WEAKLY_REFERENCED);

zend_ulong obj_addr = (zend_ulong) object;
zval *zv = zend_hash_index_lookup(&EG(weakrefs), obj_addr);
zend_ulong obj_key = zend_object_to_weakref_key(object);
zval *zv = zend_hash_index_lookup(&EG(weakrefs), obj_key);
if (Z_TYPE_P(zv) == IS_NULL) {
ZVAL_PTR(zv, payload);
return;
Expand All @@ -105,25 +115,28 @@ static void zend_weakref_register(zend_object *object, void *payload) {
zend_hash_init(ht, 0, NULL, NULL, 0);
zend_hash_index_add_new_ptr(ht, (zend_ulong) tagged_ptr, tagged_ptr);
zend_hash_index_add_new_ptr(ht, (zend_ulong) payload, payload);
zend_hash_index_update_ptr(
&EG(weakrefs), obj_addr, ZEND_WEAKREF_ENCODE(ht, ZEND_WEAKREF_TAG_HT));
/* Replace the single WeakMap or WeakReference entry in EG(weakrefs) with a HashTable with 2 entries in place. */
ZVAL_PTR(zv, ZEND_WEAKREF_ENCODE(ht, ZEND_WEAKREF_TAG_HT));
}

static void zend_weakref_unregister(zend_object *object, void *payload, bool weakref_free) {
zend_ulong obj_addr = (zend_ulong) object;
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), obj_addr);
zend_ulong obj_key = zend_object_to_weakref_key(object);
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), obj_key);
ZEND_ASSERT(tagged_ptr && "Weakref not registered?");

void *ptr = ZEND_WEAKREF_GET_PTR(tagged_ptr);
uintptr_t tag = ZEND_WEAKREF_GET_TAG(tagged_ptr);
if (tag != ZEND_WEAKREF_TAG_HT) {
ZEND_ASSERT(tagged_ptr == payload);
zend_hash_index_del(&EG(weakrefs), obj_addr);
zend_hash_index_del(&EG(weakrefs), obj_key);
GC_DEL_FLAGS(object, IS_OBJ_WEAKLY_REFERENCED);

/* Do this last, as it may destroy the object. */
if (weakref_free) {
zend_weakref_unref_single(ptr, tag, obj_addr);
zend_weakref_unref_single(ptr, tag, object);
} else {
/* The optimization of skipping unref is only used in the destructor of WeakMap */
ZEND_ASSERT(ZEND_WEAKREF_GET_TAG(payload) == ZEND_WEAKREF_TAG_MAP);
}
return;
}
Expand All @@ -139,26 +152,29 @@ static void zend_weakref_unregister(zend_object *object, void *payload, bool wea
GC_DEL_FLAGS(object, IS_OBJ_WEAKLY_REFERENCED);
zend_hash_destroy(ht);
FREE_HASHTABLE(ht);
zend_hash_index_del(&EG(weakrefs), obj_addr);
zend_hash_index_del(&EG(weakrefs), obj_key);
}

/* Do this last, as it may destroy the object. */
if (weakref_free) {
zend_weakref_unref_single(
ZEND_WEAKREF_GET_PTR(payload), ZEND_WEAKREF_GET_TAG(payload), obj_addr);
ZEND_WEAKREF_GET_PTR(payload), ZEND_WEAKREF_GET_TAG(payload), object);
} else {
/* The optimization of skipping unref is only used in the destructor of WeakMap */
ZEND_ASSERT(ZEND_WEAKREF_GET_TAG(payload) == ZEND_WEAKREF_TAG_MAP);
}
}

ZEND_API zval *zend_weakrefs_hash_add(HashTable *ht, zend_object *key, zval *pData) {
zval *zv = zend_hash_index_add(ht, (zend_ulong) key, pData);
zval *zv = zend_hash_index_add(ht, zend_object_to_weakref_key(key), pData);
if (zv) {
zend_weakref_register(key, ZEND_WEAKREF_ENCODE(ht, ZEND_WEAKREF_TAG_MAP));
}
return zv;
}

ZEND_API zend_result zend_weakrefs_hash_del(HashTable *ht, zend_object *key) {
zval *zv = zend_hash_index_find(ht, (zend_ulong) key);
zval *zv = zend_hash_index_find(ht, zend_object_to_weakref_key(key));
if (zv) {
zend_weakref_unregister(key, ZEND_WEAKREF_ENCODE(ht, ZEND_WEAKREF_TAG_MAP), 1);
return SUCCESS;
Expand All @@ -170,17 +186,19 @@ void zend_weakrefs_init(void) {
zend_hash_init(&EG(weakrefs), 8, NULL, NULL, 0);
}

/* This is called when the object is garbage collected
* to remove all WeakReference and WeakMap entries weakly referencing that object. */
void zend_weakrefs_notify(zend_object *object) {
/* Annoyingly we can't use the HT destructor here, because we need access to the key (which
* is the object address), which is not provided to the dtor. */
zend_ulong obj_addr = (zend_ulong) object;
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), obj_addr);
const zend_ulong obj_key = zend_object_to_weakref_key(object);
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), obj_key);
#if ZEND_DEBUG
ZEND_ASSERT(tagged_ptr && "Tracking of the IS_OBJ_WEAKLY_REFERENCE flag should be precise");
#endif
if (tagged_ptr) {
zend_weakref_unref(obj_addr, tagged_ptr);
zend_hash_index_del(&EG(weakrefs), obj_addr);
zend_weakref_unref(object, tagged_ptr);
zend_hash_index_del(&EG(weakrefs), obj_key);
}
}

Expand All @@ -199,7 +217,7 @@ static zend_object* zend_weakref_new(zend_class_entry *ce) {
}

static zend_always_inline bool zend_weakref_find(zend_object *referent, zval *return_value) {
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), (zend_ulong) referent);
void *tagged_ptr = zend_hash_index_find_ptr(&EG(weakrefs), zend_object_to_weakref_key(referent));
if (!tagged_ptr) {
return 0;
}
Expand Down Expand Up @@ -295,13 +313,13 @@ static zend_object *zend_weakmap_create_object(zend_class_entry *ce)
static void zend_weakmap_free_obj(zend_object *object)
{
zend_weakmap *wm = zend_weakmap_from(object);
zend_ulong obj_addr;
ZEND_HASH_MAP_FOREACH_NUM_KEY(&wm->ht, obj_addr) {
zend_ulong obj_key;
ZEND_HASH_MAP_FOREACH_NUM_KEY(&wm->ht, obj_key) {
/* Optimization: Don't call zend_weakref_unref_single to free individual entries from wm->ht when unregistering (which would do a hash table lookup, call zend_hash_index_del, and skip over any bucket collisions).
* Let freeing the corresponding values for WeakMap entries be done in zend_hash_destroy, freeing objects sequentially.
* The performance difference is notable for larger WeakMaps with worse cache locality. */
zend_weakref_unregister(
(zend_object *) obj_addr, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP), 0);
zend_weakref_key_to_object(obj_key), ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP), 0);
} ZEND_HASH_FOREACH_END();
zend_hash_destroy(&wm->ht);
zend_object_std_dtor(&wm->std);
Expand All @@ -320,12 +338,12 @@ static zval *zend_weakmap_read_dimension(zend_object *object, zval *offset, int
}

zend_weakmap *wm = zend_weakmap_from(object);
zend_object *obj_key = Z_OBJ_P(offset);
zval *zv = zend_hash_index_find(&wm->ht, (zend_ulong) obj_key);
zend_object *obj_addr = Z_OBJ_P(offset);
zval *zv = zend_hash_index_find(&wm->ht, zend_object_to_weakref_key(obj_addr));
if (zv == NULL) {
if (type != BP_VAR_IS) {
zend_throw_error(NULL,
"Object %s#%d not contained in WeakMap", ZSTR_VAL(obj_key->ce->name), obj_key->handle);
"Object %s#%d not contained in WeakMap", ZSTR_VAL(obj_addr->ce->name), obj_addr->handle);
return NULL;
}
return NULL;
Expand All @@ -350,10 +368,11 @@ static void zend_weakmap_write_dimension(zend_object *object, zval *offset, zval
}

zend_weakmap *wm = zend_weakmap_from(object);
zend_object *obj_key = Z_OBJ_P(offset);
zend_object *obj_addr = Z_OBJ_P(offset);
zend_ulong obj_key = zend_object_to_weakref_key(obj_addr);
Z_TRY_ADDREF_P(value);

zval *zv = zend_hash_index_find(&wm->ht, (zend_ulong) obj_key);
zval *zv = zend_hash_index_find(&wm->ht, obj_key);
if (zv) {
/* Because the destructors can have side effects such as resizing or rehashing the WeakMap storage,
* free the zval only after overwriting the original value. */
Expand All @@ -364,8 +383,8 @@ static void zend_weakmap_write_dimension(zend_object *object, zval *offset, zval
return;
}

zend_weakref_register(obj_key, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP));
zend_hash_index_add_new(&wm->ht, (zend_ulong) obj_key, value);
zend_weakref_register(obj_addr, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP));
zend_hash_index_add_new(&wm->ht, obj_key, value);
}

/* int return and check_empty due to Object Handler API */
Expand All @@ -377,7 +396,7 @@ static int zend_weakmap_has_dimension(zend_object *object, zval *offset, int che
}

zend_weakmap *wm = zend_weakmap_from(object);
zval *zv = zend_hash_index_find(&wm->ht, (zend_ulong) Z_OBJ_P(offset));
zval *zv = zend_hash_index_find(&wm->ht, zend_object_to_weakref_key(Z_OBJ_P(offset)));
if (!zv) {
return 0;
}
Expand All @@ -396,13 +415,13 @@ static void zend_weakmap_unset_dimension(zend_object *object, zval *offset)
}

zend_weakmap *wm = zend_weakmap_from(object);
zend_object *obj_key = Z_OBJ_P(offset);
if (!zend_hash_index_exists(&wm->ht, (zend_ulong) Z_OBJ_P(offset))) {
zend_object *obj_addr = Z_OBJ_P(offset);
if (!zend_hash_index_exists(&wm->ht, zend_object_to_weakref_key(obj_addr))) {
/* Object not in WeakMap, do nothing. */
return;
}

zend_weakref_unregister(obj_key, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP), 1);
zend_weakref_unregister(obj_addr, ZEND_WEAKREF_ENCODE(&wm->ht, ZEND_WEAKREF_TAG_MAP), 1);
}

static int zend_weakmap_count_elements(zend_object *object, zend_long *count)
Expand All @@ -423,10 +442,10 @@ static HashTable *zend_weakmap_get_properties_for(zend_object *object, zend_prop
ALLOC_HASHTABLE(ht);
zend_hash_init(ht, zend_hash_num_elements(&wm->ht), NULL, ZVAL_PTR_DTOR, 0);

zend_ulong obj_addr;
zend_ulong obj_key;
zval *val;
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(&wm->ht, obj_addr, val) {
zend_object *obj = (zend_object*)obj_addr;
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(&wm->ht, obj_key, val) {
zend_object *obj = zend_weakref_key_to_object(obj_key);
zval pair;
array_init(&pair);

Expand Down Expand Up @@ -460,11 +479,11 @@ static zend_object *zend_weakmap_clone_obj(zend_object *old_object)
zend_weakmap *new_wm = zend_weakmap_from(new_object);
zend_hash_copy(&new_wm->ht, &old_wm->ht, NULL);

zend_ulong obj_addr;
zend_ulong obj_key;
zval *val;
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(&new_wm->ht, obj_addr, val) {
ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(&new_wm->ht, obj_key, val) {
zend_weakref_register(
(zend_object *) obj_addr, ZEND_WEAKREF_ENCODE(new_wm, ZEND_WEAKREF_TAG_MAP));
zend_weakref_key_to_object(obj_key), ZEND_WEAKREF_ENCODE(new_wm, ZEND_WEAKREF_TAG_MAP));
zval_add_ref(val);
} ZEND_HASH_FOREACH_END();
return new_object;
Expand Down Expand Up @@ -511,7 +530,7 @@ static void zend_weakmap_iterator_get_current_key(zend_object_iterator *obj_iter
ZEND_ASSERT(0 && "Must have integer key");
}

ZVAL_OBJ_COPY(key, (zend_object *) num_key);
ZVAL_OBJ_COPY(key, zend_weakref_key_to_object(num_key));
}

static void zend_weakmap_iterator_move_forward(zend_object_iterator *obj_iter)
Expand Down
22 changes: 22 additions & 0 deletions Zend/zend_weakrefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#ifndef ZEND_WEAKREFS_H
#define ZEND_WEAKREFS_H

#include "zend_alloc.h"

BEGIN_EXTERN_C()

extern ZEND_API zend_class_entry *zend_ce_weakref;
Expand All @@ -40,6 +42,26 @@ static zend_always_inline void *zend_weakrefs_hash_add_ptr(HashTable *ht, zend_o
}
}

/* Because php uses the raw numbers as a hash function, raw pointers will lead to hash collisions.
* We have a guarantee that the lowest ZEND_MM_ALIGNED_OFFSET_LOG2 bits of a pointer are zero.
*
* E.g. On most 64-bit platforms, pointers are aligned to 8 bytes, so the least significant 3 bits are always 0 and can be discarded.
*
* NOTE: This function is only used for EG(weakrefs) and zend_weakmap->ht.
* It is not used for the HashTable instances associated with ZEND_WEAKREF_TAG_HT tags (created in zend_weakref_register, which uses ZEND_WEAKREF_ENCODE instead).
* The ZEND_WEAKREF_TAG_HT instances are used to disambiguate between multiple weak references to the same zend_object.
*/
static zend_always_inline zend_ulong zend_object_to_weakref_key(const zend_object *object)
{
ZEND_ASSERT(((uintptr_t)object) % ZEND_MM_ALIGNMENT == 0);
return ((uintptr_t) object) >> ZEND_MM_ALIGNMENT_LOG2;
}

static zend_always_inline zend_object *zend_weakref_key_to_object(zend_ulong key)
{
return (zend_object *) (((uintptr_t) key) << ZEND_MM_ALIGNMENT_LOG2);
}

END_EXTERN_C()

#endif
Expand Down
6 changes: 3 additions & 3 deletions ext/zend_test/test.c
Original file line number Diff line number Diff line change
Expand Up @@ -509,9 +509,9 @@ PHP_RINIT_FUNCTION(zend_test)

PHP_RSHUTDOWN_FUNCTION(zend_test)
{
zend_ulong objptr;
ZEND_HASH_FOREACH_NUM_KEY(&ZT_G(global_weakmap), objptr) {
zend_weakrefs_hash_del(&ZT_G(global_weakmap), (zend_object *)(uintptr_t)objptr);
zend_ulong obj_key;
ZEND_HASH_FOREACH_NUM_KEY(&ZT_G(global_weakmap), obj_key) {
zend_weakrefs_hash_del(&ZT_G(global_weakmap), zend_weakref_key_to_object(obj_key));
} ZEND_HASH_FOREACH_END();
zend_hash_destroy(&ZT_G(global_weakmap));
return SUCCESS;
Expand Down