Skip to content

Reduce HT_MAX_SIZE to account for the max load factor of 0.5 #10242

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Zend/zend_hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ static zend_always_inline void zend_hash_real_init_mixed_ex(HashTable *ht)
void *data;
uint32_t nSize = ht->nTableSize;

ZEND_ASSERT(HT_SIZE_TO_MASK(nSize));

if (UNEXPECTED(GC_FLAGS(ht) & IS_ARRAY_PERSISTENT)) {
data = pemalloc(HT_SIZE_EX(nSize, HT_SIZE_TO_MASK(nSize)), 1);
} else if (EXPECTED(nSize == HT_MIN_SIZE)) {
Expand Down Expand Up @@ -341,6 +343,8 @@ ZEND_API void ZEND_FASTCALL zend_hash_packed_to_hash(HashTable *ht)
Bucket *old_buckets = ht->arData;
uint32_t nSize = ht->nTableSize;

ZEND_ASSERT(HT_SIZE_TO_MASK(nSize));

HT_ASSERT_RC1(ht);
HT_FLAGS(ht) &= ~HASH_FLAG_PACKED;
new_data = pemalloc(HT_SIZE_EX(nSize, HT_SIZE_TO_MASK(nSize)), GC_FLAGS(ht) & IS_ARRAY_PERSISTENT);
Expand Down Expand Up @@ -369,7 +373,11 @@ ZEND_API void ZEND_FASTCALL zend_hash_to_packed(HashTable *ht)
ZEND_API void ZEND_FASTCALL zend_hash_extend(HashTable *ht, uint32_t nSize, bool packed)
{
HT_ASSERT_RC1(ht);

if (nSize == 0) return;

ZEND_ASSERT(HT_SIZE_TO_MASK(nSize));

if (UNEXPECTED(HT_FLAGS(ht) & HASH_FLAG_UNINITIALIZED)) {
if (nSize > ht->nTableSize) {
ht->nTableSize = zend_hash_check_size(nSize);
Expand Down Expand Up @@ -1207,6 +1215,8 @@ static void ZEND_FASTCALL zend_hash_do_resize(HashTable *ht)
uint32_t nSize = ht->nTableSize + ht->nTableSize;
Bucket *old_buckets = ht->arData;

ZEND_ASSERT(HT_SIZE_TO_MASK(nSize));

ht->nTableSize = nSize;
new_data = pemalloc(HT_SIZE_EX(nSize, HT_SIZE_TO_MASK(nSize)), GC_FLAGS(ht) & IS_ARRAY_PERSISTENT);
ht->nTableMask = HT_SIZE_TO_MASK(ht->nTableSize);
Expand Down
13 changes: 10 additions & 3 deletions Zend/zend_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,16 +400,23 @@ struct _zend_array {
#define HT_MIN_MASK ((uint32_t) -2)
#define HT_MIN_SIZE 8

/* HT_MAX_SIZE is chosen to satisfy the following constraints:
* - HT_SIZE_TO_MASK(HT_MAX_SIZE) != 0
* - HT_SIZE_EX(HT_MAX_SIZE, HT_SIZE_TO_MASK(HT_MAX_SIZE)) does not overflow or
* wrapparound, and is <= the addressable space size
* - HT_MAX_SIZE must be a power of two:
* (nTableSize<HT_MAX_SIZE ? nTableSize+nTableSize : nTableSize) <= HT_MAX_SIZE
*/
#if SIZEOF_SIZE_T == 4
# define HT_MAX_SIZE 0x04000000 /* small enough to avoid overflow checks */
# define HT_MAX_SIZE 0x02000000
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is SSIZE_MAX/(sizeof(Bucket)+2*sizeof(uint32_t)) (0x3ffffff) rounded down to the closest power of two

# define HT_HASH_TO_BUCKET_EX(data, idx) \
((Bucket*)((char*)(data) + (idx)))
# define HT_IDX_TO_HASH(idx) \
((idx) * sizeof(Bucket))
# define HT_HASH_TO_IDX(idx) \
((idx) / sizeof(Bucket))
#elif SIZEOF_SIZE_T == 8
# define HT_MAX_SIZE 0x80000000
# define HT_MAX_SIZE 0x40000000
# define HT_HASH_TO_BUCKET_EX(data, idx) \
((data) + (idx))
# define HT_IDX_TO_HASH(idx) \
Expand All @@ -428,7 +435,7 @@ struct _zend_array {
#define HT_SIZE_TO_MASK(nTableSize) \
((uint32_t)(-((nTableSize) + (nTableSize))))
#define HT_HASH_SIZE(nTableMask) \
(((size_t)(uint32_t)-(int32_t)(nTableMask)) * sizeof(uint32_t))
(((size_t)-(uint32_t)(nTableMask)) * sizeof(uint32_t))
Comment on lines -431 to +438
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(int32_t)(nTableMask) is implementation defined when nTableMask has its max value (uint32_t)INT32_MAX+1.

I think that the (int32_t) cast is not necessary here, and that -(uint32_t)(nTableMask) gives the same result without being implementation defined.

#define HT_DATA_SIZE(nTableSize) \
((size_t)(nTableSize) * sizeof(Bucket))
#define HT_SIZE_EX(nTableSize, nTableMask) \
Expand Down