From 49a7db0c90cc7b286879f7d4ae9e191a4c5bcac0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20D=C3=BCsterhus?= Date: Mon, 6 Feb 2023 17:41:50 +0100 Subject: [PATCH 1/3] random: Add `max_offset` local to Randomizer::getBytesFromString() --- ext/random/randomizer.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/ext/random/randomizer.c b/ext/random/randomizer.c index 75d3e7fb6ddee..98925c63d1fc7 100644 --- a/ext/random/randomizer.c +++ b/ext/random/randomizer.c @@ -388,6 +388,7 @@ PHP_METHOD(Random_Randomizer, getBytesFromString) ZEND_PARSE_PARAMETERS_END(); const size_t source_length = ZSTR_LEN(source); + const size_t max_offset = source_length - 1; if (source_length < 1) { zend_argument_value_error(1, "cannot be empty"); @@ -401,9 +402,9 @@ PHP_METHOD(Random_Randomizer, getBytesFromString) retval = zend_string_alloc(length, 0); - if (source_length > 0x100) { + if (max_offset > 0xff) { while (total_size < length) { - uint64_t offset = randomizer->algo->range(randomizer->status, 0, source_length - 1); + uint64_t offset = randomizer->algo->range(randomizer->status, 0, max_offset); if (EG(exception)) { zend_string_free(retval); @@ -414,21 +415,21 @@ PHP_METHOD(Random_Randomizer, getBytesFromString) } } else { uint64_t mask; - if (source_length <= 0x1) { + if (max_offset < 0x1) { mask = 0x0; - } else if (source_length <= 0x2) { + } else if (max_offset < 0x2) { mask = 0x1; - } else if (source_length <= 0x4) { + } else if (max_offset < 0x4) { mask = 0x3; - } else if (source_length <= 0x8) { + } else if (max_offset < 0x8) { mask = 0x7; - } else if (source_length <= 0x10) { + } else if (max_offset < 0x10) { mask = 0xF; - } else if (source_length <= 0x20) { + } else if (max_offset < 0x20) { mask = 0x1F; - } else if (source_length <= 0x40) { + } else if (max_offset < 0x40) { mask = 0x3F; - } else if (source_length <= 0x80) { + } else if (max_offset < 0x80) { mask = 0x7F; } else { mask = 0xFF; @@ -445,7 +446,7 @@ PHP_METHOD(Random_Randomizer, getBytesFromString) for (size_t i = 0; i < randomizer->status->last_generated_size; i++) { uint64_t offset = (result >> (i * 8)) & mask; - if (offset >= source_length) { + if (offset > max_offset) { if (++failures > PHP_RANDOM_RANGE_ATTEMPTS) { zend_string_free(retval); zend_throw_error(random_ce_Random_BrokenRandomEngineError, "Failed to generate an acceptable random number in %d attempts", PHP_RANDOM_RANGE_ATTEMPTS); From 4925a1d06682f0a0f6b1d7ee6364f93846738d44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20D=C3=BCsterhus?= Date: Mon, 6 Feb 2023 18:37:20 +0100 Subject: [PATCH 2/3] random: Use branchless implementation for mask generation in Randomizer::getBytesFromString() This was benchmarked against clzl with a standalone script with random inputs and is slightly faster. clzl requires an additional branch to handle the source_length = 1 / max_offset = 0 case. --- ext/random/randomizer.c | 27 +++------- .../methods/getBytesFromString_fast_path.phpt | 50 +++++++++++++++++++ 2 files changed, 57 insertions(+), 20 deletions(-) diff --git a/ext/random/randomizer.c b/ext/random/randomizer.c index 98925c63d1fc7..6c46ef189a2d3 100644 --- a/ext/random/randomizer.c +++ b/ext/random/randomizer.c @@ -414,26 +414,13 @@ PHP_METHOD(Random_Randomizer, getBytesFromString) ZSTR_VAL(retval)[total_size++] = ZSTR_VAL(source)[offset]; } } else { - uint64_t mask; - if (max_offset < 0x1) { - mask = 0x0; - } else if (max_offset < 0x2) { - mask = 0x1; - } else if (max_offset < 0x4) { - mask = 0x3; - } else if (max_offset < 0x8) { - mask = 0x7; - } else if (max_offset < 0x10) { - mask = 0xF; - } else if (max_offset < 0x20) { - mask = 0x1F; - } else if (max_offset < 0x40) { - mask = 0x3F; - } else if (max_offset < 0x80) { - mask = 0x7F; - } else { - mask = 0xFF; - } + uint64_t mask = max_offset; + // Copy the top-most bit into all lower bits. + // Shifting by 4 is sufficient, because max_offset + // is guaranteed to be an 8-bit integer. + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; int failures = 0; while (total_size < length) { diff --git a/ext/random/tests/03_randomizer/methods/getBytesFromString_fast_path.phpt b/ext/random/tests/03_randomizer/methods/getBytesFromString_fast_path.phpt index fe8fcbeb3873a..84c8ec611db80 100644 --- a/ext/random/tests/03_randomizer/methods/getBytesFromString_fast_path.phpt +++ b/ext/random/tests/03_randomizer/methods/getBytesFromString_fast_path.phpt @@ -47,6 +47,32 @@ for ($i = 1; $i <= strlen($allBytes); $i *= 2) { echo PHP_EOL; } +// Test lengths that are one more than the powers of two. For these +// the maximum offset will be a power of two and thus a minimal number +// of bits will be set in the offset. +for ($i = 1; ($i + 1) <= strlen($allBytes); $i *= 2) { + $oneMore = $i + 1; + + echo "{$oneMore}:", PHP_EOL; + + $wrapper = new TestWrapperEngine($xoshiro); + $r = new Randomizer($wrapper); + $result = $r->getBytesFromString(substr($allBytes, 0, $oneMore), 20000); + + $count = []; + for ($j = 0; $j < strlen($result); $j++) { + $b = $result[$j]; + $count[ord($b)] ??= 0; + $count[ord($b)]++; + } + + // We expect that each possible value appears at least once, if + // not is is very likely that some bits were erroneously masked away. + var_dump(count($count)); + + echo PHP_EOL; +} + echo "Slow Path:", PHP_EOL; $wrapper = new TestWrapperEngine($xoshiro); @@ -107,6 +133,30 @@ int(128) int(2500) int(256) +2: +int(2) + +3: +int(3) + +5: +int(5) + +9: +int(9) + +17: +int(17) + +33: +int(33) + +65: +int(65) + +129: +int(129) + Slow Path: int(20000) int(256) From c50203cfb31bf086df953ed3d55ab0d7d778a292 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20D=C3=BCsterhus?= Date: Tue, 7 Feb 2023 13:34:18 +0100 Subject: [PATCH 3/3] [ci skip] Improve comment for masking in Randomizer::getBytesFromString() --- ext/random/randomizer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ext/random/randomizer.c b/ext/random/randomizer.c index 6c46ef189a2d3..6248d97f5b1d6 100644 --- a/ext/random/randomizer.c +++ b/ext/random/randomizer.c @@ -417,7 +417,8 @@ PHP_METHOD(Random_Randomizer, getBytesFromString) uint64_t mask = max_offset; // Copy the top-most bit into all lower bits. // Shifting by 4 is sufficient, because max_offset - // is guaranteed to be an 8-bit integer. + // is guaranteed to fit in an 8-bit integer at this + // point. mask |= mask >> 1; mask |= mask >> 2; mask |= mask >> 4;