From 784629c9343b1e1f1e68560f18f5463d712f765a Mon Sep 17 00:00:00 2001 From: K Date: Sun, 25 Apr 2021 12:33:23 +0200 Subject: [PATCH 1/4] simplify unpack logic - move endiannes check to compile time - remove php_unpack function - the compiler take care of sign extension --- ext/standard/pack.c | 135 +++++++++++++++----------------------------- 1 file changed, 45 insertions(+), 90 deletions(-) diff --git a/ext/standard/pack.c b/ext/standard/pack.c index b14eb64d2923..fee6f92740fb 100644 --- a/ext/standard/pack.c +++ b/ext/standard/pack.c @@ -55,6 +55,8 @@ /* Whether machine is little endian */ char machine_little_endian; +#define IS_LITTLE_ENDIAN (*(unsigned char *)&(uint16_t){1}) + /* Mapping of byte from char (8bit) to long for machine endian */ static int byte_map[1]; @@ -677,23 +679,6 @@ PHP_FUNCTION(pack) } /* }}} */ -/* {{{ php_unpack */ -static zend_long php_unpack(char *data, size_t size, int issigned, int *map) -{ - zend_long result; - char *cresult = (char *) &result; - size_t i; - - result = issigned ? -1 : 0; - - for (i = 0; i < size; i++) { - cresult[map[i]] = *data++; - } - - return result; -} -/* }}} */ - /* unpack() is based on Perl's unpack(), but is modified a bit from there. * Rather than depending on error-prone ordered lists or syntactically * unpleasant pass-by-reference, we return an object with named parameters @@ -1003,108 +988,78 @@ PHP_FUNCTION(unpack) break; } - case 'c': - case 'C': { - int issigned = (type == 'c') ? (input[inputpos] & 0x80) : 0; - zend_long v = php_unpack(&input[inputpos], 1, issigned, byte_map); + case 'c': /* signed */ + case 'C': { /* unsigned */ + uint8_t x = input[inputpos]; + zend_long v = (type == 'c') ? (int8_t) x : x; add_assoc_long(return_value, n, v); break; } - case 's': - case 'S': - case 'n': - case 'v': { - zend_long v; - int issigned = 0; - int *map = machine_endian_short_map; + case 's': /* signed machine endian */ + case 'S': /* unsigned machine endian */ + case 'n': /* unsigned big endian */ + case 'v': { /* unsigned little endian */ + zend_long v = 0; + uint16_t x; + memcpy(&x, &input[inputpos], 2); if (type == 's') { - issigned = input[inputpos + (machine_little_endian ? 1 : 0)] & 0x80; - } else if (type == 'n') { - map = big_endian_short_map; - } else if (type == 'v') { - map = little_endian_short_map; + v = (int16_t) x; + } else if ((type == 'n' && IS_LITTLE_ENDIAN) || (type == 'v' && !IS_LITTLE_ENDIAN)) { + v = php_pack_reverse_int32(x); + } else { + v = x; } - v = php_unpack(&input[inputpos], 2, issigned, map); add_assoc_long(return_value, n, v); break; } - case 'i': - case 'I': { - zend_long v; - int issigned = 0; - - if (type == 'i') { - issigned = input[inputpos + (machine_little_endian ? (sizeof(int) - 1) : 0)] & 0x80; - } - - v = php_unpack(&input[inputpos], sizeof(int), issigned, int_map); + case 'i': /* signed integer, machine size, machine endian */ + case 'I': { /* unsigned integer, machine size, machine endian */ + unsigned int x; + memcpy(&x, &input[inputpos], sizeof(unsigned int)); + zend_long v = (type == 'i') ? (int) x : x; add_assoc_long(return_value, n, v); break; } - case 'l': - case 'L': - case 'N': - case 'V': { - int issigned = 0; - int *map = machine_endian_long_map; + case 'l': /* signed machine endian */ + case 'L': /* unsigned machine endian */ + case 'N': /* unsigned big endian */ + case 'V': { /* unsigned little endian */ zend_long v = 0; + uint32_t x; + memcpy(&x, &input[inputpos], 4); - if (type == 'l' || type == 'L') { - issigned = input[inputpos + (machine_little_endian ? 3 : 0)] & 0x80; - } else if (type == 'N') { - issigned = input[inputpos] & 0x80; - map = big_endian_long_map; - } else if (type == 'V') { - issigned = input[inputpos + 3] & 0x80; - map = little_endian_long_map; - } - - if (SIZEOF_ZEND_LONG > 4 && issigned) { - v = ~INT_MAX; + if (type == 'l') { + v = (int32_t) x; + } else if ((type == 'N' && IS_LITTLE_ENDIAN) || (type == 'V' && !IS_LITTLE_ENDIAN)) { + v = php_pack_reverse_int32(x); + } else { + v = x; } - v |= php_unpack(&input[inputpos], 4, issigned, map); - if (SIZEOF_ZEND_LONG > 4) { - if (type == 'l') { - v = (signed int) v; - } else { - v = (unsigned int) v; - } - } add_assoc_long(return_value, n, v); break; } #if SIZEOF_ZEND_LONG > 4 - case 'q': - case 'Q': - case 'J': - case 'P': { - int issigned = 0; - int *map = machine_endian_longlong_map; + case 'q': /* signed machine endian */ + case 'Q': /* unsigned machine endian */ + case 'J': /* unsigned big endian */ + case 'P': { /* unsigned little endian */ zend_long v = 0; - - if (type == 'q' || type == 'Q') { - issigned = input[inputpos + (machine_little_endian ? 7 : 0)] & 0x80; - } else if (type == 'J') { - issigned = input[inputpos] & 0x80; - map = big_endian_longlong_map; - } else if (type == 'P') { - issigned = input[inputpos + 7] & 0x80; - map = little_endian_longlong_map; - } - - v = php_unpack(&input[inputpos], 8, issigned, map); + uint64_t x; + memcpy(&x, &input[inputpos], 8); if (type == 'q') { - v = (zend_long) v; + v = (int64_t) x; + } else if ((type == 'J' && IS_LITTLE_ENDIAN) || (type == 'P' && !IS_LITTLE_ENDIAN)) { + v = php_pack_reverse_int64(x); } else { - v = (zend_ulong) v; + v = x; } add_assoc_long(return_value, n, v); From c2c344d42106d40a74f316ce9308262592463945 Mon Sep 17 00:00:00 2001 From: K Date: Sun, 25 Apr 2021 14:34:22 +0200 Subject: [PATCH 2/4] fix --- ext/standard/pack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/standard/pack.c b/ext/standard/pack.c index fee6f92740fb..dd3bb0cb56b2 100644 --- a/ext/standard/pack.c +++ b/ext/standard/pack.c @@ -1007,7 +1007,7 @@ PHP_FUNCTION(unpack) if (type == 's') { v = (int16_t) x; } else if ((type == 'n' && IS_LITTLE_ENDIAN) || (type == 'v' && !IS_LITTLE_ENDIAN)) { - v = php_pack_reverse_int32(x); + v = php_pack_reverse_int32(x) >> 16; } else { v = x; } From cd0b69ee802f402383c39873d5fc6b3f792eb678 Mon Sep 17 00:00:00 2001 From: K Date: Sun, 25 Apr 2021 17:29:28 +0200 Subject: [PATCH 3/4] reuse WORDS_BIGENDIAN --- ext/standard/pack.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/ext/standard/pack.c b/ext/standard/pack.c index dd3bb0cb56b2..df8f867f1ce4 100644 --- a/ext/standard/pack.c +++ b/ext/standard/pack.c @@ -52,10 +52,11 @@ } \ outputpos += (a)*(b); -/* Whether machine is little endian */ -char machine_little_endian; - -#define IS_LITTLE_ENDIAN (*(unsigned char *)&(uint16_t){1}) +#ifdef WORDS_BIGENDIAN +#define MACHINE_LITTLE_ENDIAN 0 +#else +#define MACHINE_LITTLE_ENDIAN 1 +#endif /* Mapping of byte from char (8bit) to long for machine endian */ static int byte_map[1]; @@ -1006,7 +1007,7 @@ PHP_FUNCTION(unpack) if (type == 's') { v = (int16_t) x; - } else if ((type == 'n' && IS_LITTLE_ENDIAN) || (type == 'v' && !IS_LITTLE_ENDIAN)) { + } else if ((type == 'n' && MACHINE_LITTLE_ENDIAN) || (type == 'v' && !MACHINE_LITTLE_ENDIAN)) { v = php_pack_reverse_int32(x) >> 16; } else { v = x; @@ -1035,7 +1036,7 @@ PHP_FUNCTION(unpack) if (type == 'l') { v = (int32_t) x; - } else if ((type == 'N' && IS_LITTLE_ENDIAN) || (type == 'V' && !IS_LITTLE_ENDIAN)) { + } else if ((type == 'N' && MACHINE_LITTLE_ENDIAN) || (type == 'V' && !MACHINE_LITTLE_ENDIAN)) { v = php_pack_reverse_int32(x); } else { v = x; @@ -1056,7 +1057,7 @@ PHP_FUNCTION(unpack) if (type == 'q') { v = (int64_t) x; - } else if ((type == 'J' && IS_LITTLE_ENDIAN) || (type == 'P' && !IS_LITTLE_ENDIAN)) { + } else if ((type == 'J' && MACHINE_LITTLE_ENDIAN) || (type == 'P' && !MACHINE_LITTLE_ENDIAN)) { v = php_pack_reverse_int64(x); } else { v = x; @@ -1156,12 +1157,9 @@ PHP_FUNCTION(unpack) /* {{{ PHP_MINIT_FUNCTION */ PHP_MINIT_FUNCTION(pack) { - int machine_endian_check = 1; int i; - machine_little_endian = ((char *)&machine_endian_check)[0]; - - if (machine_little_endian) { + if (MACHINE_LITTLE_ENDIAN) { /* Where to get lo to hi bytes from */ byte_map[0] = 0; From 79fbfcbeb5507d7cc976b7cb1b9684bbd444ff45 Mon Sep 17 00:00:00 2001 From: K Date: Mon, 26 Apr 2021 17:13:15 +0200 Subject: [PATCH 4/4] unaligned loads and php_pack_reverse_int16 --- ext/standard/pack.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/ext/standard/pack.c b/ext/standard/pack.c index df8f867f1ce4..910fac51dacf 100644 --- a/ext/standard/pack.c +++ b/ext/standard/pack.c @@ -58,6 +58,11 @@ #define MACHINE_LITTLE_ENDIAN 1 #endif +typedef ZEND_SET_ALIGNED(1, uint16_t unaligned_uint16_t); +typedef ZEND_SET_ALIGNED(1, uint32_t unaligned_uint32_t); +typedef ZEND_SET_ALIGNED(1, uint64_t unaligned_uint64_t); +typedef ZEND_SET_ALIGNED(1, unsigned int unaligned_uint); + /* Mapping of byte from char (8bit) to long for machine endian */ static int byte_map[1]; @@ -96,6 +101,11 @@ static void php_pack(zval *val, size_t size, int *map, char *output) } /* }}} */ +static inline uint16_t php_pack_reverse_int16(uint16_t arg) +{ + return ((arg & 0xFF) << 8) | ((arg >> 8) & 0xFF); +} + /* {{{ php_pack_reverse_int32 */ static inline uint32_t php_pack_reverse_int32(uint32_t arg) { @@ -1002,13 +1012,12 @@ PHP_FUNCTION(unpack) case 'n': /* unsigned big endian */ case 'v': { /* unsigned little endian */ zend_long v = 0; - uint16_t x; - memcpy(&x, &input[inputpos], 2); + uint16_t x = *((unaligned_uint16_t*) &input[inputpos]); if (type == 's') { v = (int16_t) x; } else if ((type == 'n' && MACHINE_LITTLE_ENDIAN) || (type == 'v' && !MACHINE_LITTLE_ENDIAN)) { - v = php_pack_reverse_int32(x) >> 16; + v = php_pack_reverse_int16(x); } else { v = x; } @@ -1019,8 +1028,7 @@ PHP_FUNCTION(unpack) case 'i': /* signed integer, machine size, machine endian */ case 'I': { /* unsigned integer, machine size, machine endian */ - unsigned int x; - memcpy(&x, &input[inputpos], sizeof(unsigned int)); + unsigned int x = *((unaligned_uint*) &input[inputpos]); zend_long v = (type == 'i') ? (int) x : x; add_assoc_long(return_value, n, v); break; @@ -1031,8 +1039,7 @@ PHP_FUNCTION(unpack) case 'N': /* unsigned big endian */ case 'V': { /* unsigned little endian */ zend_long v = 0; - uint32_t x; - memcpy(&x, &input[inputpos], 4); + uint32_t x = *((unaligned_uint32_t*) &input[inputpos]); if (type == 'l') { v = (int32_t) x; @@ -1052,8 +1059,7 @@ PHP_FUNCTION(unpack) case 'J': /* unsigned big endian */ case 'P': { /* unsigned little endian */ zend_long v = 0; - uint64_t x; - memcpy(&x, &input[inputpos], 8); + uint64_t x = *((unaligned_uint64_t*) &input[inputpos]); if (type == 'q') { v = (int64_t) x;