Skip to content

Commit efe79e0

Browse files
authored
Simplify unpack logic (#6908)
- move endiannes check to compile time - remove php_unpack function - the compiler take care of sign extension
1 parent 66ad709 commit efe79e0

File tree

1 file changed

+55
-96
lines changed

1 file changed

+55
-96
lines changed

ext/standard/pack.c

Lines changed: 55 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,16 @@
5252
} \
5353
outputpos += (a)*(b);
5454

55-
/* Whether machine is little endian */
56-
char machine_little_endian;
55+
#ifdef WORDS_BIGENDIAN
56+
#define MACHINE_LITTLE_ENDIAN 0
57+
#else
58+
#define MACHINE_LITTLE_ENDIAN 1
59+
#endif
60+
61+
typedef ZEND_SET_ALIGNED(1, uint16_t unaligned_uint16_t);
62+
typedef ZEND_SET_ALIGNED(1, uint32_t unaligned_uint32_t);
63+
typedef ZEND_SET_ALIGNED(1, uint64_t unaligned_uint64_t);
64+
typedef ZEND_SET_ALIGNED(1, unsigned int unaligned_uint);
5765

5866
/* Mapping of byte from char (8bit) to long for machine endian */
5967
static int byte_map[1];
@@ -93,6 +101,11 @@ static void php_pack(zval *val, size_t size, int *map, char *output)
93101
}
94102
/* }}} */
95103

104+
static inline uint16_t php_pack_reverse_int16(uint16_t arg)
105+
{
106+
return ((arg & 0xFF) << 8) | ((arg >> 8) & 0xFF);
107+
}
108+
96109
/* {{{ php_pack_reverse_int32 */
97110
static inline uint32_t php_pack_reverse_int32(uint32_t arg)
98111
{
@@ -677,23 +690,6 @@ PHP_FUNCTION(pack)
677690
}
678691
/* }}} */
679692

680-
/* {{{ php_unpack */
681-
static zend_long php_unpack(char *data, size_t size, int issigned, int *map)
682-
{
683-
zend_long result;
684-
char *cresult = (char *) &result;
685-
size_t i;
686-
687-
result = issigned ? -1 : 0;
688-
689-
for (i = 0; i < size; i++) {
690-
cresult[map[i]] = *data++;
691-
}
692-
693-
return result;
694-
}
695-
/* }}} */
696-
697693
/* unpack() is based on Perl's unpack(), but is modified a bit from there.
698694
* Rather than depending on error-prone ordered lists or syntactically
699695
* unpleasant pass-by-reference, we return an object with named parameters
@@ -1003,108 +999,74 @@ PHP_FUNCTION(unpack)
1003999
break;
10041000
}
10051001

1006-
case 'c':
1007-
case 'C': {
1008-
int issigned = (type == 'c') ? (input[inputpos] & 0x80) : 0;
1009-
zend_long v = php_unpack(&input[inputpos], 1, issigned, byte_map);
1002+
case 'c': /* signed */
1003+
case 'C': { /* unsigned */
1004+
uint8_t x = input[inputpos];
1005+
zend_long v = (type == 'c') ? (int8_t) x : x;
10101006
add_assoc_long(return_value, n, v);
10111007
break;
10121008
}
10131009

1014-
case 's':
1015-
case 'S':
1016-
case 'n':
1017-
case 'v': {
1018-
zend_long v;
1019-
int issigned = 0;
1020-
int *map = machine_endian_short_map;
1010+
case 's': /* signed machine endian */
1011+
case 'S': /* unsigned machine endian */
1012+
case 'n': /* unsigned big endian */
1013+
case 'v': { /* unsigned little endian */
1014+
zend_long v = 0;
1015+
uint16_t x = *((unaligned_uint16_t*) &input[inputpos]);
10211016

10221017
if (type == 's') {
1023-
issigned = input[inputpos + (machine_little_endian ? 1 : 0)] & 0x80;
1024-
} else if (type == 'n') {
1025-
map = big_endian_short_map;
1026-
} else if (type == 'v') {
1027-
map = little_endian_short_map;
1018+
v = (int16_t) x;
1019+
} else if ((type == 'n' && MACHINE_LITTLE_ENDIAN) || (type == 'v' && !MACHINE_LITTLE_ENDIAN)) {
1020+
v = php_pack_reverse_int16(x);
1021+
} else {
1022+
v = x;
10281023
}
10291024

1030-
v = php_unpack(&input[inputpos], 2, issigned, map);
10311025
add_assoc_long(return_value, n, v);
10321026
break;
10331027
}
10341028

1035-
case 'i':
1036-
case 'I': {
1037-
zend_long v;
1038-
int issigned = 0;
1039-
1040-
if (type == 'i') {
1041-
issigned = input[inputpos + (machine_little_endian ? (sizeof(int) - 1) : 0)] & 0x80;
1042-
}
1043-
1044-
v = php_unpack(&input[inputpos], sizeof(int), issigned, int_map);
1029+
case 'i': /* signed integer, machine size, machine endian */
1030+
case 'I': { /* unsigned integer, machine size, machine endian */
1031+
unsigned int x = *((unaligned_uint*) &input[inputpos]);
1032+
zend_long v = (type == 'i') ? (int) x : x;
10451033
add_assoc_long(return_value, n, v);
10461034
break;
10471035
}
10481036

1049-
case 'l':
1050-
case 'L':
1051-
case 'N':
1052-
case 'V': {
1053-
int issigned = 0;
1054-
int *map = machine_endian_long_map;
1037+
case 'l': /* signed machine endian */
1038+
case 'L': /* unsigned machine endian */
1039+
case 'N': /* unsigned big endian */
1040+
case 'V': { /* unsigned little endian */
10551041
zend_long v = 0;
1042+
uint32_t x = *((unaligned_uint32_t*) &input[inputpos]);
10561043

1057-
if (type == 'l' || type == 'L') {
1058-
issigned = input[inputpos + (machine_little_endian ? 3 : 0)] & 0x80;
1059-
} else if (type == 'N') {
1060-
issigned = input[inputpos] & 0x80;
1061-
map = big_endian_long_map;
1062-
} else if (type == 'V') {
1063-
issigned = input[inputpos + 3] & 0x80;
1064-
map = little_endian_long_map;
1065-
}
1066-
1067-
if (SIZEOF_ZEND_LONG > 4 && issigned) {
1068-
v = ~INT_MAX;
1044+
if (type == 'l') {
1045+
v = (int32_t) x;
1046+
} else if ((type == 'N' && MACHINE_LITTLE_ENDIAN) || (type == 'V' && !MACHINE_LITTLE_ENDIAN)) {
1047+
v = php_pack_reverse_int32(x);
1048+
} else {
1049+
v = x;
10691050
}
10701051

1071-
v |= php_unpack(&input[inputpos], 4, issigned, map);
1072-
if (SIZEOF_ZEND_LONG > 4) {
1073-
if (type == 'l') {
1074-
v = (signed int) v;
1075-
} else {
1076-
v = (unsigned int) v;
1077-
}
1078-
}
10791052
add_assoc_long(return_value, n, v);
10801053
break;
10811054
}
10821055

10831056
#if SIZEOF_ZEND_LONG > 4
1084-
case 'q':
1085-
case 'Q':
1086-
case 'J':
1087-
case 'P': {
1088-
int issigned = 0;
1089-
int *map = machine_endian_longlong_map;
1057+
case 'q': /* signed machine endian */
1058+
case 'Q': /* unsigned machine endian */
1059+
case 'J': /* unsigned big endian */
1060+
case 'P': { /* unsigned little endian */
10901061
zend_long v = 0;
1091-
1092-
if (type == 'q' || type == 'Q') {
1093-
issigned = input[inputpos + (machine_little_endian ? 7 : 0)] & 0x80;
1094-
} else if (type == 'J') {
1095-
issigned = input[inputpos] & 0x80;
1096-
map = big_endian_longlong_map;
1097-
} else if (type == 'P') {
1098-
issigned = input[inputpos + 7] & 0x80;
1099-
map = little_endian_longlong_map;
1100-
}
1101-
1102-
v = php_unpack(&input[inputpos], 8, issigned, map);
1062+
uint64_t x = *((unaligned_uint64_t*) &input[inputpos]);
11031063

11041064
if (type == 'q') {
1105-
v = (zend_long) v;
1065+
v = (int64_t) x;
1066+
} else if ((type == 'J' && MACHINE_LITTLE_ENDIAN) || (type == 'P' && !MACHINE_LITTLE_ENDIAN)) {
1067+
v = php_pack_reverse_int64(x);
11061068
} else {
1107-
v = (zend_ulong) v;
1069+
v = x;
11081070
}
11091071

11101072
add_assoc_long(return_value, n, v);
@@ -1201,12 +1163,9 @@ PHP_FUNCTION(unpack)
12011163
/* {{{ PHP_MINIT_FUNCTION */
12021164
PHP_MINIT_FUNCTION(pack)
12031165
{
1204-
int machine_endian_check = 1;
12051166
int i;
12061167

1207-
machine_little_endian = ((char *)&machine_endian_check)[0];
1208-
1209-
if (machine_little_endian) {
1168+
if (MACHINE_LITTLE_ENDIAN) {
12101169
/* Where to get lo to hi bytes from */
12111170
byte_map[0] = 0;
12121171

0 commit comments

Comments
 (0)