Skip to content

simplify unpack logic #6908

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 6, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 55 additions & 96 deletions ext/standard/pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,16 @@
} \
outputpos += (a)*(b);

/* Whether machine is little endian */
char machine_little_endian;
#ifdef WORDS_BIGENDIAN
#define MACHINE_LITTLE_ENDIAN 0
#else
#define MACHINE_LITTLE_ENDIAN 1
#endif

typedef ZEND_SET_ALIGNED(1, uint16_t unaligned_uint16_t);
typedef ZEND_SET_ALIGNED(1, uint32_t unaligned_uint32_t);
typedef ZEND_SET_ALIGNED(1, uint64_t unaligned_uint64_t);
typedef ZEND_SET_ALIGNED(1, unsigned int unaligned_uint);

/* Mapping of byte from char (8bit) to long for machine endian */
static int byte_map[1];
Expand Down Expand Up @@ -93,6 +101,11 @@ static void php_pack(zval *val, size_t size, int *map, char *output)
}
/* }}} */

static inline uint16_t php_pack_reverse_int16(uint16_t arg)
{
return ((arg & 0xFF) << 8) | ((arg >> 8) & 0xFF);
}

/* {{{ php_pack_reverse_int32 */
static inline uint32_t php_pack_reverse_int32(uint32_t arg)
{
Expand Down Expand Up @@ -677,23 +690,6 @@ PHP_FUNCTION(pack)
}
/* }}} */

/* {{{ php_unpack */
static zend_long php_unpack(char *data, size_t size, int issigned, int *map)
{
zend_long result;
char *cresult = (char *) &result;
size_t i;

result = issigned ? -1 : 0;

for (i = 0; i < size; i++) {
cresult[map[i]] = *data++;
}

return result;
}
/* }}} */

/* unpack() is based on Perl's unpack(), but is modified a bit from there.
* Rather than depending on error-prone ordered lists or syntactically
* unpleasant pass-by-reference, we return an object with named parameters
Expand Down Expand Up @@ -1003,108 +999,74 @@ PHP_FUNCTION(unpack)
break;
}

case 'c':
case 'C': {
int issigned = (type == 'c') ? (input[inputpos] & 0x80) : 0;
zend_long v = php_unpack(&input[inputpos], 1, issigned, byte_map);
case 'c': /* signed */
case 'C': { /* unsigned */
uint8_t x = input[inputpos];
zend_long v = (type == 'c') ? (int8_t) x : x;
add_assoc_long(return_value, n, v);
break;
}

case 's':
case 'S':
case 'n':
case 'v': {
zend_long v;
int issigned = 0;
int *map = machine_endian_short_map;
case 's': /* signed machine endian */
case 'S': /* unsigned machine endian */
case 'n': /* unsigned big endian */
case 'v': { /* unsigned little endian */
zend_long v = 0;
uint16_t x = *((unaligned_uint16_t*) &input[inputpos]);

if (type == 's') {
issigned = input[inputpos + (machine_little_endian ? 1 : 0)] & 0x80;
} else if (type == 'n') {
map = big_endian_short_map;
} else if (type == 'v') {
map = little_endian_short_map;
v = (int16_t) x;
} else if ((type == 'n' && MACHINE_LITTLE_ENDIAN) || (type == 'v' && !MACHINE_LITTLE_ENDIAN)) {
v = php_pack_reverse_int16(x);
} else {
v = x;
}

v = php_unpack(&input[inputpos], 2, issigned, map);
add_assoc_long(return_value, n, v);
break;
}

case 'i':
case 'I': {
zend_long v;
int issigned = 0;

if (type == 'i') {
issigned = input[inputpos + (machine_little_endian ? (sizeof(int) - 1) : 0)] & 0x80;
}

v = php_unpack(&input[inputpos], sizeof(int), issigned, int_map);
case 'i': /* signed integer, machine size, machine endian */
case 'I': { /* unsigned integer, machine size, machine endian */
unsigned int x = *((unaligned_uint*) &input[inputpos]);
zend_long v = (type == 'i') ? (int) x : x;
add_assoc_long(return_value, n, v);
break;
}

case 'l':
case 'L':
case 'N':
case 'V': {
int issigned = 0;
int *map = machine_endian_long_map;
case 'l': /* signed machine endian */
case 'L': /* unsigned machine endian */
case 'N': /* unsigned big endian */
case 'V': { /* unsigned little endian */
zend_long v = 0;
uint32_t x = *((unaligned_uint32_t*) &input[inputpos]);

if (type == 'l' || type == 'L') {
issigned = input[inputpos + (machine_little_endian ? 3 : 0)] & 0x80;
} else if (type == 'N') {
issigned = input[inputpos] & 0x80;
map = big_endian_long_map;
} else if (type == 'V') {
issigned = input[inputpos + 3] & 0x80;
map = little_endian_long_map;
}

if (SIZEOF_ZEND_LONG > 4 && issigned) {
v = ~INT_MAX;
if (type == 'l') {
v = (int32_t) x;
} else if ((type == 'N' && MACHINE_LITTLE_ENDIAN) || (type == 'V' && !MACHINE_LITTLE_ENDIAN)) {
v = php_pack_reverse_int32(x);
} else {
v = x;
}

v |= php_unpack(&input[inputpos], 4, issigned, map);
if (SIZEOF_ZEND_LONG > 4) {
if (type == 'l') {
v = (signed int) v;
} else {
v = (unsigned int) v;
}
}
add_assoc_long(return_value, n, v);
break;
}

#if SIZEOF_ZEND_LONG > 4
case 'q':
case 'Q':
case 'J':
case 'P': {
int issigned = 0;
int *map = machine_endian_longlong_map;
case 'q': /* signed machine endian */
case 'Q': /* unsigned machine endian */
case 'J': /* unsigned big endian */
case 'P': { /* unsigned little endian */
zend_long v = 0;

if (type == 'q' || type == 'Q') {
issigned = input[inputpos + (machine_little_endian ? 7 : 0)] & 0x80;
} else if (type == 'J') {
issigned = input[inputpos] & 0x80;
map = big_endian_longlong_map;
} else if (type == 'P') {
issigned = input[inputpos + 7] & 0x80;
map = little_endian_longlong_map;
}

v = php_unpack(&input[inputpos], 8, issigned, map);
uint64_t x = *((unaligned_uint64_t*) &input[inputpos]);

if (type == 'q') {
v = (zend_long) v;
v = (int64_t) x;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Far from being a C expert but isn't the following:

Suggested change
v = (int64_t) x;
v = *(int64_t*) &x;

the correct way of type-punning? As IIRC signed/unsigned casts are UB if the value exceeds the range of a type.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For casts between integers, the existing code is fine. What you may have in mind is a bitwise cast between float and int.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, I double checked the standard and apparently unsigned to signed is UB when the value doesn't fit, but almost all compiler do two complement (according to https://unspecified.wordpress.com/category/c-3/ and if I read this correctly).

So should only be an issue on unicorn compilers

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The standard (6.3.1.3 §3) mentions this conversion produce a result which is implementation-defined, not UB.
Is it OK to keep it like this?

} else if ((type == 'J' && MACHINE_LITTLE_ENDIAN) || (type == 'P' && !MACHINE_LITTLE_ENDIAN)) {
v = php_pack_reverse_int64(x);
} else {
v = (zend_ulong) v;
v = x;
}

add_assoc_long(return_value, n, v);
Expand Down Expand Up @@ -1201,12 +1163,9 @@ PHP_FUNCTION(unpack)
/* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(pack)
{
int machine_endian_check = 1;
int i;

machine_little_endian = ((char *)&machine_endian_check)[0];

if (machine_little_endian) {
if (MACHINE_LITTLE_ENDIAN) {
/* Where to get lo to hi bytes from */
byte_map[0] = 0;

Expand Down