Skip to content

Commit 21422e8

Browse files
authored
Optimize unpack() for named fields (#6958)
Create name using either zend_init_string_fast (no repetitions) or by concatenating the name with zend_print_ulong_to_buf. This is much more efficient than using snprintf. We also avoid repeated strlen() calculations.
1 parent 4522dcb commit 21422e8

File tree

2 files changed

+106
-39
lines changed

2 files changed

+106
-39
lines changed

ext/standard/pack.c

Lines changed: 64 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -739,32 +739,32 @@ PHP_FUNCTION(unpack)
739739
while (formatlen-- > 0) {
740740
char type = *(format++);
741741
char c;
742-
int arg = 1, argb;
742+
int repetitions = 1, argb;
743743
char *name;
744744
int namelen;
745-
int size=0;
745+
int size = 0;
746746

747747
/* Handle format arguments if any */
748748
if (formatlen > 0) {
749749
c = *format;
750750

751751
if (c >= '0' && c <= '9') {
752-
arg = atoi(format);
752+
repetitions = atoi(format);
753753

754754
while (formatlen > 0 && *format >= '0' && *format <= '9') {
755755
format++;
756756
formatlen--;
757757
}
758758
} else if (c == '*') {
759-
arg = -1;
759+
repetitions = -1;
760760
format++;
761761
formatlen--;
762762
}
763763
}
764764

765765
/* Get of new value in array */
766766
name = format;
767-
argb = arg;
767+
argb = repetitions;
768768

769769
while (formatlen > 0 && *format != '/') {
770770
formatlen--;
@@ -780,9 +780,9 @@ PHP_FUNCTION(unpack)
780780
/* Never use any input */
781781
case 'X':
782782
size = -1;
783-
if (arg < 0) {
783+
if (repetitions < 0) {
784784
php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", type);
785-
arg = 1;
785+
repetitions = 1;
786786
}
787787
break;
788788

@@ -793,14 +793,14 @@ PHP_FUNCTION(unpack)
793793
case 'a':
794794
case 'A':
795795
case 'Z':
796-
size = arg;
797-
arg = 1;
796+
size = repetitions;
797+
repetitions = 1;
798798
break;
799799

800800
case 'h':
801801
case 'H':
802-
size = (arg > 0) ? (arg + (arg % 2)) / 2 : arg;
803-
arg = 1;
802+
size = (repetitions > 0) ? (repetitions + (repetitions % 2)) / 2 : repetitions;
803+
repetitions = 1;
804804
break;
805805

806806
/* Use 1 byte of input */
@@ -870,18 +870,9 @@ PHP_FUNCTION(unpack)
870870
RETURN_FALSE;
871871
}
872872

873-
/* Do actual unpacking */
874-
for (i = 0; i != arg; i++ ) {
875-
/* Space for name + number, safe as namelen is ensured <= 200 */
876-
char n[256];
877873

878-
if (arg != 1 || namelen == 0) {
879-
/* Need to add element number to name */
880-
snprintf(n, sizeof(n), "%.*s%d", namelen, name, i + 1);
881-
} else {
882-
/* Truncate name to next format code or end of string */
883-
snprintf(n, sizeof(n), "%.*s", namelen, name);
884-
}
874+
/* Do actual unpacking */
875+
for (i = 0; i != repetitions; i++ ) {
885876

886877
if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) {
887878
php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
@@ -890,6 +881,22 @@ PHP_FUNCTION(unpack)
890881
}
891882

892883
if ((inputpos + size) <= inputlen) {
884+
885+
zend_string* real_name;
886+
zval val;
887+
888+
if (repetitions == 1 && namelen > 0) {
889+
/* Use a part of the formatarg argument directly as the name. */
890+
real_name = zend_string_init_fast(name, namelen);
891+
892+
} else {
893+
/* Need to add the 1-based element number to the name */
894+
char buf[MAX_LENGTH_OF_LONG + 1];
895+
char *res = zend_print_ulong_to_buf(buf + sizeof(buf) - 1, i+1);
896+
size_t digits = buf + sizeof(buf) - 1 - res;
897+
real_name = zend_string_concat2(name, namelen, res, digits);
898+
}
899+
893900
switch ((int) type) {
894901
case 'a': {
895902
/* a will not strip any trailing whitespace or null padding */
@@ -902,7 +909,8 @@ PHP_FUNCTION(unpack)
902909

903910
size = len;
904911

905-
add_assoc_stringl(return_value, n, &input[inputpos], len);
912+
ZVAL_STRINGL(&val, &input[inputpos], len);
913+
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
906914
break;
907915
}
908916
case 'A': {
@@ -928,7 +936,8 @@ PHP_FUNCTION(unpack)
928936
break;
929937
}
930938

931-
add_assoc_stringl(return_value, n, &input[inputpos], len + 1);
939+
ZVAL_STRINGL(&val, &input[inputpos], len + 1);
940+
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
932941
break;
933942
}
934943
/* New option added for Z to remain in-line with the Perl implementation */
@@ -952,7 +961,8 @@ PHP_FUNCTION(unpack)
952961
}
953962
len = s;
954963

955-
add_assoc_stringl(return_value, n, &input[inputpos], len);
964+
ZVAL_STRINGL(&val, &input[inputpos], len);
965+
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
956966
break;
957967
}
958968

@@ -995,15 +1005,19 @@ PHP_FUNCTION(unpack)
9951005
}
9961006

9971007
ZSTR_VAL(buf)[len] = '\0';
998-
add_assoc_str(return_value, n, buf);
1008+
1009+
ZVAL_STR(&val, buf);
1010+
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
9991011
break;
10001012
}
10011013

10021014
case 'c': /* signed */
10031015
case 'C': { /* unsigned */
10041016
uint8_t x = input[inputpos];
10051017
zend_long v = (type == 'c') ? (int8_t) x : x;
1006-
add_assoc_long(return_value, n, v);
1018+
1019+
ZVAL_LONG(&val, v);
1020+
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
10071021
break;
10081022
}
10091023

@@ -1022,15 +1036,18 @@ PHP_FUNCTION(unpack)
10221036
v = x;
10231037
}
10241038

1025-
add_assoc_long(return_value, n, v);
1039+
ZVAL_LONG(&val, v);
1040+
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
10261041
break;
10271042
}
10281043

10291044
case 'i': /* signed integer, machine size, machine endian */
10301045
case 'I': { /* unsigned integer, machine size, machine endian */
10311046
unsigned int x = *((unaligned_uint*) &input[inputpos]);
10321047
zend_long v = (type == 'i') ? (int) x : x;
1033-
add_assoc_long(return_value, n, v);
1048+
1049+
ZVAL_LONG(&val, v);
1050+
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
10341051
break;
10351052
}
10361053

@@ -1049,7 +1066,9 @@ PHP_FUNCTION(unpack)
10491066
v = x;
10501067
}
10511068

1052-
add_assoc_long(return_value, n, v);
1069+
ZVAL_LONG(&val, v);
1070+
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1071+
10531072
break;
10541073
}
10551074

@@ -1069,7 +1088,8 @@ PHP_FUNCTION(unpack)
10691088
v = x;
10701089
}
10711090

1072-
add_assoc_long(return_value, n, v);
1091+
ZVAL_LONG(&val, v);
1092+
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
10731093
break;
10741094
}
10751095
#endif
@@ -1088,7 +1108,8 @@ PHP_FUNCTION(unpack)
10881108
memcpy(&v, &input[inputpos], sizeof(float));
10891109
}
10901110

1091-
add_assoc_double(return_value, n, (double)v);
1111+
ZVAL_DOUBLE(&val, v);
1112+
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
10921113
break;
10931114
}
10941115

@@ -1105,7 +1126,9 @@ PHP_FUNCTION(unpack)
11051126
} else {
11061127
memcpy(&v, &input[inputpos], sizeof(double));
11071128
}
1108-
add_assoc_double(return_value, n, v);
1129+
1130+
ZVAL_DOUBLE(&val, v);
1131+
zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
11091132
break;
11101133
}
11111134

@@ -1116,33 +1139,35 @@ PHP_FUNCTION(unpack)
11161139
case 'X':
11171140
if (inputpos < size) {
11181141
inputpos = -size;
1119-
i = arg - 1; /* Break out of for loop */
1142+
i = repetitions - 1; /* Break out of for loop */
11201143

1121-
if (arg >= 0) {
1144+
if (repetitions >= 0) {
11221145
php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
11231146
}
11241147
}
11251148
break;
11261149

11271150
case '@':
1128-
if (arg <= inputlen) {
1129-
inputpos = arg;
1151+
if (repetitions <= inputlen) {
1152+
inputpos = repetitions;
11301153
} else {
11311154
php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
11321155
}
11331156

1134-
i = arg - 1; /* Done, break out of for loop */
1157+
i = repetitions - 1; /* Done, break out of for loop */
11351158
break;
11361159
}
11371160

1161+
zend_string_release(real_name);
1162+
11381163
inputpos += size;
11391164
if (inputpos < 0) {
11401165
if (size != -1) { /* only print warning if not working with * */
11411166
php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
11421167
}
11431168
inputpos = 0;
11441169
}
1145-
} else if (arg < 0) {
1170+
} else if (repetitions < 0) {
11461171
/* Reached end of input for '*' repeater */
11471172
break;
11481173
} else {
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
--TEST--
2+
test unpack() to array with named keys
3+
--FILE--
4+
<?php
5+
$str = pack('VVV', 0x00010203, 0x04050607, 0x08090a0b);
6+
print_r(unpack('Vaa/Vbb/Vcc', $str));
7+
print_r(unpack('V2aa/Vcc', $str));
8+
print_r(unpack('V3aa', $str));
9+
print_r(unpack('V*aa', $str));
10+
print_r(unpack('V*', $str));
11+
?>
12+
--EXPECT--
13+
Array
14+
(
15+
[aa] => 66051
16+
[bb] => 67438087
17+
[cc] => 134810123
18+
)
19+
Array
20+
(
21+
[aa1] => 66051
22+
[aa2] => 67438087
23+
[cc] => 134810123
24+
)
25+
Array
26+
(
27+
[aa1] => 66051
28+
[aa2] => 67438087
29+
[aa3] => 134810123
30+
)
31+
Array
32+
(
33+
[aa1] => 66051
34+
[aa2] => 67438087
35+
[aa3] => 134810123
36+
)
37+
Array
38+
(
39+
[1] => 66051
40+
[2] => 67438087
41+
[3] => 134810123
42+
)

0 commit comments

Comments
 (0)