Skip to content

Commit c1ce43d

Browse files
committed
php_json_escape_string() optimization
1 parent 52f92b5 commit c1ce43d

File tree

2 files changed

+69
-35
lines changed

2 files changed

+69
-35
lines changed

Zend/zend_smart_str.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
smart_str_appendl_ex((dest), (src), strlen(src), (what))
2828
#define smart_str_appends(dest, src) \
2929
smart_str_appendl((dest), (src), strlen(src))
30+
#define smart_str_extend(dest, len) \
31+
smart_str_extend_ex((dest), (len), 0)
3032
#define smart_str_appendc(dest, c) \
3133
smart_str_appendc_ex((dest), (c), 0)
3234
#define smart_str_appendl(dest, src, len) \
@@ -71,6 +73,13 @@ static zend_always_inline size_t smart_str_alloc(smart_str *str, size_t len, zen
7173
return len;
7274
}
7375

76+
static zend_always_inline char* smart_str_extend_ex(smart_str *dest, size_t len, zend_bool persistent) {
77+
size_t new_len = smart_str_alloc(dest, len, persistent);
78+
char *ret = ZSTR_VAL(dest->s) + ZSTR_LEN(dest->s);
79+
ZSTR_LEN(dest->s) = new_len;
80+
return ret;
81+
}
82+
7483
static zend_always_inline void smart_str_free_ex(smart_str *str, zend_bool persistent) {
7584
if (str->s) {
7685
zend_string_release_ex(str->s, persistent);

ext/json/json_encoder.c

Lines changed: 60 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
static const char digits[] = "0123456789abcdef";
3434

3535
static int php_json_escape_string(
36-
smart_str *buf, char *s, size_t len,
36+
smart_str *buf, const char *s, size_t len,
3737
int options, php_json_encoder *encoder);
3838

3939
static int php_json_determine_array_type(zval *val) /* {{{ */
@@ -250,12 +250,13 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso
250250
/* }}} */
251251

252252
static int php_json_escape_string(
253-
smart_str *buf, char *s, size_t len,
253+
smart_str *buf, const char *s, size_t len,
254254
int options, php_json_encoder *encoder) /* {{{ */
255255
{
256256
int status;
257257
unsigned int us;
258258
size_t pos, checkpoint;
259+
char *dst;
259260

260261
if (len == 0) {
261262
smart_str_appendl(buf, "\"\"", 2);
@@ -287,72 +288,89 @@ static int php_json_escape_string(
287288

288289
do {
289290
us = (unsigned char)s[pos];
290-
if (us >= 0x80) {
291-
int utf8_sub = 0;
292-
size_t prev_pos = pos;
293-
291+
if (UNEXPECTED(us >= 0x80)) {
292+
if (pos) {
293+
smart_str_appendl(buf, s, pos);
294+
s += pos;
295+
pos = 0;
296+
}
294297
us = php_next_utf8_char((unsigned char *)s, len, &pos, &status);
298+
len -= pos;
295299

296300
/* check whether UTF8 character is correct */
297-
if (status != SUCCESS) {
301+
if (UNEXPECTED(status != SUCCESS)) {
302+
s += pos;
303+
pos = 0;
298304
if (options & PHP_JSON_INVALID_UTF8_IGNORE) {
299305
/* ignore invalid UTF8 character */
300306
continue;
301307
} else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) {
302308
/* Use Unicode character 'REPLACEMENT CHARACTER' (U+FFFD) */
303-
us = 0xfffd;
304-
utf8_sub = 1;
305-
} else {
306-
if (buf->s) {
307-
ZSTR_LEN(buf->s) = checkpoint;
309+
if (options & PHP_JSON_UNESCAPED_UNICODE) {
310+
smart_str_appendl(buf, "\xef\xbf\xbd", 3);
311+
} else {
312+
smart_str_appendl(buf, "\\ufffd", 6);
308313
}
314+
continue;
315+
} else {
316+
ZSTR_LEN(buf->s) = checkpoint;
309317
encoder->error_code = PHP_JSON_ERROR_UTF8;
310318
if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
311319
smart_str_appendl(buf, "null", 4);
312320
}
313321
return FAILURE;
314322
}
315-
}
316323

317324
/* Escape U+2028/U+2029 line terminators, UNLESS both
318325
JSON_UNESCAPED_UNICODE and
319326
JSON_UNESCAPED_LINE_TERMINATORS were provided */
320-
if ((options & PHP_JSON_UNESCAPED_UNICODE)
327+
} else if ((options & PHP_JSON_UNESCAPED_UNICODE)
321328
&& ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS)
322329
|| us < 0x2028 || us > 0x2029)) {
323-
if (utf8_sub) {
324-
smart_str_appendl(buf, "\xef\xbf\xbd", 3);
325-
} else {
326-
smart_str_appendl(buf, s + prev_pos, pos - prev_pos);
327-
}
330+
smart_str_appendl(buf, s, pos);
331+
s += pos;
332+
pos = 0;
328333
continue;
329334
}
330335
/* From http://en.wikipedia.org/wiki/UTF16 */
331336
if (us >= 0x10000) {
332337
unsigned int next_us;
338+
333339
us -= 0x10000;
334340
next_us = (unsigned short)((us & 0x3ff) | 0xdc00);
335341
us = (unsigned short)((us >> 10) | 0xd800);
336-
smart_str_appendl(buf, "\\u", 2);
337-
smart_str_appendc(buf, digits[(us & 0xf000) >> 12]);
338-
smart_str_appendc(buf, digits[(us & 0xf00) >> 8]);
339-
smart_str_appendc(buf, digits[(us & 0xf0) >> 4]);
340-
smart_str_appendc(buf, digits[(us & 0xf)]);
342+
dst = smart_str_extend(buf, 6);
343+
dst[0] = '\\';
344+
dst[1] = 'u';
345+
dst[2] = digits[(us >> 12) & 0xf];
346+
dst[3] = digits[(us >> 8) & 0xf];
347+
dst[4] = digits[(us >> 4) & 0xf];
348+
dst[5] = digits[us & 0xf];
341349
us = next_us;
342350
}
343-
smart_str_appendl(buf, "\\u", 2);
344-
smart_str_appendc(buf, digits[(us & 0xf000) >> 12]);
345-
smart_str_appendc(buf, digits[(us & 0xf00) >> 8]);
346-
smart_str_appendc(buf, digits[(us & 0xf0) >> 4]);
347-
smart_str_appendc(buf, digits[(us & 0xf)]);
351+
dst = smart_str_extend(buf, 6);
352+
dst[0] = '\\';
353+
dst[1] = 'u';
354+
dst[2] = digits[(us >> 12) & 0xf];
355+
dst[3] = digits[(us >> 8) & 0xf];
356+
dst[4] = digits[(us >> 4) & 0xf];
357+
dst[5] = digits[us & 0xf];
358+
s += pos;
359+
pos = 0;
348360
} else {
349361
static const uint32_t charmap[4] = {
350362
0xffffffff, 0x500080c4, 0x10000000, 0x00000000};
351363

352-
pos++;
364+
len--;
353365
if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) {
354-
smart_str_appendc(buf, (unsigned char) us);
366+
pos++;
355367
} else {
368+
if (pos) {
369+
smart_str_appendl(buf, s, pos);
370+
s += pos;
371+
pos = 0;
372+
}
373+
s++;
356374
switch (us) {
357375
case '"':
358376
if (options & PHP_JSON_HEX_QUOT) {
@@ -428,15 +446,22 @@ static int php_json_escape_string(
428446

429447
default:
430448
ZEND_ASSERT(us < ' ');
431-
smart_str_appendl(buf, "\\u00", sizeof("\\u00")-1);
432-
smart_str_appendc(buf, digits[(us & 0xf0) >> 4]);
433-
smart_str_appendc(buf, digits[(us & 0xf)]);
449+
dst = smart_str_extend(buf, 6);
450+
dst[0] = '\\';
451+
dst[1] = 'u';
452+
dst[2] = '0';
453+
dst[3] = '0';
454+
dst[4] = digits[(us >> 4) & 0xf];
455+
dst[5] = digits[us & 0xf];
434456
break;
435457
}
436458
}
437459
}
438-
} while (pos < len);
460+
} while (len);
439461

462+
if (EXPECTED(pos)) {
463+
smart_str_appendl(buf, s, pos);
464+
}
440465
smart_str_appendc(buf, '"');
441466

442467
return SUCCESS;

0 commit comments

Comments
 (0)