|
33 | 33 | static const char digits[] = "0123456789abcdef";
|
34 | 34 |
|
35 | 35 | static int php_json_escape_string(
|
36 |
| - smart_str *buf, char *s, size_t len, |
| 36 | + smart_str *buf, const char *s, size_t len, |
37 | 37 | int options, php_json_encoder *encoder);
|
38 | 38 |
|
39 | 39 | static int php_json_determine_array_type(zval *val) /* {{{ */
|
@@ -250,12 +250,13 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso
|
250 | 250 | /* }}} */
|
251 | 251 |
|
252 | 252 | static int php_json_escape_string(
|
253 |
| - smart_str *buf, char *s, size_t len, |
| 253 | + smart_str *buf, const char *s, size_t len, |
254 | 254 | int options, php_json_encoder *encoder) /* {{{ */
|
255 | 255 | {
|
256 | 256 | int status;
|
257 | 257 | unsigned int us;
|
258 | 258 | size_t pos, checkpoint;
|
| 259 | + char *dst; |
259 | 260 |
|
260 | 261 | if (len == 0) {
|
261 | 262 | smart_str_appendl(buf, "\"\"", 2);
|
@@ -287,72 +288,89 @@ static int php_json_escape_string(
|
287 | 288 |
|
288 | 289 | do {
|
289 | 290 | us = (unsigned char)s[pos];
|
290 |
| - if (us >= 0x80) { |
291 |
| - int utf8_sub = 0; |
292 |
| - size_t prev_pos = pos; |
293 |
| - |
| 291 | + if (UNEXPECTED(us >= 0x80)) { |
| 292 | + if (pos) { |
| 293 | + smart_str_appendl(buf, s, pos); |
| 294 | + s += pos; |
| 295 | + pos = 0; |
| 296 | + } |
294 | 297 | us = php_next_utf8_char((unsigned char *)s, len, &pos, &status);
|
| 298 | + len -= pos; |
295 | 299 |
|
296 | 300 | /* check whether UTF8 character is correct */
|
297 |
| - if (status != SUCCESS) { |
| 301 | + if (UNEXPECTED(status != SUCCESS)) { |
| 302 | + s += pos; |
| 303 | + pos = 0; |
298 | 304 | if (options & PHP_JSON_INVALID_UTF8_IGNORE) {
|
299 | 305 | /* ignore invalid UTF8 character */
|
300 | 306 | continue;
|
301 | 307 | } else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) {
|
302 | 308 | /* Use Unicode character 'REPLACEMENT CHARACTER' (U+FFFD) */
|
303 |
| - us = 0xfffd; |
304 |
| - utf8_sub = 1; |
305 |
| - } else { |
306 |
| - if (buf->s) { |
307 |
| - ZSTR_LEN(buf->s) = checkpoint; |
| 309 | + if (options & PHP_JSON_UNESCAPED_UNICODE) { |
| 310 | + smart_str_appendl(buf, "\xef\xbf\xbd", 3); |
| 311 | + } else { |
| 312 | + smart_str_appendl(buf, "\\ufffd", 6); |
308 | 313 | }
|
| 314 | + continue; |
| 315 | + } else { |
| 316 | + ZSTR_LEN(buf->s) = checkpoint; |
309 | 317 | encoder->error_code = PHP_JSON_ERROR_UTF8;
|
310 | 318 | if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
|
311 | 319 | smart_str_appendl(buf, "null", 4);
|
312 | 320 | }
|
313 | 321 | return FAILURE;
|
314 | 322 | }
|
315 |
| - } |
316 | 323 |
|
317 | 324 | /* Escape U+2028/U+2029 line terminators, UNLESS both
|
318 | 325 | JSON_UNESCAPED_UNICODE and
|
319 | 326 | JSON_UNESCAPED_LINE_TERMINATORS were provided */
|
320 |
| - if ((options & PHP_JSON_UNESCAPED_UNICODE) |
| 327 | + } else if ((options & PHP_JSON_UNESCAPED_UNICODE) |
321 | 328 | && ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS)
|
322 | 329 | || us < 0x2028 || us > 0x2029)) {
|
323 |
| - if (utf8_sub) { |
324 |
| - smart_str_appendl(buf, "\xef\xbf\xbd", 3); |
325 |
| - } else { |
326 |
| - smart_str_appendl(buf, s + prev_pos, pos - prev_pos); |
327 |
| - } |
| 330 | + smart_str_appendl(buf, s, pos); |
| 331 | + s += pos; |
| 332 | + pos = 0; |
328 | 333 | continue;
|
329 | 334 | }
|
330 | 335 | /* From http://en.wikipedia.org/wiki/UTF16 */
|
331 | 336 | if (us >= 0x10000) {
|
332 | 337 | unsigned int next_us;
|
| 338 | + |
333 | 339 | us -= 0x10000;
|
334 | 340 | next_us = (unsigned short)((us & 0x3ff) | 0xdc00);
|
335 | 341 | us = (unsigned short)((us >> 10) | 0xd800);
|
336 |
| - smart_str_appendl(buf, "\\u", 2); |
337 |
| - smart_str_appendc(buf, digits[(us & 0xf000) >> 12]); |
338 |
| - smart_str_appendc(buf, digits[(us & 0xf00) >> 8]); |
339 |
| - smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); |
340 |
| - smart_str_appendc(buf, digits[(us & 0xf)]); |
| 342 | + dst = smart_str_extend(buf, 6); |
| 343 | + dst[0] = '\\'; |
| 344 | + dst[1] = 'u'; |
| 345 | + dst[2] = digits[(us >> 12) & 0xf]; |
| 346 | + dst[3] = digits[(us >> 8) & 0xf]; |
| 347 | + dst[4] = digits[(us >> 4) & 0xf]; |
| 348 | + dst[5] = digits[us & 0xf]; |
341 | 349 | us = next_us;
|
342 | 350 | }
|
343 |
| - smart_str_appendl(buf, "\\u", 2); |
344 |
| - smart_str_appendc(buf, digits[(us & 0xf000) >> 12]); |
345 |
| - smart_str_appendc(buf, digits[(us & 0xf00) >> 8]); |
346 |
| - smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); |
347 |
| - smart_str_appendc(buf, digits[(us & 0xf)]); |
| 351 | + dst = smart_str_extend(buf, 6); |
| 352 | + dst[0] = '\\'; |
| 353 | + dst[1] = 'u'; |
| 354 | + dst[2] = digits[(us >> 12) & 0xf]; |
| 355 | + dst[3] = digits[(us >> 8) & 0xf]; |
| 356 | + dst[4] = digits[(us >> 4) & 0xf]; |
| 357 | + dst[5] = digits[us & 0xf]; |
| 358 | + s += pos; |
| 359 | + pos = 0; |
348 | 360 | } else {
|
349 | 361 | static const uint32_t charmap[4] = {
|
350 | 362 | 0xffffffff, 0x500080c4, 0x10000000, 0x00000000};
|
351 | 363 |
|
352 |
| - pos++; |
| 364 | + len--; |
353 | 365 | if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) {
|
354 |
| - smart_str_appendc(buf, (unsigned char) us); |
| 366 | + pos++; |
355 | 367 | } else {
|
| 368 | + if (pos) { |
| 369 | + smart_str_appendl(buf, s, pos); |
| 370 | + s += pos; |
| 371 | + pos = 0; |
| 372 | + } |
| 373 | + s++; |
356 | 374 | switch (us) {
|
357 | 375 | case '"':
|
358 | 376 | if (options & PHP_JSON_HEX_QUOT) {
|
@@ -428,15 +446,22 @@ static int php_json_escape_string(
|
428 | 446 |
|
429 | 447 | default:
|
430 | 448 | ZEND_ASSERT(us < ' ');
|
431 |
| - smart_str_appendl(buf, "\\u00", sizeof("\\u00")-1); |
432 |
| - smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); |
433 |
| - smart_str_appendc(buf, digits[(us & 0xf)]); |
| 449 | + dst = smart_str_extend(buf, 6); |
| 450 | + dst[0] = '\\'; |
| 451 | + dst[1] = 'u'; |
| 452 | + dst[2] = '0'; |
| 453 | + dst[3] = '0'; |
| 454 | + dst[4] = digits[(us >> 4) & 0xf]; |
| 455 | + dst[5] = digits[us & 0xf]; |
434 | 456 | break;
|
435 | 457 | }
|
436 | 458 | }
|
437 | 459 | }
|
438 |
| - } while (pos < len); |
| 460 | + } while (len); |
439 | 461 |
|
| 462 | + if (EXPECTED(pos)) { |
| 463 | + smart_str_appendl(buf, s, pos); |
| 464 | + } |
440 | 465 | smart_str_appendc(buf, '"');
|
441 | 466 |
|
442 | 467 | return SUCCESS;
|
|
0 commit comments