|
19 | 19 | #include <ctype.h>
|
20 | 20 | #include <sys/types.h>
|
21 | 21 |
|
22 |
| -#ifdef __SSE2__ |
23 |
| -#include <emmintrin.h> |
24 |
| -#endif |
25 |
| - |
26 | 22 | #include "php.h"
|
27 | 23 |
|
28 | 24 | #include "url.h"
|
29 | 25 | #include "file.h"
|
| 26 | +#include "zend_simd.h" |
30 | 27 |
|
31 | 28 | /* {{{ free_url */
|
32 | 29 | PHPAPI void php_url_free(php_url *theurl)
|
@@ -460,53 +457,53 @@ static zend_always_inline zend_string *php_url_encode_impl(const char *s, size_t
|
460 | 457 | start = zend_string_safe_alloc(3, len, 0, 0);
|
461 | 458 | to = (unsigned char*)ZSTR_VAL(start);
|
462 | 459 |
|
463 |
| -#ifdef __SSE2__ |
| 460 | +#ifdef ZEND_HAVE_VECTOR_128 |
464 | 461 | while (from + 16 < end) {
|
465 |
| - __m128i mask; |
| 462 | + zend_vec_8x16_t mask; |
466 | 463 | uint32_t bits;
|
467 |
| - const __m128i _A = _mm_set1_epi8('A' - 1); |
468 |
| - const __m128i Z_ = _mm_set1_epi8('Z' + 1); |
469 |
| - const __m128i _a = _mm_set1_epi8('a' - 1); |
470 |
| - const __m128i z_ = _mm_set1_epi8('z' + 1); |
471 |
| - const __m128i _zero = _mm_set1_epi8('0' - 1); |
472 |
| - const __m128i nine_ = _mm_set1_epi8('9' + 1); |
473 |
| - const __m128i dot = _mm_set1_epi8('.'); |
474 |
| - const __m128i minus = _mm_set1_epi8('-'); |
475 |
| - const __m128i under = _mm_set1_epi8('_'); |
476 |
| - |
477 |
| - __m128i in = _mm_loadu_si128((__m128i *)from); |
478 |
| - |
479 |
| - __m128i gt = _mm_cmpgt_epi8(in, _A); |
480 |
| - __m128i lt = _mm_cmplt_epi8(in, Z_); |
481 |
| - mask = _mm_and_si128(lt, gt); /* upper */ |
482 |
| - gt = _mm_cmpgt_epi8(in, _a); |
483 |
| - lt = _mm_cmplt_epi8(in, z_); |
484 |
| - mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* lower */ |
485 |
| - gt = _mm_cmpgt_epi8(in, _zero); |
486 |
| - lt = _mm_cmplt_epi8(in, nine_); |
487 |
| - mask = _mm_or_si128(mask, _mm_and_si128(lt, gt)); /* number */ |
488 |
| - mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, dot)); |
489 |
| - mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, minus)); |
490 |
| - mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, under)); |
| 464 | + const zend_vec_8x16_t _A = zend_vec_set_8x16('A' - 1); |
| 465 | + const zend_vec_8x16_t Z_ = zend_vec_set_8x16('Z' + 1); |
| 466 | + const zend_vec_8x16_t _a = zend_vec_set_8x16('a' - 1); |
| 467 | + const zend_vec_8x16_t z_ = zend_vec_set_8x16('z' + 1); |
| 468 | + const zend_vec_8x16_t _zero = zend_vec_set_8x16('0' - 1); |
| 469 | + const zend_vec_8x16_t nine_ = zend_vec_set_8x16('9' + 1); |
| 470 | + const zend_vec_8x16_t dot = zend_vec_set_8x16('.'); |
| 471 | + const zend_vec_8x16_t minus = zend_vec_set_8x16('-'); |
| 472 | + const zend_vec_8x16_t under = zend_vec_set_8x16('_'); |
| 473 | + |
| 474 | + zend_vec_8x16_t in = zend_vec_loadu_8x16(from); |
| 475 | + |
| 476 | + zend_vec_8x16_t gt = zend_vec_cmpgt_8x16(in, _A); |
| 477 | + zend_vec_8x16_t lt = zend_vec_cmplt_8x16(in, Z_); |
| 478 | + mask = zend_vec_and_8x16(lt, gt); /* upper */ |
| 479 | + gt = zend_vec_cmpgt_8x16(in, _a); |
| 480 | + lt = zend_vec_cmplt_8x16(in, z_); |
| 481 | + mask = zend_vec_or_8x16(mask, zend_vec_and_8x16(lt, gt)); /* lower */ |
| 482 | + gt = zend_vec_cmpgt_8x16(in, _zero); |
| 483 | + lt = zend_vec_cmplt_8x16(in, nine_); |
| 484 | + mask = zend_vec_or_8x16(mask, zend_vec_and_8x16(lt, gt)); /* number */ |
| 485 | + mask = zend_vec_or_8x16(mask, zend_vec_cmpeq_8x16(in, dot)); |
| 486 | + mask = zend_vec_or_8x16(mask, zend_vec_cmpeq_8x16(in, minus)); |
| 487 | + mask = zend_vec_or_8x16(mask, zend_vec_cmpeq_8x16(in, under)); |
491 | 488 |
|
492 | 489 | if (!raw) {
|
493 |
| - const __m128i blank = _mm_set1_epi8(' '); |
494 |
| - __m128i eq = _mm_cmpeq_epi8(in, blank); |
495 |
| - if (_mm_movemask_epi8(eq)) { |
496 |
| - in = _mm_add_epi8(in, _mm_and_si128(eq, _mm_set1_epi8('+' - ' '))); |
497 |
| - mask = _mm_or_si128(mask, eq); |
| 490 | + const zend_vec_8x16_t blank = zend_vec_set_8x16(' '); |
| 491 | + zend_vec_8x16_t eq = zend_vec_cmpeq_8x16(in, blank); |
| 492 | + if (zend_vec_movemask_8x16(eq)) { |
| 493 | + in = zend_vec_add_8x16(in, zend_vec_and_8x16(eq, zend_vec_set_8x16('+' - ' '))); |
| 494 | + mask = zend_vec_or_8x16(mask, eq); |
498 | 495 | }
|
499 | 496 | }
|
500 | 497 | if (raw) {
|
501 |
| - const __m128i wavy = _mm_set1_epi8('~'); |
502 |
| - mask = _mm_or_si128(mask, _mm_cmpeq_epi8(in, wavy)); |
| 498 | + const zend_vec_8x16_t wavy = zend_vec_set_8x16('~'); |
| 499 | + mask = zend_vec_or_8x16(mask, zend_vec_cmpeq_8x16(in, wavy)); |
503 | 500 | }
|
504 |
| - if (((bits = _mm_movemask_epi8(mask)) & 0xffff) == 0xffff) { |
505 |
| - _mm_storeu_si128((__m128i*)to, in); |
| 501 | + if (((bits = zend_vec_movemask_8x16(mask)) & 0xffff) == 0xffff) { |
| 502 | + zend_vec_storeu_8x16(to, in); |
506 | 503 | to += 16;
|
507 | 504 | } else {
|
508 | 505 | unsigned char xmm[16];
|
509 |
| - _mm_storeu_si128((__m128i*)xmm, in); |
| 506 | + zend_vec_storeu_8x16(xmm, in); |
510 | 507 | for (size_t i = 0; i < sizeof(xmm); i++) {
|
511 | 508 | if ((bits & (0x1 << i))) {
|
512 | 509 | *to++ = xmm[i];
|
|
0 commit comments