@@ -1715,13 +1715,85 @@ PHP_FUNCTION(mb_str_split)
1715
1715
}
1716
1716
}
1717
1717
1718
+ #ifdef __SSE2__
1719
+ /* Thanks to StackOverflow user 'Paul R' (https://stackoverflow.com/users/253056/paul-r)
1720
+ * From: https://stackoverflow.com/questions/36998538/fastest-way-to-horizontally-sum-sse-unsigned-byte-vector
1721
+ * Takes a 128-bit XMM register, treats each byte as an 8-bit integer, and sums up all
1722
+ * 16 of them, returning the sum in an ordinary scalar register */
1723
+ static inline uint32_t _mm_sum_epu8 (const __m128i v )
1724
+ {
1725
+ /* We don't have any dedicated instruction to sum up 8-bit values from a 128-bit register
1726
+ * _mm_sad_epu8 takes the differences between corresponding bytes of two different XMM registers,
1727
+ * sums up those differences, and stores them as two 16-byte integers in the top and bottom
1728
+ * halves of the destination XMM register
1729
+ * By using a zeroed-out XMM register as one operand, we ensure the "differences" which are
1730
+ * summed up will actually just be the 8-bit values from `v` */
1731
+ __m128i vsum = _mm_sad_epu8 (v , _mm_setzero_si128 ());
1732
+ /* If _mm_sad_epu8 had stored the sum of those bytes as a single integer, we would just have
1733
+ * to extract it here; but it stored the sum as two different 16-bit values
1734
+ * _mm_cvtsi128_si32 extracts one of those values into a scalar register
1735
+ * _mm_extract_epi16 extracts the other one into another scalar register; then we just add them */
1736
+ return _mm_cvtsi128_si32 (vsum ) + _mm_extract_epi16 (vsum , 4 );
1737
+ }
1738
+ #endif
1739
+
1740
+ /* This assumes that `string` is valid UTF-8
1741
+ * In UTF-8, the only bytes which do not start a new codepoint are 0x80-0xBF (continuation bytes)
1742
+ * Interpreted as signed integers, those are all byte values less than -64
1743
+ * A fast way to get the length of a UTF-8 string is to start with its byte length,
1744
+ * then subtract off the number of continuation bytes */
1745
+ static size_t mb_fast_strlen_utf8 (unsigned char * p , size_t len )
1746
+ {
1747
+ unsigned char * e = p + len ;
1748
+
1749
+ #ifdef __SSE2__
1750
+ if (len >= sizeof (__m128i )) {
1751
+ const __m128i threshold = _mm_set1_epi8 (-64 );
1752
+ const __m128i delta = _mm_set1_epi8 (1 );
1753
+ __m128i counter = _mm_set1_epi8 (0 ); /* Vector of 16 continuation-byte counters */
1754
+
1755
+ int reset_counter = 255 ;
1756
+ do {
1757
+ __m128i operand = _mm_loadu_si128 ((__m128i * )p ); /* Load 16 bytes */
1758
+ __m128i lt = _mm_cmplt_epi8 (operand , threshold ); /* Find all which are continuation bytes */
1759
+ counter = _mm_add_epi8 (counter , _mm_and_si128 (lt , delta )); /* Update the 16 counters */
1760
+
1761
+ /* The counters can only go up to 255, so every 255 iterations, fold them into `len`
1762
+ * and reset them to zero */
1763
+ if (-- reset_counter == 0 ) {
1764
+ len -= _mm_sum_epu8 (counter );
1765
+ counter = _mm_set1_epi8 (0 );
1766
+ reset_counter = 255 ;
1767
+ }
1768
+
1769
+ p += sizeof (__m128i );
1770
+ } while (p + sizeof (__m128i ) <= e );
1771
+
1772
+ len -= _mm_sum_epu8 (counter ); /* Fold in any remaining non-zero values in the 16 counters */
1773
+ }
1774
+ #endif
1775
+
1776
+ /* Check for continuation bytes in the 0-15 remaining bytes at the end of the string */
1777
+ while (p < e ) {
1778
+ signed char c = * p ++ ;
1779
+ if (c < -64 ) {
1780
+ len -- ;
1781
+ }
1782
+ }
1783
+
1784
+ return len ;
1785
+ }
1786
+
1718
1787
static size_t mb_get_strlen (zend_string * string , const mbfl_encoding * encoding )
1719
1788
{
1720
1789
unsigned int char_len = encoding -> flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2 | MBFL_ENCTYPE_WCS4 );
1721
1790
if (char_len ) {
1722
1791
return ZSTR_LEN (string ) / char_len ;
1792
+ } else if (php_mb_is_no_encoding_utf8 (encoding -> no_encoding ) && GC_FLAGS (string ) & IS_STR_VALID_UTF8 ) {
1793
+ return mb_fast_strlen_utf8 ((unsigned char * )ZSTR_VAL (string ), ZSTR_LEN (string ));
1723
1794
}
1724
1795
1796
+
1725
1797
uint32_t wchar_buf [128 ];
1726
1798
unsigned char * in = (unsigned char * )ZSTR_VAL (string );
1727
1799
size_t in_len = ZSTR_LEN (string );
@@ -1789,14 +1861,7 @@ static unsigned char* offset_to_pointer_utf8(unsigned char *str, unsigned char *
1789
1861
}
1790
1862
1791
1863
static size_t pointer_to_offset_utf8 (unsigned char * start , unsigned char * pos ) {
1792
- size_t result = 0 ;
1793
- while (pos > start ) {
1794
- unsigned char c = * -- pos ;
1795
- if (c < 0x80 || (c & 0xC0 ) != 0x80 ) {
1796
- result ++ ;
1797
- }
1798
- }
1799
- return result ;
1864
+ return mb_fast_strlen_utf8 (start , pos - start );
1800
1865
}
1801
1866
1802
1867
static size_t mb_find_strpos (zend_string * haystack , zend_string * needle , const mbfl_encoding * enc , ssize_t offset , bool reverse )
0 commit comments