@@ -54,7 +54,36 @@ static _locale_t current_locale = NULL;
54
54
55
55
#define TYPE_PAIR (t1 ,t2 ) (((t1) << 4) | (t2))
56
56
57
- static const unsigned char tolower_map [256 ] = {
57
+ #if __SSE2__
58
+ #define HAVE_BLOCKCONV
59
+
60
+ /* Common code for SSE2 accelerated character case conversion */
61
+
62
+ #define BLOCKCONV_INIT_RANGE (start , end ) \
63
+ const __m128i blconv_start_minus_1 = _mm_set1_epi8((start) - 1); \
64
+ const __m128i blconv_end_plus_1 = _mm_set1_epi8((end) + 1);
65
+
66
+ #define BLOCKCONV_STRIDE sizeof(__m128i)
67
+
68
+ #define BLOCKCONV_INIT_DELTA (delta ) \
69
+ const __m128i blconv_delta = _mm_set1_epi8(delta);
70
+
71
+ #define BLOCKCONV_LOAD (input ) \
72
+ __m128i blconv_operand = _mm_loadu_si128((__m128i*)(input)); \
73
+ __m128i blconv_gt = _mm_cmpgt_epi8(blconv_operand, blconv_start_minus_1); \
74
+ __m128i blconv_lt = _mm_cmplt_epi8(blconv_operand, blconv_end_plus_1); \
75
+ __m128i blconv_mingle = _mm_and_si128(blconv_gt, blconv_lt);
76
+
77
+ #define BLOCKCONV_FOUND () _mm_movemask_epi8(blconv_mingle)
78
+
79
+ #define BLOCKCONV_STORE (dest ) \
80
+ __m128i blconv_add = _mm_and_si128(blconv_mingle, blconv_delta); \
81
+ __m128i blconv_result = _mm_add_epi8(blconv_operand, blconv_add); \
82
+ _mm_storeu_si128((__m128i *)(dest), blconv_result);
83
+
84
+ #endif /* __SSE2__ */
85
+
86
+ ZEND_API const unsigned char zend_tolower_map [256 ] = {
58
87
0x00 ,0x01 ,0x02 ,0x03 ,0x04 ,0x05 ,0x06 ,0x07 ,0x08 ,0x09 ,0x0a ,0x0b ,0x0c ,0x0d ,0x0e ,0x0f ,
59
88
0x10 ,0x11 ,0x12 ,0x13 ,0x14 ,0x15 ,0x16 ,0x17 ,0x18 ,0x19 ,0x1a ,0x1b ,0x1c ,0x1d ,0x1e ,0x1f ,
60
89
0x20 ,0x21 ,0x22 ,0x23 ,0x24 ,0x25 ,0x26 ,0x27 ,0x28 ,0x29 ,0x2a ,0x2b ,0x2c ,0x2d ,0x2e ,0x2f ,
@@ -73,7 +102,25 @@ static const unsigned char tolower_map[256] = {
73
102
0xf0 ,0xf1 ,0xf2 ,0xf3 ,0xf4 ,0xf5 ,0xf6 ,0xf7 ,0xf8 ,0xf9 ,0xfa ,0xfb ,0xfc ,0xfd ,0xfe ,0xff
74
103
};
75
104
76
- #define zend_tolower_ascii (c ) (tolower_map[(unsigned char)(c)])
105
+ ZEND_API const unsigned char zend_toupper_map [256 ] = {
106
+ 0x00 ,0x01 ,0x02 ,0x03 ,0x04 ,0x05 ,0x06 ,0x07 ,0x08 ,0x09 ,0x0a ,0x0b ,0x0c ,0x0d ,0x0e ,0x0f ,
107
+ 0x10 ,0x11 ,0x12 ,0x13 ,0x14 ,0x15 ,0x16 ,0x17 ,0x18 ,0x19 ,0x1a ,0x1b ,0x1c ,0x1d ,0x1e ,0x1f ,
108
+ 0x20 ,0x21 ,0x22 ,0x23 ,0x24 ,0x25 ,0x26 ,0x27 ,0x28 ,0x29 ,0x2a ,0x2b ,0x2c ,0x2d ,0x2e ,0x2f ,
109
+ 0x30 ,0x31 ,0x32 ,0x33 ,0x34 ,0x35 ,0x36 ,0x37 ,0x38 ,0x39 ,0x3a ,0x3b ,0x3c ,0x3d ,0x3e ,0x3f ,
110
+ 0x40 ,0x41 ,0x42 ,0x43 ,0x44 ,0x45 ,0x46 ,0x47 ,0x48 ,0x49 ,0x4a ,0x4b ,0x4c ,0x4d ,0x4e ,0x4f ,
111
+ 0x50 ,0x51 ,0x52 ,0x53 ,0x54 ,0x55 ,0x56 ,0x57 ,0x58 ,0x59 ,0x5a ,0x5b ,0x5c ,0x5d ,0x5e ,0x5f ,
112
+ 0x60 ,0x41 ,0x42 ,0x43 ,0x44 ,0x45 ,0x46 ,0x47 ,0x48 ,0x49 ,0x4a ,0x4b ,0x4c ,0x4d ,0x4e ,0x4f ,
113
+ 0x50 ,0x51 ,0x52 ,0x53 ,0x54 ,0x55 ,0x56 ,0x57 ,0x58 ,0x59 ,0x5a ,0x7b ,0x7c ,0x7d ,0x7e ,0x7f ,
114
+ 0x80 ,0x81 ,0x82 ,0x83 ,0x84 ,0x85 ,0x86 ,0x87 ,0x88 ,0x89 ,0x8a ,0x8b ,0x8c ,0x8d ,0x8e ,0x8f ,
115
+ 0x90 ,0x91 ,0x92 ,0x93 ,0x94 ,0x95 ,0x96 ,0x97 ,0x98 ,0x99 ,0x9a ,0x9b ,0x9c ,0x9d ,0x9e ,0x9f ,
116
+ 0xa0 ,0xa1 ,0xa2 ,0xa3 ,0xa4 ,0xa5 ,0xa6 ,0xa7 ,0xa8 ,0xa9 ,0xaa ,0xab ,0xac ,0xad ,0xae ,0xaf ,
117
+ 0xb0 ,0xb1 ,0xb2 ,0xb3 ,0xb4 ,0xb5 ,0xb6 ,0xb7 ,0xb8 ,0xb9 ,0xba ,0xbb ,0xbc ,0xbd ,0xbe ,0xbf ,
118
+ 0xc0 ,0xc1 ,0xc2 ,0xc3 ,0xc4 ,0xc5 ,0xc6 ,0xc7 ,0xc8 ,0xc9 ,0xca ,0xcb ,0xcc ,0xcd ,0xce ,0xcf ,
119
+ 0xd0 ,0xd1 ,0xd2 ,0xd3 ,0xd4 ,0xd5 ,0xd6 ,0xd7 ,0xd8 ,0xd9 ,0xda ,0xdb ,0xdc ,0xdd ,0xde ,0xdf ,
120
+ 0xe0 ,0xe1 ,0xe2 ,0xe3 ,0xe4 ,0xe5 ,0xe6 ,0xe7 ,0xe8 ,0xe9 ,0xea ,0xeb ,0xec ,0xed ,0xee ,0xef ,
121
+ 0xf0 ,0xf1 ,0xf2 ,0xf3 ,0xf4 ,0xf5 ,0xf6 ,0xf7 ,0xf8 ,0xf9 ,0xfa ,0xfb ,0xfc ,0xfd ,0xfe ,0xff
122
+ };
123
+
77
124
78
125
/**
79
126
* Functions using locale lowercase:
@@ -2665,22 +2712,16 @@ static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str
2665
2712
unsigned char * p = (unsigned char * )str ;
2666
2713
unsigned char * q = (unsigned char * )dest ;
2667
2714
unsigned char * end = p + length ;
2668
- #ifdef __SSE2__
2669
- if (length >= 16 ) {
2670
- const __m128i _A = _mm_set1_epi8 ('A' - 1 );
2671
- const __m128i Z_ = _mm_set1_epi8 ('Z' + 1 );
2672
- const __m128i delta = _mm_set1_epi8 ('a' - 'A' );
2715
+ #ifdef HAVE_BLOCKCONV
2716
+ if (length >= BLOCKCONV_STRIDE ) {
2717
+ BLOCKCONV_INIT_RANGE ('A' , 'Z' );
2718
+ BLOCKCONV_INIT_DELTA ('a' - 'A' );
2673
2719
do {
2674
- __m128i op = _mm_loadu_si128 ((__m128i * )p );
2675
- __m128i gt = _mm_cmpgt_epi8 (op , _A );
2676
- __m128i lt = _mm_cmplt_epi8 (op , Z_ );
2677
- __m128i mingle = _mm_and_si128 (gt , lt );
2678
- __m128i add = _mm_and_si128 (mingle , delta );
2679
- __m128i lower = _mm_add_epi8 (op , add );
2680
- _mm_storeu_si128 ((__m128i * )q , lower );
2681
- p += 16 ;
2682
- q += 16 ;
2683
- } while (p + 16 <= end );
2720
+ BLOCKCONV_LOAD (p );
2721
+ BLOCKCONV_STORE (q );
2722
+ p += BLOCKCONV_STRIDE ;
2723
+ q += BLOCKCONV_STRIDE ;
2724
+ } while (p + BLOCKCONV_STRIDE <= end );
2684
2725
}
2685
2726
#endif
2686
2727
while (p < end ) {
@@ -2689,6 +2730,28 @@ static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str
2689
2730
}
2690
2731
/* }}} */
2691
2732
2733
+ static zend_always_inline void zend_str_toupper_impl (char * dest , const char * str , size_t length ) /* {{{ */ {
2734
+ unsigned char * p = (unsigned char * )str ;
2735
+ unsigned char * q = (unsigned char * )dest ;
2736
+ unsigned char * end = p + length ;
2737
+ #ifdef HAVE_BLOCKCONV
2738
+ if (length >= BLOCKCONV_STRIDE ) {
2739
+ BLOCKCONV_INIT_RANGE ('a' , 'z' );
2740
+ BLOCKCONV_INIT_DELTA ('A' - 'a' );
2741
+ do {
2742
+ BLOCKCONV_LOAD (p );
2743
+ BLOCKCONV_STORE (q );
2744
+ p += BLOCKCONV_STRIDE ;
2745
+ q += BLOCKCONV_STRIDE ;
2746
+ } while (p + BLOCKCONV_STRIDE <= end );
2747
+ }
2748
+ #endif
2749
+ while (p < end ) {
2750
+ * q ++ = zend_toupper_ascii (* p ++ );
2751
+ }
2752
+ }
2753
+ /* }}} */
2754
+
2692
2755
ZEND_API char * ZEND_FASTCALL zend_str_tolower_copy (char * dest , const char * source , size_t length ) /* {{{ */
2693
2756
{
2694
2757
zend_str_tolower_impl (dest , source , length );
@@ -2697,18 +2760,39 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *sourc
2697
2760
}
2698
2761
/* }}} */
2699
2762
2763
+ ZEND_API char * ZEND_FASTCALL zend_str_toupper_copy (char * dest , const char * source , size_t length ) /* {{{ */
2764
+ {
2765
+ zend_str_toupper_impl (dest , source , length );
2766
+ dest [length ] = '\0' ;
2767
+ return dest ;
2768
+ }
2769
+ /* }}} */
2770
+
2700
2771
ZEND_API char * ZEND_FASTCALL zend_str_tolower_dup (const char * source , size_t length ) /* {{{ */
2701
2772
{
2702
2773
return zend_str_tolower_copy ((char * )emalloc (length + 1 ), source , length );
2703
2774
}
2704
2775
/* }}} */
2705
2776
2777
+ ZEND_API char * ZEND_FASTCALL zend_str_toupper_dup (const char * source , size_t length ) /* {{{ */
2778
+ {
2779
+ return zend_str_toupper_copy ((char * )emalloc (length + 1 ), source , length );
2780
+ }
2781
+ /* }}} */
2782
+
2706
2783
ZEND_API void ZEND_FASTCALL zend_str_tolower (char * str , size_t length ) /* {{{ */
2707
2784
{
2708
2785
zend_str_tolower_impl (str , (const char * )str , length );
2709
2786
}
2710
2787
/* }}} */
2711
2788
2789
+ ZEND_API void ZEND_FASTCALL zend_str_toupper (char * str , size_t length ) /* {{{ */
2790
+ {
2791
+ zend_str_toupper_impl (str , (const char * )str , length );
2792
+ }
2793
+ /* }}} */
2794
+
2795
+
2712
2796
ZEND_API char * ZEND_FASTCALL zend_str_tolower_dup_ex (const char * source , size_t length ) /* {{{ */
2713
2797
{
2714
2798
const unsigned char * p = (const unsigned char * )source ;
@@ -2733,38 +2817,57 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t
2733
2817
}
2734
2818
/* }}} */
2735
2819
2820
+ ZEND_API char * ZEND_FASTCALL zend_str_toupper_dup_ex (const char * source , size_t length ) /* {{{ */
2821
+ {
2822
+ const unsigned char * p = (const unsigned char * )source ;
2823
+ const unsigned char * end = p + length ;
2824
+
2825
+ while (p < end ) {
2826
+ if (* p != zend_toupper_ascii (* p )) {
2827
+ char * res = (char * )emalloc (length + 1 );
2828
+ unsigned char * r ;
2829
+
2830
+ if (p != (const unsigned char * )source ) {
2831
+ memcpy (res , source , p - (const unsigned char * )source );
2832
+ }
2833
+ r = (unsigned char * )p + (res - source );
2834
+ zend_str_toupper_impl ((char * )r , (const char * )p , end - p );
2835
+ res [length ] = '\0' ;
2836
+ return res ;
2837
+ }
2838
+ p ++ ;
2839
+ }
2840
+ return NULL ;
2841
+ }
2842
+ /* }}} */
2843
+
2736
2844
ZEND_API zend_string * ZEND_FASTCALL zend_string_tolower_ex (zend_string * str , bool persistent ) /* {{{ */
2737
2845
{
2738
2846
size_t length = ZSTR_LEN (str );
2739
2847
unsigned char * p = (unsigned char * ) ZSTR_VAL (str );
2740
2848
unsigned char * end = p + length ;
2741
2849
2742
- #ifdef __SSE2__
2743
- while (p + 16 <= end ) {
2744
- const __m128i _A = _mm_set1_epi8 ('A' - 1 );
2745
- const __m128i Z_ = _mm_set1_epi8 ('Z' + 1 );
2746
- __m128i op = _mm_loadu_si128 ((__m128i * )p );
2747
- __m128i gt = _mm_cmpgt_epi8 (op , _A );
2748
- __m128i lt = _mm_cmplt_epi8 (op , Z_ );
2749
- __m128i mingle = _mm_and_si128 (gt , lt );
2750
- if (_mm_movemask_epi8 (mingle )) {
2850
+ #ifdef HAVE_BLOCKCONV
2851
+ BLOCKCONV_INIT_RANGE ('A' , 'Z' );
2852
+ while (p + BLOCKCONV_STRIDE <= end ) {
2853
+ BLOCKCONV_LOAD (p );
2854
+ if (BLOCKCONV_FOUND ()) {
2751
2855
zend_string * res = zend_string_alloc (length , persistent );
2752
2856
memcpy (ZSTR_VAL (res ), ZSTR_VAL (str ), p - (unsigned char * ) ZSTR_VAL (str ));
2753
2857
unsigned char * q = p + (ZSTR_VAL (res ) - ZSTR_VAL (str ));
2754
2858
2755
2859
/* Lowercase the chunk we already compared. */
2756
- const __m128i delta = _mm_set1_epi8 ('a' - 'A' );
2757
- __m128i add = _mm_and_si128 (mingle , delta );
2758
- __m128i lower = _mm_add_epi8 (op , add );
2759
- _mm_storeu_si128 ((__m128i * ) q , lower );
2860
+ BLOCKCONV_INIT_DELTA ('a' - 'A' );
2861
+ BLOCKCONV_STORE (q );
2760
2862
2761
2863
/* Lowercase the rest of the string. */
2762
- p += 16 ; q += 16 ;
2864
+ p += BLOCKCONV_STRIDE ;
2865
+ q += BLOCKCONV_STRIDE ;
2763
2866
zend_str_tolower_impl ((char * ) q , (const char * ) p , end - p );
2764
2867
ZSTR_VAL (res )[length ] = '\0' ;
2765
2868
return res ;
2766
2869
}
2767
- p += 16 ;
2870
+ p += BLOCKCONV_STRIDE ;
2768
2871
}
2769
2872
#endif
2770
2873
@@ -2787,6 +2890,55 @@ ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, boo
2787
2890
}
2788
2891
/* }}} */
2789
2892
2893
+ ZEND_API zend_string * ZEND_FASTCALL zend_string_toupper_ex (zend_string * str , bool persistent ) /* {{{ */
2894
+ {
2895
+ size_t length = ZSTR_LEN (str );
2896
+ unsigned char * p = (unsigned char * ) ZSTR_VAL (str );
2897
+ unsigned char * end = p + length ;
2898
+
2899
+ #ifdef HAVE_BLOCKCONV
2900
+ BLOCKCONV_INIT_RANGE ('a' , 'z' );
2901
+ while (p + BLOCKCONV_STRIDE <= end ) {
2902
+ BLOCKCONV_LOAD (p );
2903
+ if (BLOCKCONV_FOUND ()) {
2904
+ zend_string * res = zend_string_alloc (length , persistent );
2905
+ memcpy (ZSTR_VAL (res ), ZSTR_VAL (str ), p - (unsigned char * ) ZSTR_VAL (str ));
2906
+ unsigned char * q = p + (ZSTR_VAL (res ) - ZSTR_VAL (str ));
2907
+
2908
+ /* Uppercase the chunk we already compared. */
2909
+ BLOCKCONV_INIT_DELTA ('A' - 'a' );
2910
+ BLOCKCONV_STORE (q );
2911
+
2912
+ /* Uppercase the rest of the string. */
2913
+ p += BLOCKCONV_STRIDE ;
2914
+ q += BLOCKCONV_STRIDE ;
2915
+ zend_str_toupper_impl ((char * ) q , (const char * ) p , end - p );
2916
+ ZSTR_VAL (res )[length ] = '\0' ;
2917
+ return res ;
2918
+ }
2919
+ p += BLOCKCONV_STRIDE ;
2920
+ }
2921
+ #endif
2922
+
2923
+ while (p < end ) {
2924
+ if (* p != zend_toupper_ascii (* p )) {
2925
+ zend_string * res = zend_string_alloc (length , persistent );
2926
+ memcpy (ZSTR_VAL (res ), ZSTR_VAL (str ), p - (unsigned char * ) ZSTR_VAL (str ));
2927
+
2928
+ unsigned char * q = p + (ZSTR_VAL (res ) - ZSTR_VAL (str ));
2929
+ while (p < end ) {
2930
+ * q ++ = zend_toupper_ascii (* p ++ );
2931
+ }
2932
+ ZSTR_VAL (res )[length ] = '\0' ;
2933
+ return res ;
2934
+ }
2935
+ p ++ ;
2936
+ }
2937
+
2938
+ return zend_string_copy (str );
2939
+ }
2940
+ /* }}} */
2941
+
2790
2942
ZEND_API int ZEND_FASTCALL zend_binary_strcmp (const char * s1 , size_t len1 , const char * s2 , size_t len2 ) /* {{{ */
2791
2943
{
2792
2944
int retval ;
0 commit comments