@@ -259,6 +259,9 @@ unsafe fn test_simd() {
259
259
test_mm_insert_epi16 ( ) ;
260
260
test_mm_shuffle_epi8 ( ) ;
261
261
262
+ #[ cfg( not( jit) ) ]
263
+ test_mm_cmpestri ( ) ;
264
+
262
265
test_mm256_shuffle_epi8 ( ) ;
263
266
test_mm256_permute2x128_si256 ( ) ;
264
267
test_mm256_permutevar8x32_epi32 ( ) ;
@@ -430,6 +433,31 @@ unsafe fn test_mm_shuffle_epi8() {
430
433
assert_eq_m128i ( r, expected) ;
431
434
}
432
435
436
+ // Currently one cannot `load` a &[u8] that is less than 16
437
+ // in length. This makes loading strings less than 16 in length
438
+ // a bit difficult. Rather than `load` and mutate the __m128i,
439
+ // it is easier to memcpy the given string to a local slice with
440
+ // length 16 and `load` the local slice.
441
+ #[ cfg( not( jit) ) ]
442
+ #[ cfg( target_arch = "x86_64" ) ]
443
+ #[ target_feature( enable = "sse4.2" ) ]
444
+ unsafe fn str_to_m128i ( s : & [ u8 ] ) -> __m128i {
445
+ assert ! ( s. len( ) <= 16 ) ;
446
+ let slice = & mut [ 0u8 ; 16 ] ;
447
+ std:: ptr:: copy_nonoverlapping ( s. as_ptr ( ) , slice. as_mut_ptr ( ) , s. len ( ) ) ;
448
+ _mm_loadu_si128 ( slice. as_ptr ( ) as * const _ )
449
+ }
450
+
451
+ #[ cfg( not( jit) ) ]
452
+ #[ cfg( target_arch = "x86_64" ) ]
453
+ #[ target_feature( enable = "sse4.2" ) ]
454
+ unsafe fn test_mm_cmpestri ( ) {
455
+ let a = str_to_m128i ( b"bar - garbage" ) ;
456
+ let b = str_to_m128i ( b"foobar" ) ;
457
+ let i = _mm_cmpestri :: < _SIDD_CMP_EQUAL_ORDERED > ( a, 3 , b, 6 ) ;
458
+ assert_eq ! ( 3 , i) ;
459
+ }
460
+
433
461
#[ cfg( target_arch = "x86_64" ) ]
434
462
#[ target_feature( enable = "avx2" ) ]
435
463
unsafe fn test_mm256_shuffle_epi8 ( ) {
0 commit comments