From 105ce134043a7de9022791ede87d01a9ed680613 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Sat, 4 Mar 2023 11:40:00 +0200 Subject: [PATCH] Fix failure of AVX2-accelerated mb_check_encoding on 32-bit MS Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks to Ilija Tovilo for noticing and reporting this problem. Thanks also to Michael Voříšek for finding the StackOverflow post which explained the reason for the failure. --- ext/mbstring/mbstring.c | 7 ++++++- ext/mbstring/tests/utf_encodings.phpt | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 165c0c28eef6..0ea582022333 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -5155,7 +5155,12 @@ static bool mb_fast_check_utf8_avx2(zend_string *str) goto check_operand; case 7: case 8: - operand = _mm256_set_epi64x(0, 0, 0, *((int64_t*)p)); + /* This was originally: operand = _mm256_set_epi64x(0, 0, 0, *((int64_t*)p)); + * However, that caused test failures on 32-bit MS Windows + * (Bad 7/8-byte UTF-8 strings would be wrongly passed through as 'valid') + * It seems this is caused by a bug in MS Visual C++ + * Ref: https://stackoverflow.com/questions/37509129/potential-bug-in-visual-studio-c-compiler-or-in-intel-intrinsics-avx2-mm256-s */ + operand = _mm256_set_epi32(0, 0, 0, 0, 0, 0, ((int32_t*)p)[1], ((int32_t*)p)[0]); goto check_operand; case 9: operand = _mm256_set_m128i(_mm_setzero_si128(), _mm_srli_si128(_mm_loadu_si128((__m128i*)(p - 6)), 6)); diff --git a/ext/mbstring/tests/utf_encodings.phpt b/ext/mbstring/tests/utf_encodings.phpt index 3f450871972f..634d070ea27a 100644 --- a/ext/mbstring/tests/utf_encodings.phpt +++ b/ext/mbstring/tests/utf_encodings.phpt @@ -5,7 +5,6 @@ mbstring --SKIPIF-- --FILE--